convoviz 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- convoviz/__init__.py +34 -0
- convoviz/__main__.py +6 -0
- convoviz/analysis/__init__.py +22 -0
- convoviz/analysis/graphs.py +879 -0
- convoviz/analysis/wordcloud.py +204 -0
- convoviz/assets/colormaps.txt +15 -0
- convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
- convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
- convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
- convoviz/assets/fonts/Borel-Regular.ttf +0 -0
- convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
- convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
- convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
- convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
- convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
- convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
- convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
- convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
- convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
- convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
- convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
- convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
- convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
- convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
- convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
- convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
- convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
- convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
- convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
- convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
- convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
- convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
- convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
- convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
- convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
- convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
- convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
- convoviz/assets/stopwords.txt +1 -0
- convoviz/cli.py +149 -0
- convoviz/config.py +120 -0
- convoviz/exceptions.py +47 -0
- convoviz/interactive.py +264 -0
- convoviz/io/__init__.py +21 -0
- convoviz/io/assets.py +109 -0
- convoviz/io/loaders.py +191 -0
- convoviz/io/writers.py +231 -0
- convoviz/logging_config.py +69 -0
- convoviz/models/__init__.py +24 -0
- convoviz/models/collection.py +115 -0
- convoviz/models/conversation.py +158 -0
- convoviz/models/message.py +218 -0
- convoviz/models/node.py +66 -0
- convoviz/pipeline.py +184 -0
- convoviz/py.typed +0 -0
- convoviz/renderers/__init__.py +10 -0
- convoviz/renderers/markdown.py +269 -0
- convoviz/renderers/yaml.py +119 -0
- convoviz/utils.py +155 -0
- convoviz-0.4.1.dist-info/METADATA +215 -0
- convoviz-0.4.1.dist-info/RECORD +62 -0
- convoviz-0.4.1.dist-info/WHEEL +4 -0
- convoviz-0.4.1.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,879 @@
|
|
|
1
|
+
"""Graph generation for conversation analytics.
|
|
2
|
+
|
|
3
|
+
Goals:
|
|
4
|
+
- Professional, consistent styling across plots.
|
|
5
|
+
- High-signal summaries by default (avoid output spam).
|
|
6
|
+
- Correct time bucketing (based on *message timestamps*, not conversation creation time).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from collections import defaultdict
|
|
13
|
+
from collections.abc import Callable, Iterable
|
|
14
|
+
from datetime import UTC, datetime
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
import matplotlib.dates as mdates
|
|
18
|
+
import matplotlib.font_manager as fm
|
|
19
|
+
import matplotlib.ticker as mticker
|
|
20
|
+
from matplotlib.axes import Axes
|
|
21
|
+
from matplotlib.figure import Figure
|
|
22
|
+
from matplotlib.image import AxesImage
|
|
23
|
+
from tqdm import tqdm
|
|
24
|
+
|
|
25
|
+
from convoviz.config import GraphConfig, get_default_config
|
|
26
|
+
from convoviz.models import ConversationCollection
|
|
27
|
+
from convoviz.utils import get_asset_path
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
WEEKDAYS = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _load_font(config: GraphConfig) -> fm.FontProperties:
|
|
35
|
+
font_path = get_asset_path(f"fonts/{config.font_name}")
|
|
36
|
+
return fm.FontProperties(fname=str(font_path)) if font_path.exists() else fm.FontProperties()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _style_axes(ax: Axes, config: GraphConfig) -> None:
|
|
40
|
+
# Clean look
|
|
41
|
+
ax.set_facecolor("white")
|
|
42
|
+
ax.spines["top"].set_visible(False)
|
|
43
|
+
ax.spines["right"].set_visible(False)
|
|
44
|
+
ax.spines["left"].set_color("#d0d7de")
|
|
45
|
+
ax.spines["bottom"].set_color("#d0d7de")
|
|
46
|
+
ax.tick_params(colors="#24292f")
|
|
47
|
+
ax.yaxis.set_major_locator(mticker.MaxNLocator(nbins=6, integer=True))
|
|
48
|
+
|
|
49
|
+
if config.grid:
|
|
50
|
+
ax.grid(axis="y", linestyle="-", linewidth=0.8, alpha=0.35, color="#8c959f")
|
|
51
|
+
ax.set_axisbelow(True)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _apply_tick_font(ax: Axes, font_prop: fm.FontProperties) -> None:
|
|
55
|
+
for label in ax.get_xticklabels() + ax.get_yticklabels():
|
|
56
|
+
label.set_fontproperties(font_prop)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _setup_single_axes(config: GraphConfig) -> tuple[Figure, Axes, fm.FontProperties]:
|
|
60
|
+
fig = Figure(figsize=config.figsize, dpi=config.dpi, facecolor="white")
|
|
61
|
+
ax: Axes = fig.add_subplot()
|
|
62
|
+
font_prop = _load_font(config)
|
|
63
|
+
_style_axes(ax, config)
|
|
64
|
+
return fig, ax, font_prop
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _ts_to_dt(ts: float, config: GraphConfig) -> datetime:
|
|
68
|
+
"""Convert epoch timestamps into aware datetimes based on config."""
|
|
69
|
+
dt_utc = datetime.fromtimestamp(ts, UTC)
|
|
70
|
+
return dt_utc if config.timezone == "utc" else dt_utc.astimezone()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _tz_label(config: GraphConfig) -> str:
|
|
74
|
+
return "UTC" if config.timezone == "utc" else "Local"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _month_start(dt: datetime) -> datetime:
|
|
78
|
+
return dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _year_start(dt: datetime) -> datetime:
|
|
82
|
+
return dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _day_start(dt: datetime) -> datetime:
|
|
86
|
+
return dt.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _iter_month_starts(start: datetime, end: datetime) -> list[datetime]:
|
|
90
|
+
start = _month_start(start)
|
|
91
|
+
end = _month_start(end)
|
|
92
|
+
months: list[datetime] = []
|
|
93
|
+
cur = start
|
|
94
|
+
while cur <= end:
|
|
95
|
+
months.append(cur)
|
|
96
|
+
year, month = cur.year, cur.month
|
|
97
|
+
cur = cur.replace(year=year + 1, month=1) if month == 12 else cur.replace(month=month + 1)
|
|
98
|
+
return months
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _fill_missing_months(counts: dict[datetime, int]) -> tuple[list[datetime], list[int]]:
|
|
102
|
+
if not counts:
|
|
103
|
+
return [], []
|
|
104
|
+
keys = sorted(counts.keys())
|
|
105
|
+
months = _iter_month_starts(keys[0], keys[-1])
|
|
106
|
+
return months, [counts.get(m, 0) for m in months]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _aggregate_counts_by_month(
|
|
110
|
+
timestamps: Iterable[float],
|
|
111
|
+
config: GraphConfig,
|
|
112
|
+
) -> dict[datetime, int]:
|
|
113
|
+
counts: defaultdict[datetime, int] = defaultdict(int)
|
|
114
|
+
for ts in timestamps:
|
|
115
|
+
dt = _ts_to_dt(ts, config)
|
|
116
|
+
counts[_month_start(dt)] += 1
|
|
117
|
+
return dict(counts)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _moving_average(values: list[int], window: int) -> list[float]:
|
|
121
|
+
if window <= 1:
|
|
122
|
+
return [float(v) for v in values]
|
|
123
|
+
if len(values) < window:
|
|
124
|
+
return []
|
|
125
|
+
out: list[float] = []
|
|
126
|
+
running = sum(values[:window])
|
|
127
|
+
out.append(running / window)
|
|
128
|
+
for i in range(window, len(values)):
|
|
129
|
+
running += values[i] - values[i - window]
|
|
130
|
+
out.append(running / window)
|
|
131
|
+
return out
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def generate_week_barplot(
|
|
135
|
+
timestamps: list[float],
|
|
136
|
+
title: str,
|
|
137
|
+
config: GraphConfig | None = None,
|
|
138
|
+
) -> Figure:
|
|
139
|
+
"""Create a bar graph showing message distribution across weekdays.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
timestamps: List of Unix timestamps
|
|
143
|
+
title: Title for the graph
|
|
144
|
+
config: Optional graph configuration
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Matplotlib Figure object
|
|
148
|
+
"""
|
|
149
|
+
cfg = config or get_default_config().graph
|
|
150
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
151
|
+
|
|
152
|
+
weekday_counts: dict[str, int] = dict.fromkeys(WEEKDAYS, 0)
|
|
153
|
+
for ts in timestamps:
|
|
154
|
+
dt = _ts_to_dt(ts, cfg)
|
|
155
|
+
weekday_counts[WEEKDAYS[dt.weekday()]] += 1
|
|
156
|
+
|
|
157
|
+
x = list(range(len(WEEKDAYS)))
|
|
158
|
+
y = [weekday_counts[d] for d in WEEKDAYS]
|
|
159
|
+
|
|
160
|
+
bars = ax.bar(x, y, color=cfg.color, alpha=0.9, width=0.72)
|
|
161
|
+
if cfg.show_counts:
|
|
162
|
+
ax.bar_label(bars, padding=3, fontsize=9, fontproperties=font_prop, color="#24292f")
|
|
163
|
+
|
|
164
|
+
ax.set_title(f"{title} · Weekday pattern", fontproperties=font_prop, fontsize=14, pad=14)
|
|
165
|
+
ax.set_xlabel("Weekday", fontproperties=font_prop)
|
|
166
|
+
ax.set_ylabel("User prompts", fontproperties=font_prop)
|
|
167
|
+
ax.set_xticks(x)
|
|
168
|
+
ax.set_xticklabels(WEEKDAYS, rotation=35, ha="right", fontproperties=font_prop)
|
|
169
|
+
_apply_tick_font(ax, font_prop)
|
|
170
|
+
|
|
171
|
+
fig.tight_layout()
|
|
172
|
+
return fig
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def generate_hour_barplot(
|
|
176
|
+
timestamps: list[float],
|
|
177
|
+
title: str,
|
|
178
|
+
config: GraphConfig | None = None,
|
|
179
|
+
) -> Figure:
|
|
180
|
+
"""Create a bar graph showing message distribution across hours of the day (0-23).
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
timestamps: List of Unix timestamps
|
|
184
|
+
title: Title for the graph
|
|
185
|
+
config: Optional graph configuration
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Matplotlib Figure object
|
|
189
|
+
"""
|
|
190
|
+
cfg = config or get_default_config().graph
|
|
191
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
192
|
+
|
|
193
|
+
hour_counts: dict[int, int] = dict.fromkeys(range(24), 0)
|
|
194
|
+
for ts in timestamps:
|
|
195
|
+
dt = _ts_to_dt(ts, cfg)
|
|
196
|
+
hour_counts[dt.hour] += 1
|
|
197
|
+
|
|
198
|
+
y = [hour_counts[i] for i in range(24)]
|
|
199
|
+
|
|
200
|
+
bars = ax.bar(range(24), y, color=cfg.color, alpha=0.9, width=0.72)
|
|
201
|
+
if cfg.show_counts:
|
|
202
|
+
ax.bar_label(bars, padding=2, fontsize=7, fontproperties=font_prop, color="#24292f")
|
|
203
|
+
|
|
204
|
+
ax.set_title(
|
|
205
|
+
f"{title} · Hourly pattern ({_tz_label(cfg)})",
|
|
206
|
+
fontproperties=font_prop,
|
|
207
|
+
fontsize=14,
|
|
208
|
+
pad=14,
|
|
209
|
+
)
|
|
210
|
+
ax.set_xlabel(f"Hour of day ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
211
|
+
ax.set_ylabel("User prompts", fontproperties=font_prop)
|
|
212
|
+
|
|
213
|
+
ax.set_xticks(range(0, 24, 2))
|
|
214
|
+
ax.set_xticklabels([f"{i:02d}:00" for i in range(0, 24, 2)], fontproperties=font_prop)
|
|
215
|
+
_apply_tick_font(ax, font_prop)
|
|
216
|
+
|
|
217
|
+
fig.tight_layout()
|
|
218
|
+
return fig
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def generate_model_piechart(
|
|
222
|
+
collection: ConversationCollection,
|
|
223
|
+
config: GraphConfig | None = None,
|
|
224
|
+
) -> Figure:
|
|
225
|
+
"""Create a model usage chart.
|
|
226
|
+
|
|
227
|
+
Note: kept for backwards compatibility (historically a pie chart). We now render a
|
|
228
|
+
more readable horizontal bar chart with percentages.
|
|
229
|
+
"""
|
|
230
|
+
cfg = config or get_default_config().graph
|
|
231
|
+
model_counts: defaultdict[str, int] = defaultdict(int)
|
|
232
|
+
|
|
233
|
+
for conv in collection.conversations:
|
|
234
|
+
model = conv.model or "Unknown"
|
|
235
|
+
model_counts[model] += 1
|
|
236
|
+
|
|
237
|
+
total = sum(model_counts.values())
|
|
238
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
239
|
+
|
|
240
|
+
if total == 0:
|
|
241
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
242
|
+
ax.set_axis_off()
|
|
243
|
+
return fig
|
|
244
|
+
|
|
245
|
+
# Group minor models to keep the plot readable.
|
|
246
|
+
threshold = 0.05
|
|
247
|
+
refined_counts: dict[str, int] = {}
|
|
248
|
+
other_count = 0
|
|
249
|
+
for model, count in model_counts.items():
|
|
250
|
+
if count / total < threshold:
|
|
251
|
+
other_count += count
|
|
252
|
+
else:
|
|
253
|
+
refined_counts[model] = count
|
|
254
|
+
if other_count:
|
|
255
|
+
refined_counts["Other"] = other_count
|
|
256
|
+
|
|
257
|
+
items = sorted(refined_counts.items(), key=lambda x: x[1], reverse=True)
|
|
258
|
+
labels = [k for k, _ in items][:10]
|
|
259
|
+
counts = [v for _, v in items][:10]
|
|
260
|
+
y = list(range(len(labels)))[::-1]
|
|
261
|
+
|
|
262
|
+
bars = ax.barh(y, counts[::-1], color=cfg.color, alpha=0.9, height=0.6)
|
|
263
|
+
ax.set_yticks(y)
|
|
264
|
+
ax.set_yticklabels(labels[::-1], fontproperties=font_prop)
|
|
265
|
+
ax.set_xlabel("Conversations", fontproperties=font_prop)
|
|
266
|
+
ax.set_title("Model usage", fontproperties=font_prop, fontsize=14, pad=14)
|
|
267
|
+
|
|
268
|
+
for bar, count in zip(bars, counts[::-1], strict=True):
|
|
269
|
+
pct = 100 * (count / total)
|
|
270
|
+
ax.text(
|
|
271
|
+
bar.get_width(),
|
|
272
|
+
bar.get_y() + bar.get_height() / 2,
|
|
273
|
+
f" {count} ({pct:.1f}%)",
|
|
274
|
+
va="center",
|
|
275
|
+
ha="left",
|
|
276
|
+
fontproperties=font_prop,
|
|
277
|
+
fontsize=9,
|
|
278
|
+
color="#24292f",
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
_apply_tick_font(ax, font_prop)
|
|
282
|
+
fig.tight_layout()
|
|
283
|
+
return fig
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def generate_length_histogram(
|
|
287
|
+
collection: ConversationCollection,
|
|
288
|
+
config: GraphConfig | None = None,
|
|
289
|
+
) -> Figure:
|
|
290
|
+
"""Create a histogram showing distribution of conversation lengths.
|
|
291
|
+
|
|
292
|
+
Caps the X-axis at the 95th percentile to focus on typical lengths.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
collection: Collection of conversations
|
|
296
|
+
config: Optional graph configuration
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Matplotlib Figure object
|
|
300
|
+
"""
|
|
301
|
+
cfg = config or get_default_config().graph
|
|
302
|
+
lengths = [conv.message_count("user") for conv in collection.conversations]
|
|
303
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
304
|
+
|
|
305
|
+
if not lengths:
|
|
306
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
307
|
+
ax.set_axis_off()
|
|
308
|
+
return fig
|
|
309
|
+
|
|
310
|
+
sorted_lengths = sorted(lengths)
|
|
311
|
+
p50 = sorted_lengths[int(0.50 * (len(sorted_lengths) - 1))]
|
|
312
|
+
p90 = sorted_lengths[int(0.90 * (len(sorted_lengths) - 1))]
|
|
313
|
+
p95 = sorted_lengths[int(0.95 * (len(sorted_lengths) - 1))]
|
|
314
|
+
cap = max(int(p95), 5)
|
|
315
|
+
plot_lengths = [min(L, cap) for L in lengths]
|
|
316
|
+
|
|
317
|
+
bin_count = min(24, max(10, cap // 2))
|
|
318
|
+
ax.hist(
|
|
319
|
+
plot_lengths,
|
|
320
|
+
bins=bin_count,
|
|
321
|
+
color=cfg.color,
|
|
322
|
+
alpha=0.85,
|
|
323
|
+
rwidth=0.9,
|
|
324
|
+
edgecolor="white",
|
|
325
|
+
linewidth=0.5,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
ax.axvline(p50, color="#24292f", linewidth=1.2, alpha=0.8)
|
|
329
|
+
ax.axvline(p90, color="#cf222e", linewidth=1.2, alpha=0.8)
|
|
330
|
+
ax.text(
|
|
331
|
+
p50,
|
|
332
|
+
ax.get_ylim()[1] * 0.95,
|
|
333
|
+
f"median={p50}",
|
|
334
|
+
rotation=90,
|
|
335
|
+
va="top",
|
|
336
|
+
ha="right",
|
|
337
|
+
fontproperties=font_prop,
|
|
338
|
+
fontsize=9,
|
|
339
|
+
color="#24292f",
|
|
340
|
+
)
|
|
341
|
+
ax.text(
|
|
342
|
+
p90,
|
|
343
|
+
ax.get_ylim()[1] * 0.95,
|
|
344
|
+
f"p90={p90}",
|
|
345
|
+
rotation=90,
|
|
346
|
+
va="top",
|
|
347
|
+
ha="right",
|
|
348
|
+
fontproperties=font_prop,
|
|
349
|
+
fontsize=9,
|
|
350
|
+
color="#cf222e",
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
ax.set_title(
|
|
354
|
+
"Conversation length (user prompts)", fontproperties=font_prop, fontsize=14, pad=14
|
|
355
|
+
)
|
|
356
|
+
ax.set_xlabel("User prompts per conversation", fontproperties=font_prop)
|
|
357
|
+
ax.set_ylabel("Conversations", fontproperties=font_prop)
|
|
358
|
+
ax.set_xlim(left=0, right=cap)
|
|
359
|
+
_apply_tick_font(ax, font_prop)
|
|
360
|
+
|
|
361
|
+
fig.tight_layout()
|
|
362
|
+
return fig
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def generate_conversation_lifetime_histogram(
|
|
366
|
+
collection: ConversationCollection,
|
|
367
|
+
config: GraphConfig | None = None,
|
|
368
|
+
) -> Figure:
|
|
369
|
+
"""Create a histogram of conversation lifetimes (update_time - create_time)."""
|
|
370
|
+
cfg = config or get_default_config().graph
|
|
371
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
372
|
+
|
|
373
|
+
lifetimes_days: list[float] = []
|
|
374
|
+
for conv in collection.conversations:
|
|
375
|
+
delta = conv.update_time - conv.create_time
|
|
376
|
+
lifetimes_days.append(max(0.0, delta.total_seconds() / 86_400))
|
|
377
|
+
|
|
378
|
+
if not lifetimes_days:
|
|
379
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
380
|
+
ax.set_axis_off()
|
|
381
|
+
return fig
|
|
382
|
+
|
|
383
|
+
sorted_vals = sorted(lifetimes_days)
|
|
384
|
+
p50 = sorted_vals[int(0.50 * (len(sorted_vals) - 1))]
|
|
385
|
+
p90 = sorted_vals[int(0.90 * (len(sorted_vals) - 1))]
|
|
386
|
+
p95 = sorted_vals[int(0.95 * (len(sorted_vals) - 1))]
|
|
387
|
+
cap = max(float(p95), 1.0)
|
|
388
|
+
plot_vals = [min(v, cap) for v in lifetimes_days]
|
|
389
|
+
|
|
390
|
+
ax.hist(
|
|
391
|
+
plot_vals,
|
|
392
|
+
bins=24,
|
|
393
|
+
color=cfg.color,
|
|
394
|
+
alpha=0.85,
|
|
395
|
+
rwidth=0.9,
|
|
396
|
+
edgecolor="white",
|
|
397
|
+
linewidth=0.5,
|
|
398
|
+
)
|
|
399
|
+
ax.axvline(p50, color="#24292f", linewidth=1.2, alpha=0.8)
|
|
400
|
+
ax.axvline(p90, color="#cf222e", linewidth=1.2, alpha=0.8)
|
|
401
|
+
|
|
402
|
+
ax.set_title("Conversation lifetimes (days)", fontproperties=font_prop, fontsize=14, pad=14)
|
|
403
|
+
ax.set_xlabel("Days between first and last message", fontproperties=font_prop)
|
|
404
|
+
ax.set_ylabel("Conversations", fontproperties=font_prop)
|
|
405
|
+
ax.set_xlim(left=0, right=cap)
|
|
406
|
+
ax.text(
|
|
407
|
+
0.99,
|
|
408
|
+
0.98,
|
|
409
|
+
f"median={p50:.1f}d\np90={p90:.1f}d",
|
|
410
|
+
transform=ax.transAxes,
|
|
411
|
+
ha="right",
|
|
412
|
+
va="top",
|
|
413
|
+
fontproperties=font_prop,
|
|
414
|
+
fontsize=9,
|
|
415
|
+
color="#57606a",
|
|
416
|
+
)
|
|
417
|
+
_apply_tick_font(ax, font_prop)
|
|
418
|
+
|
|
419
|
+
fig.tight_layout()
|
|
420
|
+
return fig
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def generate_monthly_activity_barplot(
|
|
424
|
+
collection: ConversationCollection,
|
|
425
|
+
config: GraphConfig | None = None,
|
|
426
|
+
) -> Figure:
|
|
427
|
+
"""Create a bar chart showing total prompt count per month.
|
|
428
|
+
|
|
429
|
+
Important: this is computed from *message timestamps* (actual activity), not from
|
|
430
|
+
the conversation creation month.
|
|
431
|
+
"""
|
|
432
|
+
cfg = config or get_default_config().graph
|
|
433
|
+
timestamps = collection.timestamps("user")
|
|
434
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
435
|
+
|
|
436
|
+
if not timestamps:
|
|
437
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
438
|
+
ax.set_axis_off()
|
|
439
|
+
return fig
|
|
440
|
+
|
|
441
|
+
month_counts = _aggregate_counts_by_month(timestamps, cfg)
|
|
442
|
+
months, values = _fill_missing_months(month_counts)
|
|
443
|
+
if not months:
|
|
444
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
445
|
+
ax.set_axis_off()
|
|
446
|
+
return fig
|
|
447
|
+
|
|
448
|
+
x = mdates.date2num(months)
|
|
449
|
+
ax.bar(x, values, width=25, color=cfg.color, alpha=0.25, edgecolor="none")
|
|
450
|
+
|
|
451
|
+
smooth = _moving_average(values, window=3)
|
|
452
|
+
if smooth:
|
|
453
|
+
ax.plot(x[2:], smooth, color=cfg.color, linewidth=2.2, alpha=0.9)
|
|
454
|
+
|
|
455
|
+
locator = mdates.AutoDateLocator(minticks=4, maxticks=10)
|
|
456
|
+
ax.xaxis.set_major_locator(locator)
|
|
457
|
+
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
|
|
458
|
+
|
|
459
|
+
ax.set_title("Monthly activity (user prompts)", fontproperties=font_prop, fontsize=14, pad=14)
|
|
460
|
+
ax.set_xlabel(f"Month ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
461
|
+
ax.set_ylabel("User prompts", fontproperties=font_prop)
|
|
462
|
+
_apply_tick_font(ax, font_prop)
|
|
463
|
+
|
|
464
|
+
fig.tight_layout()
|
|
465
|
+
return fig
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def generate_daily_activity_lineplot(
|
|
469
|
+
collection: ConversationCollection,
|
|
470
|
+
config: GraphConfig | None = None,
|
|
471
|
+
) -> Figure:
|
|
472
|
+
"""Create a line chart showing user prompt count per day (with a rolling mean)."""
|
|
473
|
+
cfg = config or get_default_config().graph
|
|
474
|
+
timestamps = collection.timestamps("user")
|
|
475
|
+
|
|
476
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
477
|
+
if not timestamps:
|
|
478
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
479
|
+
ax.set_axis_off()
|
|
480
|
+
return fig
|
|
481
|
+
|
|
482
|
+
counts: defaultdict[datetime, int] = defaultdict(int)
|
|
483
|
+
for ts in timestamps:
|
|
484
|
+
dt = _ts_to_dt(ts, cfg)
|
|
485
|
+
day = dt.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
486
|
+
counts[day] += 1
|
|
487
|
+
|
|
488
|
+
days = sorted(counts.keys())
|
|
489
|
+
values = [counts[d] for d in days]
|
|
490
|
+
|
|
491
|
+
x = mdates.date2num(days)
|
|
492
|
+
ax.bar(x, values, width=0.9, color=cfg.color, alpha=0.18, edgecolor="none")
|
|
493
|
+
ax.plot(x, values, color=cfg.color, linewidth=1.2, alpha=0.25)
|
|
494
|
+
smooth = _moving_average(values, window=7)
|
|
495
|
+
if smooth:
|
|
496
|
+
ax.plot(x[6:], smooth, color=cfg.color, linewidth=2.4, alpha=0.95)
|
|
497
|
+
|
|
498
|
+
locator = mdates.AutoDateLocator()
|
|
499
|
+
ax.xaxis.set_major_locator(locator)
|
|
500
|
+
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
|
|
501
|
+
ax.set_title("Daily activity (user prompts)", fontproperties=font_prop, fontsize=14, pad=14)
|
|
502
|
+
ax.set_xlabel(f"Day ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
503
|
+
ax.set_ylabel("User prompts", fontproperties=font_prop)
|
|
504
|
+
|
|
505
|
+
for label in ax.get_xticklabels() + ax.get_yticklabels():
|
|
506
|
+
label.set_fontproperties(font_prop)
|
|
507
|
+
|
|
508
|
+
fig.tight_layout()
|
|
509
|
+
return fig
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def generate_activity_heatmap(
|
|
513
|
+
collection: ConversationCollection,
|
|
514
|
+
config: GraphConfig | None = None,
|
|
515
|
+
) -> Figure:
|
|
516
|
+
"""Create a heatmap of activity by weekday × hour (user prompts)."""
|
|
517
|
+
cfg = config or get_default_config().graph
|
|
518
|
+
timestamps = collection.timestamps("user")
|
|
519
|
+
|
|
520
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
521
|
+
if not timestamps:
|
|
522
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
523
|
+
ax.set_axis_off()
|
|
524
|
+
return fig
|
|
525
|
+
|
|
526
|
+
grid: list[list[int]] = [[0 for _ in range(24)] for _ in range(7)]
|
|
527
|
+
for ts in timestamps:
|
|
528
|
+
dt = _ts_to_dt(ts, cfg)
|
|
529
|
+
grid[dt.weekday()][dt.hour] += 1
|
|
530
|
+
|
|
531
|
+
# Keep the axes frame for the heatmap.
|
|
532
|
+
ax.grid(False)
|
|
533
|
+
for side in ["top", "right", "left", "bottom"]:
|
|
534
|
+
ax.spines[side].set_visible(False)
|
|
535
|
+
|
|
536
|
+
img: AxesImage = ax.imshow(grid, aspect="auto", cmap="Blues", interpolation="nearest")
|
|
537
|
+
|
|
538
|
+
ax.set_title(
|
|
539
|
+
f"Activity heatmap (weekday × hour, {_tz_label(cfg)})",
|
|
540
|
+
fontproperties=font_prop,
|
|
541
|
+
fontsize=14,
|
|
542
|
+
pad=14,
|
|
543
|
+
)
|
|
544
|
+
ax.set_xlabel(f"Hour of day ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
545
|
+
ax.set_ylabel("Weekday", fontproperties=font_prop)
|
|
546
|
+
|
|
547
|
+
ax.set_xticks(list(range(0, 24, 2)))
|
|
548
|
+
ax.set_xticklabels([f"{h:02d}" for h in range(0, 24, 2)], fontproperties=font_prop)
|
|
549
|
+
ax.set_yticks(list(range(7)))
|
|
550
|
+
ax.set_yticklabels(WEEKDAYS, fontproperties=font_prop)
|
|
551
|
+
|
|
552
|
+
cbar = fig.colorbar(img, ax=ax, fraction=0.046, pad=0.04)
|
|
553
|
+
cbar.set_label("User prompts", fontproperties=font_prop)
|
|
554
|
+
for t in cbar.ax.get_yticklabels():
|
|
555
|
+
t.set_fontproperties(font_prop)
|
|
556
|
+
|
|
557
|
+
fig.tight_layout()
|
|
558
|
+
return fig
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def generate_summary_dashboard(
|
|
562
|
+
collection: ConversationCollection,
|
|
563
|
+
config: GraphConfig | None = None,
|
|
564
|
+
) -> Figure:
|
|
565
|
+
"""Create a compact, high-signal overview dashboard."""
|
|
566
|
+
cfg = config or get_default_config().graph
|
|
567
|
+
font_prop = _load_font(cfg)
|
|
568
|
+
|
|
569
|
+
fig = Figure(figsize=(14, 9), dpi=cfg.dpi, facecolor="white")
|
|
570
|
+
gs = fig.add_gridspec(3, 2, height_ratios=[1.2, 1.0, 1.0], width_ratios=[1.25, 1.0])
|
|
571
|
+
|
|
572
|
+
ax_ts: Axes = fig.add_subplot(gs[0, :])
|
|
573
|
+
ax_heat: Axes = fig.add_subplot(gs[1:, 0])
|
|
574
|
+
ax_model: Axes = fig.add_subplot(gs[1, 1])
|
|
575
|
+
ax_len: Axes = fig.add_subplot(gs[2, 1])
|
|
576
|
+
|
|
577
|
+
for ax in (ax_ts, ax_model, ax_len):
|
|
578
|
+
_style_axes(ax, cfg)
|
|
579
|
+
_apply_tick_font(ax, font_prop)
|
|
580
|
+
|
|
581
|
+
# Header
|
|
582
|
+
user_ts = collection.timestamps("user")
|
|
583
|
+
conv_count = len(collection.conversations)
|
|
584
|
+
prompt_count = len(user_ts)
|
|
585
|
+
|
|
586
|
+
fig.text(
|
|
587
|
+
0.01,
|
|
588
|
+
0.985,
|
|
589
|
+
"ChatGPT usage overview",
|
|
590
|
+
fontproperties=font_prop,
|
|
591
|
+
fontsize=18,
|
|
592
|
+
va="top",
|
|
593
|
+
ha="left",
|
|
594
|
+
color="#24292f",
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
if user_ts:
|
|
598
|
+
dts = [_ts_to_dt(ts, cfg) for ts in user_ts]
|
|
599
|
+
date_range = f"{min(dts).date().isoformat()} → {max(dts).date().isoformat()}"
|
|
600
|
+
else:
|
|
601
|
+
date_range = "No activity"
|
|
602
|
+
|
|
603
|
+
fig.text(
|
|
604
|
+
0.01,
|
|
605
|
+
0.955,
|
|
606
|
+
f"{conv_count} conversations · {prompt_count} user prompts · {date_range} · {_tz_label(cfg)}",
|
|
607
|
+
fontproperties=font_prop,
|
|
608
|
+
fontsize=10,
|
|
609
|
+
va="top",
|
|
610
|
+
ha="left",
|
|
611
|
+
color="#57606a",
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
# Monthly activity (timeseries)
|
|
615
|
+
if user_ts:
|
|
616
|
+
month_counts = _aggregate_counts_by_month(user_ts, cfg)
|
|
617
|
+
months, values = _fill_missing_months(month_counts)
|
|
618
|
+
x = mdates.date2num(months)
|
|
619
|
+
ax_ts.bar(x, values, width=25, color=cfg.color, alpha=0.20, edgecolor="none")
|
|
620
|
+
smooth = _moving_average(values, window=3)
|
|
621
|
+
if smooth:
|
|
622
|
+
ax_ts.plot(x[2:], smooth, color=cfg.color, linewidth=2.6, alpha=0.95)
|
|
623
|
+
|
|
624
|
+
locator = mdates.AutoDateLocator(minticks=4, maxticks=10)
|
|
625
|
+
ax_ts.xaxis.set_major_locator(locator)
|
|
626
|
+
ax_ts.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
|
|
627
|
+
ax_ts.set_title(
|
|
628
|
+
"Monthly activity (user prompts)", fontproperties=font_prop, fontsize=13, pad=10
|
|
629
|
+
)
|
|
630
|
+
ax_ts.set_ylabel("User prompts", fontproperties=font_prop)
|
|
631
|
+
ax_ts.set_xlabel(f"Month ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
632
|
+
_apply_tick_font(ax_ts, font_prop)
|
|
633
|
+
else:
|
|
634
|
+
ax_ts.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
635
|
+
ax_ts.set_axis_off()
|
|
636
|
+
|
|
637
|
+
# Heatmap
|
|
638
|
+
if user_ts:
|
|
639
|
+
grid: list[list[int]] = [[0 for _ in range(24)] for _ in range(7)]
|
|
640
|
+
for ts in user_ts:
|
|
641
|
+
dt = _ts_to_dt(ts, cfg)
|
|
642
|
+
grid[dt.weekday()][dt.hour] += 1
|
|
643
|
+
|
|
644
|
+
ax_heat.grid(False)
|
|
645
|
+
for side in ["top", "right", "left", "bottom"]:
|
|
646
|
+
ax_heat.spines[side].set_visible(False)
|
|
647
|
+
img = ax_heat.imshow(grid, aspect="auto", cmap="Blues", interpolation="nearest")
|
|
648
|
+
ax_heat.set_title(
|
|
649
|
+
f"Weekday × hour heatmap ({_tz_label(cfg)})",
|
|
650
|
+
fontproperties=font_prop,
|
|
651
|
+
fontsize=13,
|
|
652
|
+
pad=10,
|
|
653
|
+
)
|
|
654
|
+
ax_heat.set_xlabel("Hour", fontproperties=font_prop)
|
|
655
|
+
ax_heat.set_ylabel("Weekday", fontproperties=font_prop)
|
|
656
|
+
ax_heat.set_xticks(list(range(0, 24, 3)))
|
|
657
|
+
ax_heat.set_xticklabels([f"{h:02d}" for h in range(0, 24, 3)], fontproperties=font_prop)
|
|
658
|
+
ax_heat.set_yticks(list(range(7)))
|
|
659
|
+
ax_heat.set_yticklabels(WEEKDAYS, fontproperties=font_prop)
|
|
660
|
+
cbar = fig.colorbar(img, ax=ax_heat, fraction=0.046, pad=0.04)
|
|
661
|
+
cbar.set_label("Prompts", fontproperties=font_prop)
|
|
662
|
+
for t in cbar.ax.get_yticklabels():
|
|
663
|
+
t.set_fontproperties(font_prop)
|
|
664
|
+
else:
|
|
665
|
+
ax_heat.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
666
|
+
ax_heat.set_axis_off()
|
|
667
|
+
|
|
668
|
+
# Model usage (reuse existing generator logic by drawing into its own axes)
|
|
669
|
+
model_counts: defaultdict[str, int] = defaultdict(int)
|
|
670
|
+
for conv in collection.conversations:
|
|
671
|
+
model_counts[conv.model or "Unknown"] += 1
|
|
672
|
+
total_models = sum(model_counts.values())
|
|
673
|
+
if total_models:
|
|
674
|
+
items = sorted(model_counts.items(), key=lambda x: x[1], reverse=True)
|
|
675
|
+
labels = [k for k, _ in items][:8]
|
|
676
|
+
counts = [v for _, v in items][:8]
|
|
677
|
+
y = list(range(len(labels)))[::-1]
|
|
678
|
+
bars = ax_model.barh(y, counts[::-1], color=cfg.color, alpha=0.9, height=0.6)
|
|
679
|
+
ax_model.set_yticks(y)
|
|
680
|
+
ax_model.set_yticklabels(labels[::-1], fontproperties=font_prop)
|
|
681
|
+
ax_model.set_xlabel("Conversations", fontproperties=font_prop)
|
|
682
|
+
ax_model.set_title("Models", fontproperties=font_prop, fontsize=13, pad=10)
|
|
683
|
+
for bar, count in zip(bars, counts[::-1], strict=True):
|
|
684
|
+
pct = 100 * (count / total_models)
|
|
685
|
+
ax_model.text(
|
|
686
|
+
bar.get_width(),
|
|
687
|
+
bar.get_y() + bar.get_height() / 2,
|
|
688
|
+
f" {pct:.0f}%",
|
|
689
|
+
va="center",
|
|
690
|
+
ha="left",
|
|
691
|
+
fontproperties=font_prop,
|
|
692
|
+
fontsize=9,
|
|
693
|
+
color="#57606a",
|
|
694
|
+
)
|
|
695
|
+
_apply_tick_font(ax_model, font_prop)
|
|
696
|
+
else:
|
|
697
|
+
ax_model.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
698
|
+
ax_model.set_axis_off()
|
|
699
|
+
|
|
700
|
+
# Conversation length mini-hist
|
|
701
|
+
lengths = [conv.message_count("user") for conv in collection.conversations]
|
|
702
|
+
if lengths:
|
|
703
|
+
sorted_lengths = sorted(lengths)
|
|
704
|
+
cap = max(int(sorted_lengths[int(0.95 * (len(sorted_lengths) - 1))]), 5)
|
|
705
|
+
plot_lengths = [min(L, cap) for L in lengths]
|
|
706
|
+
ax_len.hist(
|
|
707
|
+
plot_lengths,
|
|
708
|
+
bins=min(16, max(8, cap // 2)),
|
|
709
|
+
color=cfg.color,
|
|
710
|
+
alpha=0.85,
|
|
711
|
+
rwidth=0.9,
|
|
712
|
+
edgecolor="white",
|
|
713
|
+
linewidth=0.5,
|
|
714
|
+
)
|
|
715
|
+
ax_len.set_title("Conversation length", fontproperties=font_prop, fontsize=13, pad=10)
|
|
716
|
+
ax_len.set_xlabel("User prompts", fontproperties=font_prop)
|
|
717
|
+
ax_len.set_ylabel("Conversations", fontproperties=font_prop)
|
|
718
|
+
ax_len.set_xlim(left=0, right=cap)
|
|
719
|
+
_apply_tick_font(ax_len, font_prop)
|
|
720
|
+
else:
|
|
721
|
+
ax_len.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
722
|
+
ax_len.set_axis_off()
|
|
723
|
+
|
|
724
|
+
fig.subplots_adjust(top=0.93, left=0.06, right=0.98, bottom=0.06, hspace=0.4, wspace=0.25)
|
|
725
|
+
return fig
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def generate_summary_graphs(
|
|
729
|
+
collection: ConversationCollection,
|
|
730
|
+
output_dir: Path,
|
|
731
|
+
config: GraphConfig | None = None,
|
|
732
|
+
*,
|
|
733
|
+
progress_bar: bool = False,
|
|
734
|
+
) -> None:
|
|
735
|
+
"""Generate all summary-level graphs.
|
|
736
|
+
|
|
737
|
+
Args:
|
|
738
|
+
collection: Collection of conversations
|
|
739
|
+
output_dir: Directory to save the graphs
|
|
740
|
+
config: Optional graph configuration
|
|
741
|
+
"""
|
|
742
|
+
if not collection.conversations:
|
|
743
|
+
return
|
|
744
|
+
|
|
745
|
+
cfg = config or get_default_config().graph
|
|
746
|
+
|
|
747
|
+
user_ts = collection.timestamps("user")
|
|
748
|
+
logger.info(f"Generating summary graphs to {output_dir}")
|
|
749
|
+
|
|
750
|
+
tasks: list[tuple[str, str, Callable[[], Figure]]] = [
|
|
751
|
+
("Overview", "overview.png", lambda: generate_summary_dashboard(collection, cfg)),
|
|
752
|
+
(
|
|
753
|
+
"Activity heatmap",
|
|
754
|
+
"activity_heatmap.png",
|
|
755
|
+
lambda: generate_activity_heatmap(collection, cfg),
|
|
756
|
+
),
|
|
757
|
+
(
|
|
758
|
+
"Daily activity",
|
|
759
|
+
"daily_activity.png",
|
|
760
|
+
lambda: generate_daily_activity_lineplot(collection, cfg),
|
|
761
|
+
),
|
|
762
|
+
(
|
|
763
|
+
"Monthly activity",
|
|
764
|
+
"monthly_activity.png",
|
|
765
|
+
lambda: generate_monthly_activity_barplot(collection, cfg),
|
|
766
|
+
),
|
|
767
|
+
("Model usage", "model_usage.png", lambda: generate_model_piechart(collection, cfg)),
|
|
768
|
+
(
|
|
769
|
+
"Conversation lengths",
|
|
770
|
+
"conversation_lengths.png",
|
|
771
|
+
lambda: generate_length_histogram(collection, cfg),
|
|
772
|
+
),
|
|
773
|
+
(
|
|
774
|
+
"Conversation lifetimes",
|
|
775
|
+
"conversation_lifetimes.png",
|
|
776
|
+
lambda: generate_conversation_lifetime_histogram(collection, cfg),
|
|
777
|
+
),
|
|
778
|
+
]
|
|
779
|
+
|
|
780
|
+
if user_ts:
|
|
781
|
+
tasks.extend(
|
|
782
|
+
[
|
|
783
|
+
(
|
|
784
|
+
"Weekday pattern",
|
|
785
|
+
"weekday_pattern.png",
|
|
786
|
+
lambda: generate_week_barplot(user_ts, "All time", cfg),
|
|
787
|
+
),
|
|
788
|
+
(
|
|
789
|
+
"Hourly pattern",
|
|
790
|
+
"hourly_pattern.png",
|
|
791
|
+
lambda: generate_hour_barplot(user_ts, "All time", cfg),
|
|
792
|
+
),
|
|
793
|
+
]
|
|
794
|
+
)
|
|
795
|
+
|
|
796
|
+
for _, filename, build in tqdm(
|
|
797
|
+
tasks,
|
|
798
|
+
desc="Creating summary graphs",
|
|
799
|
+
disable=not progress_bar,
|
|
800
|
+
):
|
|
801
|
+
fig = build()
|
|
802
|
+
fig.savefig(output_dir / filename, facecolor="white")
|
|
803
|
+
|
|
804
|
+
|
|
805
|
+
def generate_graphs(
|
|
806
|
+
collection: ConversationCollection,
|
|
807
|
+
output_dir: Path,
|
|
808
|
+
config: GraphConfig | None = None,
|
|
809
|
+
*,
|
|
810
|
+
progress_bar: bool = False,
|
|
811
|
+
) -> None:
|
|
812
|
+
"""Generate weekly, hourly, and summary graphs.
|
|
813
|
+
|
|
814
|
+
Args:
|
|
815
|
+
collection: Collection of conversations
|
|
816
|
+
output_dir: Directory to save the graphs
|
|
817
|
+
config: Optional graph configuration
|
|
818
|
+
progress_bar: Whether to show progress bars
|
|
819
|
+
"""
|
|
820
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
821
|
+
cfg = config or get_default_config().graph
|
|
822
|
+
|
|
823
|
+
# Summary graphs (default: small, high-signal set)
|
|
824
|
+
generate_summary_graphs(collection, output_dir, cfg, progress_bar=progress_bar)
|
|
825
|
+
|
|
826
|
+
# Optional breakdowns (can generate lots of files; off by default)
|
|
827
|
+
if not collection.conversations:
|
|
828
|
+
return
|
|
829
|
+
|
|
830
|
+
timestamps = collection.timestamps("user")
|
|
831
|
+
if not timestamps:
|
|
832
|
+
return
|
|
833
|
+
|
|
834
|
+
breakdown_root = output_dir / "Breakdowns"
|
|
835
|
+
if cfg.generate_monthly_breakdowns:
|
|
836
|
+
monthly_dir = breakdown_root / "Monthly"
|
|
837
|
+
monthly_dir.mkdir(parents=True, exist_ok=True)
|
|
838
|
+
|
|
839
|
+
month_groups: defaultdict[datetime, list[float]] = defaultdict(list)
|
|
840
|
+
for ts in timestamps:
|
|
841
|
+
dt = _ts_to_dt(ts, cfg)
|
|
842
|
+
month_groups[_month_start(dt)].append(ts)
|
|
843
|
+
|
|
844
|
+
for month, ts_list in tqdm(
|
|
845
|
+
sorted(month_groups.items(), key=lambda x: x[0]),
|
|
846
|
+
desc="Creating monthly breakdown graphs",
|
|
847
|
+
disable=not progress_bar,
|
|
848
|
+
):
|
|
849
|
+
slug = month.strftime("%Y-%m")
|
|
850
|
+
title = month.strftime("%b %Y")
|
|
851
|
+
generate_week_barplot(ts_list, title, cfg).savefig(
|
|
852
|
+
monthly_dir / f"{slug}_weekday.png", facecolor="white"
|
|
853
|
+
)
|
|
854
|
+
generate_hour_barplot(ts_list, title, cfg).savefig(
|
|
855
|
+
monthly_dir / f"{slug}_hourly.png", facecolor="white"
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
if cfg.generate_yearly_breakdowns:
|
|
859
|
+
yearly_dir = breakdown_root / "Yearly"
|
|
860
|
+
yearly_dir.mkdir(parents=True, exist_ok=True)
|
|
861
|
+
|
|
862
|
+
year_groups: defaultdict[datetime, list[float]] = defaultdict(list)
|
|
863
|
+
for ts in timestamps:
|
|
864
|
+
dt = _ts_to_dt(ts, cfg)
|
|
865
|
+
year_groups[_year_start(dt)].append(ts)
|
|
866
|
+
|
|
867
|
+
for year, ts_list in tqdm(
|
|
868
|
+
sorted(year_groups.items(), key=lambda x: x[0]),
|
|
869
|
+
desc="Creating yearly breakdown graphs",
|
|
870
|
+
disable=not progress_bar,
|
|
871
|
+
):
|
|
872
|
+
slug = year.strftime("%Y")
|
|
873
|
+
title = year.strftime("%Y")
|
|
874
|
+
generate_week_barplot(ts_list, title, cfg).savefig(
|
|
875
|
+
yearly_dir / f"{slug}_weekday.png", facecolor="white"
|
|
876
|
+
)
|
|
877
|
+
generate_hour_barplot(ts_list, title, cfg).savefig(
|
|
878
|
+
yearly_dir / f"{slug}_hourly.png", facecolor="white"
|
|
879
|
+
)
|