convoviz 0.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. convoviz/__init__.py +25 -0
  2. convoviz/__main__.py +6 -0
  3. convoviz/analysis/__init__.py +9 -0
  4. convoviz/analysis/graphs.py +855 -0
  5. convoviz/analysis/wordcloud.py +165 -0
  6. convoviz/assets/colormaps.txt +15 -0
  7. convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
  8. convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
  9. convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
  10. convoviz/assets/fonts/Borel-Regular.ttf +0 -0
  11. convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
  12. convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
  13. convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
  14. convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
  15. convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
  16. convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
  17. convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
  18. convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
  19. convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
  20. convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
  21. convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
  22. convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
  23. convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
  24. convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
  25. convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
  26. convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
  27. convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
  28. convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
  29. convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
  30. convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
  31. convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
  32. convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
  33. convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
  34. convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
  35. convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
  36. convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
  37. convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
  38. convoviz/assets/stopwords.txt +1 -0
  39. convoviz/cli.py +117 -0
  40. convoviz/config.py +106 -0
  41. convoviz/exceptions.py +47 -0
  42. convoviz/interactive.py +247 -0
  43. convoviz/io/__init__.py +21 -0
  44. convoviz/io/assets.py +98 -0
  45. convoviz/io/loaders.py +186 -0
  46. convoviz/io/writers.py +227 -0
  47. convoviz/models/__init__.py +24 -0
  48. convoviz/models/collection.py +115 -0
  49. convoviz/models/conversation.py +158 -0
  50. convoviz/models/message.py +218 -0
  51. convoviz/models/node.py +66 -0
  52. convoviz/pipeline.py +167 -0
  53. convoviz/py.typed +0 -0
  54. convoviz/renderers/__init__.py +10 -0
  55. convoviz/renderers/markdown.py +269 -0
  56. convoviz/renderers/yaml.py +119 -0
  57. convoviz/utils.py +155 -0
  58. convoviz-0.2.12.dist-info/METADATA +148 -0
  59. convoviz-0.2.12.dist-info/RECORD +61 -0
  60. convoviz-0.2.12.dist-info/WHEEL +4 -0
  61. convoviz-0.2.12.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,855 @@
1
+ """Graph generation for conversation analytics.
2
+
3
+ Goals:
4
+ - Professional, consistent styling across plots.
5
+ - High-signal summaries by default (avoid output spam).
6
+ - Correct time bucketing (based on *message timestamps*, not conversation creation time).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections import defaultdict
12
+ from collections.abc import Callable, Iterable
13
+ from datetime import UTC, datetime
14
+ from pathlib import Path
15
+
16
+ import matplotlib.dates as mdates
17
+ import matplotlib.font_manager as fm
18
+ import matplotlib.ticker as mticker
19
+ from matplotlib.axes import Axes
20
+ from matplotlib.figure import Figure
21
+ from matplotlib.image import AxesImage
22
+ from tqdm import tqdm
23
+
24
+ from convoviz.config import GraphConfig, get_default_config
25
+ from convoviz.models import ConversationCollection
26
+ from convoviz.utils import get_asset_path
27
+
28
+ WEEKDAYS = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
29
+
30
+
31
+ def _load_font(config: GraphConfig) -> fm.FontProperties:
32
+ font_path = get_asset_path(f"fonts/{config.font_name}")
33
+ return fm.FontProperties(fname=str(font_path)) if font_path.exists() else fm.FontProperties()
34
+
35
+
36
+ def _style_axes(ax: Axes, config: GraphConfig) -> None:
37
+ # Clean look
38
+ ax.set_facecolor("white")
39
+ ax.spines["top"].set_visible(False)
40
+ ax.spines["right"].set_visible(False)
41
+ ax.spines["left"].set_color("#d0d7de")
42
+ ax.spines["bottom"].set_color("#d0d7de")
43
+ ax.tick_params(colors="#24292f")
44
+ ax.yaxis.set_major_locator(mticker.MaxNLocator(nbins=6, integer=True))
45
+
46
+ if config.grid:
47
+ ax.grid(axis="y", linestyle="-", linewidth=0.8, alpha=0.35, color="#8c959f")
48
+ ax.set_axisbelow(True)
49
+
50
+
51
+ def _apply_tick_font(ax: Axes, font_prop: fm.FontProperties) -> None:
52
+ for label in ax.get_xticklabels() + ax.get_yticklabels():
53
+ label.set_fontproperties(font_prop)
54
+
55
+
56
+ def _setup_single_axes(config: GraphConfig) -> tuple[Figure, Axes, fm.FontProperties]:
57
+ fig = Figure(figsize=config.figsize, dpi=config.dpi, facecolor="white")
58
+ ax: Axes = fig.add_subplot()
59
+ font_prop = _load_font(config)
60
+ _style_axes(ax, config)
61
+ return fig, ax, font_prop
62
+
63
+
64
+ def _ts_to_dt(ts: float, config: GraphConfig) -> datetime:
65
+ """Convert epoch timestamps into aware datetimes based on config."""
66
+ dt_utc = datetime.fromtimestamp(ts, UTC)
67
+ return dt_utc if config.timezone == "utc" else dt_utc.astimezone()
68
+
69
+
70
+ def _tz_label(config: GraphConfig) -> str:
71
+ return "UTC" if config.timezone == "utc" else "Local"
72
+
73
+
74
+ def _month_start(dt: datetime) -> datetime:
75
+ return dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
76
+
77
+
78
+ def _year_start(dt: datetime) -> datetime:
79
+ return dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
80
+
81
+
82
+ def _day_start(dt: datetime) -> datetime:
83
+ return dt.replace(hour=0, minute=0, second=0, microsecond=0)
84
+
85
+
86
+ def _iter_month_starts(start: datetime, end: datetime) -> list[datetime]:
87
+ start = _month_start(start)
88
+ end = _month_start(end)
89
+ months: list[datetime] = []
90
+ cur = start
91
+ while cur <= end:
92
+ months.append(cur)
93
+ year, month = cur.year, cur.month
94
+ cur = cur.replace(year=year + 1, month=1) if month == 12 else cur.replace(month=month + 1)
95
+ return months
96
+
97
+
98
+ def _fill_missing_months(counts: dict[datetime, int]) -> tuple[list[datetime], list[int]]:
99
+ if not counts:
100
+ return [], []
101
+ keys = sorted(counts.keys())
102
+ months = _iter_month_starts(keys[0], keys[-1])
103
+ return months, [counts.get(m, 0) for m in months]
104
+
105
+
106
+ def _aggregate_counts_by_month(
107
+ timestamps: Iterable[float],
108
+ config: GraphConfig,
109
+ ) -> dict[datetime, int]:
110
+ counts: defaultdict[datetime, int] = defaultdict(int)
111
+ for ts in timestamps:
112
+ dt = _ts_to_dt(ts, config)
113
+ counts[_month_start(dt)] += 1
114
+ return dict(counts)
115
+
116
+
117
+ def _moving_average(values: list[int], window: int) -> list[float]:
118
+ if window <= 1:
119
+ return [float(v) for v in values]
120
+ if len(values) < window:
121
+ return []
122
+ out: list[float] = []
123
+ running = sum(values[:window])
124
+ out.append(running / window)
125
+ for i in range(window, len(values)):
126
+ running += values[i] - values[i - window]
127
+ out.append(running / window)
128
+ return out
129
+
130
+
131
+ def generate_week_barplot(
132
+ timestamps: list[float],
133
+ title: str,
134
+ config: GraphConfig | None = None,
135
+ ) -> Figure:
136
+ """Create a bar graph showing message distribution across weekdays.
137
+
138
+ Args:
139
+ timestamps: List of Unix timestamps
140
+ title: Title for the graph
141
+ config: Optional graph configuration
142
+
143
+ Returns:
144
+ Matplotlib Figure object
145
+ """
146
+ cfg = config or get_default_config().graph
147
+ fig, ax, font_prop = _setup_single_axes(cfg)
148
+
149
+ weekday_counts: dict[str, int] = dict.fromkeys(WEEKDAYS, 0)
150
+ for ts in timestamps:
151
+ dt = _ts_to_dt(ts, cfg)
152
+ weekday_counts[WEEKDAYS[dt.weekday()]] += 1
153
+
154
+ x = list(range(len(WEEKDAYS)))
155
+ y = [weekday_counts[d] for d in WEEKDAYS]
156
+
157
+ bars = ax.bar(x, y, color=cfg.color, alpha=0.9, width=0.72)
158
+ if cfg.show_counts:
159
+ ax.bar_label(bars, padding=3, fontsize=9, fontproperties=font_prop, color="#24292f")
160
+
161
+ ax.set_title(f"{title} · Weekday pattern", fontproperties=font_prop, fontsize=14, pad=14)
162
+ ax.set_xlabel("Weekday", fontproperties=font_prop)
163
+ ax.set_ylabel("User prompts", fontproperties=font_prop)
164
+ ax.set_xticks(x)
165
+ ax.set_xticklabels(WEEKDAYS, rotation=35, ha="right", fontproperties=font_prop)
166
+ _apply_tick_font(ax, font_prop)
167
+
168
+ fig.tight_layout()
169
+ return fig
170
+
171
+
172
+ def generate_hour_barplot(
173
+ timestamps: list[float],
174
+ title: str,
175
+ config: GraphConfig | None = None,
176
+ ) -> Figure:
177
+ """Create a bar graph showing message distribution across hours of the day (0-23).
178
+
179
+ Args:
180
+ timestamps: List of Unix timestamps
181
+ title: Title for the graph
182
+ config: Optional graph configuration
183
+
184
+ Returns:
185
+ Matplotlib Figure object
186
+ """
187
+ cfg = config or get_default_config().graph
188
+ fig, ax, font_prop = _setup_single_axes(cfg)
189
+
190
+ hour_counts: dict[int, int] = dict.fromkeys(range(24), 0)
191
+ for ts in timestamps:
192
+ dt = _ts_to_dt(ts, cfg)
193
+ hour_counts[dt.hour] += 1
194
+
195
+ y = [hour_counts[i] for i in range(24)]
196
+
197
+ bars = ax.bar(range(24), y, color=cfg.color, alpha=0.9, width=0.72)
198
+ if cfg.show_counts:
199
+ ax.bar_label(bars, padding=2, fontsize=7, fontproperties=font_prop, color="#24292f")
200
+
201
+ ax.set_title(
202
+ f"{title} · Hourly pattern ({_tz_label(cfg)})",
203
+ fontproperties=font_prop,
204
+ fontsize=14,
205
+ pad=14,
206
+ )
207
+ ax.set_xlabel(f"Hour of day ({_tz_label(cfg)})", fontproperties=font_prop)
208
+ ax.set_ylabel("User prompts", fontproperties=font_prop)
209
+
210
+ ax.set_xticks(range(0, 24, 2))
211
+ ax.set_xticklabels([f"{i:02d}:00" for i in range(0, 24, 2)], fontproperties=font_prop)
212
+ _apply_tick_font(ax, font_prop)
213
+
214
+ fig.tight_layout()
215
+ return fig
216
+
217
+
218
+ def generate_model_piechart(
219
+ collection: ConversationCollection,
220
+ config: GraphConfig | None = None,
221
+ ) -> Figure:
222
+ """Create a model usage chart.
223
+
224
+ Note: kept for backwards compatibility (historically a pie chart). We now render a
225
+ more readable horizontal bar chart with percentages.
226
+ """
227
+ cfg = config or get_default_config().graph
228
+ model_counts: defaultdict[str, int] = defaultdict(int)
229
+
230
+ for conv in collection.conversations:
231
+ model = conv.model or "Unknown"
232
+ model_counts[model] += 1
233
+
234
+ total = sum(model_counts.values())
235
+ fig, ax, font_prop = _setup_single_axes(cfg)
236
+
237
+ if total == 0:
238
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
239
+ ax.set_axis_off()
240
+ return fig
241
+
242
+ # Group minor models to keep the plot readable.
243
+ threshold = 0.05
244
+ refined_counts: dict[str, int] = {}
245
+ other_count = 0
246
+ for model, count in model_counts.items():
247
+ if count / total < threshold:
248
+ other_count += count
249
+ else:
250
+ refined_counts[model] = count
251
+ if other_count:
252
+ refined_counts["Other"] = other_count
253
+
254
+ items = sorted(refined_counts.items(), key=lambda x: x[1], reverse=True)
255
+ labels = [k for k, _ in items][:10]
256
+ counts = [v for _, v in items][:10]
257
+ y = list(range(len(labels)))[::-1]
258
+
259
+ bars = ax.barh(y, counts[::-1], color=cfg.color, alpha=0.9, height=0.6)
260
+ ax.set_yticks(y)
261
+ ax.set_yticklabels(labels[::-1], fontproperties=font_prop)
262
+ ax.set_xlabel("Conversations", fontproperties=font_prop)
263
+ ax.set_title("Model usage", fontproperties=font_prop, fontsize=14, pad=14)
264
+
265
+ for bar, count in zip(bars, counts[::-1], strict=True):
266
+ pct = 100 * (count / total)
267
+ ax.text(
268
+ bar.get_width(),
269
+ bar.get_y() + bar.get_height() / 2,
270
+ f" {count} ({pct:.1f}%)",
271
+ va="center",
272
+ ha="left",
273
+ fontproperties=font_prop,
274
+ fontsize=9,
275
+ color="#24292f",
276
+ )
277
+
278
+ _apply_tick_font(ax, font_prop)
279
+ fig.tight_layout()
280
+ return fig
281
+
282
+
283
+ def generate_length_histogram(
284
+ collection: ConversationCollection,
285
+ config: GraphConfig | None = None,
286
+ ) -> Figure:
287
+ """Create a histogram showing distribution of conversation lengths.
288
+
289
+ Caps the X-axis at the 95th percentile to focus on typical lengths.
290
+
291
+ Args:
292
+ collection: Collection of conversations
293
+ config: Optional graph configuration
294
+
295
+ Returns:
296
+ Matplotlib Figure object
297
+ """
298
+ cfg = config or get_default_config().graph
299
+ lengths = [conv.message_count("user") for conv in collection.conversations]
300
+ fig, ax, font_prop = _setup_single_axes(cfg)
301
+
302
+ if not lengths:
303
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
304
+ ax.set_axis_off()
305
+ return fig
306
+
307
+ sorted_lengths = sorted(lengths)
308
+ p50 = sorted_lengths[int(0.50 * (len(sorted_lengths) - 1))]
309
+ p90 = sorted_lengths[int(0.90 * (len(sorted_lengths) - 1))]
310
+ p95 = sorted_lengths[int(0.95 * (len(sorted_lengths) - 1))]
311
+ cap = max(int(p95), 5)
312
+ plot_lengths = [min(L, cap) for L in lengths]
313
+
314
+ bin_count = min(24, max(10, cap // 2))
315
+ ax.hist(
316
+ plot_lengths,
317
+ bins=bin_count,
318
+ color=cfg.color,
319
+ alpha=0.85,
320
+ rwidth=0.9,
321
+ edgecolor="white",
322
+ linewidth=0.5,
323
+ )
324
+
325
+ ax.axvline(p50, color="#24292f", linewidth=1.2, alpha=0.8)
326
+ ax.axvline(p90, color="#cf222e", linewidth=1.2, alpha=0.8)
327
+ ax.text(
328
+ p50,
329
+ ax.get_ylim()[1] * 0.95,
330
+ f"median={p50}",
331
+ rotation=90,
332
+ va="top",
333
+ ha="right",
334
+ fontproperties=font_prop,
335
+ fontsize=9,
336
+ color="#24292f",
337
+ )
338
+ ax.text(
339
+ p90,
340
+ ax.get_ylim()[1] * 0.95,
341
+ f"p90={p90}",
342
+ rotation=90,
343
+ va="top",
344
+ ha="right",
345
+ fontproperties=font_prop,
346
+ fontsize=9,
347
+ color="#cf222e",
348
+ )
349
+
350
+ ax.set_title("Conversation length (user prompts)", fontproperties=font_prop, fontsize=14, pad=14)
351
+ ax.set_xlabel("User prompts per conversation", fontproperties=font_prop)
352
+ ax.set_ylabel("Conversations", fontproperties=font_prop)
353
+ ax.set_xlim(left=0, right=cap)
354
+ _apply_tick_font(ax, font_prop)
355
+
356
+ fig.tight_layout()
357
+ return fig
358
+
359
+
360
+ def generate_conversation_lifetime_histogram(
361
+ collection: ConversationCollection,
362
+ config: GraphConfig | None = None,
363
+ ) -> Figure:
364
+ """Create a histogram of conversation lifetimes (update_time - create_time)."""
365
+ cfg = config or get_default_config().graph
366
+ fig, ax, font_prop = _setup_single_axes(cfg)
367
+
368
+ lifetimes_days: list[float] = []
369
+ for conv in collection.conversations:
370
+ delta = conv.update_time - conv.create_time
371
+ lifetimes_days.append(max(0.0, delta.total_seconds() / 86_400))
372
+
373
+ if not lifetimes_days:
374
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
375
+ ax.set_axis_off()
376
+ return fig
377
+
378
+ sorted_vals = sorted(lifetimes_days)
379
+ p50 = sorted_vals[int(0.50 * (len(sorted_vals) - 1))]
380
+ p90 = sorted_vals[int(0.90 * (len(sorted_vals) - 1))]
381
+ p95 = sorted_vals[int(0.95 * (len(sorted_vals) - 1))]
382
+ cap = max(float(p95), 1.0)
383
+ plot_vals = [min(v, cap) for v in lifetimes_days]
384
+
385
+ ax.hist(
386
+ plot_vals,
387
+ bins=24,
388
+ color=cfg.color,
389
+ alpha=0.85,
390
+ rwidth=0.9,
391
+ edgecolor="white",
392
+ linewidth=0.5,
393
+ )
394
+ ax.axvline(p50, color="#24292f", linewidth=1.2, alpha=0.8)
395
+ ax.axvline(p90, color="#cf222e", linewidth=1.2, alpha=0.8)
396
+
397
+ ax.set_title("Conversation lifetimes (days)", fontproperties=font_prop, fontsize=14, pad=14)
398
+ ax.set_xlabel("Days between first and last message", fontproperties=font_prop)
399
+ ax.set_ylabel("Conversations", fontproperties=font_prop)
400
+ ax.set_xlim(left=0, right=cap)
401
+ ax.text(
402
+ 0.99,
403
+ 0.98,
404
+ f"median={p50:.1f}d\np90={p90:.1f}d",
405
+ transform=ax.transAxes,
406
+ ha="right",
407
+ va="top",
408
+ fontproperties=font_prop,
409
+ fontsize=9,
410
+ color="#57606a",
411
+ )
412
+ _apply_tick_font(ax, font_prop)
413
+
414
+ fig.tight_layout()
415
+ return fig
416
+
417
+
418
+ def generate_monthly_activity_barplot(
419
+ collection: ConversationCollection,
420
+ config: GraphConfig | None = None,
421
+ ) -> Figure:
422
+ """Create a bar chart showing total prompt count per month.
423
+
424
+ Important: this is computed from *message timestamps* (actual activity), not from
425
+ the conversation creation month.
426
+ """
427
+ cfg = config or get_default_config().graph
428
+ timestamps = collection.timestamps("user")
429
+ fig, ax, font_prop = _setup_single_axes(cfg)
430
+
431
+ if not timestamps:
432
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
433
+ ax.set_axis_off()
434
+ return fig
435
+
436
+ month_counts = _aggregate_counts_by_month(timestamps, cfg)
437
+ months, values = _fill_missing_months(month_counts)
438
+ if not months:
439
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
440
+ ax.set_axis_off()
441
+ return fig
442
+
443
+ x = mdates.date2num(months)
444
+ ax.bar(x, values, width=25, color=cfg.color, alpha=0.25, edgecolor="none")
445
+
446
+ smooth = _moving_average(values, window=3)
447
+ if smooth:
448
+ ax.plot(x[2:], smooth, color=cfg.color, linewidth=2.2, alpha=0.9)
449
+
450
+ locator = mdates.AutoDateLocator(minticks=4, maxticks=10)
451
+ ax.xaxis.set_major_locator(locator)
452
+ ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
453
+
454
+ ax.set_title("Monthly activity (user prompts)", fontproperties=font_prop, fontsize=14, pad=14)
455
+ ax.set_xlabel(f"Month ({_tz_label(cfg)})", fontproperties=font_prop)
456
+ ax.set_ylabel("User prompts", fontproperties=font_prop)
457
+ _apply_tick_font(ax, font_prop)
458
+
459
+ fig.tight_layout()
460
+ return fig
461
+
462
+
463
+ def generate_daily_activity_lineplot(
464
+ collection: ConversationCollection,
465
+ config: GraphConfig | None = None,
466
+ ) -> Figure:
467
+ """Create a line chart showing user prompt count per day (with a rolling mean)."""
468
+ cfg = config or get_default_config().graph
469
+ timestamps = collection.timestamps("user")
470
+
471
+ fig, ax, font_prop = _setup_single_axes(cfg)
472
+ if not timestamps:
473
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
474
+ ax.set_axis_off()
475
+ return fig
476
+
477
+ counts: defaultdict[datetime, int] = defaultdict(int)
478
+ for ts in timestamps:
479
+ dt = _ts_to_dt(ts, cfg)
480
+ day = dt.replace(hour=0, minute=0, second=0, microsecond=0)
481
+ counts[day] += 1
482
+
483
+ days = sorted(counts.keys())
484
+ values = [counts[d] for d in days]
485
+
486
+ x = mdates.date2num(days)
487
+ ax.bar(x, values, width=0.9, color=cfg.color, alpha=0.18, edgecolor="none")
488
+ ax.plot(x, values, color=cfg.color, linewidth=1.2, alpha=0.25)
489
+ smooth = _moving_average(values, window=7)
490
+ if smooth:
491
+ ax.plot(x[6:], smooth, color=cfg.color, linewidth=2.4, alpha=0.95)
492
+
493
+ locator = mdates.AutoDateLocator()
494
+ ax.xaxis.set_major_locator(locator)
495
+ ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
496
+ ax.set_title("Daily activity (user prompts)", fontproperties=font_prop, fontsize=14, pad=14)
497
+ ax.set_xlabel(f"Day ({_tz_label(cfg)})", fontproperties=font_prop)
498
+ ax.set_ylabel("User prompts", fontproperties=font_prop)
499
+
500
+ for label in ax.get_xticklabels() + ax.get_yticklabels():
501
+ label.set_fontproperties(font_prop)
502
+
503
+ fig.tight_layout()
504
+ return fig
505
+
506
+
507
+ def generate_activity_heatmap(
508
+ collection: ConversationCollection,
509
+ config: GraphConfig | None = None,
510
+ ) -> Figure:
511
+ """Create a heatmap of activity by weekday × hour (user prompts)."""
512
+ cfg = config or get_default_config().graph
513
+ timestamps = collection.timestamps("user")
514
+
515
+ fig, ax, font_prop = _setup_single_axes(cfg)
516
+ if not timestamps:
517
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
518
+ ax.set_axis_off()
519
+ return fig
520
+
521
+ grid: list[list[int]] = [[0 for _ in range(24)] for _ in range(7)]
522
+ for ts in timestamps:
523
+ dt = _ts_to_dt(ts, cfg)
524
+ grid[dt.weekday()][dt.hour] += 1
525
+
526
+ # Keep the axes frame for the heatmap.
527
+ ax.grid(False)
528
+ for side in ["top", "right", "left", "bottom"]:
529
+ ax.spines[side].set_visible(False)
530
+
531
+ img: AxesImage = ax.imshow(grid, aspect="auto", cmap="Blues", interpolation="nearest")
532
+
533
+ ax.set_title(
534
+ f"Activity heatmap (weekday × hour, {_tz_label(cfg)})",
535
+ fontproperties=font_prop,
536
+ fontsize=14,
537
+ pad=14,
538
+ )
539
+ ax.set_xlabel(f"Hour of day ({_tz_label(cfg)})", fontproperties=font_prop)
540
+ ax.set_ylabel("Weekday", fontproperties=font_prop)
541
+
542
+ ax.set_xticks(list(range(0, 24, 2)))
543
+ ax.set_xticklabels([f"{h:02d}" for h in range(0, 24, 2)], fontproperties=font_prop)
544
+ ax.set_yticks(list(range(7)))
545
+ ax.set_yticklabels(WEEKDAYS, fontproperties=font_prop)
546
+
547
+ cbar = fig.colorbar(img, ax=ax, fraction=0.046, pad=0.04)
548
+ cbar.set_label("User prompts", fontproperties=font_prop)
549
+ for t in cbar.ax.get_yticklabels():
550
+ t.set_fontproperties(font_prop)
551
+
552
+ fig.tight_layout()
553
+ return fig
554
+
555
+
556
+ def generate_summary_dashboard(
557
+ collection: ConversationCollection,
558
+ config: GraphConfig | None = None,
559
+ ) -> Figure:
560
+ """Create a compact, high-signal overview dashboard."""
561
+ cfg = config or get_default_config().graph
562
+ font_prop = _load_font(cfg)
563
+
564
+ fig = Figure(figsize=(14, 9), dpi=cfg.dpi, facecolor="white")
565
+ gs = fig.add_gridspec(3, 2, height_ratios=[1.2, 1.0, 1.0], width_ratios=[1.25, 1.0])
566
+
567
+ ax_ts: Axes = fig.add_subplot(gs[0, :])
568
+ ax_heat: Axes = fig.add_subplot(gs[1:, 0])
569
+ ax_model: Axes = fig.add_subplot(gs[1, 1])
570
+ ax_len: Axes = fig.add_subplot(gs[2, 1])
571
+
572
+ for ax in (ax_ts, ax_model, ax_len):
573
+ _style_axes(ax, cfg)
574
+ _apply_tick_font(ax, font_prop)
575
+
576
+ # Header
577
+ user_ts = collection.timestamps("user")
578
+ conv_count = len(collection.conversations)
579
+ prompt_count = len(user_ts)
580
+
581
+ fig.text(
582
+ 0.01,
583
+ 0.985,
584
+ "ChatGPT usage overview",
585
+ fontproperties=font_prop,
586
+ fontsize=18,
587
+ va="top",
588
+ ha="left",
589
+ color="#24292f",
590
+ )
591
+
592
+ if user_ts:
593
+ dts = [_ts_to_dt(ts, cfg) for ts in user_ts]
594
+ date_range = f"{min(dts).date().isoformat()} → {max(dts).date().isoformat()}"
595
+ else:
596
+ date_range = "No activity"
597
+
598
+ fig.text(
599
+ 0.01,
600
+ 0.955,
601
+ f"{conv_count} conversations · {prompt_count} user prompts · {date_range} · {_tz_label(cfg)}",
602
+ fontproperties=font_prop,
603
+ fontsize=10,
604
+ va="top",
605
+ ha="left",
606
+ color="#57606a",
607
+ )
608
+
609
+ # Monthly activity (timeseries)
610
+ if user_ts:
611
+ month_counts = _aggregate_counts_by_month(user_ts, cfg)
612
+ months, values = _fill_missing_months(month_counts)
613
+ x = mdates.date2num(months)
614
+ ax_ts.bar(x, values, width=25, color=cfg.color, alpha=0.20, edgecolor="none")
615
+ smooth = _moving_average(values, window=3)
616
+ if smooth:
617
+ ax_ts.plot(x[2:], smooth, color=cfg.color, linewidth=2.6, alpha=0.95)
618
+
619
+ locator = mdates.AutoDateLocator(minticks=4, maxticks=10)
620
+ ax_ts.xaxis.set_major_locator(locator)
621
+ ax_ts.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
622
+ ax_ts.set_title("Monthly activity (user prompts)", fontproperties=font_prop, fontsize=13, pad=10)
623
+ ax_ts.set_ylabel("User prompts", fontproperties=font_prop)
624
+ ax_ts.set_xlabel(f"Month ({_tz_label(cfg)})", fontproperties=font_prop)
625
+ _apply_tick_font(ax_ts, font_prop)
626
+ else:
627
+ ax_ts.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
628
+ ax_ts.set_axis_off()
629
+
630
+ # Heatmap
631
+ if user_ts:
632
+ grid: list[list[int]] = [[0 for _ in range(24)] for _ in range(7)]
633
+ for ts in user_ts:
634
+ dt = _ts_to_dt(ts, cfg)
635
+ grid[dt.weekday()][dt.hour] += 1
636
+
637
+ ax_heat.grid(False)
638
+ for side in ["top", "right", "left", "bottom"]:
639
+ ax_heat.spines[side].set_visible(False)
640
+ img = ax_heat.imshow(grid, aspect="auto", cmap="Blues", interpolation="nearest")
641
+ ax_heat.set_title(
642
+ f"Weekday × hour heatmap ({_tz_label(cfg)})",
643
+ fontproperties=font_prop,
644
+ fontsize=13,
645
+ pad=10,
646
+ )
647
+ ax_heat.set_xlabel("Hour", fontproperties=font_prop)
648
+ ax_heat.set_ylabel("Weekday", fontproperties=font_prop)
649
+ ax_heat.set_xticks(list(range(0, 24, 3)))
650
+ ax_heat.set_xticklabels([f"{h:02d}" for h in range(0, 24, 3)], fontproperties=font_prop)
651
+ ax_heat.set_yticks(list(range(7)))
652
+ ax_heat.set_yticklabels(WEEKDAYS, fontproperties=font_prop)
653
+ cbar = fig.colorbar(img, ax=ax_heat, fraction=0.046, pad=0.04)
654
+ cbar.set_label("Prompts", fontproperties=font_prop)
655
+ for t in cbar.ax.get_yticklabels():
656
+ t.set_fontproperties(font_prop)
657
+ else:
658
+ ax_heat.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
659
+ ax_heat.set_axis_off()
660
+
661
+ # Model usage (reuse existing generator logic by drawing into its own axes)
662
+ model_counts: defaultdict[str, int] = defaultdict(int)
663
+ for conv in collection.conversations:
664
+ model_counts[conv.model or "Unknown"] += 1
665
+ total_models = sum(model_counts.values())
666
+ if total_models:
667
+ items = sorted(model_counts.items(), key=lambda x: x[1], reverse=True)
668
+ labels = [k for k, _ in items][:8]
669
+ counts = [v for _, v in items][:8]
670
+ y = list(range(len(labels)))[::-1]
671
+ bars = ax_model.barh(y, counts[::-1], color=cfg.color, alpha=0.9, height=0.6)
672
+ ax_model.set_yticks(y)
673
+ ax_model.set_yticklabels(labels[::-1], fontproperties=font_prop)
674
+ ax_model.set_xlabel("Conversations", fontproperties=font_prop)
675
+ ax_model.set_title("Models", fontproperties=font_prop, fontsize=13, pad=10)
676
+ for bar, count in zip(bars, counts[::-1], strict=True):
677
+ pct = 100 * (count / total_models)
678
+ ax_model.text(
679
+ bar.get_width(),
680
+ bar.get_y() + bar.get_height() / 2,
681
+ f" {pct:.0f}%",
682
+ va="center",
683
+ ha="left",
684
+ fontproperties=font_prop,
685
+ fontsize=9,
686
+ color="#57606a",
687
+ )
688
+ _apply_tick_font(ax_model, font_prop)
689
+ else:
690
+ ax_model.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
691
+ ax_model.set_axis_off()
692
+
693
+ # Conversation length mini-hist
694
+ lengths = [conv.message_count("user") for conv in collection.conversations]
695
+ if lengths:
696
+ sorted_lengths = sorted(lengths)
697
+ cap = max(int(sorted_lengths[int(0.95 * (len(sorted_lengths) - 1))]), 5)
698
+ plot_lengths = [min(L, cap) for L in lengths]
699
+ ax_len.hist(
700
+ plot_lengths,
701
+ bins=min(16, max(8, cap // 2)),
702
+ color=cfg.color,
703
+ alpha=0.85,
704
+ rwidth=0.9,
705
+ edgecolor="white",
706
+ linewidth=0.5,
707
+ )
708
+ ax_len.set_title("Conversation length", fontproperties=font_prop, fontsize=13, pad=10)
709
+ ax_len.set_xlabel("User prompts", fontproperties=font_prop)
710
+ ax_len.set_ylabel("Conversations", fontproperties=font_prop)
711
+ ax_len.set_xlim(left=0, right=cap)
712
+ _apply_tick_font(ax_len, font_prop)
713
+ else:
714
+ ax_len.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
715
+ ax_len.set_axis_off()
716
+
717
+ fig.subplots_adjust(top=0.93, left=0.06, right=0.98, bottom=0.06, hspace=0.4, wspace=0.25)
718
+ return fig
719
+
720
+
721
+ def generate_summary_graphs(
722
+ collection: ConversationCollection,
723
+ output_dir: Path,
724
+ config: GraphConfig | None = None,
725
+ *,
726
+ progress_bar: bool = False,
727
+ ) -> None:
728
+ """Generate all summary-level graphs.
729
+
730
+ Args:
731
+ collection: Collection of conversations
732
+ output_dir: Directory to save the graphs
733
+ config: Optional graph configuration
734
+ """
735
+ if not collection.conversations:
736
+ return
737
+
738
+ cfg = config or get_default_config().graph
739
+
740
+ user_ts = collection.timestamps("user")
741
+
742
+ tasks: list[tuple[str, str, Callable[[], Figure]]] = [
743
+ ("Overview", "overview.png", lambda: generate_summary_dashboard(collection, cfg)),
744
+ ("Activity heatmap", "activity_heatmap.png", lambda: generate_activity_heatmap(collection, cfg)),
745
+ ("Daily activity", "daily_activity.png", lambda: generate_daily_activity_lineplot(collection, cfg)),
746
+ ("Monthly activity", "monthly_activity.png", lambda: generate_monthly_activity_barplot(collection, cfg)),
747
+ ("Model usage", "model_usage.png", lambda: generate_model_piechart(collection, cfg)),
748
+ ("Conversation lengths", "conversation_lengths.png", lambda: generate_length_histogram(collection, cfg)),
749
+ (
750
+ "Conversation lifetimes",
751
+ "conversation_lifetimes.png",
752
+ lambda: generate_conversation_lifetime_histogram(collection, cfg),
753
+ ),
754
+ ]
755
+
756
+ if user_ts:
757
+ tasks.extend(
758
+ [
759
+ (
760
+ "Weekday pattern",
761
+ "weekday_pattern.png",
762
+ lambda: generate_week_barplot(user_ts, "All time", cfg),
763
+ ),
764
+ (
765
+ "Hourly pattern",
766
+ "hourly_pattern.png",
767
+ lambda: generate_hour_barplot(user_ts, "All time", cfg),
768
+ ),
769
+ ]
770
+ )
771
+
772
+ for _, filename, build in tqdm(
773
+ tasks,
774
+ desc="Creating summary graphs",
775
+ disable=not progress_bar,
776
+ ):
777
+ fig = build()
778
+ fig.savefig(output_dir / filename, facecolor="white")
779
+
780
+
781
+ def generate_graphs(
782
+ collection: ConversationCollection,
783
+ output_dir: Path,
784
+ config: GraphConfig | None = None,
785
+ *,
786
+ progress_bar: bool = False,
787
+ ) -> None:
788
+ """Generate weekly, hourly, and summary graphs.
789
+
790
+ Args:
791
+ collection: Collection of conversations
792
+ output_dir: Directory to save the graphs
793
+ config: Optional graph configuration
794
+ progress_bar: Whether to show progress bars
795
+ """
796
+ output_dir.mkdir(parents=True, exist_ok=True)
797
+ cfg = config or get_default_config().graph
798
+
799
+ # Summary graphs (default: small, high-signal set)
800
+ generate_summary_graphs(collection, output_dir, cfg, progress_bar=progress_bar)
801
+
802
+ # Optional breakdowns (can generate lots of files; off by default)
803
+ if not collection.conversations:
804
+ return
805
+
806
+ timestamps = collection.timestamps("user")
807
+ if not timestamps:
808
+ return
809
+
810
+ breakdown_root = output_dir / "Breakdowns"
811
+ if cfg.generate_monthly_breakdowns:
812
+ monthly_dir = breakdown_root / "Monthly"
813
+ monthly_dir.mkdir(parents=True, exist_ok=True)
814
+
815
+ month_groups: defaultdict[datetime, list[float]] = defaultdict(list)
816
+ for ts in timestamps:
817
+ dt = _ts_to_dt(ts, cfg)
818
+ month_groups[_month_start(dt)].append(ts)
819
+
820
+ for month, ts_list in tqdm(
821
+ sorted(month_groups.items(), key=lambda x: x[0]),
822
+ desc="Creating monthly breakdown graphs",
823
+ disable=not progress_bar,
824
+ ):
825
+ slug = month.strftime("%Y-%m")
826
+ title = month.strftime("%b %Y")
827
+ generate_week_barplot(ts_list, title, cfg).savefig(
828
+ monthly_dir / f"{slug}_weekday.png", facecolor="white"
829
+ )
830
+ generate_hour_barplot(ts_list, title, cfg).savefig(
831
+ monthly_dir / f"{slug}_hourly.png", facecolor="white"
832
+ )
833
+
834
+ if cfg.generate_yearly_breakdowns:
835
+ yearly_dir = breakdown_root / "Yearly"
836
+ yearly_dir.mkdir(parents=True, exist_ok=True)
837
+
838
+ year_groups: defaultdict[datetime, list[float]] = defaultdict(list)
839
+ for ts in timestamps:
840
+ dt = _ts_to_dt(ts, cfg)
841
+ year_groups[_year_start(dt)].append(ts)
842
+
843
+ for year, ts_list in tqdm(
844
+ sorted(year_groups.items(), key=lambda x: x[0]),
845
+ desc="Creating yearly breakdown graphs",
846
+ disable=not progress_bar,
847
+ ):
848
+ slug = year.strftime("%Y")
849
+ title = year.strftime("%Y")
850
+ generate_week_barplot(ts_list, title, cfg).savefig(
851
+ yearly_dir / f"{slug}_weekday.png", facecolor="white"
852
+ )
853
+ generate_hour_barplot(ts_list, title, cfg).savefig(
854
+ yearly_dir / f"{slug}_hourly.png", facecolor="white"
855
+ )