convoviz 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. convoviz/__init__.py +34 -0
  2. convoviz/__main__.py +6 -0
  3. convoviz/analysis/__init__.py +22 -0
  4. convoviz/analysis/graphs.py +879 -0
  5. convoviz/analysis/wordcloud.py +204 -0
  6. convoviz/assets/colormaps.txt +15 -0
  7. convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
  8. convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
  9. convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
  10. convoviz/assets/fonts/Borel-Regular.ttf +0 -0
  11. convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
  12. convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
  13. convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
  14. convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
  15. convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
  16. convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
  17. convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
  18. convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
  19. convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
  20. convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
  21. convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
  22. convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
  23. convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
  24. convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
  25. convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
  26. convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
  27. convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
  28. convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
  29. convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
  30. convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
  31. convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
  32. convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
  33. convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
  34. convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
  35. convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
  36. convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
  37. convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
  38. convoviz/assets/stopwords.txt +1 -0
  39. convoviz/cli.py +149 -0
  40. convoviz/config.py +120 -0
  41. convoviz/exceptions.py +47 -0
  42. convoviz/interactive.py +264 -0
  43. convoviz/io/__init__.py +21 -0
  44. convoviz/io/assets.py +109 -0
  45. convoviz/io/loaders.py +191 -0
  46. convoviz/io/writers.py +231 -0
  47. convoviz/logging_config.py +69 -0
  48. convoviz/models/__init__.py +24 -0
  49. convoviz/models/collection.py +115 -0
  50. convoviz/models/conversation.py +158 -0
  51. convoviz/models/message.py +218 -0
  52. convoviz/models/node.py +66 -0
  53. convoviz/pipeline.py +184 -0
  54. convoviz/py.typed +0 -0
  55. convoviz/renderers/__init__.py +10 -0
  56. convoviz/renderers/markdown.py +269 -0
  57. convoviz/renderers/yaml.py +119 -0
  58. convoviz/utils.py +155 -0
  59. convoviz-0.4.1.dist-info/METADATA +215 -0
  60. convoviz-0.4.1.dist-info/RECORD +62 -0
  61. convoviz-0.4.1.dist-info/WHEEL +4 -0
  62. convoviz-0.4.1.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,879 @@
1
+ """Graph generation for conversation analytics.
2
+
3
+ Goals:
4
+ - Professional, consistent styling across plots.
5
+ - High-signal summaries by default (avoid output spam).
6
+ - Correct time bucketing (based on *message timestamps*, not conversation creation time).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ from collections import defaultdict
13
+ from collections.abc import Callable, Iterable
14
+ from datetime import UTC, datetime
15
+ from pathlib import Path
16
+
17
+ import matplotlib.dates as mdates
18
+ import matplotlib.font_manager as fm
19
+ import matplotlib.ticker as mticker
20
+ from matplotlib.axes import Axes
21
+ from matplotlib.figure import Figure
22
+ from matplotlib.image import AxesImage
23
+ from tqdm import tqdm
24
+
25
+ from convoviz.config import GraphConfig, get_default_config
26
+ from convoviz.models import ConversationCollection
27
+ from convoviz.utils import get_asset_path
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ WEEKDAYS = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
32
+
33
+
34
+ def _load_font(config: GraphConfig) -> fm.FontProperties:
35
+ font_path = get_asset_path(f"fonts/{config.font_name}")
36
+ return fm.FontProperties(fname=str(font_path)) if font_path.exists() else fm.FontProperties()
37
+
38
+
39
+ def _style_axes(ax: Axes, config: GraphConfig) -> None:
40
+ # Clean look
41
+ ax.set_facecolor("white")
42
+ ax.spines["top"].set_visible(False)
43
+ ax.spines["right"].set_visible(False)
44
+ ax.spines["left"].set_color("#d0d7de")
45
+ ax.spines["bottom"].set_color("#d0d7de")
46
+ ax.tick_params(colors="#24292f")
47
+ ax.yaxis.set_major_locator(mticker.MaxNLocator(nbins=6, integer=True))
48
+
49
+ if config.grid:
50
+ ax.grid(axis="y", linestyle="-", linewidth=0.8, alpha=0.35, color="#8c959f")
51
+ ax.set_axisbelow(True)
52
+
53
+
54
+ def _apply_tick_font(ax: Axes, font_prop: fm.FontProperties) -> None:
55
+ for label in ax.get_xticklabels() + ax.get_yticklabels():
56
+ label.set_fontproperties(font_prop)
57
+
58
+
59
+ def _setup_single_axes(config: GraphConfig) -> tuple[Figure, Axes, fm.FontProperties]:
60
+ fig = Figure(figsize=config.figsize, dpi=config.dpi, facecolor="white")
61
+ ax: Axes = fig.add_subplot()
62
+ font_prop = _load_font(config)
63
+ _style_axes(ax, config)
64
+ return fig, ax, font_prop
65
+
66
+
67
+ def _ts_to_dt(ts: float, config: GraphConfig) -> datetime:
68
+ """Convert epoch timestamps into aware datetimes based on config."""
69
+ dt_utc = datetime.fromtimestamp(ts, UTC)
70
+ return dt_utc if config.timezone == "utc" else dt_utc.astimezone()
71
+
72
+
73
+ def _tz_label(config: GraphConfig) -> str:
74
+ return "UTC" if config.timezone == "utc" else "Local"
75
+
76
+
77
+ def _month_start(dt: datetime) -> datetime:
78
+ return dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
79
+
80
+
81
+ def _year_start(dt: datetime) -> datetime:
82
+ return dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
83
+
84
+
85
+ def _day_start(dt: datetime) -> datetime:
86
+ return dt.replace(hour=0, minute=0, second=0, microsecond=0)
87
+
88
+
89
+ def _iter_month_starts(start: datetime, end: datetime) -> list[datetime]:
90
+ start = _month_start(start)
91
+ end = _month_start(end)
92
+ months: list[datetime] = []
93
+ cur = start
94
+ while cur <= end:
95
+ months.append(cur)
96
+ year, month = cur.year, cur.month
97
+ cur = cur.replace(year=year + 1, month=1) if month == 12 else cur.replace(month=month + 1)
98
+ return months
99
+
100
+
101
+ def _fill_missing_months(counts: dict[datetime, int]) -> tuple[list[datetime], list[int]]:
102
+ if not counts:
103
+ return [], []
104
+ keys = sorted(counts.keys())
105
+ months = _iter_month_starts(keys[0], keys[-1])
106
+ return months, [counts.get(m, 0) for m in months]
107
+
108
+
109
+ def _aggregate_counts_by_month(
110
+ timestamps: Iterable[float],
111
+ config: GraphConfig,
112
+ ) -> dict[datetime, int]:
113
+ counts: defaultdict[datetime, int] = defaultdict(int)
114
+ for ts in timestamps:
115
+ dt = _ts_to_dt(ts, config)
116
+ counts[_month_start(dt)] += 1
117
+ return dict(counts)
118
+
119
+
120
+ def _moving_average(values: list[int], window: int) -> list[float]:
121
+ if window <= 1:
122
+ return [float(v) for v in values]
123
+ if len(values) < window:
124
+ return []
125
+ out: list[float] = []
126
+ running = sum(values[:window])
127
+ out.append(running / window)
128
+ for i in range(window, len(values)):
129
+ running += values[i] - values[i - window]
130
+ out.append(running / window)
131
+ return out
132
+
133
+
134
+ def generate_week_barplot(
135
+ timestamps: list[float],
136
+ title: str,
137
+ config: GraphConfig | None = None,
138
+ ) -> Figure:
139
+ """Create a bar graph showing message distribution across weekdays.
140
+
141
+ Args:
142
+ timestamps: List of Unix timestamps
143
+ title: Title for the graph
144
+ config: Optional graph configuration
145
+
146
+ Returns:
147
+ Matplotlib Figure object
148
+ """
149
+ cfg = config or get_default_config().graph
150
+ fig, ax, font_prop = _setup_single_axes(cfg)
151
+
152
+ weekday_counts: dict[str, int] = dict.fromkeys(WEEKDAYS, 0)
153
+ for ts in timestamps:
154
+ dt = _ts_to_dt(ts, cfg)
155
+ weekday_counts[WEEKDAYS[dt.weekday()]] += 1
156
+
157
+ x = list(range(len(WEEKDAYS)))
158
+ y = [weekday_counts[d] for d in WEEKDAYS]
159
+
160
+ bars = ax.bar(x, y, color=cfg.color, alpha=0.9, width=0.72)
161
+ if cfg.show_counts:
162
+ ax.bar_label(bars, padding=3, fontsize=9, fontproperties=font_prop, color="#24292f")
163
+
164
+ ax.set_title(f"{title} · Weekday pattern", fontproperties=font_prop, fontsize=14, pad=14)
165
+ ax.set_xlabel("Weekday", fontproperties=font_prop)
166
+ ax.set_ylabel("User prompts", fontproperties=font_prop)
167
+ ax.set_xticks(x)
168
+ ax.set_xticklabels(WEEKDAYS, rotation=35, ha="right", fontproperties=font_prop)
169
+ _apply_tick_font(ax, font_prop)
170
+
171
+ fig.tight_layout()
172
+ return fig
173
+
174
+
175
+ def generate_hour_barplot(
176
+ timestamps: list[float],
177
+ title: str,
178
+ config: GraphConfig | None = None,
179
+ ) -> Figure:
180
+ """Create a bar graph showing message distribution across hours of the day (0-23).
181
+
182
+ Args:
183
+ timestamps: List of Unix timestamps
184
+ title: Title for the graph
185
+ config: Optional graph configuration
186
+
187
+ Returns:
188
+ Matplotlib Figure object
189
+ """
190
+ cfg = config or get_default_config().graph
191
+ fig, ax, font_prop = _setup_single_axes(cfg)
192
+
193
+ hour_counts: dict[int, int] = dict.fromkeys(range(24), 0)
194
+ for ts in timestamps:
195
+ dt = _ts_to_dt(ts, cfg)
196
+ hour_counts[dt.hour] += 1
197
+
198
+ y = [hour_counts[i] for i in range(24)]
199
+
200
+ bars = ax.bar(range(24), y, color=cfg.color, alpha=0.9, width=0.72)
201
+ if cfg.show_counts:
202
+ ax.bar_label(bars, padding=2, fontsize=7, fontproperties=font_prop, color="#24292f")
203
+
204
+ ax.set_title(
205
+ f"{title} · Hourly pattern ({_tz_label(cfg)})",
206
+ fontproperties=font_prop,
207
+ fontsize=14,
208
+ pad=14,
209
+ )
210
+ ax.set_xlabel(f"Hour of day ({_tz_label(cfg)})", fontproperties=font_prop)
211
+ ax.set_ylabel("User prompts", fontproperties=font_prop)
212
+
213
+ ax.set_xticks(range(0, 24, 2))
214
+ ax.set_xticklabels([f"{i:02d}:00" for i in range(0, 24, 2)], fontproperties=font_prop)
215
+ _apply_tick_font(ax, font_prop)
216
+
217
+ fig.tight_layout()
218
+ return fig
219
+
220
+
221
+ def generate_model_piechart(
222
+ collection: ConversationCollection,
223
+ config: GraphConfig | None = None,
224
+ ) -> Figure:
225
+ """Create a model usage chart.
226
+
227
+ Note: kept for backwards compatibility (historically a pie chart). We now render a
228
+ more readable horizontal bar chart with percentages.
229
+ """
230
+ cfg = config or get_default_config().graph
231
+ model_counts: defaultdict[str, int] = defaultdict(int)
232
+
233
+ for conv in collection.conversations:
234
+ model = conv.model or "Unknown"
235
+ model_counts[model] += 1
236
+
237
+ total = sum(model_counts.values())
238
+ fig, ax, font_prop = _setup_single_axes(cfg)
239
+
240
+ if total == 0:
241
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
242
+ ax.set_axis_off()
243
+ return fig
244
+
245
+ # Group minor models to keep the plot readable.
246
+ threshold = 0.05
247
+ refined_counts: dict[str, int] = {}
248
+ other_count = 0
249
+ for model, count in model_counts.items():
250
+ if count / total < threshold:
251
+ other_count += count
252
+ else:
253
+ refined_counts[model] = count
254
+ if other_count:
255
+ refined_counts["Other"] = other_count
256
+
257
+ items = sorted(refined_counts.items(), key=lambda x: x[1], reverse=True)
258
+ labels = [k for k, _ in items][:10]
259
+ counts = [v for _, v in items][:10]
260
+ y = list(range(len(labels)))[::-1]
261
+
262
+ bars = ax.barh(y, counts[::-1], color=cfg.color, alpha=0.9, height=0.6)
263
+ ax.set_yticks(y)
264
+ ax.set_yticklabels(labels[::-1], fontproperties=font_prop)
265
+ ax.set_xlabel("Conversations", fontproperties=font_prop)
266
+ ax.set_title("Model usage", fontproperties=font_prop, fontsize=14, pad=14)
267
+
268
+ for bar, count in zip(bars, counts[::-1], strict=True):
269
+ pct = 100 * (count / total)
270
+ ax.text(
271
+ bar.get_width(),
272
+ bar.get_y() + bar.get_height() / 2,
273
+ f" {count} ({pct:.1f}%)",
274
+ va="center",
275
+ ha="left",
276
+ fontproperties=font_prop,
277
+ fontsize=9,
278
+ color="#24292f",
279
+ )
280
+
281
+ _apply_tick_font(ax, font_prop)
282
+ fig.tight_layout()
283
+ return fig
284
+
285
+
286
+ def generate_length_histogram(
287
+ collection: ConversationCollection,
288
+ config: GraphConfig | None = None,
289
+ ) -> Figure:
290
+ """Create a histogram showing distribution of conversation lengths.
291
+
292
+ Caps the X-axis at the 95th percentile to focus on typical lengths.
293
+
294
+ Args:
295
+ collection: Collection of conversations
296
+ config: Optional graph configuration
297
+
298
+ Returns:
299
+ Matplotlib Figure object
300
+ """
301
+ cfg = config or get_default_config().graph
302
+ lengths = [conv.message_count("user") for conv in collection.conversations]
303
+ fig, ax, font_prop = _setup_single_axes(cfg)
304
+
305
+ if not lengths:
306
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
307
+ ax.set_axis_off()
308
+ return fig
309
+
310
+ sorted_lengths = sorted(lengths)
311
+ p50 = sorted_lengths[int(0.50 * (len(sorted_lengths) - 1))]
312
+ p90 = sorted_lengths[int(0.90 * (len(sorted_lengths) - 1))]
313
+ p95 = sorted_lengths[int(0.95 * (len(sorted_lengths) - 1))]
314
+ cap = max(int(p95), 5)
315
+ plot_lengths = [min(L, cap) for L in lengths]
316
+
317
+ bin_count = min(24, max(10, cap // 2))
318
+ ax.hist(
319
+ plot_lengths,
320
+ bins=bin_count,
321
+ color=cfg.color,
322
+ alpha=0.85,
323
+ rwidth=0.9,
324
+ edgecolor="white",
325
+ linewidth=0.5,
326
+ )
327
+
328
+ ax.axvline(p50, color="#24292f", linewidth=1.2, alpha=0.8)
329
+ ax.axvline(p90, color="#cf222e", linewidth=1.2, alpha=0.8)
330
+ ax.text(
331
+ p50,
332
+ ax.get_ylim()[1] * 0.95,
333
+ f"median={p50}",
334
+ rotation=90,
335
+ va="top",
336
+ ha="right",
337
+ fontproperties=font_prop,
338
+ fontsize=9,
339
+ color="#24292f",
340
+ )
341
+ ax.text(
342
+ p90,
343
+ ax.get_ylim()[1] * 0.95,
344
+ f"p90={p90}",
345
+ rotation=90,
346
+ va="top",
347
+ ha="right",
348
+ fontproperties=font_prop,
349
+ fontsize=9,
350
+ color="#cf222e",
351
+ )
352
+
353
+ ax.set_title(
354
+ "Conversation length (user prompts)", fontproperties=font_prop, fontsize=14, pad=14
355
+ )
356
+ ax.set_xlabel("User prompts per conversation", fontproperties=font_prop)
357
+ ax.set_ylabel("Conversations", fontproperties=font_prop)
358
+ ax.set_xlim(left=0, right=cap)
359
+ _apply_tick_font(ax, font_prop)
360
+
361
+ fig.tight_layout()
362
+ return fig
363
+
364
+
365
+ def generate_conversation_lifetime_histogram(
366
+ collection: ConversationCollection,
367
+ config: GraphConfig | None = None,
368
+ ) -> Figure:
369
+ """Create a histogram of conversation lifetimes (update_time - create_time)."""
370
+ cfg = config or get_default_config().graph
371
+ fig, ax, font_prop = _setup_single_axes(cfg)
372
+
373
+ lifetimes_days: list[float] = []
374
+ for conv in collection.conversations:
375
+ delta = conv.update_time - conv.create_time
376
+ lifetimes_days.append(max(0.0, delta.total_seconds() / 86_400))
377
+
378
+ if not lifetimes_days:
379
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
380
+ ax.set_axis_off()
381
+ return fig
382
+
383
+ sorted_vals = sorted(lifetimes_days)
384
+ p50 = sorted_vals[int(0.50 * (len(sorted_vals) - 1))]
385
+ p90 = sorted_vals[int(0.90 * (len(sorted_vals) - 1))]
386
+ p95 = sorted_vals[int(0.95 * (len(sorted_vals) - 1))]
387
+ cap = max(float(p95), 1.0)
388
+ plot_vals = [min(v, cap) for v in lifetimes_days]
389
+
390
+ ax.hist(
391
+ plot_vals,
392
+ bins=24,
393
+ color=cfg.color,
394
+ alpha=0.85,
395
+ rwidth=0.9,
396
+ edgecolor="white",
397
+ linewidth=0.5,
398
+ )
399
+ ax.axvline(p50, color="#24292f", linewidth=1.2, alpha=0.8)
400
+ ax.axvline(p90, color="#cf222e", linewidth=1.2, alpha=0.8)
401
+
402
+ ax.set_title("Conversation lifetimes (days)", fontproperties=font_prop, fontsize=14, pad=14)
403
+ ax.set_xlabel("Days between first and last message", fontproperties=font_prop)
404
+ ax.set_ylabel("Conversations", fontproperties=font_prop)
405
+ ax.set_xlim(left=0, right=cap)
406
+ ax.text(
407
+ 0.99,
408
+ 0.98,
409
+ f"median={p50:.1f}d\np90={p90:.1f}d",
410
+ transform=ax.transAxes,
411
+ ha="right",
412
+ va="top",
413
+ fontproperties=font_prop,
414
+ fontsize=9,
415
+ color="#57606a",
416
+ )
417
+ _apply_tick_font(ax, font_prop)
418
+
419
+ fig.tight_layout()
420
+ return fig
421
+
422
+
423
+ def generate_monthly_activity_barplot(
424
+ collection: ConversationCollection,
425
+ config: GraphConfig | None = None,
426
+ ) -> Figure:
427
+ """Create a bar chart showing total prompt count per month.
428
+
429
+ Important: this is computed from *message timestamps* (actual activity), not from
430
+ the conversation creation month.
431
+ """
432
+ cfg = config or get_default_config().graph
433
+ timestamps = collection.timestamps("user")
434
+ fig, ax, font_prop = _setup_single_axes(cfg)
435
+
436
+ if not timestamps:
437
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
438
+ ax.set_axis_off()
439
+ return fig
440
+
441
+ month_counts = _aggregate_counts_by_month(timestamps, cfg)
442
+ months, values = _fill_missing_months(month_counts)
443
+ if not months:
444
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
445
+ ax.set_axis_off()
446
+ return fig
447
+
448
+ x = mdates.date2num(months)
449
+ ax.bar(x, values, width=25, color=cfg.color, alpha=0.25, edgecolor="none")
450
+
451
+ smooth = _moving_average(values, window=3)
452
+ if smooth:
453
+ ax.plot(x[2:], smooth, color=cfg.color, linewidth=2.2, alpha=0.9)
454
+
455
+ locator = mdates.AutoDateLocator(minticks=4, maxticks=10)
456
+ ax.xaxis.set_major_locator(locator)
457
+ ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
458
+
459
+ ax.set_title("Monthly activity (user prompts)", fontproperties=font_prop, fontsize=14, pad=14)
460
+ ax.set_xlabel(f"Month ({_tz_label(cfg)})", fontproperties=font_prop)
461
+ ax.set_ylabel("User prompts", fontproperties=font_prop)
462
+ _apply_tick_font(ax, font_prop)
463
+
464
+ fig.tight_layout()
465
+ return fig
466
+
467
+
468
+ def generate_daily_activity_lineplot(
469
+ collection: ConversationCollection,
470
+ config: GraphConfig | None = None,
471
+ ) -> Figure:
472
+ """Create a line chart showing user prompt count per day (with a rolling mean)."""
473
+ cfg = config or get_default_config().graph
474
+ timestamps = collection.timestamps("user")
475
+
476
+ fig, ax, font_prop = _setup_single_axes(cfg)
477
+ if not timestamps:
478
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
479
+ ax.set_axis_off()
480
+ return fig
481
+
482
+ counts: defaultdict[datetime, int] = defaultdict(int)
483
+ for ts in timestamps:
484
+ dt = _ts_to_dt(ts, cfg)
485
+ day = dt.replace(hour=0, minute=0, second=0, microsecond=0)
486
+ counts[day] += 1
487
+
488
+ days = sorted(counts.keys())
489
+ values = [counts[d] for d in days]
490
+
491
+ x = mdates.date2num(days)
492
+ ax.bar(x, values, width=0.9, color=cfg.color, alpha=0.18, edgecolor="none")
493
+ ax.plot(x, values, color=cfg.color, linewidth=1.2, alpha=0.25)
494
+ smooth = _moving_average(values, window=7)
495
+ if smooth:
496
+ ax.plot(x[6:], smooth, color=cfg.color, linewidth=2.4, alpha=0.95)
497
+
498
+ locator = mdates.AutoDateLocator()
499
+ ax.xaxis.set_major_locator(locator)
500
+ ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
501
+ ax.set_title("Daily activity (user prompts)", fontproperties=font_prop, fontsize=14, pad=14)
502
+ ax.set_xlabel(f"Day ({_tz_label(cfg)})", fontproperties=font_prop)
503
+ ax.set_ylabel("User prompts", fontproperties=font_prop)
504
+
505
+ for label in ax.get_xticklabels() + ax.get_yticklabels():
506
+ label.set_fontproperties(font_prop)
507
+
508
+ fig.tight_layout()
509
+ return fig
510
+
511
+
512
+ def generate_activity_heatmap(
513
+ collection: ConversationCollection,
514
+ config: GraphConfig | None = None,
515
+ ) -> Figure:
516
+ """Create a heatmap of activity by weekday × hour (user prompts)."""
517
+ cfg = config or get_default_config().graph
518
+ timestamps = collection.timestamps("user")
519
+
520
+ fig, ax, font_prop = _setup_single_axes(cfg)
521
+ if not timestamps:
522
+ ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
523
+ ax.set_axis_off()
524
+ return fig
525
+
526
+ grid: list[list[int]] = [[0 for _ in range(24)] for _ in range(7)]
527
+ for ts in timestamps:
528
+ dt = _ts_to_dt(ts, cfg)
529
+ grid[dt.weekday()][dt.hour] += 1
530
+
531
+ # Keep the axes frame for the heatmap.
532
+ ax.grid(False)
533
+ for side in ["top", "right", "left", "bottom"]:
534
+ ax.spines[side].set_visible(False)
535
+
536
+ img: AxesImage = ax.imshow(grid, aspect="auto", cmap="Blues", interpolation="nearest")
537
+
538
+ ax.set_title(
539
+ f"Activity heatmap (weekday × hour, {_tz_label(cfg)})",
540
+ fontproperties=font_prop,
541
+ fontsize=14,
542
+ pad=14,
543
+ )
544
+ ax.set_xlabel(f"Hour of day ({_tz_label(cfg)})", fontproperties=font_prop)
545
+ ax.set_ylabel("Weekday", fontproperties=font_prop)
546
+
547
+ ax.set_xticks(list(range(0, 24, 2)))
548
+ ax.set_xticklabels([f"{h:02d}" for h in range(0, 24, 2)], fontproperties=font_prop)
549
+ ax.set_yticks(list(range(7)))
550
+ ax.set_yticklabels(WEEKDAYS, fontproperties=font_prop)
551
+
552
+ cbar = fig.colorbar(img, ax=ax, fraction=0.046, pad=0.04)
553
+ cbar.set_label("User prompts", fontproperties=font_prop)
554
+ for t in cbar.ax.get_yticklabels():
555
+ t.set_fontproperties(font_prop)
556
+
557
+ fig.tight_layout()
558
+ return fig
559
+
560
+
561
+ def generate_summary_dashboard(
562
+ collection: ConversationCollection,
563
+ config: GraphConfig | None = None,
564
+ ) -> Figure:
565
+ """Create a compact, high-signal overview dashboard."""
566
+ cfg = config or get_default_config().graph
567
+ font_prop = _load_font(cfg)
568
+
569
+ fig = Figure(figsize=(14, 9), dpi=cfg.dpi, facecolor="white")
570
+ gs = fig.add_gridspec(3, 2, height_ratios=[1.2, 1.0, 1.0], width_ratios=[1.25, 1.0])
571
+
572
+ ax_ts: Axes = fig.add_subplot(gs[0, :])
573
+ ax_heat: Axes = fig.add_subplot(gs[1:, 0])
574
+ ax_model: Axes = fig.add_subplot(gs[1, 1])
575
+ ax_len: Axes = fig.add_subplot(gs[2, 1])
576
+
577
+ for ax in (ax_ts, ax_model, ax_len):
578
+ _style_axes(ax, cfg)
579
+ _apply_tick_font(ax, font_prop)
580
+
581
+ # Header
582
+ user_ts = collection.timestamps("user")
583
+ conv_count = len(collection.conversations)
584
+ prompt_count = len(user_ts)
585
+
586
+ fig.text(
587
+ 0.01,
588
+ 0.985,
589
+ "ChatGPT usage overview",
590
+ fontproperties=font_prop,
591
+ fontsize=18,
592
+ va="top",
593
+ ha="left",
594
+ color="#24292f",
595
+ )
596
+
597
+ if user_ts:
598
+ dts = [_ts_to_dt(ts, cfg) for ts in user_ts]
599
+ date_range = f"{min(dts).date().isoformat()} → {max(dts).date().isoformat()}"
600
+ else:
601
+ date_range = "No activity"
602
+
603
+ fig.text(
604
+ 0.01,
605
+ 0.955,
606
+ f"{conv_count} conversations · {prompt_count} user prompts · {date_range} · {_tz_label(cfg)}",
607
+ fontproperties=font_prop,
608
+ fontsize=10,
609
+ va="top",
610
+ ha="left",
611
+ color="#57606a",
612
+ )
613
+
614
+ # Monthly activity (timeseries)
615
+ if user_ts:
616
+ month_counts = _aggregate_counts_by_month(user_ts, cfg)
617
+ months, values = _fill_missing_months(month_counts)
618
+ x = mdates.date2num(months)
619
+ ax_ts.bar(x, values, width=25, color=cfg.color, alpha=0.20, edgecolor="none")
620
+ smooth = _moving_average(values, window=3)
621
+ if smooth:
622
+ ax_ts.plot(x[2:], smooth, color=cfg.color, linewidth=2.6, alpha=0.95)
623
+
624
+ locator = mdates.AutoDateLocator(minticks=4, maxticks=10)
625
+ ax_ts.xaxis.set_major_locator(locator)
626
+ ax_ts.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
627
+ ax_ts.set_title(
628
+ "Monthly activity (user prompts)", fontproperties=font_prop, fontsize=13, pad=10
629
+ )
630
+ ax_ts.set_ylabel("User prompts", fontproperties=font_prop)
631
+ ax_ts.set_xlabel(f"Month ({_tz_label(cfg)})", fontproperties=font_prop)
632
+ _apply_tick_font(ax_ts, font_prop)
633
+ else:
634
+ ax_ts.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
635
+ ax_ts.set_axis_off()
636
+
637
+ # Heatmap
638
+ if user_ts:
639
+ grid: list[list[int]] = [[0 for _ in range(24)] for _ in range(7)]
640
+ for ts in user_ts:
641
+ dt = _ts_to_dt(ts, cfg)
642
+ grid[dt.weekday()][dt.hour] += 1
643
+
644
+ ax_heat.grid(False)
645
+ for side in ["top", "right", "left", "bottom"]:
646
+ ax_heat.spines[side].set_visible(False)
647
+ img = ax_heat.imshow(grid, aspect="auto", cmap="Blues", interpolation="nearest")
648
+ ax_heat.set_title(
649
+ f"Weekday × hour heatmap ({_tz_label(cfg)})",
650
+ fontproperties=font_prop,
651
+ fontsize=13,
652
+ pad=10,
653
+ )
654
+ ax_heat.set_xlabel("Hour", fontproperties=font_prop)
655
+ ax_heat.set_ylabel("Weekday", fontproperties=font_prop)
656
+ ax_heat.set_xticks(list(range(0, 24, 3)))
657
+ ax_heat.set_xticklabels([f"{h:02d}" for h in range(0, 24, 3)], fontproperties=font_prop)
658
+ ax_heat.set_yticks(list(range(7)))
659
+ ax_heat.set_yticklabels(WEEKDAYS, fontproperties=font_prop)
660
+ cbar = fig.colorbar(img, ax=ax_heat, fraction=0.046, pad=0.04)
661
+ cbar.set_label("Prompts", fontproperties=font_prop)
662
+ for t in cbar.ax.get_yticklabels():
663
+ t.set_fontproperties(font_prop)
664
+ else:
665
+ ax_heat.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
666
+ ax_heat.set_axis_off()
667
+
668
+ # Model usage (reuse existing generator logic by drawing into its own axes)
669
+ model_counts: defaultdict[str, int] = defaultdict(int)
670
+ for conv in collection.conversations:
671
+ model_counts[conv.model or "Unknown"] += 1
672
+ total_models = sum(model_counts.values())
673
+ if total_models:
674
+ items = sorted(model_counts.items(), key=lambda x: x[1], reverse=True)
675
+ labels = [k for k, _ in items][:8]
676
+ counts = [v for _, v in items][:8]
677
+ y = list(range(len(labels)))[::-1]
678
+ bars = ax_model.barh(y, counts[::-1], color=cfg.color, alpha=0.9, height=0.6)
679
+ ax_model.set_yticks(y)
680
+ ax_model.set_yticklabels(labels[::-1], fontproperties=font_prop)
681
+ ax_model.set_xlabel("Conversations", fontproperties=font_prop)
682
+ ax_model.set_title("Models", fontproperties=font_prop, fontsize=13, pad=10)
683
+ for bar, count in zip(bars, counts[::-1], strict=True):
684
+ pct = 100 * (count / total_models)
685
+ ax_model.text(
686
+ bar.get_width(),
687
+ bar.get_y() + bar.get_height() / 2,
688
+ f" {pct:.0f}%",
689
+ va="center",
690
+ ha="left",
691
+ fontproperties=font_prop,
692
+ fontsize=9,
693
+ color="#57606a",
694
+ )
695
+ _apply_tick_font(ax_model, font_prop)
696
+ else:
697
+ ax_model.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
698
+ ax_model.set_axis_off()
699
+
700
+ # Conversation length mini-hist
701
+ lengths = [conv.message_count("user") for conv in collection.conversations]
702
+ if lengths:
703
+ sorted_lengths = sorted(lengths)
704
+ cap = max(int(sorted_lengths[int(0.95 * (len(sorted_lengths) - 1))]), 5)
705
+ plot_lengths = [min(L, cap) for L in lengths]
706
+ ax_len.hist(
707
+ plot_lengths,
708
+ bins=min(16, max(8, cap // 2)),
709
+ color=cfg.color,
710
+ alpha=0.85,
711
+ rwidth=0.9,
712
+ edgecolor="white",
713
+ linewidth=0.5,
714
+ )
715
+ ax_len.set_title("Conversation length", fontproperties=font_prop, fontsize=13, pad=10)
716
+ ax_len.set_xlabel("User prompts", fontproperties=font_prop)
717
+ ax_len.set_ylabel("Conversations", fontproperties=font_prop)
718
+ ax_len.set_xlim(left=0, right=cap)
719
+ _apply_tick_font(ax_len, font_prop)
720
+ else:
721
+ ax_len.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
722
+ ax_len.set_axis_off()
723
+
724
+ fig.subplots_adjust(top=0.93, left=0.06, right=0.98, bottom=0.06, hspace=0.4, wspace=0.25)
725
+ return fig
726
+
727
+
728
+ def generate_summary_graphs(
729
+ collection: ConversationCollection,
730
+ output_dir: Path,
731
+ config: GraphConfig | None = None,
732
+ *,
733
+ progress_bar: bool = False,
734
+ ) -> None:
735
+ """Generate all summary-level graphs.
736
+
737
+ Args:
738
+ collection: Collection of conversations
739
+ output_dir: Directory to save the graphs
740
+ config: Optional graph configuration
741
+ """
742
+ if not collection.conversations:
743
+ return
744
+
745
+ cfg = config or get_default_config().graph
746
+
747
+ user_ts = collection.timestamps("user")
748
+ logger.info(f"Generating summary graphs to {output_dir}")
749
+
750
+ tasks: list[tuple[str, str, Callable[[], Figure]]] = [
751
+ ("Overview", "overview.png", lambda: generate_summary_dashboard(collection, cfg)),
752
+ (
753
+ "Activity heatmap",
754
+ "activity_heatmap.png",
755
+ lambda: generate_activity_heatmap(collection, cfg),
756
+ ),
757
+ (
758
+ "Daily activity",
759
+ "daily_activity.png",
760
+ lambda: generate_daily_activity_lineplot(collection, cfg),
761
+ ),
762
+ (
763
+ "Monthly activity",
764
+ "monthly_activity.png",
765
+ lambda: generate_monthly_activity_barplot(collection, cfg),
766
+ ),
767
+ ("Model usage", "model_usage.png", lambda: generate_model_piechart(collection, cfg)),
768
+ (
769
+ "Conversation lengths",
770
+ "conversation_lengths.png",
771
+ lambda: generate_length_histogram(collection, cfg),
772
+ ),
773
+ (
774
+ "Conversation lifetimes",
775
+ "conversation_lifetimes.png",
776
+ lambda: generate_conversation_lifetime_histogram(collection, cfg),
777
+ ),
778
+ ]
779
+
780
+ if user_ts:
781
+ tasks.extend(
782
+ [
783
+ (
784
+ "Weekday pattern",
785
+ "weekday_pattern.png",
786
+ lambda: generate_week_barplot(user_ts, "All time", cfg),
787
+ ),
788
+ (
789
+ "Hourly pattern",
790
+ "hourly_pattern.png",
791
+ lambda: generate_hour_barplot(user_ts, "All time", cfg),
792
+ ),
793
+ ]
794
+ )
795
+
796
+ for _, filename, build in tqdm(
797
+ tasks,
798
+ desc="Creating summary graphs",
799
+ disable=not progress_bar,
800
+ ):
801
+ fig = build()
802
+ fig.savefig(output_dir / filename, facecolor="white")
803
+
804
+
805
+ def generate_graphs(
806
+ collection: ConversationCollection,
807
+ output_dir: Path,
808
+ config: GraphConfig | None = None,
809
+ *,
810
+ progress_bar: bool = False,
811
+ ) -> None:
812
+ """Generate weekly, hourly, and summary graphs.
813
+
814
+ Args:
815
+ collection: Collection of conversations
816
+ output_dir: Directory to save the graphs
817
+ config: Optional graph configuration
818
+ progress_bar: Whether to show progress bars
819
+ """
820
+ output_dir.mkdir(parents=True, exist_ok=True)
821
+ cfg = config or get_default_config().graph
822
+
823
+ # Summary graphs (default: small, high-signal set)
824
+ generate_summary_graphs(collection, output_dir, cfg, progress_bar=progress_bar)
825
+
826
+ # Optional breakdowns (can generate lots of files; off by default)
827
+ if not collection.conversations:
828
+ return
829
+
830
+ timestamps = collection.timestamps("user")
831
+ if not timestamps:
832
+ return
833
+
834
+ breakdown_root = output_dir / "Breakdowns"
835
+ if cfg.generate_monthly_breakdowns:
836
+ monthly_dir = breakdown_root / "Monthly"
837
+ monthly_dir.mkdir(parents=True, exist_ok=True)
838
+
839
+ month_groups: defaultdict[datetime, list[float]] = defaultdict(list)
840
+ for ts in timestamps:
841
+ dt = _ts_to_dt(ts, cfg)
842
+ month_groups[_month_start(dt)].append(ts)
843
+
844
+ for month, ts_list in tqdm(
845
+ sorted(month_groups.items(), key=lambda x: x[0]),
846
+ desc="Creating monthly breakdown graphs",
847
+ disable=not progress_bar,
848
+ ):
849
+ slug = month.strftime("%Y-%m")
850
+ title = month.strftime("%b %Y")
851
+ generate_week_barplot(ts_list, title, cfg).savefig(
852
+ monthly_dir / f"{slug}_weekday.png", facecolor="white"
853
+ )
854
+ generate_hour_barplot(ts_list, title, cfg).savefig(
855
+ monthly_dir / f"{slug}_hourly.png", facecolor="white"
856
+ )
857
+
858
+ if cfg.generate_yearly_breakdowns:
859
+ yearly_dir = breakdown_root / "Yearly"
860
+ yearly_dir.mkdir(parents=True, exist_ok=True)
861
+
862
+ year_groups: defaultdict[datetime, list[float]] = defaultdict(list)
863
+ for ts in timestamps:
864
+ dt = _ts_to_dt(ts, cfg)
865
+ year_groups[_year_start(dt)].append(ts)
866
+
867
+ for year, ts_list in tqdm(
868
+ sorted(year_groups.items(), key=lambda x: x[0]),
869
+ desc="Creating yearly breakdown graphs",
870
+ disable=not progress_bar,
871
+ ):
872
+ slug = year.strftime("%Y")
873
+ title = year.strftime("%Y")
874
+ generate_week_barplot(ts_list, title, cfg).savefig(
875
+ yearly_dir / f"{slug}_weekday.png", facecolor="white"
876
+ )
877
+ generate_hour_barplot(ts_list, title, cfg).savefig(
878
+ yearly_dir / f"{slug}_hourly.png", facecolor="white"
879
+ )