convoviz 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- convoviz/analysis/graphs.py +610 -242
- convoviz/cli.py +9 -1
- convoviz/config.py +11 -0
- convoviz/io/writers.py +58 -2
- convoviz/pipeline.py +1 -0
- convoviz/renderers/markdown.py +24 -4
- {convoviz-0.2.5.dist-info → convoviz-0.2.7.dist-info}/METADATA +15 -8
- {convoviz-0.2.5.dist-info → convoviz-0.2.7.dist-info}/RECORD +10 -10
- {convoviz-0.2.5.dist-info → convoviz-0.2.7.dist-info}/WHEEL +0 -0
- {convoviz-0.2.5.dist-info → convoviz-0.2.7.dist-info}/entry_points.txt +0 -0
convoviz/analysis/graphs.py
CHANGED
|
@@ -1,65 +1,133 @@
|
|
|
1
|
-
"""Graph generation for conversation analytics.
|
|
1
|
+
"""Graph generation for conversation analytics.
|
|
2
|
+
|
|
3
|
+
Goals:
|
|
4
|
+
- Professional, consistent styling across plots.
|
|
5
|
+
- High-signal summaries by default (avoid output spam).
|
|
6
|
+
- Correct time bucketing (based on *message timestamps*, not conversation creation time).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
2
10
|
|
|
3
11
|
from collections import defaultdict
|
|
12
|
+
from collections.abc import Callable, Iterable
|
|
4
13
|
from datetime import UTC, datetime
|
|
5
14
|
from pathlib import Path
|
|
6
15
|
|
|
7
16
|
import matplotlib.dates as mdates
|
|
8
17
|
import matplotlib.font_manager as fm
|
|
18
|
+
import matplotlib.ticker as mticker
|
|
9
19
|
from matplotlib.axes import Axes
|
|
10
20
|
from matplotlib.figure import Figure
|
|
21
|
+
from matplotlib.image import AxesImage
|
|
11
22
|
from tqdm import tqdm
|
|
12
23
|
|
|
13
24
|
from convoviz.config import GraphConfig, get_default_config
|
|
14
25
|
from convoviz.models import ConversationCollection
|
|
15
26
|
from convoviz.utils import get_asset_path
|
|
16
27
|
|
|
17
|
-
WEEKDAYS = [
|
|
18
|
-
"Monday",
|
|
19
|
-
"Tuesday",
|
|
20
|
-
"Wednesday",
|
|
21
|
-
"Thursday",
|
|
22
|
-
"Friday",
|
|
23
|
-
"Saturday",
|
|
24
|
-
"Sunday",
|
|
25
|
-
]
|
|
26
|
-
|
|
28
|
+
WEEKDAYS = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
|
27
29
|
|
|
28
|
-
def _setup_figure(config: GraphConfig) -> tuple[Figure, Axes, fm.FontProperties]:
|
|
29
|
-
"""Internal helper to setup a figure with common styling."""
|
|
30
|
-
fig = Figure(figsize=config.figsize, dpi=config.dpi)
|
|
31
|
-
ax: Axes = fig.add_subplot()
|
|
32
30
|
|
|
33
|
-
|
|
31
|
+
def _load_font(config: GraphConfig) -> fm.FontProperties:
|
|
34
32
|
font_path = get_asset_path(f"fonts/{config.font_name}")
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
)
|
|
33
|
+
return fm.FontProperties(fname=str(font_path)) if font_path.exists() else fm.FontProperties()
|
|
34
|
+
|
|
38
35
|
|
|
39
|
-
|
|
40
|
-
|
|
36
|
+
def _style_axes(ax: Axes, config: GraphConfig) -> None:
|
|
37
|
+
# Clean look
|
|
41
38
|
ax.set_facecolor("white")
|
|
42
39
|
ax.spines["top"].set_visible(False)
|
|
43
40
|
ax.spines["right"].set_visible(False)
|
|
41
|
+
ax.spines["left"].set_color("#d0d7de")
|
|
42
|
+
ax.spines["bottom"].set_color("#d0d7de")
|
|
43
|
+
ax.tick_params(colors="#24292f")
|
|
44
|
+
ax.yaxis.set_major_locator(mticker.MaxNLocator(nbins=6, integer=True))
|
|
45
|
+
|
|
44
46
|
if config.grid:
|
|
45
|
-
ax.grid(axis="y", linestyle="
|
|
47
|
+
ax.grid(axis="y", linestyle="-", linewidth=0.8, alpha=0.35, color="#8c959f")
|
|
46
48
|
ax.set_axisbelow(True)
|
|
47
49
|
|
|
50
|
+
|
|
51
|
+
def _apply_tick_font(ax: Axes, font_prop: fm.FontProperties) -> None:
|
|
52
|
+
for label in ax.get_xticklabels() + ax.get_yticklabels():
|
|
53
|
+
label.set_fontproperties(font_prop)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _setup_single_axes(config: GraphConfig) -> tuple[Figure, Axes, fm.FontProperties]:
|
|
57
|
+
fig = Figure(figsize=config.figsize, dpi=config.dpi, facecolor="white")
|
|
58
|
+
ax: Axes = fig.add_subplot()
|
|
59
|
+
font_prop = _load_font(config)
|
|
60
|
+
_style_axes(ax, config)
|
|
48
61
|
return fig, ax, font_prop
|
|
49
62
|
|
|
50
63
|
|
|
51
64
|
def _ts_to_dt(ts: float, config: GraphConfig) -> datetime:
|
|
52
65
|
"""Convert epoch timestamps into aware datetimes based on config."""
|
|
53
66
|
dt_utc = datetime.fromtimestamp(ts, UTC)
|
|
54
|
-
if config.timezone == "utc"
|
|
55
|
-
return dt_utc
|
|
56
|
-
return dt_utc.astimezone()
|
|
67
|
+
return dt_utc if config.timezone == "utc" else dt_utc.astimezone()
|
|
57
68
|
|
|
58
69
|
|
|
59
70
|
def _tz_label(config: GraphConfig) -> str:
|
|
60
71
|
return "UTC" if config.timezone == "utc" else "Local"
|
|
61
72
|
|
|
62
73
|
|
|
74
|
+
def _month_start(dt: datetime) -> datetime:
|
|
75
|
+
return dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _year_start(dt: datetime) -> datetime:
|
|
79
|
+
return dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _day_start(dt: datetime) -> datetime:
|
|
83
|
+
return dt.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _iter_month_starts(start: datetime, end: datetime) -> list[datetime]:
|
|
87
|
+
start = _month_start(start)
|
|
88
|
+
end = _month_start(end)
|
|
89
|
+
months: list[datetime] = []
|
|
90
|
+
cur = start
|
|
91
|
+
while cur <= end:
|
|
92
|
+
months.append(cur)
|
|
93
|
+
year, month = cur.year, cur.month
|
|
94
|
+
cur = cur.replace(year=year + 1, month=1) if month == 12 else cur.replace(month=month + 1)
|
|
95
|
+
return months
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _fill_missing_months(counts: dict[datetime, int]) -> tuple[list[datetime], list[int]]:
|
|
99
|
+
if not counts:
|
|
100
|
+
return [], []
|
|
101
|
+
keys = sorted(counts.keys())
|
|
102
|
+
months = _iter_month_starts(keys[0], keys[-1])
|
|
103
|
+
return months, [counts.get(m, 0) for m in months]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _aggregate_counts_by_month(
|
|
107
|
+
timestamps: Iterable[float],
|
|
108
|
+
config: GraphConfig,
|
|
109
|
+
) -> dict[datetime, int]:
|
|
110
|
+
counts: defaultdict[datetime, int] = defaultdict(int)
|
|
111
|
+
for ts in timestamps:
|
|
112
|
+
dt = _ts_to_dt(ts, config)
|
|
113
|
+
counts[_month_start(dt)] += 1
|
|
114
|
+
return dict(counts)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _moving_average(values: list[int], window: int) -> list[float]:
|
|
118
|
+
if window <= 1:
|
|
119
|
+
return [float(v) for v in values]
|
|
120
|
+
if len(values) < window:
|
|
121
|
+
return []
|
|
122
|
+
out: list[float] = []
|
|
123
|
+
running = sum(values[:window])
|
|
124
|
+
out.append(running / window)
|
|
125
|
+
for i in range(window, len(values)):
|
|
126
|
+
running += values[i] - values[i - window]
|
|
127
|
+
out.append(running / window)
|
|
128
|
+
return out
|
|
129
|
+
|
|
130
|
+
|
|
63
131
|
def generate_week_barplot(
|
|
64
132
|
timestamps: list[float],
|
|
65
133
|
title: str,
|
|
@@ -76,40 +144,26 @@ def generate_week_barplot(
|
|
|
76
144
|
Matplotlib Figure object
|
|
77
145
|
"""
|
|
78
146
|
cfg = config or get_default_config().graph
|
|
79
|
-
|
|
147
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
80
148
|
|
|
81
|
-
weekday_counts:
|
|
82
|
-
for
|
|
83
|
-
|
|
149
|
+
weekday_counts: dict[str, int] = dict.fromkeys(WEEKDAYS, 0)
|
|
150
|
+
for ts in timestamps:
|
|
151
|
+
dt = _ts_to_dt(ts, cfg)
|
|
152
|
+
weekday_counts[WEEKDAYS[dt.weekday()]] += 1
|
|
84
153
|
|
|
85
154
|
x = list(range(len(WEEKDAYS)))
|
|
86
|
-
y = [weekday_counts[
|
|
87
|
-
|
|
88
|
-
fig, ax, font_prop = _setup_figure(cfg)
|
|
89
|
-
|
|
90
|
-
bars = ax.bar(x, y, color=cfg.color, alpha=0.85)
|
|
155
|
+
y = [weekday_counts[d] for d in WEEKDAYS]
|
|
91
156
|
|
|
157
|
+
bars = ax.bar(x, y, color=cfg.color, alpha=0.9, width=0.72)
|
|
92
158
|
if cfg.show_counts:
|
|
93
|
-
|
|
94
|
-
height = bar.get_height()
|
|
95
|
-
if height > 0:
|
|
96
|
-
ax.text(
|
|
97
|
-
bar.get_x() + bar.get_width() / 2.0,
|
|
98
|
-
height,
|
|
99
|
-
f"{int(height)}",
|
|
100
|
-
ha="center",
|
|
101
|
-
va="bottom",
|
|
102
|
-
fontproperties=font_prop,
|
|
103
|
-
)
|
|
159
|
+
ax.bar_label(bars, padding=3, fontsize=9, fontproperties=font_prop, color="#24292f")
|
|
104
160
|
|
|
161
|
+
ax.set_title(f"{title} · Weekday pattern", fontproperties=font_prop, fontsize=14, pad=14)
|
|
105
162
|
ax.set_xlabel("Weekday", fontproperties=font_prop)
|
|
106
|
-
ax.set_ylabel("User
|
|
107
|
-
ax.set_title(title, fontproperties=font_prop, fontsize=16, pad=20)
|
|
163
|
+
ax.set_ylabel("User prompts", fontproperties=font_prop)
|
|
108
164
|
ax.set_xticks(x)
|
|
109
|
-
ax.set_xticklabels(WEEKDAYS, rotation=
|
|
110
|
-
|
|
111
|
-
for label in ax.get_yticklabels():
|
|
112
|
-
label.set_fontproperties(font_prop)
|
|
165
|
+
ax.set_xticklabels(WEEKDAYS, rotation=35, ha="right", fontproperties=font_prop)
|
|
166
|
+
_apply_tick_font(ax, font_prop)
|
|
113
167
|
|
|
114
168
|
fig.tight_layout()
|
|
115
169
|
return fig
|
|
@@ -131,41 +185,31 @@ def generate_hour_barplot(
|
|
|
131
185
|
Matplotlib Figure object
|
|
132
186
|
"""
|
|
133
187
|
cfg = config or get_default_config().graph
|
|
134
|
-
|
|
188
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
135
189
|
|
|
136
190
|
hour_counts: dict[int, int] = dict.fromkeys(range(24), 0)
|
|
137
|
-
for
|
|
138
|
-
|
|
191
|
+
for ts in timestamps:
|
|
192
|
+
dt = _ts_to_dt(ts, cfg)
|
|
193
|
+
hour_counts[dt.hour] += 1
|
|
139
194
|
|
|
140
|
-
x = [f"{i:02d}:00" for i in range(24)]
|
|
141
195
|
y = [hour_counts[i] for i in range(24)]
|
|
142
196
|
|
|
143
|
-
|
|
197
|
+
bars = ax.bar(range(24), y, color=cfg.color, alpha=0.9, width=0.72)
|
|
198
|
+
if cfg.show_counts:
|
|
199
|
+
ax.bar_label(bars, padding=2, fontsize=7, fontproperties=font_prop, color="#24292f")
|
|
144
200
|
|
|
145
|
-
|
|
201
|
+
ax.set_title(
|
|
202
|
+
f"{title} · Hourly pattern ({_tz_label(cfg)})",
|
|
203
|
+
fontproperties=font_prop,
|
|
204
|
+
fontsize=14,
|
|
205
|
+
pad=14,
|
|
206
|
+
)
|
|
207
|
+
ax.set_xlabel(f"Hour of day ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
208
|
+
ax.set_ylabel("User prompts", fontproperties=font_prop)
|
|
146
209
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
if height > 0:
|
|
151
|
-
ax.text(
|
|
152
|
-
bar.get_x() + bar.get_width() / 2.0,
|
|
153
|
-
height,
|
|
154
|
-
f"{int(height)}",
|
|
155
|
-
ha="center",
|
|
156
|
-
va="bottom",
|
|
157
|
-
fontproperties=font_prop,
|
|
158
|
-
fontsize=8,
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
ax.set_xlabel(f"Hour of Day ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
162
|
-
ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
|
|
163
|
-
ax.set_title(f"{title} - Hourly Distribution", fontproperties=font_prop, fontsize=16, pad=20)
|
|
164
|
-
ax.set_xticks(range(24))
|
|
165
|
-
ax.set_xticklabels(x, rotation=90, fontproperties=font_prop)
|
|
166
|
-
|
|
167
|
-
for label in ax.get_yticklabels():
|
|
168
|
-
label.set_fontproperties(font_prop)
|
|
210
|
+
ax.set_xticks(range(0, 24, 2))
|
|
211
|
+
ax.set_xticklabels([f"{i:02d}:00" for i in range(0, 24, 2)], fontproperties=font_prop)
|
|
212
|
+
_apply_tick_font(ax, font_prop)
|
|
169
213
|
|
|
170
214
|
fig.tight_layout()
|
|
171
215
|
return fig
|
|
@@ -175,16 +219,10 @@ def generate_model_piechart(
|
|
|
175
219
|
collection: ConversationCollection,
|
|
176
220
|
config: GraphConfig | None = None,
|
|
177
221
|
) -> Figure:
|
|
178
|
-
"""Create a
|
|
179
|
-
|
|
180
|
-
Groups models with < 5% usage into "Other".
|
|
222
|
+
"""Create a model usage chart.
|
|
181
223
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
config: Optional graph configuration
|
|
185
|
-
|
|
186
|
-
Returns:
|
|
187
|
-
Matplotlib Figure object
|
|
224
|
+
Note: kept for backwards compatibility (historically a pie chart). We now render a
|
|
225
|
+
more readable horizontal bar chart with percentages.
|
|
188
226
|
"""
|
|
189
227
|
cfg = config or get_default_config().graph
|
|
190
228
|
model_counts: defaultdict[str, int] = defaultdict(int)
|
|
@@ -194,54 +232,50 @@ def generate_model_piechart(
|
|
|
194
232
|
model_counts[model] += 1
|
|
195
233
|
|
|
196
234
|
total = sum(model_counts.values())
|
|
235
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
236
|
+
|
|
197
237
|
if total == 0:
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
|
|
238
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
239
|
+
ax.set_axis_off()
|
|
201
240
|
return fig
|
|
202
241
|
|
|
203
|
-
# Group minor models
|
|
242
|
+
# Group minor models to keep the plot readable.
|
|
204
243
|
threshold = 0.05
|
|
205
244
|
refined_counts: dict[str, int] = {}
|
|
206
245
|
other_count = 0
|
|
207
|
-
|
|
208
246
|
for model, count in model_counts.items():
|
|
209
247
|
if count / total < threshold:
|
|
210
248
|
other_count += count
|
|
211
249
|
else:
|
|
212
250
|
refined_counts[model] = count
|
|
213
|
-
|
|
214
|
-
if other_count > 0:
|
|
251
|
+
if other_count:
|
|
215
252
|
refined_counts["Other"] = other_count
|
|
216
253
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
)
|
|
243
|
-
ax.set_title("Model Usage Distribution", fontproperties=font_prop, fontsize=16, pad=20)
|
|
244
|
-
|
|
254
|
+
items = sorted(refined_counts.items(), key=lambda x: x[1], reverse=True)
|
|
255
|
+
labels = [k for k, _ in items][:10]
|
|
256
|
+
counts = [v for _, v in items][:10]
|
|
257
|
+
y = list(range(len(labels)))[::-1]
|
|
258
|
+
|
|
259
|
+
bars = ax.barh(y, counts[::-1], color=cfg.color, alpha=0.9, height=0.6)
|
|
260
|
+
ax.set_yticks(y)
|
|
261
|
+
ax.set_yticklabels(labels[::-1], fontproperties=font_prop)
|
|
262
|
+
ax.set_xlabel("Conversations", fontproperties=font_prop)
|
|
263
|
+
ax.set_title("Model usage", fontproperties=font_prop, fontsize=14, pad=14)
|
|
264
|
+
|
|
265
|
+
for bar, count in zip(bars, counts[::-1], strict=True):
|
|
266
|
+
pct = 100 * (count / total)
|
|
267
|
+
ax.text(
|
|
268
|
+
bar.get_width(),
|
|
269
|
+
bar.get_y() + bar.get_height() / 2,
|
|
270
|
+
f" {count} ({pct:.1f}%)",
|
|
271
|
+
va="center",
|
|
272
|
+
ha="left",
|
|
273
|
+
fontproperties=font_prop,
|
|
274
|
+
fontsize=9,
|
|
275
|
+
color="#24292f",
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
_apply_tick_font(ax, font_prop)
|
|
245
279
|
fig.tight_layout()
|
|
246
280
|
return fig
|
|
247
281
|
|
|
@@ -263,91 +297,164 @@ def generate_length_histogram(
|
|
|
263
297
|
"""
|
|
264
298
|
cfg = config or get_default_config().graph
|
|
265
299
|
lengths = [conv.message_count("user") for conv in collection.conversations]
|
|
266
|
-
|
|
267
|
-
fig, ax, font_prop = _setup_figure(cfg)
|
|
300
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
268
301
|
|
|
269
302
|
if not lengths:
|
|
270
|
-
ax.text(0.5, 0.5, "No
|
|
303
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
304
|
+
ax.set_axis_off()
|
|
271
305
|
return fig
|
|
272
306
|
|
|
273
|
-
# Cap at 95th percentile to focus on most conversations
|
|
274
307
|
sorted_lengths = sorted(lengths)
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
# Filter lengths for the histogram plot, but keep the data correct
|
|
308
|
+
p50 = sorted_lengths[int(0.50 * (len(sorted_lengths) - 1))]
|
|
309
|
+
p90 = sorted_lengths[int(0.90 * (len(sorted_lengths) - 1))]
|
|
310
|
+
p95 = sorted_lengths[int(0.95 * (len(sorted_lengths) - 1))]
|
|
311
|
+
cap = max(int(p95), 5)
|
|
280
312
|
plot_lengths = [min(L, cap) for L in lengths]
|
|
281
313
|
|
|
282
|
-
|
|
283
|
-
ax.hist(
|
|
314
|
+
bin_count = min(24, max(10, cap // 2))
|
|
315
|
+
ax.hist(
|
|
316
|
+
plot_lengths,
|
|
317
|
+
bins=bin_count,
|
|
318
|
+
color=cfg.color,
|
|
319
|
+
alpha=0.85,
|
|
320
|
+
rwidth=0.9,
|
|
321
|
+
edgecolor="white",
|
|
322
|
+
linewidth=0.5,
|
|
323
|
+
)
|
|
284
324
|
|
|
285
|
-
ax.
|
|
286
|
-
ax.
|
|
287
|
-
ax.
|
|
288
|
-
|
|
325
|
+
ax.axvline(p50, color="#24292f", linewidth=1.2, alpha=0.8)
|
|
326
|
+
ax.axvline(p90, color="#cf222e", linewidth=1.2, alpha=0.8)
|
|
327
|
+
ax.text(
|
|
328
|
+
p50,
|
|
329
|
+
ax.get_ylim()[1] * 0.95,
|
|
330
|
+
f"median={p50}",
|
|
331
|
+
rotation=90,
|
|
332
|
+
va="top",
|
|
333
|
+
ha="right",
|
|
334
|
+
fontproperties=font_prop,
|
|
335
|
+
fontsize=9,
|
|
336
|
+
color="#24292f",
|
|
337
|
+
)
|
|
338
|
+
ax.text(
|
|
339
|
+
p90,
|
|
340
|
+
ax.get_ylim()[1] * 0.95,
|
|
341
|
+
f"p90={p90}",
|
|
342
|
+
rotation=90,
|
|
343
|
+
va="top",
|
|
344
|
+
ha="right",
|
|
289
345
|
fontproperties=font_prop,
|
|
290
|
-
fontsize=
|
|
291
|
-
|
|
346
|
+
fontsize=9,
|
|
347
|
+
color="#cf222e",
|
|
292
348
|
)
|
|
293
349
|
|
|
294
|
-
|
|
295
|
-
|
|
350
|
+
ax.set_title("Conversation length (user prompts)", fontproperties=font_prop, fontsize=14, pad=14)
|
|
351
|
+
ax.set_xlabel("User prompts per conversation", fontproperties=font_prop)
|
|
352
|
+
ax.set_ylabel("Conversations", fontproperties=font_prop)
|
|
353
|
+
ax.set_xlim(left=0, right=cap)
|
|
354
|
+
_apply_tick_font(ax, font_prop)
|
|
296
355
|
|
|
297
356
|
fig.tight_layout()
|
|
298
357
|
return fig
|
|
299
358
|
|
|
300
359
|
|
|
301
|
-
def
|
|
360
|
+
def generate_conversation_lifetime_histogram(
|
|
302
361
|
collection: ConversationCollection,
|
|
303
362
|
config: GraphConfig | None = None,
|
|
304
363
|
) -> Figure:
|
|
305
|
-
"""Create a
|
|
364
|
+
"""Create a histogram of conversation lifetimes (update_time - create_time)."""
|
|
365
|
+
cfg = config or get_default_config().graph
|
|
366
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
306
367
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
368
|
+
lifetimes_days: list[float] = []
|
|
369
|
+
for conv in collection.conversations:
|
|
370
|
+
delta = conv.update_time - conv.create_time
|
|
371
|
+
lifetimes_days.append(max(0.0, delta.total_seconds() / 86_400))
|
|
310
372
|
|
|
311
|
-
|
|
312
|
-
|
|
373
|
+
if not lifetimes_days:
|
|
374
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
375
|
+
ax.set_axis_off()
|
|
376
|
+
return fig
|
|
377
|
+
|
|
378
|
+
sorted_vals = sorted(lifetimes_days)
|
|
379
|
+
p50 = sorted_vals[int(0.50 * (len(sorted_vals) - 1))]
|
|
380
|
+
p90 = sorted_vals[int(0.90 * (len(sorted_vals) - 1))]
|
|
381
|
+
p95 = sorted_vals[int(0.95 * (len(sorted_vals) - 1))]
|
|
382
|
+
cap = max(float(p95), 1.0)
|
|
383
|
+
plot_vals = [min(v, cap) for v in lifetimes_days]
|
|
384
|
+
|
|
385
|
+
ax.hist(
|
|
386
|
+
plot_vals,
|
|
387
|
+
bins=24,
|
|
388
|
+
color=cfg.color,
|
|
389
|
+
alpha=0.85,
|
|
390
|
+
rwidth=0.9,
|
|
391
|
+
edgecolor="white",
|
|
392
|
+
linewidth=0.5,
|
|
393
|
+
)
|
|
394
|
+
ax.axvline(p50, color="#24292f", linewidth=1.2, alpha=0.8)
|
|
395
|
+
ax.axvline(p90, color="#cf222e", linewidth=1.2, alpha=0.8)
|
|
396
|
+
|
|
397
|
+
ax.set_title("Conversation lifetimes (days)", fontproperties=font_prop, fontsize=14, pad=14)
|
|
398
|
+
ax.set_xlabel("Days between first and last message", fontproperties=font_prop)
|
|
399
|
+
ax.set_ylabel("Conversations", fontproperties=font_prop)
|
|
400
|
+
ax.set_xlim(left=0, right=cap)
|
|
401
|
+
ax.text(
|
|
402
|
+
0.99,
|
|
403
|
+
0.98,
|
|
404
|
+
f"median={p50:.1f}d\np90={p90:.1f}d",
|
|
405
|
+
transform=ax.transAxes,
|
|
406
|
+
ha="right",
|
|
407
|
+
va="top",
|
|
408
|
+
fontproperties=font_prop,
|
|
409
|
+
fontsize=9,
|
|
410
|
+
color="#57606a",
|
|
411
|
+
)
|
|
412
|
+
_apply_tick_font(ax, font_prop)
|
|
413
|
+
|
|
414
|
+
fig.tight_layout()
|
|
415
|
+
return fig
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def generate_monthly_activity_barplot(
|
|
419
|
+
collection: ConversationCollection,
|
|
420
|
+
config: GraphConfig | None = None,
|
|
421
|
+
) -> Figure:
|
|
422
|
+
"""Create a bar chart showing total prompt count per month.
|
|
423
|
+
|
|
424
|
+
Important: this is computed from *message timestamps* (actual activity), not from
|
|
425
|
+
the conversation creation month.
|
|
313
426
|
"""
|
|
314
427
|
cfg = config or get_default_config().graph
|
|
315
|
-
|
|
316
|
-
|
|
428
|
+
timestamps = collection.timestamps("user")
|
|
429
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
317
430
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
431
|
+
if not timestamps:
|
|
432
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
433
|
+
ax.set_axis_off()
|
|
434
|
+
return fig
|
|
321
435
|
|
|
322
|
-
|
|
436
|
+
month_counts = _aggregate_counts_by_month(timestamps, cfg)
|
|
437
|
+
months, values = _fill_missing_months(month_counts)
|
|
438
|
+
if not months:
|
|
439
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
440
|
+
ax.set_axis_off()
|
|
441
|
+
return fig
|
|
323
442
|
|
|
324
|
-
|
|
325
|
-
|
|
443
|
+
x = mdates.date2num(months)
|
|
444
|
+
ax.bar(x, values, width=25, color=cfg.color, alpha=0.25, edgecolor="none")
|
|
326
445
|
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
)
|
|
340
|
-
|
|
341
|
-
ax.set_xlabel("Month", fontproperties=font_prop)
|
|
342
|
-
ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
|
|
343
|
-
ax.set_title("Monthly Activity History", fontproperties=font_prop, fontsize=16, pad=20)
|
|
344
|
-
tick_step = max(1, len(positions) // 12) # show ~12 labels max
|
|
345
|
-
shown = positions[::tick_step] if positions else []
|
|
346
|
-
ax.set_xticks(shown)
|
|
347
|
-
ax.set_xticklabels([x[i] for i in shown], rotation=45, fontproperties=font_prop)
|
|
348
|
-
|
|
349
|
-
for label in ax.get_yticklabels():
|
|
350
|
-
label.set_fontproperties(font_prop)
|
|
446
|
+
smooth = _moving_average(values, window=3)
|
|
447
|
+
if smooth:
|
|
448
|
+
ax.plot(x[2:], smooth, color=cfg.color, linewidth=2.2, alpha=0.9)
|
|
449
|
+
|
|
450
|
+
locator = mdates.AutoDateLocator(minticks=4, maxticks=10)
|
|
451
|
+
ax.xaxis.set_major_locator(locator)
|
|
452
|
+
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
|
|
453
|
+
|
|
454
|
+
ax.set_title("Monthly activity (user prompts)", fontproperties=font_prop, fontsize=14, pad=14)
|
|
455
|
+
ax.set_xlabel(f"Month ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
456
|
+
ax.set_ylabel("User prompts", fontproperties=font_prop)
|
|
457
|
+
_apply_tick_font(ax, font_prop)
|
|
351
458
|
|
|
352
459
|
fig.tight_layout()
|
|
353
460
|
return fig
|
|
@@ -357,13 +464,14 @@ def generate_daily_activity_lineplot(
|
|
|
357
464
|
collection: ConversationCollection,
|
|
358
465
|
config: GraphConfig | None = None,
|
|
359
466
|
) -> Figure:
|
|
360
|
-
"""Create a line chart showing user prompt count per day."""
|
|
467
|
+
"""Create a line chart showing user prompt count per day (with a rolling mean)."""
|
|
361
468
|
cfg = config or get_default_config().graph
|
|
362
469
|
timestamps = collection.timestamps("user")
|
|
363
470
|
|
|
364
|
-
fig, ax, font_prop =
|
|
471
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
365
472
|
if not timestamps:
|
|
366
|
-
ax.text(0.5, 0.5, "No
|
|
473
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
474
|
+
ax.set_axis_off()
|
|
367
475
|
return fig
|
|
368
476
|
|
|
369
477
|
counts: defaultdict[datetime, int] = defaultdict(int)
|
|
@@ -376,14 +484,18 @@ def generate_daily_activity_lineplot(
|
|
|
376
484
|
values = [counts[d] for d in days]
|
|
377
485
|
|
|
378
486
|
x = mdates.date2num(days)
|
|
379
|
-
ax.
|
|
380
|
-
ax.
|
|
487
|
+
ax.bar(x, values, width=0.9, color=cfg.color, alpha=0.18, edgecolor="none")
|
|
488
|
+
ax.plot(x, values, color=cfg.color, linewidth=1.2, alpha=0.25)
|
|
489
|
+
smooth = _moving_average(values, window=7)
|
|
490
|
+
if smooth:
|
|
491
|
+
ax.plot(x[6:], smooth, color=cfg.color, linewidth=2.4, alpha=0.95)
|
|
492
|
+
|
|
381
493
|
locator = mdates.AutoDateLocator()
|
|
382
494
|
ax.xaxis.set_major_locator(locator)
|
|
383
495
|
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
|
|
384
|
-
ax.set_title("Daily
|
|
496
|
+
ax.set_title("Daily activity (user prompts)", fontproperties=font_prop, fontsize=14, pad=14)
|
|
385
497
|
ax.set_xlabel(f"Day ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
386
|
-
ax.set_ylabel("User
|
|
498
|
+
ax.set_ylabel("User prompts", fontproperties=font_prop)
|
|
387
499
|
|
|
388
500
|
for label in ax.get_xticklabels() + ax.get_yticklabels():
|
|
389
501
|
label.set_fontproperties(font_prop)
|
|
@@ -392,10 +504,226 @@ def generate_daily_activity_lineplot(
|
|
|
392
504
|
return fig
|
|
393
505
|
|
|
394
506
|
|
|
507
|
+
def generate_activity_heatmap(
|
|
508
|
+
collection: ConversationCollection,
|
|
509
|
+
config: GraphConfig | None = None,
|
|
510
|
+
) -> Figure:
|
|
511
|
+
"""Create a heatmap of activity by weekday × hour (user prompts)."""
|
|
512
|
+
cfg = config or get_default_config().graph
|
|
513
|
+
timestamps = collection.timestamps("user")
|
|
514
|
+
|
|
515
|
+
fig, ax, font_prop = _setup_single_axes(cfg)
|
|
516
|
+
if not timestamps:
|
|
517
|
+
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
518
|
+
ax.set_axis_off()
|
|
519
|
+
return fig
|
|
520
|
+
|
|
521
|
+
grid: list[list[int]] = [[0 for _ in range(24)] for _ in range(7)]
|
|
522
|
+
for ts in timestamps:
|
|
523
|
+
dt = _ts_to_dt(ts, cfg)
|
|
524
|
+
grid[dt.weekday()][dt.hour] += 1
|
|
525
|
+
|
|
526
|
+
# Keep the axes frame for the heatmap.
|
|
527
|
+
ax.grid(False)
|
|
528
|
+
for side in ["top", "right", "left", "bottom"]:
|
|
529
|
+
ax.spines[side].set_visible(False)
|
|
530
|
+
|
|
531
|
+
img: AxesImage = ax.imshow(grid, aspect="auto", cmap="Blues", interpolation="nearest")
|
|
532
|
+
|
|
533
|
+
ax.set_title(
|
|
534
|
+
f"Activity heatmap (weekday × hour, {_tz_label(cfg)})",
|
|
535
|
+
fontproperties=font_prop,
|
|
536
|
+
fontsize=14,
|
|
537
|
+
pad=14,
|
|
538
|
+
)
|
|
539
|
+
ax.set_xlabel(f"Hour of day ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
540
|
+
ax.set_ylabel("Weekday", fontproperties=font_prop)
|
|
541
|
+
|
|
542
|
+
ax.set_xticks(list(range(0, 24, 2)))
|
|
543
|
+
ax.set_xticklabels([f"{h:02d}" for h in range(0, 24, 2)], fontproperties=font_prop)
|
|
544
|
+
ax.set_yticks(list(range(7)))
|
|
545
|
+
ax.set_yticklabels(WEEKDAYS, fontproperties=font_prop)
|
|
546
|
+
|
|
547
|
+
cbar = fig.colorbar(img, ax=ax, fraction=0.046, pad=0.04)
|
|
548
|
+
cbar.set_label("User prompts", fontproperties=font_prop)
|
|
549
|
+
for t in cbar.ax.get_yticklabels():
|
|
550
|
+
t.set_fontproperties(font_prop)
|
|
551
|
+
|
|
552
|
+
fig.tight_layout()
|
|
553
|
+
return fig
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def generate_summary_dashboard(
|
|
557
|
+
collection: ConversationCollection,
|
|
558
|
+
config: GraphConfig | None = None,
|
|
559
|
+
) -> Figure:
|
|
560
|
+
"""Create a compact, high-signal overview dashboard."""
|
|
561
|
+
cfg = config or get_default_config().graph
|
|
562
|
+
font_prop = _load_font(cfg)
|
|
563
|
+
|
|
564
|
+
fig = Figure(figsize=(14, 9), dpi=cfg.dpi, facecolor="white")
|
|
565
|
+
gs = fig.add_gridspec(3, 2, height_ratios=[1.2, 1.0, 1.0], width_ratios=[1.25, 1.0])
|
|
566
|
+
|
|
567
|
+
ax_ts: Axes = fig.add_subplot(gs[0, :])
|
|
568
|
+
ax_heat: Axes = fig.add_subplot(gs[1:, 0])
|
|
569
|
+
ax_model: Axes = fig.add_subplot(gs[1, 1])
|
|
570
|
+
ax_len: Axes = fig.add_subplot(gs[2, 1])
|
|
571
|
+
|
|
572
|
+
for ax in (ax_ts, ax_model, ax_len):
|
|
573
|
+
_style_axes(ax, cfg)
|
|
574
|
+
_apply_tick_font(ax, font_prop)
|
|
575
|
+
|
|
576
|
+
# Header
|
|
577
|
+
user_ts = collection.timestamps("user")
|
|
578
|
+
conv_count = len(collection.conversations)
|
|
579
|
+
prompt_count = len(user_ts)
|
|
580
|
+
|
|
581
|
+
fig.text(
|
|
582
|
+
0.01,
|
|
583
|
+
0.985,
|
|
584
|
+
"ChatGPT usage overview",
|
|
585
|
+
fontproperties=font_prop,
|
|
586
|
+
fontsize=18,
|
|
587
|
+
va="top",
|
|
588
|
+
ha="left",
|
|
589
|
+
color="#24292f",
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
if user_ts:
|
|
593
|
+
dts = [_ts_to_dt(ts, cfg) for ts in user_ts]
|
|
594
|
+
date_range = f"{min(dts).date().isoformat()} → {max(dts).date().isoformat()}"
|
|
595
|
+
else:
|
|
596
|
+
date_range = "No activity"
|
|
597
|
+
|
|
598
|
+
fig.text(
|
|
599
|
+
0.01,
|
|
600
|
+
0.955,
|
|
601
|
+
f"{conv_count} conversations · {prompt_count} user prompts · {date_range} · {_tz_label(cfg)}",
|
|
602
|
+
fontproperties=font_prop,
|
|
603
|
+
fontsize=10,
|
|
604
|
+
va="top",
|
|
605
|
+
ha="left",
|
|
606
|
+
color="#57606a",
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
# Monthly activity (timeseries)
|
|
610
|
+
if user_ts:
|
|
611
|
+
month_counts = _aggregate_counts_by_month(user_ts, cfg)
|
|
612
|
+
months, values = _fill_missing_months(month_counts)
|
|
613
|
+
x = mdates.date2num(months)
|
|
614
|
+
ax_ts.bar(x, values, width=25, color=cfg.color, alpha=0.20, edgecolor="none")
|
|
615
|
+
smooth = _moving_average(values, window=3)
|
|
616
|
+
if smooth:
|
|
617
|
+
ax_ts.plot(x[2:], smooth, color=cfg.color, linewidth=2.6, alpha=0.95)
|
|
618
|
+
|
|
619
|
+
locator = mdates.AutoDateLocator(minticks=4, maxticks=10)
|
|
620
|
+
ax_ts.xaxis.set_major_locator(locator)
|
|
621
|
+
ax_ts.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
|
|
622
|
+
ax_ts.set_title("Monthly activity (user prompts)", fontproperties=font_prop, fontsize=13, pad=10)
|
|
623
|
+
ax_ts.set_ylabel("User prompts", fontproperties=font_prop)
|
|
624
|
+
ax_ts.set_xlabel(f"Month ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
625
|
+
_apply_tick_font(ax_ts, font_prop)
|
|
626
|
+
else:
|
|
627
|
+
ax_ts.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
628
|
+
ax_ts.set_axis_off()
|
|
629
|
+
|
|
630
|
+
# Heatmap
|
|
631
|
+
if user_ts:
|
|
632
|
+
grid: list[list[int]] = [[0 for _ in range(24)] for _ in range(7)]
|
|
633
|
+
for ts in user_ts:
|
|
634
|
+
dt = _ts_to_dt(ts, cfg)
|
|
635
|
+
grid[dt.weekday()][dt.hour] += 1
|
|
636
|
+
|
|
637
|
+
ax_heat.grid(False)
|
|
638
|
+
for side in ["top", "right", "left", "bottom"]:
|
|
639
|
+
ax_heat.spines[side].set_visible(False)
|
|
640
|
+
img = ax_heat.imshow(grid, aspect="auto", cmap="Blues", interpolation="nearest")
|
|
641
|
+
ax_heat.set_title(
|
|
642
|
+
f"Weekday × hour heatmap ({_tz_label(cfg)})",
|
|
643
|
+
fontproperties=font_prop,
|
|
644
|
+
fontsize=13,
|
|
645
|
+
pad=10,
|
|
646
|
+
)
|
|
647
|
+
ax_heat.set_xlabel("Hour", fontproperties=font_prop)
|
|
648
|
+
ax_heat.set_ylabel("Weekday", fontproperties=font_prop)
|
|
649
|
+
ax_heat.set_xticks(list(range(0, 24, 3)))
|
|
650
|
+
ax_heat.set_xticklabels([f"{h:02d}" for h in range(0, 24, 3)], fontproperties=font_prop)
|
|
651
|
+
ax_heat.set_yticks(list(range(7)))
|
|
652
|
+
ax_heat.set_yticklabels(WEEKDAYS, fontproperties=font_prop)
|
|
653
|
+
cbar = fig.colorbar(img, ax=ax_heat, fraction=0.046, pad=0.04)
|
|
654
|
+
cbar.set_label("Prompts", fontproperties=font_prop)
|
|
655
|
+
for t in cbar.ax.get_yticklabels():
|
|
656
|
+
t.set_fontproperties(font_prop)
|
|
657
|
+
else:
|
|
658
|
+
ax_heat.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
659
|
+
ax_heat.set_axis_off()
|
|
660
|
+
|
|
661
|
+
# Model usage (reuse existing generator logic by drawing into its own axes)
|
|
662
|
+
model_counts: defaultdict[str, int] = defaultdict(int)
|
|
663
|
+
for conv in collection.conversations:
|
|
664
|
+
model_counts[conv.model or "Unknown"] += 1
|
|
665
|
+
total_models = sum(model_counts.values())
|
|
666
|
+
if total_models:
|
|
667
|
+
items = sorted(model_counts.items(), key=lambda x: x[1], reverse=True)
|
|
668
|
+
labels = [k for k, _ in items][:8]
|
|
669
|
+
counts = [v for _, v in items][:8]
|
|
670
|
+
y = list(range(len(labels)))[::-1]
|
|
671
|
+
bars = ax_model.barh(y, counts[::-1], color=cfg.color, alpha=0.9, height=0.6)
|
|
672
|
+
ax_model.set_yticks(y)
|
|
673
|
+
ax_model.set_yticklabels(labels[::-1], fontproperties=font_prop)
|
|
674
|
+
ax_model.set_xlabel("Conversations", fontproperties=font_prop)
|
|
675
|
+
ax_model.set_title("Models", fontproperties=font_prop, fontsize=13, pad=10)
|
|
676
|
+
for bar, count in zip(bars, counts[::-1], strict=True):
|
|
677
|
+
pct = 100 * (count / total_models)
|
|
678
|
+
ax_model.text(
|
|
679
|
+
bar.get_width(),
|
|
680
|
+
bar.get_y() + bar.get_height() / 2,
|
|
681
|
+
f" {pct:.0f}%",
|
|
682
|
+
va="center",
|
|
683
|
+
ha="left",
|
|
684
|
+
fontproperties=font_prop,
|
|
685
|
+
fontsize=9,
|
|
686
|
+
color="#57606a",
|
|
687
|
+
)
|
|
688
|
+
_apply_tick_font(ax_model, font_prop)
|
|
689
|
+
else:
|
|
690
|
+
ax_model.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
691
|
+
ax_model.set_axis_off()
|
|
692
|
+
|
|
693
|
+
# Conversation length mini-hist
|
|
694
|
+
lengths = [conv.message_count("user") for conv in collection.conversations]
|
|
695
|
+
if lengths:
|
|
696
|
+
sorted_lengths = sorted(lengths)
|
|
697
|
+
cap = max(int(sorted_lengths[int(0.95 * (len(sorted_lengths) - 1))]), 5)
|
|
698
|
+
plot_lengths = [min(L, cap) for L in lengths]
|
|
699
|
+
ax_len.hist(
|
|
700
|
+
plot_lengths,
|
|
701
|
+
bins=min(16, max(8, cap // 2)),
|
|
702
|
+
color=cfg.color,
|
|
703
|
+
alpha=0.85,
|
|
704
|
+
rwidth=0.9,
|
|
705
|
+
edgecolor="white",
|
|
706
|
+
linewidth=0.5,
|
|
707
|
+
)
|
|
708
|
+
ax_len.set_title("Conversation length", fontproperties=font_prop, fontsize=13, pad=10)
|
|
709
|
+
ax_len.set_xlabel("User prompts", fontproperties=font_prop)
|
|
710
|
+
ax_len.set_ylabel("Conversations", fontproperties=font_prop)
|
|
711
|
+
ax_len.set_xlim(left=0, right=cap)
|
|
712
|
+
_apply_tick_font(ax_len, font_prop)
|
|
713
|
+
else:
|
|
714
|
+
ax_len.text(0.5, 0.5, "No data", ha="center", va="center", fontproperties=font_prop)
|
|
715
|
+
ax_len.set_axis_off()
|
|
716
|
+
|
|
717
|
+
fig.subplots_adjust(top=0.93, left=0.06, right=0.98, bottom=0.06, hspace=0.4, wspace=0.25)
|
|
718
|
+
return fig
|
|
719
|
+
|
|
720
|
+
|
|
395
721
|
def generate_summary_graphs(
|
|
396
722
|
collection: ConversationCollection,
|
|
397
723
|
output_dir: Path,
|
|
398
724
|
config: GraphConfig | None = None,
|
|
725
|
+
*,
|
|
726
|
+
progress_bar: bool = False,
|
|
399
727
|
) -> None:
|
|
400
728
|
"""Generate all summary-level graphs.
|
|
401
729
|
|
|
@@ -404,27 +732,50 @@ def generate_summary_graphs(
|
|
|
404
732
|
output_dir: Directory to save the graphs
|
|
405
733
|
config: Optional graph configuration
|
|
406
734
|
"""
|
|
407
|
-
summary_dir = output_dir / "Summary"
|
|
408
|
-
summary_dir.mkdir(parents=True, exist_ok=True)
|
|
409
|
-
|
|
410
735
|
if not collection.conversations:
|
|
411
736
|
return
|
|
412
737
|
|
|
413
|
-
|
|
414
|
-
fig_models = generate_model_piechart(collection, config)
|
|
415
|
-
fig_models.savefig(summary_dir / "model_usage.png")
|
|
416
|
-
|
|
417
|
-
# Length distribution
|
|
418
|
-
fig_length = generate_length_histogram(collection, config)
|
|
419
|
-
fig_length.savefig(summary_dir / "conversation_lengths.png")
|
|
738
|
+
cfg = config or get_default_config().graph
|
|
420
739
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
740
|
+
user_ts = collection.timestamps("user")
|
|
741
|
+
|
|
742
|
+
tasks: list[tuple[str, str, Callable[[], Figure]]] = [
|
|
743
|
+
("Overview", "overview.png", lambda: generate_summary_dashboard(collection, cfg)),
|
|
744
|
+
("Activity heatmap", "activity_heatmap.png", lambda: generate_activity_heatmap(collection, cfg)),
|
|
745
|
+
("Daily activity", "daily_activity.png", lambda: generate_daily_activity_lineplot(collection, cfg)),
|
|
746
|
+
("Monthly activity", "monthly_activity.png", lambda: generate_monthly_activity_barplot(collection, cfg)),
|
|
747
|
+
("Model usage", "model_usage.png", lambda: generate_model_piechart(collection, cfg)),
|
|
748
|
+
("Conversation lengths", "conversation_lengths.png", lambda: generate_length_histogram(collection, cfg)),
|
|
749
|
+
(
|
|
750
|
+
"Conversation lifetimes",
|
|
751
|
+
"conversation_lifetimes.png",
|
|
752
|
+
lambda: generate_conversation_lifetime_histogram(collection, cfg),
|
|
753
|
+
),
|
|
754
|
+
]
|
|
424
755
|
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
756
|
+
if user_ts:
|
|
757
|
+
tasks.extend(
|
|
758
|
+
[
|
|
759
|
+
(
|
|
760
|
+
"Weekday pattern",
|
|
761
|
+
"weekday_pattern.png",
|
|
762
|
+
lambda: generate_week_barplot(user_ts, "All time", cfg),
|
|
763
|
+
),
|
|
764
|
+
(
|
|
765
|
+
"Hourly pattern",
|
|
766
|
+
"hourly_pattern.png",
|
|
767
|
+
lambda: generate_hour_barplot(user_ts, "All time", cfg),
|
|
768
|
+
),
|
|
769
|
+
]
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
for _, filename, build in tqdm(
|
|
773
|
+
tasks,
|
|
774
|
+
desc="Creating summary graphs",
|
|
775
|
+
disable=not progress_bar,
|
|
776
|
+
):
|
|
777
|
+
fig = build()
|
|
778
|
+
fig.savefig(output_dir / filename, facecolor="white")
|
|
428
779
|
|
|
429
780
|
|
|
430
781
|
def generate_graphs(
|
|
@@ -443,45 +794,62 @@ def generate_graphs(
|
|
|
443
794
|
progress_bar: Whether to show progress bars
|
|
444
795
|
"""
|
|
445
796
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
797
|
+
cfg = config or get_default_config().graph
|
|
446
798
|
|
|
447
|
-
# Summary graphs
|
|
448
|
-
generate_summary_graphs(collection, output_dir,
|
|
449
|
-
|
|
450
|
-
month_groups = collection.group_by_month()
|
|
451
|
-
year_groups = collection.group_by_year()
|
|
799
|
+
# Summary graphs (default: small, high-signal set)
|
|
800
|
+
generate_summary_graphs(collection, output_dir, cfg, progress_bar=progress_bar)
|
|
452
801
|
|
|
453
|
-
#
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
desc="Creating monthly graphs 📈",
|
|
457
|
-
disable=not progress_bar,
|
|
458
|
-
):
|
|
459
|
-
base_name = month.strftime("%Y %B")
|
|
460
|
-
title = month.strftime("%B '%y")
|
|
461
|
-
timestamps = group.timestamps("user")
|
|
462
|
-
|
|
463
|
-
# Weekday distribution
|
|
464
|
-
fig_week = generate_week_barplot(timestamps, title, config)
|
|
465
|
-
fig_week.savefig(output_dir / f"{base_name}_weekly.png")
|
|
466
|
-
|
|
467
|
-
# Hourly distribution
|
|
468
|
-
fig_hour = generate_hour_barplot(timestamps, title, config)
|
|
469
|
-
fig_hour.savefig(output_dir / f"{base_name}_hourly.png")
|
|
470
|
-
|
|
471
|
-
# Year-wise graphs
|
|
472
|
-
for year, group in tqdm(
|
|
473
|
-
year_groups.items(),
|
|
474
|
-
desc="Creating yearly graphs 📈",
|
|
475
|
-
disable=not progress_bar,
|
|
476
|
-
):
|
|
477
|
-
base_name = year.strftime("%Y")
|
|
478
|
-
title = year.strftime("%Y")
|
|
479
|
-
timestamps = group.timestamps("user")
|
|
802
|
+
# Optional breakdowns (can generate lots of files; off by default)
|
|
803
|
+
if not collection.conversations:
|
|
804
|
+
return
|
|
480
805
|
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
806
|
+
timestamps = collection.timestamps("user")
|
|
807
|
+
if not timestamps:
|
|
808
|
+
return
|
|
484
809
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
810
|
+
breakdown_root = output_dir / "Breakdowns"
|
|
811
|
+
if cfg.generate_monthly_breakdowns:
|
|
812
|
+
monthly_dir = breakdown_root / "Monthly"
|
|
813
|
+
monthly_dir.mkdir(parents=True, exist_ok=True)
|
|
814
|
+
|
|
815
|
+
month_groups: defaultdict[datetime, list[float]] = defaultdict(list)
|
|
816
|
+
for ts in timestamps:
|
|
817
|
+
dt = _ts_to_dt(ts, cfg)
|
|
818
|
+
month_groups[_month_start(dt)].append(ts)
|
|
819
|
+
|
|
820
|
+
for month, ts_list in tqdm(
|
|
821
|
+
sorted(month_groups.items(), key=lambda x: x[0]),
|
|
822
|
+
desc="Creating monthly breakdown graphs",
|
|
823
|
+
disable=not progress_bar,
|
|
824
|
+
):
|
|
825
|
+
slug = month.strftime("%Y-%m")
|
|
826
|
+
title = month.strftime("%b %Y")
|
|
827
|
+
generate_week_barplot(ts_list, title, cfg).savefig(
|
|
828
|
+
monthly_dir / f"{slug}_weekday.png", facecolor="white"
|
|
829
|
+
)
|
|
830
|
+
generate_hour_barplot(ts_list, title, cfg).savefig(
|
|
831
|
+
monthly_dir / f"{slug}_hourly.png", facecolor="white"
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
if cfg.generate_yearly_breakdowns:
|
|
835
|
+
yearly_dir = breakdown_root / "Yearly"
|
|
836
|
+
yearly_dir.mkdir(parents=True, exist_ok=True)
|
|
837
|
+
|
|
838
|
+
year_groups: defaultdict[datetime, list[float]] = defaultdict(list)
|
|
839
|
+
for ts in timestamps:
|
|
840
|
+
dt = _ts_to_dt(ts, cfg)
|
|
841
|
+
year_groups[_year_start(dt)].append(ts)
|
|
842
|
+
|
|
843
|
+
for year, ts_list in tqdm(
|
|
844
|
+
sorted(year_groups.items(), key=lambda x: x[0]),
|
|
845
|
+
desc="Creating yearly breakdown graphs",
|
|
846
|
+
disable=not progress_bar,
|
|
847
|
+
):
|
|
848
|
+
slug = year.strftime("%Y")
|
|
849
|
+
title = year.strftime("%Y")
|
|
850
|
+
generate_week_barplot(ts_list, title, cfg).savefig(
|
|
851
|
+
yearly_dir / f"{slug}_weekday.png", facecolor="white"
|
|
852
|
+
)
|
|
853
|
+
generate_hour_barplot(ts_list, title, cfg).savefig(
|
|
854
|
+
yearly_dir / f"{slug}_hourly.png", facecolor="white"
|
|
855
|
+
)
|