convoviz 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {convoviz-0.2.2 → convoviz-0.2.4}/PKG-INFO +30 -5
  2. {convoviz-0.2.2 → convoviz-0.2.4}/README.md +29 -4
  3. convoviz-0.2.4/convoviz/analysis/graphs.py +487 -0
  4. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/analysis/wordcloud.py +21 -1
  5. convoviz-0.2.4/convoviz/assets/stopwords.txt +75 -0
  6. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/cli.py +18 -15
  7. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/config.py +14 -7
  8. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/interactive.py +40 -11
  9. convoviz-0.2.4/convoviz/io/assets.py +82 -0
  10. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/io/loaders.py +54 -3
  11. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/io/writers.py +17 -2
  12. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/models/__init__.py +0 -4
  13. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/models/collection.py +14 -6
  14. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/models/conversation.py +4 -6
  15. convoviz-0.2.4/convoviz/models/message.py +157 -0
  16. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/pipeline.py +70 -24
  17. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/renderers/markdown.py +91 -24
  18. convoviz-0.2.4/convoviz/renderers/yaml.py +119 -0
  19. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/utils.py +54 -4
  20. {convoviz-0.2.2 → convoviz-0.2.4}/pyproject.toml +2 -3
  21. convoviz-0.2.2/convoviz/analysis/graphs.py +0 -98
  22. convoviz-0.2.2/convoviz/models/message.py +0 -77
  23. convoviz-0.2.2/convoviz/renderers/yaml.py +0 -42
  24. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/__init__.py +0 -0
  25. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/__main__.py +0 -0
  26. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/analysis/__init__.py +0 -0
  27. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/colormaps.txt +0 -0
  28. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
  29. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
  30. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
  31. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/Borel-Regular.ttf +0 -0
  32. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
  33. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
  34. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
  35. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
  36. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
  37. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
  38. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
  39. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
  40. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
  41. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
  42. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
  43. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
  44. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
  45. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
  46. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
  47. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
  48. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
  49. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
  50. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
  51. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
  52. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
  53. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
  54. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
  55. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
  56. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
  57. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
  58. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
  59. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/exceptions.py +0 -0
  60. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/io/__init__.py +0 -0
  61. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/models/node.py +0 -0
  62. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/py.typed +0 -0
  63. {convoviz-0.2.2 → convoviz-0.2.4}/convoviz/renderers/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: convoviz
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Get analytics and visualizations on your ChatGPT data!
5
5
  Keywords: markdown,chatgpt,openai,visualization,analytics,json,export,data-analysis,obsidian
6
6
  Author: Mohamed Cheikh Sidiya
@@ -24,7 +24,7 @@ Requires-Python: >=3.12
24
24
  Project-URL: Repository, https://github.com/mohamed-chs/chatgpt-history-export-to-md
25
25
  Description-Content-Type: text/markdown
26
26
 
27
- # Convoviz 📊: Visualize your entire ChatGPT data !
27
+ # Convoviz 📊: Visualize your entire ChatGPT data
28
28
 
29
29
  Convert your ChatGPT history into well-formatted Markdown files. Additionally, visualize your data with word clouds 🔡☁️, view your prompt history graphs 📈, and access all your custom instructions 🤖 in a single location.
30
30
 
@@ -68,7 +68,7 @@ or pipx:
68
68
  pipx install convoviz
69
69
  ```
70
70
 
71
- ### 3. Run the Script 🏃‍♂️
71
+ ### 3. Run the tool 🏃‍♂️
72
72
 
73
73
  Simply run the command and follow the prompts:
74
74
 
@@ -81,9 +81,18 @@ convoviz
81
81
  You can provide arguments directly to skip the prompts:
82
82
 
83
83
  ```bash
84
- convoviz --zip path/to/your/export.zip --output path/to/output/folder
84
+ convoviz --input path/to/your/export.zip --output path/to/output/folder
85
85
  ```
86
86
 
87
+ Inputs can be any of:
88
+ - A ChatGPT export ZIP (downloaded from OpenAI)
89
+ - An extracted export directory containing `conversations.json`
90
+ - A `conversations.json` file directly
91
+
92
+ Notes:
93
+ - `--zip` / `-z` is kept as an alias for `--input` for convenience.
94
+ - You can force non-interactive mode with `--no-interactive`.
95
+
87
96
  For more options, run:
88
97
 
89
98
  ```bash
@@ -118,4 +127,20 @@ It was also a great opportunity to learn more about Python and type annotations.
118
127
 
119
128
  It should(?) also work as library, so you can import and use the models and functions. I need to add more documentation for that tho. Feel free to reach out if you need help.
120
129
 
121
- I'm working on automating it to add new conversations and updating old ones. Had some luck with a JavaScript bookmarklet, still ironing it out tho.
130
+ ### Offline / reproducible runs
131
+
132
+ Convoviz uses NLTK stopwords for word clouds. If you’re offline and NLTK data isn’t already installed, pre-download it once:
133
+
134
+ ```bash
135
+ python -c "import nltk; nltk.download('stopwords')"
136
+ ```
137
+
138
+ If you’re using `uv` without a global install, you can run:
139
+
140
+ ```bash
141
+ uv run python -c "import nltk; nltk.download('stopwords')"
142
+ ```
143
+
144
+ ### Bookmarklet
145
+
146
+ There’s also a JavaScript bookmarklet flow under `js/` (experimental) for exporting additional conversation data outside the official ZIP export.
@@ -1,4 +1,4 @@
1
- # Convoviz 📊: Visualize your entire ChatGPT data !
1
+ # Convoviz 📊: Visualize your entire ChatGPT data
2
2
 
3
3
  Convert your ChatGPT history into well-formatted Markdown files. Additionally, visualize your data with word clouds 🔡☁️, view your prompt history graphs 📈, and access all your custom instructions 🤖 in a single location.
4
4
 
@@ -42,7 +42,7 @@ or pipx:
42
42
  pipx install convoviz
43
43
  ```
44
44
 
45
- ### 3. Run the Script 🏃‍♂️
45
+ ### 3. Run the tool 🏃‍♂️
46
46
 
47
47
  Simply run the command and follow the prompts:
48
48
 
@@ -55,9 +55,18 @@ convoviz
55
55
  You can provide arguments directly to skip the prompts:
56
56
 
57
57
  ```bash
58
- convoviz --zip path/to/your/export.zip --output path/to/output/folder
58
+ convoviz --input path/to/your/export.zip --output path/to/output/folder
59
59
  ```
60
60
 
61
+ Inputs can be any of:
62
+ - A ChatGPT export ZIP (downloaded from OpenAI)
63
+ - An extracted export directory containing `conversations.json`
64
+ - A `conversations.json` file directly
65
+
66
+ Notes:
67
+ - `--zip` / `-z` is kept as an alias for `--input` for convenience.
68
+ - You can force non-interactive mode with `--no-interactive`.
69
+
61
70
  For more options, run:
62
71
 
63
72
  ```bash
@@ -92,4 +101,20 @@ It was also a great opportunity to learn more about Python and type annotations.
92
101
 
93
102
  It should(?) also work as library, so you can import and use the models and functions. I need to add more documentation for that tho. Feel free to reach out if you need help.
94
103
 
95
- I'm working on automating it to add new conversations and updating old ones. Had some luck with a JavaScript bookmarklet, still ironing it out tho.
104
+ ### Offline / reproducible runs
105
+
106
+ Convoviz uses NLTK stopwords for word clouds. If you’re offline and NLTK data isn’t already installed, pre-download it once:
107
+
108
+ ```bash
109
+ python -c "import nltk; nltk.download('stopwords')"
110
+ ```
111
+
112
+ If you’re using `uv` without a global install, you can run:
113
+
114
+ ```bash
115
+ uv run python -c "import nltk; nltk.download('stopwords')"
116
+ ```
117
+
118
+ ### Bookmarklet
119
+
120
+ There’s also a JavaScript bookmarklet flow under `js/` (experimental) for exporting additional conversation data outside the official ZIP export.
@@ -0,0 +1,487 @@
1
+ """Graph generation for conversation analytics."""
2
+
3
+ from collections import defaultdict
4
+ from datetime import UTC, datetime
5
+ from pathlib import Path
6
+
7
+ import matplotlib.dates as mdates
8
+ import matplotlib.font_manager as fm
9
+ from matplotlib.axes import Axes
10
+ from matplotlib.figure import Figure
11
+ from tqdm import tqdm
12
+
13
+ from convoviz.config import GraphConfig, get_default_config
14
+ from convoviz.models import ConversationCollection
15
+ from convoviz.utils import get_asset_path
16
+
17
+ WEEKDAYS = [
18
+ "Monday",
19
+ "Tuesday",
20
+ "Wednesday",
21
+ "Thursday",
22
+ "Friday",
23
+ "Saturday",
24
+ "Sunday",
25
+ ]
26
+
27
+
28
+ def _setup_figure(config: GraphConfig) -> tuple[Figure, Axes, fm.FontProperties]:
29
+ """Internal helper to setup a figure with common styling."""
30
+ fig = Figure(figsize=config.figsize, dpi=config.dpi)
31
+ ax: Axes = fig.add_subplot()
32
+
33
+ # Load custom font if possible
34
+ font_path = get_asset_path(f"fonts/{config.font_name}")
35
+ font_prop = (
36
+ fm.FontProperties(fname=str(font_path)) if font_path.exists() else fm.FontProperties()
37
+ )
38
+
39
+ # Styling
40
+ fig.set_facecolor("white")
41
+ ax.set_facecolor("white")
42
+ ax.spines["top"].set_visible(False)
43
+ ax.spines["right"].set_visible(False)
44
+ if config.grid:
45
+ ax.grid(axis="y", linestyle="--", alpha=0.7)
46
+ ax.set_axisbelow(True)
47
+
48
+ return fig, ax, font_prop
49
+
50
+
51
+ def _ts_to_dt(ts: float, config: GraphConfig) -> datetime:
52
+ """Convert epoch timestamps into aware datetimes based on config."""
53
+ dt_utc = datetime.fromtimestamp(ts, UTC)
54
+ if config.timezone == "utc":
55
+ return dt_utc
56
+ return dt_utc.astimezone()
57
+
58
+
59
+ def _tz_label(config: GraphConfig) -> str:
60
+ return "UTC" if config.timezone == "utc" else "Local"
61
+
62
+
63
+ def generate_week_barplot(
64
+ timestamps: list[float],
65
+ title: str,
66
+ config: GraphConfig | None = None,
67
+ ) -> Figure:
68
+ """Create a bar graph showing message distribution across weekdays.
69
+
70
+ Args:
71
+ timestamps: List of Unix timestamps
72
+ title: Title for the graph
73
+ config: Optional graph configuration
74
+
75
+ Returns:
76
+ Matplotlib Figure object
77
+ """
78
+ cfg = config or get_default_config().graph
79
+ dates = [_ts_to_dt(ts, cfg) for ts in timestamps]
80
+
81
+ weekday_counts: defaultdict[str, int] = defaultdict(int)
82
+ for date in dates:
83
+ weekday_counts[WEEKDAYS[date.weekday()]] += 1
84
+
85
+ x = list(range(len(WEEKDAYS)))
86
+ y = [weekday_counts[day] for day in WEEKDAYS]
87
+
88
+ fig, ax, font_prop = _setup_figure(cfg)
89
+
90
+ bars = ax.bar(x, y, color=cfg.color, alpha=0.85)
91
+
92
+ if cfg.show_counts:
93
+ for bar in bars:
94
+ height = bar.get_height()
95
+ if height > 0:
96
+ ax.text(
97
+ bar.get_x() + bar.get_width() / 2.0,
98
+ height,
99
+ f"{int(height)}",
100
+ ha="center",
101
+ va="bottom",
102
+ fontproperties=font_prop,
103
+ )
104
+
105
+ ax.set_xlabel("Weekday", fontproperties=font_prop)
106
+ ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
107
+ ax.set_title(title, fontproperties=font_prop, fontsize=16, pad=20)
108
+ ax.set_xticks(x)
109
+ ax.set_xticklabels(WEEKDAYS, rotation=45, fontproperties=font_prop)
110
+
111
+ for label in ax.get_yticklabels():
112
+ label.set_fontproperties(font_prop)
113
+
114
+ fig.tight_layout()
115
+ return fig
116
+
117
+
118
+ def generate_hour_barplot(
119
+ timestamps: list[float],
120
+ title: str,
121
+ config: GraphConfig | None = None,
122
+ ) -> Figure:
123
+ """Create a bar graph showing message distribution across hours of the day (0-23).
124
+
125
+ Args:
126
+ timestamps: List of Unix timestamps
127
+ title: Title for the graph
128
+ config: Optional graph configuration
129
+
130
+ Returns:
131
+ Matplotlib Figure object
132
+ """
133
+ cfg = config or get_default_config().graph
134
+ dates = [_ts_to_dt(ts, cfg) for ts in timestamps]
135
+
136
+ hour_counts: dict[int, int] = dict.fromkeys(range(24), 0)
137
+ for date in dates:
138
+ hour_counts[date.hour] += 1
139
+
140
+ x = [f"{i:02d}:00" for i in range(24)]
141
+ y = [hour_counts[i] for i in range(24)]
142
+
143
+ fig, ax, font_prop = _setup_figure(cfg)
144
+
145
+ bars = ax.bar(range(24), y, color=cfg.color, alpha=0.8)
146
+
147
+ if cfg.show_counts:
148
+ for bar in bars:
149
+ height = bar.get_height()
150
+ if height > 0:
151
+ ax.text(
152
+ bar.get_x() + bar.get_width() / 2.0,
153
+ height,
154
+ f"{int(height)}",
155
+ ha="center",
156
+ va="bottom",
157
+ fontproperties=font_prop,
158
+ fontsize=8,
159
+ )
160
+
161
+ ax.set_xlabel(f"Hour of Day ({_tz_label(cfg)})", fontproperties=font_prop)
162
+ ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
163
+ ax.set_title(f"{title} - Hourly Distribution", fontproperties=font_prop, fontsize=16, pad=20)
164
+ ax.set_xticks(range(24))
165
+ ax.set_xticklabels(x, rotation=90, fontproperties=font_prop)
166
+
167
+ for label in ax.get_yticklabels():
168
+ label.set_fontproperties(font_prop)
169
+
170
+ fig.tight_layout()
171
+ return fig
172
+
173
+
174
+ def generate_model_piechart(
175
+ collection: ConversationCollection,
176
+ config: GraphConfig | None = None,
177
+ ) -> Figure:
178
+ """Create a pie chart showing distribution of models used.
179
+
180
+ Groups models with < 5% usage into "Other".
181
+
182
+ Args:
183
+ collection: Collection of conversations
184
+ config: Optional graph configuration
185
+
186
+ Returns:
187
+ Matplotlib Figure object
188
+ """
189
+ cfg = config or get_default_config().graph
190
+ model_counts: defaultdict[str, int] = defaultdict(int)
191
+
192
+ for conv in collection.conversations:
193
+ model = conv.model or "Unknown"
194
+ model_counts[model] += 1
195
+
196
+ total = sum(model_counts.values())
197
+ if total == 0:
198
+ # Return empty figure or figure with "No Data"
199
+ fig, ax, font_prop = _setup_figure(cfg)
200
+ ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
201
+ return fig
202
+
203
+ # Group minor models
204
+ threshold = 0.05
205
+ refined_counts: dict[str, int] = {}
206
+ other_count = 0
207
+
208
+ for model, count in model_counts.items():
209
+ if count / total < threshold:
210
+ other_count += count
211
+ else:
212
+ refined_counts[model] = count
213
+
214
+ if other_count > 0:
215
+ refined_counts["Other"] = other_count
216
+
217
+ # Sort for consistent display
218
+ sorted_items = sorted(refined_counts.items(), key=lambda x: x[1], reverse=True)
219
+ labels = [item[0] for item in sorted_items]
220
+ sizes = [item[1] for item in sorted_items]
221
+
222
+ fig, ax, font_prop = _setup_figure(cfg)
223
+
224
+ colors = [
225
+ "#4A90E2",
226
+ "#50E3C2",
227
+ "#F5A623",
228
+ "#D0021B",
229
+ "#8B572A",
230
+ "#417505",
231
+ "#9013FE",
232
+ "#BD10E0",
233
+ "#7F7F7F",
234
+ ]
235
+ ax.pie(
236
+ sizes,
237
+ labels=labels,
238
+ autopct="%1.1f%%",
239
+ startangle=140,
240
+ colors=colors[: len(labels)],
241
+ textprops={"fontproperties": font_prop},
242
+ )
243
+ ax.set_title("Model Usage Distribution", fontproperties=font_prop, fontsize=16, pad=20)
244
+
245
+ fig.tight_layout()
246
+ return fig
247
+
248
+
249
+ def generate_length_histogram(
250
+ collection: ConversationCollection,
251
+ config: GraphConfig | None = None,
252
+ ) -> Figure:
253
+ """Create a histogram showing distribution of conversation lengths.
254
+
255
+ Caps the X-axis at the 95th percentile to focus on typical lengths.
256
+
257
+ Args:
258
+ collection: Collection of conversations
259
+ config: Optional graph configuration
260
+
261
+ Returns:
262
+ Matplotlib Figure object
263
+ """
264
+ cfg = config or get_default_config().graph
265
+ lengths = [conv.message_count("user") for conv in collection.conversations]
266
+
267
+ fig, ax, font_prop = _setup_figure(cfg)
268
+
269
+ if not lengths:
270
+ ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
271
+ return fig
272
+
273
+ # Cap at 95th percentile to focus on most conversations
274
+ sorted_lengths = sorted(lengths)
275
+ idx = int(0.95 * (len(sorted_lengths) - 1))
276
+ cap = int(sorted_lengths[idx])
277
+ cap = max(cap, 5) # Ensure at least some range
278
+
279
+ # Filter lengths for the histogram plot, but keep the data correct
280
+ plot_lengths = [min(L, cap) for L in lengths]
281
+
282
+ bins = range(0, cap + 2, max(1, cap // 10))
283
+ ax.hist(plot_lengths, bins=bins, color=cfg.color, alpha=0.8, rwidth=0.8)
284
+
285
+ ax.set_xlabel("Number of User Prompts", fontproperties=font_prop)
286
+ ax.set_ylabel("Number of Conversations", fontproperties=font_prop)
287
+ ax.set_title(
288
+ f"Conversation Length Distribution (Capped at {cap})",
289
+ fontproperties=font_prop,
290
+ fontsize=16,
291
+ pad=20,
292
+ )
293
+
294
+ for label in ax.get_xticklabels() + ax.get_yticklabels():
295
+ label.set_fontproperties(font_prop)
296
+
297
+ fig.tight_layout()
298
+ return fig
299
+
300
+
301
+ def generate_monthly_activity_barplot(
302
+ collection: ConversationCollection,
303
+ config: GraphConfig | None = None,
304
+ ) -> Figure:
305
+ """Create a bar chart showing total prompt count per month with readable labels.
306
+
307
+ Args:
308
+ collection: Collection of conversations
309
+ config: Optional graph configuration
310
+
311
+ Returns:
312
+ Matplotlib Figure object
313
+ """
314
+ cfg = config or get_default_config().graph
315
+ month_groups = collection.group_by_month()
316
+ sorted_months = sorted(month_groups.keys())
317
+
318
+ # Format labels as "Feb '23"
319
+ x = [m.strftime("%b '%y") for m in sorted_months]
320
+ y = [len(month_groups[m].timestamps("user")) for m in sorted_months]
321
+
322
+ fig, ax, font_prop = _setup_figure(cfg)
323
+
324
+ positions = list(range(len(x)))
325
+ bars = ax.bar(positions, y, color=cfg.color, alpha=0.85)
326
+
327
+ if cfg.show_counts:
328
+ for bar in bars:
329
+ height = bar.get_height()
330
+ if height > 0:
331
+ ax.text(
332
+ bar.get_x() + bar.get_width() / 2.0,
333
+ height,
334
+ f"{int(height)}",
335
+ ha="center",
336
+ va="bottom",
337
+ fontproperties=font_prop,
338
+ fontsize=8,
339
+ )
340
+
341
+ ax.set_xlabel("Month", fontproperties=font_prop)
342
+ ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
343
+ ax.set_title("Monthly Activity History", fontproperties=font_prop, fontsize=16, pad=20)
344
+ tick_step = max(1, len(positions) // 12) # show ~12 labels max
345
+ shown = positions[::tick_step] if positions else []
346
+ ax.set_xticks(shown)
347
+ ax.set_xticklabels([x[i] for i in shown], rotation=45, fontproperties=font_prop)
348
+
349
+ for label in ax.get_yticklabels():
350
+ label.set_fontproperties(font_prop)
351
+
352
+ fig.tight_layout()
353
+ return fig
354
+
355
+
356
+ def generate_daily_activity_lineplot(
357
+ collection: ConversationCollection,
358
+ config: GraphConfig | None = None,
359
+ ) -> Figure:
360
+ """Create a line chart showing user prompt count per day."""
361
+ cfg = config or get_default_config().graph
362
+ timestamps = collection.timestamps("user")
363
+
364
+ fig, ax, font_prop = _setup_figure(cfg)
365
+ if not timestamps:
366
+ ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
367
+ return fig
368
+
369
+ counts: defaultdict[datetime, int] = defaultdict(int)
370
+ for ts in timestamps:
371
+ dt = _ts_to_dt(ts, cfg)
372
+ day = dt.replace(hour=0, minute=0, second=0, microsecond=0)
373
+ counts[day] += 1
374
+
375
+ days = sorted(counts.keys())
376
+ values = [counts[d] for d in days]
377
+
378
+ x = mdates.date2num(days)
379
+ ax.plot(x, values, color=cfg.color, linewidth=2.0)
380
+ ax.fill_between(x, values, color=cfg.color, alpha=0.15)
381
+ locator = mdates.AutoDateLocator()
382
+ ax.xaxis.set_major_locator(locator)
383
+ ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
384
+ ax.set_title("Daily Activity History", fontproperties=font_prop, fontsize=16, pad=20)
385
+ ax.set_xlabel(f"Day ({_tz_label(cfg)})", fontproperties=font_prop)
386
+ ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
387
+
388
+ for label in ax.get_xticklabels() + ax.get_yticklabels():
389
+ label.set_fontproperties(font_prop)
390
+
391
+ fig.tight_layout()
392
+ return fig
393
+
394
+
395
+ def generate_summary_graphs(
396
+ collection: ConversationCollection,
397
+ output_dir: Path,
398
+ config: GraphConfig | None = None,
399
+ ) -> None:
400
+ """Generate all summary-level graphs.
401
+
402
+ Args:
403
+ collection: Collection of conversations
404
+ output_dir: Directory to save the graphs
405
+ config: Optional graph configuration
406
+ """
407
+ summary_dir = output_dir / "Summary"
408
+ summary_dir.mkdir(parents=True, exist_ok=True)
409
+
410
+ if not collection.conversations:
411
+ return
412
+
413
+ # Model usage
414
+ fig_models = generate_model_piechart(collection, config)
415
+ fig_models.savefig(summary_dir / "model_usage.png")
416
+
417
+ # Length distribution
418
+ fig_length = generate_length_histogram(collection, config)
419
+ fig_length.savefig(summary_dir / "conversation_lengths.png")
420
+
421
+ # Monthly activity
422
+ fig_activity = generate_monthly_activity_barplot(collection, config)
423
+ fig_activity.savefig(summary_dir / "monthly_activity.png")
424
+
425
+ # Daily activity
426
+ fig_daily = generate_daily_activity_lineplot(collection, config)
427
+ fig_daily.savefig(summary_dir / "daily_activity.png")
428
+
429
+
430
+ def generate_graphs(
431
+ collection: ConversationCollection,
432
+ output_dir: Path,
433
+ config: GraphConfig | None = None,
434
+ *,
435
+ progress_bar: bool = False,
436
+ ) -> None:
437
+ """Generate weekly, hourly, and summary graphs.
438
+
439
+ Args:
440
+ collection: Collection of conversations
441
+ output_dir: Directory to save the graphs
442
+ config: Optional graph configuration
443
+ progress_bar: Whether to show progress bars
444
+ """
445
+ output_dir.mkdir(parents=True, exist_ok=True)
446
+
447
+ # Summary graphs
448
+ generate_summary_graphs(collection, output_dir, config)
449
+
450
+ month_groups = collection.group_by_month()
451
+ year_groups = collection.group_by_year()
452
+
453
+ # Month-wise graphs
454
+ for month, group in tqdm(
455
+ month_groups.items(),
456
+ desc="Creating monthly graphs 📈",
457
+ disable=not progress_bar,
458
+ ):
459
+ base_name = month.strftime("%Y %B")
460
+ title = month.strftime("%B '%y")
461
+ timestamps = group.timestamps("user")
462
+
463
+ # Weekday distribution
464
+ fig_week = generate_week_barplot(timestamps, title, config)
465
+ fig_week.savefig(output_dir / f"{base_name}_weekly.png")
466
+
467
+ # Hourly distribution
468
+ fig_hour = generate_hour_barplot(timestamps, title, config)
469
+ fig_hour.savefig(output_dir / f"{base_name}_hourly.png")
470
+
471
+ # Year-wise graphs
472
+ for year, group in tqdm(
473
+ year_groups.items(),
474
+ desc="Creating yearly graphs 📈",
475
+ disable=not progress_bar,
476
+ ):
477
+ base_name = year.strftime("%Y")
478
+ title = year.strftime("%Y")
479
+ timestamps = group.timestamps("user")
480
+
481
+ # Weekday distribution
482
+ fig_week = generate_week_barplot(timestamps, title, config)
483
+ fig_week.savefig(output_dir / f"{base_name}_weekly.png")
484
+
485
+ # Hourly distribution
486
+ fig_hour = generate_hour_barplot(timestamps, title, config)
487
+ fig_hour.savefig(output_dir / f"{base_name}_hourly.png")
@@ -24,6 +24,23 @@ STOPWORD_LANGUAGES = [
24
24
  ]
25
25
 
26
26
 
27
+ @lru_cache(maxsize=1)
28
+ def load_programming_stopwords() -> frozenset[str]:
29
+ """Load programming keywords and types from assets.
30
+
31
+ Returns:
32
+ Frozen set of programming stop words
33
+ """
34
+ stopwords_path = Path(__file__).parent.parent / "assets" / "stopwords.txt"
35
+ if not stopwords_path.exists():
36
+ return frozenset()
37
+
38
+ with open(stopwords_path, encoding="utf-8") as f:
39
+ return frozenset(
40
+ line.strip().lower() for line in f if line.strip() and not line.strip().startswith("#")
41
+ )
42
+
43
+
27
44
  @lru_cache(maxsize=1)
28
45
  def load_nltk_stopwords() -> frozenset[str]:
29
46
  """Load and cache NLTK stopwords.
@@ -45,7 +62,7 @@ def load_nltk_stopwords() -> frozenset[str]:
45
62
  return frozenset(words)
46
63
 
47
64
 
48
- def parse_custom_stopwords(stopwords_str: str) -> set[str]:
65
+ def parse_custom_stopwords(stopwords_str: str | None) -> set[str]:
49
66
  """Parse a comma-separated string of custom stopwords.
50
67
 
51
68
  Args:
@@ -74,6 +91,9 @@ def generate_wordcloud(text: str, config: WordCloudConfig) -> Image:
74
91
  stopwords = set(load_nltk_stopwords())
75
92
  stopwords.update(parse_custom_stopwords(config.custom_stopwords))
76
93
 
94
+ if config.exclude_programming_keywords:
95
+ stopwords.update(load_programming_stopwords())
96
+
77
97
  wc = WordCloud(
78
98
  font_path=str(config.font_path) if config.font_path else None,
79
99
  width=config.width,