convoviz 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {convoviz-0.2.2 → convoviz-0.2.3}/PKG-INFO +1 -1
  2. convoviz-0.2.3/convoviz/analysis/graphs.py +429 -0
  3. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/analysis/wordcloud.py +20 -0
  4. convoviz-0.2.3/convoviz/assets/stopwords.txt +75 -0
  5. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/cli.py +18 -15
  6. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/config.py +12 -7
  7. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/interactive.py +22 -12
  8. convoviz-0.2.3/convoviz/io/assets.py +82 -0
  9. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/io/loaders.py +30 -2
  10. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/io/writers.py +17 -2
  11. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/models/__init__.py +0 -4
  12. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/models/collection.py +2 -0
  13. convoviz-0.2.3/convoviz/models/message.py +119 -0
  14. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/pipeline.py +42 -19
  15. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/renderers/markdown.py +46 -15
  16. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/utils.py +54 -4
  17. {convoviz-0.2.2 → convoviz-0.2.3}/pyproject.toml +1 -1
  18. convoviz-0.2.2/convoviz/analysis/graphs.py +0 -98
  19. convoviz-0.2.2/convoviz/models/message.py +0 -77
  20. {convoviz-0.2.2 → convoviz-0.2.3}/README.md +0 -0
  21. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/__init__.py +0 -0
  22. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/__main__.py +0 -0
  23. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/analysis/__init__.py +0 -0
  24. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/colormaps.txt +0 -0
  25. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
  26. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
  27. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
  28. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/Borel-Regular.ttf +0 -0
  29. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
  30. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
  31. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
  32. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
  33. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
  34. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
  35. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
  36. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
  37. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
  38. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
  39. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
  40. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
  41. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
  42. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
  43. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
  44. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
  45. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
  46. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
  47. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
  48. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
  49. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
  50. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
  51. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
  52. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
  53. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
  54. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
  55. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
  56. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/exceptions.py +0 -0
  57. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/io/__init__.py +0 -0
  58. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/models/conversation.py +0 -0
  59. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/models/node.py +0 -0
  60. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/py.typed +0 -0
  61. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/renderers/__init__.py +0 -0
  62. {convoviz-0.2.2 → convoviz-0.2.3}/convoviz/renderers/yaml.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: convoviz
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Get analytics and visualizations on your ChatGPT data!
5
5
  Keywords: markdown,chatgpt,openai,visualization,analytics,json,export,data-analysis,obsidian
6
6
  Author: Mohamed Cheikh Sidiya
@@ -0,0 +1,429 @@
1
+ """Graph generation for conversation analytics."""
2
+
3
+ from collections import defaultdict
4
+ from datetime import UTC, datetime
5
+ from pathlib import Path
6
+
7
+ import matplotlib.font_manager as fm
8
+ from matplotlib.figure import Figure
9
+ from tqdm import tqdm
10
+
11
+ from convoviz.config import GraphConfig, get_default_config
12
+ from convoviz.models import ConversationCollection
13
+ from convoviz.utils import get_asset_path
14
+
15
+ WEEKDAYS = [
16
+ "Monday",
17
+ "Tuesday",
18
+ "Wednesday",
19
+ "Thursday",
20
+ "Friday",
21
+ "Saturday",
22
+ "Sunday",
23
+ ]
24
+
25
+
26
+ def _setup_figure(config: GraphConfig) -> tuple[Figure, fm.FontProperties]:
27
+ """Internal helper to setup a figure with common styling."""
28
+ fig = Figure(figsize=config.figsize, dpi=300)
29
+ ax = fig.add_subplot()
30
+
31
+ # Load custom font if possible
32
+ font_path = get_asset_path(f"fonts/{config.font_name}")
33
+ font_prop = (
34
+ fm.FontProperties(fname=str(font_path)) if font_path.exists() else fm.FontProperties()
35
+ )
36
+
37
+ # Styling
38
+ ax.spines["top"].set_visible(False)
39
+ ax.spines["right"].set_visible(False)
40
+ if config.grid:
41
+ ax.grid(axis="y", linestyle="--", alpha=0.7)
42
+
43
+ return fig, font_prop
44
+
45
+
46
+ def generate_week_barplot(
47
+ timestamps: list[float],
48
+ title: str,
49
+ config: GraphConfig | None = None,
50
+ ) -> Figure:
51
+ """Create a bar graph showing message distribution across weekdays.
52
+
53
+ Args:
54
+ timestamps: List of Unix timestamps
55
+ title: Title for the graph
56
+ config: Optional graph configuration
57
+
58
+ Returns:
59
+ Matplotlib Figure object
60
+ """
61
+ cfg = config or get_default_config().graph
62
+ dates = [datetime.fromtimestamp(ts, UTC) for ts in timestamps]
63
+
64
+ weekday_counts: defaultdict[str, int] = defaultdict(int)
65
+ for date in dates:
66
+ weekday_counts[WEEKDAYS[date.weekday()]] += 1
67
+
68
+ x = WEEKDAYS
69
+ y = [weekday_counts[day] for day in WEEKDAYS]
70
+
71
+ fig, font_prop = _setup_figure(cfg)
72
+ ax = fig.gca()
73
+
74
+ bars = ax.bar(x, y, color=cfg.color, alpha=0.8)
75
+
76
+ if cfg.show_counts:
77
+ for bar in bars:
78
+ height = bar.get_height()
79
+ ax.text(
80
+ bar.get_x() + bar.get_width() / 2.0,
81
+ height,
82
+ f"{int(height)}",
83
+ ha="center",
84
+ va="bottom",
85
+ fontproperties=font_prop,
86
+ )
87
+
88
+ ax.set_xlabel("Weekday", fontproperties=font_prop)
89
+ ax.set_ylabel("Prompt Count", fontproperties=font_prop)
90
+ ax.set_title(title, fontproperties=font_prop, fontsize=16, pad=20)
91
+ ax.set_xticks(range(len(x)))
92
+ ax.set_xticklabels(x, rotation=45, fontproperties=font_prop)
93
+
94
+ for label in ax.get_yticklabels():
95
+ label.set_fontproperties(font_prop)
96
+
97
+ fig.tight_layout()
98
+ return fig
99
+
100
+
101
+ def generate_hour_barplot(
102
+ timestamps: list[float],
103
+ title: str,
104
+ config: GraphConfig | None = None,
105
+ ) -> Figure:
106
+ """Create a bar graph showing message distribution across hours of the day (0-23).
107
+
108
+ Args:
109
+ timestamps: List of Unix timestamps
110
+ title: Title for the graph
111
+ config: Optional graph configuration
112
+
113
+ Returns:
114
+ Matplotlib Figure object
115
+ """
116
+ cfg = config or get_default_config().graph
117
+ dates = [datetime.fromtimestamp(ts, UTC) for ts in timestamps]
118
+
119
+ hour_counts: dict[int, int] = dict.fromkeys(range(24), 0)
120
+ for date in dates:
121
+ hour_counts[date.hour] += 1
122
+
123
+ x = [f"{i:02d}:00" for i in range(24)]
124
+ y = [hour_counts[i] for i in range(24)]
125
+
126
+ fig, font_prop = _setup_figure(cfg)
127
+ ax = fig.gca()
128
+
129
+ bars = ax.bar(range(24), y, color=cfg.color, alpha=0.8)
130
+
131
+ if cfg.show_counts:
132
+ for bar in bars:
133
+ height = bar.get_height()
134
+ if height > 0:
135
+ ax.text(
136
+ bar.get_x() + bar.get_width() / 2.0,
137
+ height,
138
+ f"{int(height)}",
139
+ ha="center",
140
+ va="bottom",
141
+ fontproperties=font_prop,
142
+ fontsize=8,
143
+ )
144
+
145
+ ax.set_xlabel("Hour of Day (UTC)", fontproperties=font_prop)
146
+ ax.set_ylabel("Prompt Count", fontproperties=font_prop)
147
+ ax.set_title(f"{title} - Hourly Distribution", fontproperties=font_prop, fontsize=16, pad=20)
148
+ ax.set_xticks(range(24))
149
+ ax.set_xticklabels(x, rotation=90, fontproperties=font_prop)
150
+
151
+ for label in ax.get_yticklabels():
152
+ label.set_fontproperties(font_prop)
153
+
154
+ fig.tight_layout()
155
+ return fig
156
+
157
+
158
+ def generate_model_piechart(
159
+ collection: ConversationCollection,
160
+ config: GraphConfig | None = None,
161
+ ) -> Figure:
162
+ """Create a pie chart showing distribution of models used.
163
+
164
+ Groups models with < 5% usage into "Other".
165
+
166
+ Args:
167
+ collection: Collection of conversations
168
+ config: Optional graph configuration
169
+
170
+ Returns:
171
+ Matplotlib Figure object
172
+ """
173
+ cfg = config or get_default_config().graph
174
+ model_counts: defaultdict[str, int] = defaultdict(int)
175
+
176
+ for conv in collection.conversations:
177
+ model = conv.model or "Unknown"
178
+ model_counts[model] += 1
179
+
180
+ total = sum(model_counts.values())
181
+ if total == 0:
182
+ # Return empty figure or figure with "No Data"
183
+ fig, font_prop = _setup_figure(cfg)
184
+ ax = fig.gca()
185
+ ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
186
+ return fig
187
+
188
+ # Group minor models
189
+ threshold = 0.05
190
+ refined_counts: dict[str, int] = {}
191
+ other_count = 0
192
+
193
+ for model, count in model_counts.items():
194
+ if count / total < threshold:
195
+ other_count += count
196
+ else:
197
+ refined_counts[model] = count
198
+
199
+ if other_count > 0:
200
+ refined_counts["Other"] = other_count
201
+
202
+ # Sort for consistent display
203
+ sorted_items = sorted(refined_counts.items(), key=lambda x: x[1], reverse=True)
204
+ labels = [item[0] for item in sorted_items]
205
+ sizes = [item[1] for item in sorted_items]
206
+
207
+ fig, font_prop = _setup_figure(cfg)
208
+ ax = fig.gca()
209
+
210
+ colors = [
211
+ "#4A90E2",
212
+ "#50E3C2",
213
+ "#F5A623",
214
+ "#D0021B",
215
+ "#8B572A",
216
+ "#417505",
217
+ "#9013FE",
218
+ "#BD10E0",
219
+ "#7F7F7F",
220
+ ]
221
+ ax.pie(
222
+ sizes,
223
+ labels=labels,
224
+ autopct="%1.1f%%",
225
+ startangle=140,
226
+ colors=colors[: len(labels)],
227
+ textprops={"fontproperties": font_prop},
228
+ )
229
+ ax.set_title("Model Usage Distribution", fontproperties=font_prop, fontsize=16, pad=20)
230
+
231
+ fig.tight_layout()
232
+ return fig
233
+
234
+
235
+ def generate_length_histogram(
236
+ collection: ConversationCollection,
237
+ config: GraphConfig | None = None,
238
+ ) -> Figure:
239
+ """Create a histogram showing distribution of conversation lengths.
240
+
241
+ Caps the X-axis at the 95th percentile to focus on typical lengths.
242
+
243
+ Args:
244
+ collection: Collection of conversations
245
+ config: Optional graph configuration
246
+
247
+ Returns:
248
+ Matplotlib Figure object
249
+ """
250
+ cfg = config or get_default_config().graph
251
+ lengths = [conv.message_count("user") for conv in collection.conversations]
252
+
253
+ fig, font_prop = _setup_figure(cfg)
254
+ ax = fig.gca()
255
+
256
+ if not lengths:
257
+ ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
258
+ return fig
259
+
260
+ import numpy as np
261
+
262
+ # Cap at 95th percentile to focus on most conversations
263
+ cap = int(np.percentile(lengths, 95))
264
+ cap = max(cap, 5) # Ensure at least some range
265
+
266
+ # Filter lengths for the histogram plot, but keep the data correct
267
+ plot_lengths = [min(L, cap) for L in lengths]
268
+
269
+ bins = range(0, cap + 2, max(1, cap // 10))
270
+ ax.hist(plot_lengths, bins=bins, color=cfg.color, alpha=0.8, rwidth=0.8)
271
+
272
+ ax.set_xlabel("Number of User Prompts", fontproperties=font_prop)
273
+ ax.set_ylabel("Number of Conversations", fontproperties=font_prop)
274
+ ax.set_title(
275
+ f"Conversation Length Distribution (Capped at {cap})",
276
+ fontproperties=font_prop,
277
+ fontsize=16,
278
+ pad=20,
279
+ )
280
+
281
+ for label in ax.get_xticklabels() + ax.get_yticklabels():
282
+ label.set_fontproperties(font_prop)
283
+
284
+ fig.tight_layout()
285
+ return fig
286
+
287
+
288
+ def generate_monthly_activity_barplot(
289
+ collection: ConversationCollection,
290
+ config: GraphConfig | None = None,
291
+ ) -> Figure:
292
+ """Create a bar chart showing total prompt count per month with readable labels.
293
+
294
+ Args:
295
+ collection: Collection of conversations
296
+ config: Optional graph configuration
297
+
298
+ Returns:
299
+ Matplotlib Figure object
300
+ """
301
+ cfg = config or get_default_config().graph
302
+ month_groups = collection.group_by_month()
303
+ sorted_months = sorted(month_groups.keys())
304
+
305
+ # Format labels as "Feb '23"
306
+ x = [m.strftime("%b '%y") for m in sorted_months]
307
+ y = [len(month_groups[m].timestamps("user")) for m in sorted_months]
308
+
309
+ fig, font_prop = _setup_figure(cfg)
310
+ ax = fig.gca()
311
+
312
+ bars = ax.bar(x, y, color=cfg.color, alpha=0.8)
313
+
314
+ if cfg.show_counts:
315
+ for bar in bars:
316
+ height = bar.get_height()
317
+ if height > 0:
318
+ ax.text(
319
+ bar.get_x() + bar.get_width() / 2.0,
320
+ height,
321
+ f"{int(height)}",
322
+ ha="center",
323
+ va="bottom",
324
+ fontproperties=font_prop,
325
+ fontsize=8,
326
+ )
327
+
328
+ ax.set_xlabel("Month", fontproperties=font_prop)
329
+ ax.set_ylabel("Total Prompt Count", fontproperties=font_prop)
330
+ ax.set_title("Monthly Activity History", fontproperties=font_prop, fontsize=16, pad=20)
331
+ ax.set_xticks(range(len(x)))
332
+ ax.set_xticklabels(x, rotation=45, fontproperties=font_prop)
333
+
334
+ for label in ax.get_yticklabels():
335
+ label.set_fontproperties(font_prop)
336
+
337
+ fig.tight_layout()
338
+ return fig
339
+
340
+
341
+ def generate_summary_graphs(
342
+ collection: ConversationCollection,
343
+ output_dir: Path,
344
+ config: GraphConfig | None = None,
345
+ ) -> None:
346
+ """Generate all summary-level graphs.
347
+
348
+ Args:
349
+ collection: Collection of conversations
350
+ output_dir: Directory to save the graphs
351
+ config: Optional graph configuration
352
+ """
353
+ summary_dir = output_dir / "Summary"
354
+ summary_dir.mkdir(parents=True, exist_ok=True)
355
+
356
+ if not collection.conversations:
357
+ return
358
+
359
+ # Model usage
360
+ fig_models = generate_model_piechart(collection, config)
361
+ fig_models.savefig(summary_dir / "model_usage.png")
362
+
363
+ # Length distribution
364
+ fig_length = generate_length_histogram(collection, config)
365
+ fig_length.savefig(summary_dir / "conversation_lengths.png")
366
+
367
+ # Monthly activity
368
+ fig_activity = generate_monthly_activity_barplot(collection, config)
369
+ fig_activity.savefig(summary_dir / "monthly_activity.png")
370
+
371
+
372
+ def generate_graphs(
373
+ collection: ConversationCollection,
374
+ output_dir: Path,
375
+ config: GraphConfig | None = None,
376
+ *,
377
+ progress_bar: bool = False,
378
+ ) -> None:
379
+ """Generate weekly, hourly, and summary graphs.
380
+
381
+ Args:
382
+ collection: Collection of conversations
383
+ output_dir: Directory to save the graphs
384
+ config: Optional graph configuration
385
+ progress_bar: Whether to show progress bars
386
+ """
387
+ output_dir.mkdir(parents=True, exist_ok=True)
388
+
389
+ # Summary graphs
390
+ generate_summary_graphs(collection, output_dir, config)
391
+
392
+ month_groups = collection.group_by_month()
393
+ year_groups = collection.group_by_year()
394
+
395
+ # Month-wise graphs
396
+ for month, group in tqdm(
397
+ month_groups.items(),
398
+ desc="Creating monthly graphs 📈",
399
+ disable=not progress_bar,
400
+ ):
401
+ base_name = month.strftime("%Y %B")
402
+ title = month.strftime("%B '%y")
403
+ timestamps = group.timestamps("user")
404
+
405
+ # Weekday distribution
406
+ fig_week = generate_week_barplot(timestamps, title, config)
407
+ fig_week.savefig(output_dir / f"{base_name}_weekly.png")
408
+
409
+ # Hourly distribution
410
+ fig_hour = generate_hour_barplot(timestamps, title, config)
411
+ fig_hour.savefig(output_dir / f"{base_name}_hourly.png")
412
+
413
+ # Year-wise graphs
414
+ for year, group in tqdm(
415
+ year_groups.items(),
416
+ desc="Creating yearly graphs 📈",
417
+ disable=not progress_bar,
418
+ ):
419
+ base_name = year.strftime("%Y")
420
+ title = year.strftime("%Y")
421
+ timestamps = group.timestamps("user")
422
+
423
+ # Weekday distribution
424
+ fig_week = generate_week_barplot(timestamps, title, config)
425
+ fig_week.savefig(output_dir / f"{base_name}_weekly.png")
426
+
427
+ # Hourly distribution
428
+ fig_hour = generate_hour_barplot(timestamps, title, config)
429
+ fig_hour.savefig(output_dir / f"{base_name}_hourly.png")
@@ -24,6 +24,23 @@ STOPWORD_LANGUAGES = [
24
24
  ]
25
25
 
26
26
 
27
+ @lru_cache(maxsize=1)
28
+ def load_programming_stopwords() -> frozenset[str]:
29
+ """Load programming keywords and types from assets.
30
+
31
+ Returns:
32
+ Frozen set of programming stop words
33
+ """
34
+ stopwords_path = Path(__file__).parent.parent / "assets" / "stopwords.txt"
35
+ if not stopwords_path.exists():
36
+ return frozenset()
37
+
38
+ with open(stopwords_path, encoding="utf-8") as f:
39
+ return frozenset(
40
+ line.strip().lower() for line in f if line.strip() and not line.strip().startswith("#")
41
+ )
42
+
43
+
27
44
  @lru_cache(maxsize=1)
28
45
  def load_nltk_stopwords() -> frozenset[str]:
29
46
  """Load and cache NLTK stopwords.
@@ -74,6 +91,9 @@ def generate_wordcloud(text: str, config: WordCloudConfig) -> Image:
74
91
  stopwords = set(load_nltk_stopwords())
75
92
  stopwords.update(parse_custom_stopwords(config.custom_stopwords))
76
93
 
94
+ if config.exclude_programming_keywords:
95
+ stopwords.update(load_programming_stopwords())
96
+
77
97
  wc = WordCloud(
78
98
  font_path=str(config.font_path) if config.font_path else None,
79
99
  width=config.width,
@@ -0,0 +1,75 @@
1
+ # Python
2
+ def
3
+ class
4
+ import
5
+ from
6
+ as
7
+ elif
8
+ finally
9
+ yield
10
+ pass
11
+ lambda
12
+ async
13
+ await
14
+ nonlocal
15
+ assert
16
+ self
17
+ cls
18
+ # JavaScript / TypeScript
19
+ const
20
+ let
21
+ var
22
+ function
23
+ export
24
+ default
25
+ extends
26
+ implements
27
+ static
28
+ # Java / C#
29
+ final
30
+ abstract
31
+ new
32
+ super
33
+ package
34
+ throws
35
+ synchronized
36
+ volatile
37
+ transient
38
+ native
39
+ strictfp
40
+ override
41
+ # C / C++
42
+ unsigned
43
+ signed
44
+ typedef
45
+ sizeof
46
+ extern
47
+ register
48
+ restrict
49
+ inline
50
+ template
51
+ typename
52
+ virtual
53
+ friend
54
+ mutable
55
+ explicit
56
+ operator
57
+ typeid
58
+ # Rust
59
+ mut
60
+ fn
61
+ pub
62
+ mod
63
+ trait
64
+ impl
65
+ where
66
+ loop
67
+ unsafe
68
+ crate
69
+ dyn
70
+ # Go
71
+ func
72
+ chan
73
+ defer
74
+ fallthrough
75
+ goto
@@ -8,7 +8,7 @@ from rich.console import Console
8
8
  from convoviz.config import get_default_config
9
9
  from convoviz.exceptions import ConfigurationError, InvalidZipError
10
10
  from convoviz.interactive import run_interactive_config
11
- from convoviz.io.loaders import find_latest_zip, validate_zip
11
+ from convoviz.io.loaders import find_latest_zip
12
12
  from convoviz.pipeline import run_pipeline
13
13
  from convoviz.utils import default_font_path
14
14
 
@@ -22,14 +22,15 @@ console = Console()
22
22
  @app.callback(invoke_without_command=True)
23
23
  def run(
24
24
  ctx: typer.Context,
25
- zip_path: Path | None = typer.Option(
25
+ input_path: Path | None = typer.Option(
26
26
  None,
27
+ "--input",
27
28
  "--zip",
28
29
  "-z",
29
- help="Path to the ChatGPT export zip file.",
30
+ help="Path to the ChatGPT export zip file, JSON file, or extracted directory.",
30
31
  exists=True,
31
32
  file_okay=True,
32
- dir_okay=False,
33
+ dir_okay=True,
33
34
  ),
34
35
  output_dir: Path | None = typer.Option(
35
36
  None,
@@ -52,13 +53,13 @@ def run(
52
53
  config = get_default_config()
53
54
 
54
55
  # Override with CLI args
55
- if zip_path:
56
- config.zip_filepath = zip_path
56
+ if input_path:
57
+ config.input_path = input_path
57
58
  if output_dir:
58
59
  config.output_folder = output_dir
59
60
 
60
- # Determine mode: interactive if explicitly requested or no zip provided
61
- use_interactive = interactive if interactive is not None else (zip_path is None)
61
+ # Determine mode: interactive if explicitly requested or no input provided
62
+ use_interactive = interactive if interactive is not None else (input_path is None)
62
63
 
63
64
  if use_interactive:
64
65
  console.print("Welcome to ChatGPT Data Visualizer ✨📊!\n")
@@ -69,21 +70,23 @@ def run(
69
70
  raise typer.Exit(code=0) from None
70
71
  else:
71
72
  # Non-interactive mode: validate we have what we need
72
- if not config.zip_filepath:
73
+ if not config.input_path:
73
74
  # Try to find a default
74
75
  latest = find_latest_zip()
75
76
  if latest:
76
- console.print(f"No zip file specified, using latest found: {latest}")
77
- config.zip_filepath = latest
77
+ console.print(f"No input specified, using latest zip found: {latest}")
78
+ config.input_path = latest
78
79
  else:
79
80
  console.print(
80
- "[bold red]Error:[/bold red] No zip file provided and none found in Downloads."
81
+ "[bold red]Error:[/bold red] No input file provided and none found in Downloads."
81
82
  )
82
83
  raise typer.Exit(code=1)
83
84
 
84
- # Validate the zip
85
- if not validate_zip(config.zip_filepath):
86
- console.print(f"[bold red]Error:[/bold red] Invalid zip file: {config.zip_filepath}")
85
+ # Validate the input (basic check)
86
+ if not config.input_path.exists():
87
+ console.print(
88
+ f"[bold red]Error:[/bold red] Input path does not exist: {config.input_path}"
89
+ )
87
90
  raise typer.Exit(code=1)
88
91
 
89
92
  # Set default font if not set
@@ -19,6 +19,7 @@ class MarkdownConfig(BaseModel):
19
19
  """Configuration for markdown output."""
20
20
 
21
21
  latex_delimiters: Literal["default", "dollars"] = "default"
22
+ flavor: Literal["obsidian", "standard"] = "obsidian"
22
23
 
23
24
 
24
25
  class YAMLConfig(BaseModel):
@@ -53,27 +54,31 @@ class WordCloudConfig(BaseModel):
53
54
  """Configuration for word cloud generation."""
54
55
 
55
56
  font_path: Path | None = None
56
- colormap: str = "magma"
57
+ colormap: str = "RdYlBu"
57
58
  custom_stopwords: str = "use, file, "
59
+ exclude_programming_keywords: bool = True
58
60
  background_color: str | None = None
59
61
  mode: Literal["RGB", "RGBA"] = "RGBA"
60
62
  include_numbers: bool = False
61
- width: int = 1000
62
- height: int = 1000
63
+ width: int = 600
64
+ height: int = 600
63
65
 
64
66
 
65
67
  class GraphConfig(BaseModel):
66
68
  """Configuration for graph generation."""
67
69
 
68
- # Extensible for future graph options
69
- pass
70
+ color: str = "#4A90E2"
71
+ grid: bool = True
72
+ show_counts: bool = True
73
+ font_name: str = "Montserrat-Regular.ttf"
74
+ figsize: tuple[int, int] = (10, 6)
70
75
 
71
76
 
72
77
  class ConvovizConfig(BaseModel):
73
78
  """Main configuration for convoviz."""
74
79
 
75
- zip_filepath: Path | None = None
76
- output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT Data")
80
+ input_path: Path | None = None
81
+ output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT-Data")
77
82
  message: MessageConfig = Field(default_factory=MessageConfig)
78
83
  conversation: ConversationConfig = Field(default_factory=ConversationConfig)
79
84
  wordcloud: WordCloudConfig = Field(default_factory=WordCloudConfig)