convoviz 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,9 @@ from collections import defaultdict
4
4
  from datetime import UTC, datetime
5
5
  from pathlib import Path
6
6
 
7
+ import matplotlib.dates as mdates
7
8
  import matplotlib.font_manager as fm
9
+ from matplotlib.axes import Axes
8
10
  from matplotlib.figure import Figure
9
11
  from tqdm import tqdm
10
12
 
@@ -23,10 +25,10 @@ WEEKDAYS = [
23
25
  ]
24
26
 
25
27
 
26
- def _setup_figure(config: GraphConfig) -> tuple[Figure, fm.FontProperties]:
28
+ def _setup_figure(config: GraphConfig) -> tuple[Figure, Axes, fm.FontProperties]:
27
29
  """Internal helper to setup a figure with common styling."""
28
- fig = Figure(figsize=config.figsize, dpi=300)
29
- ax = fig.add_subplot()
30
+ fig = Figure(figsize=config.figsize, dpi=config.dpi)
31
+ ax: Axes = fig.add_subplot()
30
32
 
31
33
  # Load custom font if possible
32
34
  font_path = get_asset_path(f"fonts/{config.font_name}")
@@ -35,12 +37,27 @@ def _setup_figure(config: GraphConfig) -> tuple[Figure, fm.FontProperties]:
35
37
  )
36
38
 
37
39
  # Styling
40
+ fig.set_facecolor("white")
41
+ ax.set_facecolor("white")
38
42
  ax.spines["top"].set_visible(False)
39
43
  ax.spines["right"].set_visible(False)
40
44
  if config.grid:
41
45
  ax.grid(axis="y", linestyle="--", alpha=0.7)
46
+ ax.set_axisbelow(True)
42
47
 
43
- return fig, font_prop
48
+ return fig, ax, font_prop
49
+
50
+
51
+ def _ts_to_dt(ts: float, config: GraphConfig) -> datetime:
52
+ """Convert epoch timestamps into aware datetimes based on config."""
53
+ dt_utc = datetime.fromtimestamp(ts, UTC)
54
+ if config.timezone == "utc":
55
+ return dt_utc
56
+ return dt_utc.astimezone()
57
+
58
+
59
+ def _tz_label(config: GraphConfig) -> str:
60
+ return "UTC" if config.timezone == "utc" else "Local"
44
61
 
45
62
 
46
63
  def generate_week_barplot(
@@ -59,37 +76,37 @@ def generate_week_barplot(
59
76
  Matplotlib Figure object
60
77
  """
61
78
  cfg = config or get_default_config().graph
62
- dates = [datetime.fromtimestamp(ts, UTC) for ts in timestamps]
79
+ dates = [_ts_to_dt(ts, cfg) for ts in timestamps]
63
80
 
64
81
  weekday_counts: defaultdict[str, int] = defaultdict(int)
65
82
  for date in dates:
66
83
  weekday_counts[WEEKDAYS[date.weekday()]] += 1
67
84
 
68
- x = WEEKDAYS
85
+ x = list(range(len(WEEKDAYS)))
69
86
  y = [weekday_counts[day] for day in WEEKDAYS]
70
87
 
71
- fig, font_prop = _setup_figure(cfg)
72
- ax = fig.gca()
88
+ fig, ax, font_prop = _setup_figure(cfg)
73
89
 
74
- bars = ax.bar(x, y, color=cfg.color, alpha=0.8)
90
+ bars = ax.bar(x, y, color=cfg.color, alpha=0.85)
75
91
 
76
92
  if cfg.show_counts:
77
93
  for bar in bars:
78
94
  height = bar.get_height()
79
- ax.text(
80
- bar.get_x() + bar.get_width() / 2.0,
81
- height,
82
- f"{int(height)}",
83
- ha="center",
84
- va="bottom",
85
- fontproperties=font_prop,
86
- )
95
+ if height > 0:
96
+ ax.text(
97
+ bar.get_x() + bar.get_width() / 2.0,
98
+ height,
99
+ f"{int(height)}",
100
+ ha="center",
101
+ va="bottom",
102
+ fontproperties=font_prop,
103
+ )
87
104
 
88
105
  ax.set_xlabel("Weekday", fontproperties=font_prop)
89
- ax.set_ylabel("Prompt Count", fontproperties=font_prop)
106
+ ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
90
107
  ax.set_title(title, fontproperties=font_prop, fontsize=16, pad=20)
91
- ax.set_xticks(range(len(x)))
92
- ax.set_xticklabels(x, rotation=45, fontproperties=font_prop)
108
+ ax.set_xticks(x)
109
+ ax.set_xticklabels(WEEKDAYS, rotation=45, fontproperties=font_prop)
93
110
 
94
111
  for label in ax.get_yticklabels():
95
112
  label.set_fontproperties(font_prop)
@@ -114,7 +131,7 @@ def generate_hour_barplot(
114
131
  Matplotlib Figure object
115
132
  """
116
133
  cfg = config or get_default_config().graph
117
- dates = [datetime.fromtimestamp(ts, UTC) for ts in timestamps]
134
+ dates = [_ts_to_dt(ts, cfg) for ts in timestamps]
118
135
 
119
136
  hour_counts: dict[int, int] = dict.fromkeys(range(24), 0)
120
137
  for date in dates:
@@ -123,8 +140,7 @@ def generate_hour_barplot(
123
140
  x = [f"{i:02d}:00" for i in range(24)]
124
141
  y = [hour_counts[i] for i in range(24)]
125
142
 
126
- fig, font_prop = _setup_figure(cfg)
127
- ax = fig.gca()
143
+ fig, ax, font_prop = _setup_figure(cfg)
128
144
 
129
145
  bars = ax.bar(range(24), y, color=cfg.color, alpha=0.8)
130
146
 
@@ -142,8 +158,8 @@ def generate_hour_barplot(
142
158
  fontsize=8,
143
159
  )
144
160
 
145
- ax.set_xlabel("Hour of Day (UTC)", fontproperties=font_prop)
146
- ax.set_ylabel("Prompt Count", fontproperties=font_prop)
161
+ ax.set_xlabel(f"Hour of Day ({_tz_label(cfg)})", fontproperties=font_prop)
162
+ ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
147
163
  ax.set_title(f"{title} - Hourly Distribution", fontproperties=font_prop, fontsize=16, pad=20)
148
164
  ax.set_xticks(range(24))
149
165
  ax.set_xticklabels(x, rotation=90, fontproperties=font_prop)
@@ -180,8 +196,7 @@ def generate_model_piechart(
180
196
  total = sum(model_counts.values())
181
197
  if total == 0:
182
198
  # Return empty figure or figure with "No Data"
183
- fig, font_prop = _setup_figure(cfg)
184
- ax = fig.gca()
199
+ fig, ax, font_prop = _setup_figure(cfg)
185
200
  ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
186
201
  return fig
187
202
 
@@ -204,8 +219,7 @@ def generate_model_piechart(
204
219
  labels = [item[0] for item in sorted_items]
205
220
  sizes = [item[1] for item in sorted_items]
206
221
 
207
- fig, font_prop = _setup_figure(cfg)
208
- ax = fig.gca()
222
+ fig, ax, font_prop = _setup_figure(cfg)
209
223
 
210
224
  colors = [
211
225
  "#4A90E2",
@@ -250,17 +264,16 @@ def generate_length_histogram(
250
264
  cfg = config or get_default_config().graph
251
265
  lengths = [conv.message_count("user") for conv in collection.conversations]
252
266
 
253
- fig, font_prop = _setup_figure(cfg)
254
- ax = fig.gca()
267
+ fig, ax, font_prop = _setup_figure(cfg)
255
268
 
256
269
  if not lengths:
257
270
  ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
258
271
  return fig
259
272
 
260
- import numpy as np
261
-
262
273
  # Cap at 95th percentile to focus on most conversations
263
- cap = int(np.percentile(lengths, 95))
274
+ sorted_lengths = sorted(lengths)
275
+ idx = int(0.95 * (len(sorted_lengths) - 1))
276
+ cap = int(sorted_lengths[idx])
264
277
  cap = max(cap, 5) # Ensure at least some range
265
278
 
266
279
  # Filter lengths for the histogram plot, but keep the data correct
@@ -306,10 +319,10 @@ def generate_monthly_activity_barplot(
306
319
  x = [m.strftime("%b '%y") for m in sorted_months]
307
320
  y = [len(month_groups[m].timestamps("user")) for m in sorted_months]
308
321
 
309
- fig, font_prop = _setup_figure(cfg)
310
- ax = fig.gca()
322
+ fig, ax, font_prop = _setup_figure(cfg)
311
323
 
312
- bars = ax.bar(x, y, color=cfg.color, alpha=0.8)
324
+ positions = list(range(len(x)))
325
+ bars = ax.bar(positions, y, color=cfg.color, alpha=0.85)
313
326
 
314
327
  if cfg.show_counts:
315
328
  for bar in bars:
@@ -326,10 +339,12 @@ def generate_monthly_activity_barplot(
326
339
  )
327
340
 
328
341
  ax.set_xlabel("Month", fontproperties=font_prop)
329
- ax.set_ylabel("Total Prompt Count", fontproperties=font_prop)
342
+ ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
330
343
  ax.set_title("Monthly Activity History", fontproperties=font_prop, fontsize=16, pad=20)
331
- ax.set_xticks(range(len(x)))
332
- ax.set_xticklabels(x, rotation=45, fontproperties=font_prop)
344
+ tick_step = max(1, len(positions) // 12) # show ~12 labels max
345
+ shown = positions[::tick_step] if positions else []
346
+ ax.set_xticks(shown)
347
+ ax.set_xticklabels([x[i] for i in shown], rotation=45, fontproperties=font_prop)
333
348
 
334
349
  for label in ax.get_yticklabels():
335
350
  label.set_fontproperties(font_prop)
@@ -338,6 +353,45 @@ def generate_monthly_activity_barplot(
338
353
  return fig
339
354
 
340
355
 
356
+ def generate_daily_activity_lineplot(
357
+ collection: ConversationCollection,
358
+ config: GraphConfig | None = None,
359
+ ) -> Figure:
360
+ """Create a line chart showing user prompt count per day."""
361
+ cfg = config or get_default_config().graph
362
+ timestamps = collection.timestamps("user")
363
+
364
+ fig, ax, font_prop = _setup_figure(cfg)
365
+ if not timestamps:
366
+ ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
367
+ return fig
368
+
369
+ counts: defaultdict[datetime, int] = defaultdict(int)
370
+ for ts in timestamps:
371
+ dt = _ts_to_dt(ts, cfg)
372
+ day = dt.replace(hour=0, minute=0, second=0, microsecond=0)
373
+ counts[day] += 1
374
+
375
+ days = sorted(counts.keys())
376
+ values = [counts[d] for d in days]
377
+
378
+ x = mdates.date2num(days)
379
+ ax.plot(x, values, color=cfg.color, linewidth=2.0)
380
+ ax.fill_between(x, values, color=cfg.color, alpha=0.15)
381
+ locator = mdates.AutoDateLocator()
382
+ ax.xaxis.set_major_locator(locator)
383
+ ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
384
+ ax.set_title("Daily Activity History", fontproperties=font_prop, fontsize=16, pad=20)
385
+ ax.set_xlabel(f"Day ({_tz_label(cfg)})", fontproperties=font_prop)
386
+ ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
387
+
388
+ for label in ax.get_xticklabels() + ax.get_yticklabels():
389
+ label.set_fontproperties(font_prop)
390
+
391
+ fig.tight_layout()
392
+ return fig
393
+
394
+
341
395
  def generate_summary_graphs(
342
396
  collection: ConversationCollection,
343
397
  output_dir: Path,
@@ -368,6 +422,10 @@ def generate_summary_graphs(
368
422
  fig_activity = generate_monthly_activity_barplot(collection, config)
369
423
  fig_activity.savefig(summary_dir / "monthly_activity.png")
370
424
 
425
+ # Daily activity
426
+ fig_daily = generate_daily_activity_lineplot(collection, config)
427
+ fig_daily.savefig(summary_dir / "daily_activity.png")
428
+
371
429
 
372
430
  def generate_graphs(
373
431
  collection: ConversationCollection,
@@ -62,7 +62,7 @@ def load_nltk_stopwords() -> frozenset[str]:
62
62
  return frozenset(words)
63
63
 
64
64
 
65
- def parse_custom_stopwords(stopwords_str: str) -> set[str]:
65
+ def parse_custom_stopwords(stopwords_str: str | None) -> set[str]:
66
66
  """Parse a comma-separated string of custom stopwords.
67
67
 
68
68
  Args:
convoviz/config.py CHANGED
@@ -19,7 +19,7 @@ class MarkdownConfig(BaseModel):
19
19
  """Configuration for markdown output."""
20
20
 
21
21
  latex_delimiters: Literal["default", "dollars"] = "default"
22
- flavor: Literal["obsidian", "standard"] = "obsidian"
22
+ flavor: Literal["obsidian", "standard"] = "standard"
23
23
 
24
24
 
25
25
  class YAMLConfig(BaseModel):
@@ -72,6 +72,8 @@ class GraphConfig(BaseModel):
72
72
  show_counts: bool = True
73
73
  font_name: str = "Montserrat-Regular.ttf"
74
74
  figsize: tuple[int, int] = (10, 6)
75
+ dpi: int = 300
76
+ timezone: Literal["utc", "local"] = "local"
75
77
 
76
78
 
77
79
  class ConvovizConfig(BaseModel):
convoviz/interactive.py CHANGED
@@ -1,13 +1,14 @@
1
1
  """Interactive configuration prompts using questionary."""
2
2
 
3
3
  from pathlib import Path
4
+ from typing import Literal, Protocol, cast
4
5
 
5
6
  from questionary import Choice, Style, checkbox, select
6
7
  from questionary import path as qst_path
7
8
  from questionary import text as qst_text
8
9
 
9
10
  from convoviz.config import ConvovizConfig, get_default_config
10
- from convoviz.io.loaders import find_latest_zip
11
+ from convoviz.io.loaders import find_latest_zip, validate_zip
11
12
  from convoviz.utils import colormaps, default_font_path, font_names, font_path, validate_header
12
13
 
13
14
  CUSTOM_STYLE = Style(
@@ -25,6 +26,42 @@ CUSTOM_STYLE = Style(
25
26
  ]
26
27
  )
27
28
 
29
+ class _QuestionaryPrompt[T](Protocol):
30
+ def ask(self) -> T | None: ...
31
+
32
+
33
+ def _ask_or_cancel[T](prompt: _QuestionaryPrompt[T]) -> T:
34
+ """Ask a questionary prompt; treat Ctrl+C/Ctrl+D as cancelling the run.
35
+
36
+ questionary's `.ask()` returns `None` on cancellation (Ctrl+C / Ctrl+D). We
37
+ convert that to `KeyboardInterrupt` so callers can abort the whole
38
+ interactive session with a single Ctrl+C.
39
+ """
40
+
41
+ result = prompt.ask()
42
+ if result is None:
43
+ raise KeyboardInterrupt
44
+ return result
45
+
46
+
47
+ def _validate_input_path(raw: str) -> bool | str:
48
+ path = Path(raw)
49
+ if not path.exists():
50
+ return "Path must exist"
51
+
52
+ if path.is_dir():
53
+ if (path / "conversations.json").exists():
54
+ return True
55
+ return "Directory must contain conversations.json"
56
+
57
+ if path.suffix.lower() == ".json":
58
+ return True
59
+
60
+ if path.suffix.lower() == ".zip":
61
+ return True if validate_zip(path) else "ZIP must contain conversations.json"
62
+
63
+ return "Input must be a .zip, a .json, or a directory containing conversations.json"
64
+
28
65
 
29
66
  def run_interactive_config(initial_config: ConvovizConfig | None = None) -> ConvovizConfig:
30
67
  """Run interactive prompts to configure convoviz.
@@ -48,22 +85,26 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
48
85
 
49
86
  # Prompt for input path
50
87
  input_default = str(config.input_path) if config.input_path else ""
51
- input_result = qst_path(
52
- "Enter the path to the zip file or extracted directory:",
53
- default=input_default,
54
- validate=lambda p: Path(p).exists() or "Path must exist",
55
- style=CUSTOM_STYLE,
56
- ).ask()
88
+ input_result: str = _ask_or_cancel(
89
+ qst_path(
90
+ "Enter the path to the export ZIP, conversations JSON, or extracted directory:",
91
+ default=input_default,
92
+ validate=_validate_input_path,
93
+ style=CUSTOM_STYLE,
94
+ )
95
+ )
57
96
 
58
97
  if input_result:
59
98
  config.input_path = Path(input_result)
60
99
 
61
100
  # Prompt for output folder
62
- output_result = qst_path(
63
- "Enter the path to the output folder:",
64
- default=str(config.output_folder),
65
- style=CUSTOM_STYLE,
66
- ).ask()
101
+ output_result: str = _ask_or_cancel(
102
+ qst_path(
103
+ "Enter the path to the output folder:",
104
+ default=str(config.output_folder),
105
+ style=CUSTOM_STYLE,
106
+ )
107
+ )
67
108
 
68
109
  if output_result:
69
110
  config.output_folder = Path(output_result)
@@ -72,34 +113,46 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
72
113
  headers = config.message.author_headers
73
114
  for role in ["system", "user", "assistant", "tool"]:
74
115
  current = getattr(headers, role)
75
- result = qst_text(
76
- f"Enter the message header for '{role}':",
77
- default=current,
78
- validate=lambda t: validate_header(t)
79
- or "Must be a valid markdown header (e.g., # Title)",
80
- style=CUSTOM_STYLE,
81
- ).ask()
116
+ result: str = _ask_or_cancel(
117
+ qst_text(
118
+ f"Enter the message header for '{role}':",
119
+ default=current,
120
+ validate=lambda t: validate_header(t)
121
+ or "Must be a valid markdown header (e.g., # Title)",
122
+ style=CUSTOM_STYLE,
123
+ )
124
+ )
82
125
  if result:
83
126
  setattr(headers, role, result)
84
127
 
85
128
  # Prompt for LaTeX delimiters
86
- latex_result = select(
87
- "Select the LaTeX math delimiters:",
88
- choices=["default", "dollars"],
89
- default=config.conversation.markdown.latex_delimiters,
90
- style=CUSTOM_STYLE,
91
- ).ask()
129
+ latex_result = cast(
130
+ Literal["default", "dollars"],
131
+ _ask_or_cancel(
132
+ select(
133
+ "Select the LaTeX math delimiters:",
134
+ choices=["default", "dollars"],
135
+ default=config.conversation.markdown.latex_delimiters,
136
+ style=CUSTOM_STYLE,
137
+ )
138
+ ),
139
+ )
92
140
 
93
141
  if latex_result:
94
142
  config.conversation.markdown.latex_delimiters = latex_result
95
143
 
96
144
  # Prompt for markdown flavor
97
- flavor_result = select(
98
- "Select the markdown flavor:",
99
- choices=["obsidian", "standard"],
100
- default=config.conversation.markdown.flavor,
101
- style=CUSTOM_STYLE,
102
- ).ask()
145
+ flavor_result = cast(
146
+ Literal["obsidian", "standard"],
147
+ _ask_or_cancel(
148
+ select(
149
+ "Select the markdown flavor:",
150
+ choices=["obsidian", "standard"],
151
+ default=config.conversation.markdown.flavor,
152
+ style=CUSTOM_STYLE,
153
+ )
154
+ ),
155
+ )
103
156
 
104
157
  if flavor_result:
105
158
  config.conversation.markdown.flavor = flavor_result
@@ -122,27 +175,28 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
122
175
  ]
123
176
  ]
124
177
 
125
- selected = checkbox(
126
- "Select YAML metadata headers to include:",
127
- choices=yaml_choices,
128
- style=CUSTOM_STYLE,
129
- ).ask()
130
-
131
- if selected is not None:
132
- selected_set = set(selected)
133
- for field_name in [
134
- "title",
135
- "tags",
136
- "chat_link",
137
- "create_time",
138
- "update_time",
139
- "model",
140
- "used_plugins",
141
- "message_count",
142
- "content_types",
143
- "custom_instructions",
144
- ]:
145
- setattr(yaml_config, field_name, field_name in selected_set)
178
+ selected: list[str] = _ask_or_cancel(
179
+ checkbox(
180
+ "Select YAML metadata headers to include:",
181
+ choices=yaml_choices,
182
+ style=CUSTOM_STYLE,
183
+ )
184
+ )
185
+
186
+ selected_set = set(selected)
187
+ for field_name in [
188
+ "title",
189
+ "tags",
190
+ "chat_link",
191
+ "create_time",
192
+ "update_time",
193
+ "model",
194
+ "used_plugins",
195
+ "message_count",
196
+ "content_types",
197
+ "custom_instructions",
198
+ ]:
199
+ setattr(yaml_config, field_name, field_name in selected_set)
146
200
 
147
201
  # Prompt for font
148
202
  available_fonts = font_names()
@@ -150,12 +204,14 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
150
204
  current_font = (
151
205
  config.wordcloud.font_path.stem if config.wordcloud.font_path else available_fonts[0]
152
206
  )
153
- font_result = select(
154
- "Select the font for word clouds:",
155
- choices=available_fonts,
156
- default=current_font if current_font in available_fonts else available_fonts[0],
157
- style=CUSTOM_STYLE,
158
- ).ask()
207
+ font_result: str = _ask_or_cancel(
208
+ select(
209
+ "Select the font for word clouds:",
210
+ choices=available_fonts,
211
+ default=current_font if current_font in available_fonts else available_fonts[0],
212
+ style=CUSTOM_STYLE,
213
+ )
214
+ )
159
215
 
160
216
  if font_result:
161
217
  config.wordcloud.font_path = font_path(font_result)
@@ -163,26 +219,29 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
163
219
  # Prompt for colormap
164
220
  available_colormaps = colormaps()
165
221
  if available_colormaps:
166
- colormap_result = select(
167
- "Select the color theme for word clouds:",
168
- choices=available_colormaps,
169
- default=config.wordcloud.colormap
170
- if config.wordcloud.colormap in available_colormaps
171
- else available_colormaps[0],
172
- style=CUSTOM_STYLE,
173
- ).ask()
222
+ colormap_result: str = _ask_or_cancel(
223
+ select(
224
+ "Select the color theme for word clouds:",
225
+ choices=available_colormaps,
226
+ default=config.wordcloud.colormap
227
+ if config.wordcloud.colormap in available_colormaps
228
+ else available_colormaps[0],
229
+ style=CUSTOM_STYLE,
230
+ )
231
+ )
174
232
 
175
233
  if colormap_result:
176
234
  config.wordcloud.colormap = colormap_result
177
235
 
178
236
  # Prompt for custom stopwords
179
- stopwords_result = qst_text(
180
- "Enter custom stopwords (comma-separated):",
181
- default=config.wordcloud.custom_stopwords,
182
- style=CUSTOM_STYLE,
183
- ).ask()
184
-
185
- if stopwords_result is not None:
186
- config.wordcloud.custom_stopwords = stopwords_result
237
+ stopwords_result: str = _ask_or_cancel(
238
+ qst_text(
239
+ "Enter custom stopwords (comma-separated):",
240
+ default=config.wordcloud.custom_stopwords,
241
+ style=CUSTOM_STYLE,
242
+ )
243
+ )
244
+
245
+ config.wordcloud.custom_stopwords = stopwords_result
187
246
 
188
247
  return config
convoviz/io/loaders.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """Loading functions for conversations and collections."""
2
2
 
3
- from pathlib import Path
3
+ from pathlib import Path, PurePosixPath
4
4
  from zipfile import ZipFile
5
5
 
6
6
  from orjson import loads
@@ -9,6 +9,27 @@ from convoviz.exceptions import InvalidZipError
9
9
  from convoviz.models import Conversation, ConversationCollection
10
10
 
11
11
 
12
+ def _is_safe_zip_member_name(name: str) -> bool:
13
+ """Return True if a ZIP entry name is safe to extract.
14
+
15
+ This is intentionally OS-agnostic: it treats both ``/`` and ``\\`` as path
16
+ separators and rejects absolute paths, drive-letter paths, and ``..`` parts.
17
+ """
18
+ normalized = name.replace("\\", "/")
19
+ member_path = PurePosixPath(normalized)
20
+
21
+ # Absolute paths (e.g. "/etc/passwd") or empty names
22
+ if not normalized or member_path.is_absolute():
23
+ return False
24
+
25
+ # Windows drive letters / UNC-style prefixes stored in the archive
26
+ first = member_path.parts[0] if member_path.parts else ""
27
+ if first.endswith(":") or first.startswith("//") or first.startswith("\\\\"):
28
+ return False
29
+
30
+ return ".." not in member_path.parts
31
+
32
+
12
33
  def extract_archive(filepath: Path) -> Path:
13
34
  """Extract a ZIP file and return the extraction folder path.
14
35
 
@@ -28,15 +49,17 @@ def extract_archive(filepath: Path) -> Path:
28
49
 
29
50
  with ZipFile(filepath) as zf:
30
51
  for member in zf.infolist():
31
- # Check for path traversal (Zip-Slip)
32
- member_path = Path(member.filename)
33
- if member_path.is_absolute() or ".." in member_path.parts:
52
+ # Check for path traversal (Zip-Slip) in an OS-agnostic way.
53
+ # ZIP files are typically POSIX-path-like, but malicious archives can
54
+ # embed backslashes or drive-letter tricks.
55
+ if not _is_safe_zip_member_name(member.filename):
34
56
  raise InvalidZipError(
35
57
  str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
36
58
  )
37
59
 
38
60
  # Additional check using resolved paths
39
- target_path = (folder / member.filename).resolve()
61
+ normalized = member.filename.replace("\\", "/")
62
+ target_path = (folder / normalized).resolve()
40
63
  if not target_path.is_relative_to(folder.resolve()):
41
64
  raise InvalidZipError(
42
65
  str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
@@ -37,14 +37,20 @@ class ConversationCollection(BaseModel):
37
37
  def update(self, other: "ConversationCollection") -> None:
38
38
  """Merge another collection into this one.
39
39
 
40
- Only updates if the other collection has newer content.
40
+ Merges per-conversation, keeping the newest version when IDs collide.
41
+
42
+ Note: We intentionally do *not* gate on ``other.last_updated`` because
43
+ "new" conversations can still have older timestamps than the most recent
44
+ conversation in this collection (e.g. bookmarklet downloads).
41
45
  """
42
- if other.last_updated <= self.last_updated:
43
- return
46
+ merged: dict[str, Conversation] = dict(self.index)
47
+
48
+ for conv_id, incoming in other.index.items():
49
+ existing = merged.get(conv_id)
50
+ if existing is None or incoming.update_time > existing.update_time:
51
+ merged[conv_id] = incoming
44
52
 
45
- merged_index = self.index
46
- merged_index.update(other.index)
47
- self.conversations = list(merged_index.values())
53
+ self.conversations = list(merged.values())
48
54
 
49
55
  def add(self, conversation: Conversation) -> None:
50
56
  """Add a conversation to the collection."""
@@ -36,22 +36,29 @@ class Conversation(BaseModel):
36
36
 
37
37
  @property
38
38
  def all_message_nodes(self) -> list[Node]:
39
- """Get all nodes that have messages (including all branches)."""
39
+ """Get all nodes that have messages (including hidden/internal ones)."""
40
40
  return [node for node in self.node_mapping.values() if node.has_message]
41
41
 
42
- def nodes_by_author(self, *authors: AuthorRole) -> list[Node]:
42
+ @property
43
+ def visible_message_nodes(self) -> list[Node]:
44
+ """Get all nodes that have *visible* (non-hidden) messages."""
45
+ return [
46
+ node
47
+ for node in self.node_mapping.values()
48
+ if node.has_message and node.message is not None and not node.message.is_hidden
49
+ ]
50
+
51
+ def nodes_by_author(self, *authors: AuthorRole, include_hidden: bool = False) -> list[Node]:
43
52
  """Get nodes with messages from specified authors.
44
53
 
45
54
  Args:
46
55
  *authors: Author roles to filter by. Defaults to ("user",) if empty.
56
+ include_hidden: Whether to include hidden/internal messages.
47
57
  """
48
58
  if not authors:
49
59
  authors = ("user",)
50
- return [
51
- node
52
- for node in self.all_message_nodes
53
- if node.message and node.message.author.role in authors
54
- ]
60
+ nodes = self.all_message_nodes if include_hidden else self.visible_message_nodes
61
+ return [node for node in nodes if node.message and node.message.author.role in authors]
55
62
 
56
63
  @property
57
64
  def leaf_count(self) -> int:
@@ -65,9 +72,13 @@ class Conversation(BaseModel):
65
72
 
66
73
  @property
67
74
  def content_types(self) -> list[str]:
68
- """Get all unique content types in the conversation."""
75
+ """Get all unique content types in the conversation (excluding hidden messages)."""
69
76
  return list(
70
- {node.message.content.content_type for node in self.all_message_nodes if node.message}
77
+ {
78
+ node.message.content.content_type
79
+ for node in self.visible_message_nodes
80
+ if node.message
81
+ }
71
82
  )
72
83
 
73
84
  def message_count(self, *authors: AuthorRole) -> int:
@@ -98,12 +109,10 @@ class Conversation(BaseModel):
98
109
  def custom_instructions(self) -> dict[str, str]:
99
110
  """Get custom instructions used for this conversation."""
100
111
  system_nodes = self.nodes_by_author("system")
101
- if len(system_nodes) < 2:
102
- return {}
103
-
104
- context_message = system_nodes[1].message
105
- if context_message and context_message.metadata.is_user_system_message:
106
- return context_message.metadata.user_context_message_data or {}
112
+ for node in system_nodes:
113
+ context_message = node.message
114
+ if context_message and context_message.metadata.is_user_system_message:
115
+ return context_message.metadata.user_context_message_data or {}
107
116
  return {}
108
117
 
109
118
  def timestamps(self, *authors: AuthorRole) -> list[float]:
@@ -6,7 +6,7 @@ Object path: conversations.json -> conversation -> mapping -> mapping node -> me
6
6
  from datetime import datetime
7
7
  from typing import Any, Literal
8
8
 
9
- from pydantic import BaseModel, ConfigDict
9
+ from pydantic import BaseModel, ConfigDict, Field
10
10
 
11
11
  from convoviz.exceptions import MessageContentError
12
12
 
@@ -18,7 +18,7 @@ class MessageAuthor(BaseModel):
18
18
 
19
19
  role: AuthorRole
20
20
  name: str | None = None
21
- metadata: dict[str, Any] = {}
21
+ metadata: dict[str, Any] = Field(default_factory=dict)
22
22
 
23
23
 
24
24
  class MessageContent(BaseModel):
@@ -55,8 +55,8 @@ class Message(BaseModel):
55
55
  status: str
56
56
  end_turn: bool | None = None
57
57
  weight: float
58
- metadata: MessageMetadata
59
- recipient: str
58
+ metadata: MessageMetadata = Field(default_factory=MessageMetadata)
59
+ recipient: str | None = None
60
60
 
61
61
  @property
62
62
  def images(self) -> list[str]:
@@ -117,3 +117,41 @@ class Message(BaseModel):
117
117
  return bool(
118
118
  self.content.parts or self.content.text is not None or self.content.result is not None
119
119
  )
120
+
121
+ @property
122
+ def is_empty(self) -> bool:
123
+ """Check if the message is effectively empty (no text, no images)."""
124
+ try:
125
+ return not self.text.strip() and not self.images
126
+ except MessageContentError:
127
+ return True
128
+
129
+ @property
130
+ def is_hidden(self) -> bool:
131
+ """Check if message should be hidden in export.
132
+
133
+ Hidden if:
134
+ 1. It is empty (no text, no images).
135
+ 2. It is an internal system message (not custom instructions).
136
+ 3. It is a browser tool output (intermediate search steps).
137
+ """
138
+ if self.is_empty:
139
+ return True
140
+
141
+ # Hide internal system messages
142
+ if self.author.role == "system":
143
+ # Only show if explicitly marked as user system message (Custom Instructions)
144
+ return not self.metadata.is_user_system_message
145
+
146
+ # Hide browser tool outputs (usually intermediate search steps)
147
+ if self.author.role == "tool" and self.author.name == "browser":
148
+ return True
149
+
150
+ # Hide assistant calls to browser tool (e.g. "search(...)") or code interpreter
151
+ if self.author.role == "assistant" and (
152
+ self.recipient == "browser" or self.content.content_type == "code"
153
+ ):
154
+ return True
155
+
156
+ # Hide browsing status messages
157
+ return self.content.content_type == "tether_browsing_display"
convoviz/pipeline.py CHANGED
@@ -19,6 +19,18 @@ from convoviz.io.writers import save_collection, save_custom_instructions
19
19
  console = Console()
20
20
 
21
21
 
22
+ def _safe_uri(path: Path) -> str:
23
+ """Best-effort URI for printing.
24
+
25
+ ``Path.as_uri()`` requires an absolute path; users often provide relative
26
+ output paths, so we resolve first and fall back to string form.
27
+ """
28
+ try:
29
+ return path.resolve().as_uri()
30
+ except Exception:
31
+ return str(path)
32
+
33
+
22
34
  def run_pipeline(config: ConvovizConfig) -> None:
23
35
  """Run the main processing pipeline.
24
36
 
@@ -72,8 +84,14 @@ def run_pipeline(config: ConvovizConfig) -> None:
72
84
  managed_dirs = ["Markdown", "Graphs", "Word-Clouds"]
73
85
  for d in managed_dirs:
74
86
  sub_dir = output_folder / d
75
- if sub_dir.exists() and sub_dir.is_dir():
76
- rmtree(sub_dir)
87
+ if sub_dir.exists():
88
+ # Never follow symlinks; just unlink them.
89
+ if sub_dir.is_symlink():
90
+ sub_dir.unlink()
91
+ elif sub_dir.is_dir():
92
+ rmtree(sub_dir)
93
+ else:
94
+ sub_dir.unlink()
77
95
  sub_dir.mkdir(exist_ok=True)
78
96
 
79
97
  # Clean specific files we manage
@@ -81,7 +99,12 @@ def run_pipeline(config: ConvovizConfig) -> None:
81
99
  for f in managed_files:
82
100
  managed_file = output_folder / f
83
101
  if managed_file.exists():
84
- managed_file.unlink()
102
+ if managed_file.is_symlink() or managed_file.is_file():
103
+ managed_file.unlink()
104
+ elif managed_file.is_dir():
105
+ rmtree(managed_file)
106
+ else:
107
+ managed_file.unlink()
85
108
 
86
109
  # Save markdown files
87
110
  markdown_folder = output_folder / "Markdown"
@@ -94,7 +117,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
94
117
  )
95
118
  console.print(
96
119
  f"\nDone [bold green]✅[/bold green] ! "
97
- f"Check the output [bold blue]📄[/bold blue] here: {markdown_folder.as_uri()} 🔗\n"
120
+ f"Check the output [bold blue]📄[/bold blue] here: {_safe_uri(markdown_folder)} 🔗\n"
98
121
  )
99
122
 
100
123
  # Generate graphs
@@ -108,7 +131,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
108
131
  )
109
132
  console.print(
110
133
  f"\nDone [bold green]✅[/bold green] ! "
111
- f"Check the output [bold blue]📈[/bold blue] here: {graph_folder.as_uri()} 🔗\n"
134
+ f"Check the output [bold blue]📈[/bold blue] here: {_safe_uri(graph_folder)} 🔗\n"
112
135
  )
113
136
 
114
137
  # Generate word clouds
@@ -122,7 +145,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
122
145
  )
123
146
  console.print(
124
147
  f"\nDone [bold green]✅[/bold green] ! "
125
- f"Check the output [bold blue]🔡☁️[/bold blue] here: {wordcloud_folder.as_uri()} 🔗\n"
148
+ f"Check the output [bold blue]🔡☁️[/bold blue] here: {_safe_uri(wordcloud_folder)} 🔗\n"
126
149
  )
127
150
 
128
151
  # Save custom instructions
@@ -131,12 +154,12 @@ def run_pipeline(config: ConvovizConfig) -> None:
131
154
  save_custom_instructions(collection, instructions_path)
132
155
  console.print(
133
156
  f"\nDone [bold green]✅[/bold green] ! "
134
- f"Check the output [bold blue]📝[/bold blue] here: {instructions_path.as_uri()} 🔗\n"
157
+ f"Check the output [bold blue]📝[/bold blue] here: {_safe_uri(instructions_path)} 🔗\n"
135
158
  )
136
159
 
137
160
  console.print(
138
161
  "ALL DONE [bold green]🎉🎉🎉[/bold green] !\n\n"
139
- f"Explore the full gallery [bold yellow]🖼️[/bold yellow] at: {output_folder.as_uri()} 🔗\n\n"
162
+ f"Explore the full gallery [bold yellow]🖼️[/bold yellow] at: {_safe_uri(output_folder)} 🔗\n\n"
140
163
  "I hope you enjoy the outcome 🤞.\n\n"
141
164
  "If you appreciate it, kindly give the project a star 🌟 on GitHub:\n\n"
142
165
  "➡️ https://github.com/mohamed-chs/chatgpt-history-export-to-md 🔗\n\n"
@@ -4,6 +4,7 @@ import re
4
4
  from collections.abc import Callable
5
5
 
6
6
  from convoviz.config import AuthorHeaders, ConversationConfig
7
+ from convoviz.exceptions import MessageContentError
7
8
  from convoviz.models import Conversation, Node
8
9
  from convoviz.renderers.yaml import render_yaml_header
9
10
 
@@ -80,7 +81,7 @@ def render_message_header(role: str, headers: AuthorHeaders) -> str:
80
81
  return header_map.get(role, f"### {role.title()}")
81
82
 
82
83
 
83
- def render_node_header(node: Node, headers: AuthorHeaders, flavor: str = "obsidian") -> str:
84
+ def render_node_header(node: Node, headers: AuthorHeaders, flavor: str = "standard") -> str:
84
85
  """Render the header section of a node.
85
86
 
86
87
  Includes the node ID, parent link, and message author header.
@@ -112,7 +113,7 @@ def render_node_header(node: Node, headers: AuthorHeaders, flavor: str = "obsidi
112
113
  return "\n".join(parts) + "\n"
113
114
 
114
115
 
115
- def render_node_footer(node: Node, flavor: str = "obsidian") -> str:
116
+ def render_node_footer(node: Node, flavor: str = "standard") -> str:
116
117
  """Render the footer section of a node with child links.
117
118
 
118
119
  Args:
@@ -137,7 +138,7 @@ def render_node(
137
138
  headers: AuthorHeaders,
138
139
  use_dollar_latex: bool = False,
139
140
  asset_resolver: Callable[[str], str | None] | None = None,
140
- flavor: str = "obsidian",
141
+ flavor: str = "standard",
141
142
  ) -> str:
142
143
  """Render a complete node as markdown.
143
144
 
@@ -154,32 +155,67 @@ def render_node(
154
155
  if node.message is None:
155
156
  return ""
156
157
 
158
+ if node.message.is_hidden:
159
+ return ""
160
+
157
161
  header = render_node_header(node, headers, flavor=flavor)
158
162
 
159
163
  # Get and process content
160
164
  try:
161
- content = close_code_blocks(node.message.text)
162
- content = f"\n{content}\n" if content else ""
163
- if use_dollar_latex:
164
- content = replace_latex_delimiters(content)
165
-
166
- # Append images if resolver is provided and images exist
167
- if asset_resolver and node.message.images:
168
- for image_id in node.message.images:
169
- rel_path = asset_resolver(image_id)
170
- if rel_path:
171
- # Using standard markdown image syntax.
172
- # Obsidian handles this well.
173
- content += f"\n![Image]({rel_path})\n"
174
-
175
- except Exception:
176
- content = ""
165
+ text = node.message.text
166
+ except MessageContentError:
167
+ # Some message types only contain non-text parts; those still may have images.
168
+ text = ""
169
+
170
+ content = close_code_blocks(text)
171
+ content = f"\n{content}\n" if content else ""
172
+ if use_dollar_latex:
173
+ content = replace_latex_delimiters(content)
174
+
175
+ # Append images if resolver is provided and images exist
176
+ if asset_resolver and node.message.images:
177
+ for image_id in node.message.images:
178
+ rel_path = asset_resolver(image_id)
179
+ if rel_path:
180
+ # Using standard markdown image syntax.
181
+ # Obsidian handles this well.
182
+ content += f"\n![Image]({rel_path})\n"
177
183
 
178
184
  footer = render_node_footer(node, flavor=flavor)
179
185
 
180
186
  return f"\n{header}{content}{footer}\n---\n"
181
187
 
182
188
 
189
+ def _ordered_nodes(conversation: Conversation) -> list[Node]:
190
+ """Return nodes in a deterministic depth-first traversal order.
191
+
192
+ ChatGPT exports store nodes in a mapping; dict iteration order is not a
193
+ reliable semantic ordering. For markdown output, we traverse from roots.
194
+ """
195
+ mapping = conversation.node_mapping
196
+ roots = sorted((n for n in mapping.values() if n.parent is None), key=lambda n: n.id)
197
+
198
+ visited: set[str] = set()
199
+ ordered: list[Node] = []
200
+
201
+ def dfs(node: Node) -> None:
202
+ if node.id in visited:
203
+ return
204
+ visited.add(node.id)
205
+ ordered.append(node)
206
+ for child in node.children_nodes:
207
+ dfs(child)
208
+
209
+ for root in roots:
210
+ dfs(root)
211
+
212
+ # Include any disconnected/orphan nodes deterministically at the end.
213
+ for node in sorted(mapping.values(), key=lambda n: n.id):
214
+ dfs(node)
215
+
216
+ return ordered
217
+
218
+
183
219
  def render_conversation(
184
220
  conversation: Conversation,
185
221
  config: ConversationConfig,
@@ -203,8 +239,8 @@ def render_conversation(
203
239
  # Start with YAML header
204
240
  markdown = render_yaml_header(conversation, config.yaml)
205
241
 
206
- # Render all message nodes
207
- for node in conversation.all_message_nodes:
242
+ # Render message nodes in a deterministic traversal order.
243
+ for node in _ordered_nodes(conversation):
208
244
  if node.message:
209
245
  markdown += render_node(
210
246
  node, headers, use_dollar_latex, asset_resolver=asset_resolver, flavor=flavor
@@ -1,8 +1,83 @@
1
1
  """YAML frontmatter rendering for conversations."""
2
2
 
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from datetime import datetime
7
+
3
8
  from convoviz.config import YAMLConfig
4
9
  from convoviz.models import Conversation
5
10
 
11
+ _TAG_SAFE_RE = re.compile(r"[^a-z0-9/_\-]+")
12
+
13
+
14
+ def _to_yaml_scalar(value: object) -> str:
15
+ if value is None:
16
+ return "null"
17
+ if isinstance(value, bool):
18
+ return "true" if value else "false"
19
+ if isinstance(value, (int, float)):
20
+ return str(value)
21
+ if isinstance(value, datetime):
22
+ # Frontmatter consumers generally expect ISO 8601 strings
23
+ return f'"{value.isoformat()}"'
24
+ if isinstance(value, str):
25
+ if "\n" in value:
26
+ # Multiline: use a block scalar
27
+ indented = "\n".join(f" {line}" for line in value.splitlines())
28
+ return f"|-\n{indented}"
29
+ escaped = value.replace("\\", "\\\\").replace('"', '\\"')
30
+ return f'"{escaped}"'
31
+
32
+ # Fallback: stringify and quote
33
+ escaped = str(value).replace("\\", "\\\\").replace('"', '\\"')
34
+ return f'"{escaped}"'
35
+
36
+
37
+ def _to_yaml(value: object, indent: int = 0) -> str:
38
+ pad = " " * indent
39
+
40
+ if isinstance(value, dict):
41
+ lines: list[str] = []
42
+ for k, v in value.items():
43
+ key = str(k)
44
+ if isinstance(v, (dict, list)):
45
+ lines.append(f"{pad}{key}:")
46
+ lines.append(_to_yaml(v, indent=indent + 2))
47
+ else:
48
+ scalar = _to_yaml_scalar(v)
49
+ # Block scalars already include newline + indentation
50
+ if scalar.startswith("|-"):
51
+ lines.append(f"{pad}{key}: {scalar.splitlines()[0]}")
52
+ lines.extend(f"{pad}{line}" for line in scalar.splitlines()[1:])
53
+ else:
54
+ lines.append(f"{pad}{key}: {scalar}")
55
+ return "\n".join(lines)
56
+
57
+ if isinstance(value, list):
58
+ lines = []
59
+ for item in value:
60
+ if isinstance(item, (dict, list)):
61
+ lines.append(f"{pad}-")
62
+ lines.append(_to_yaml(item, indent=indent + 2))
63
+ else:
64
+ lines.append(f"{pad}- {_to_yaml_scalar(item)}")
65
+ return "\n".join(lines)
66
+
67
+ return f"{pad}{_to_yaml_scalar(value)}"
68
+
69
+
70
+ def _default_tags(conversation: Conversation) -> list[str]:
71
+ tags: list[str] = ["chatgpt"]
72
+ tags.extend(conversation.plugins)
73
+ # Normalize to a tag-friendly form
74
+ normalized: list[str] = []
75
+ for t in tags:
76
+ t2 = _TAG_SAFE_RE.sub("-", t.strip().lower()).strip("-")
77
+ if t2 and t2 not in normalized:
78
+ normalized.append(t2)
79
+ return normalized
80
+
6
81
 
7
82
  def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
8
83
  """Render the YAML frontmatter for a conversation.
@@ -18,6 +93,8 @@ def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
18
93
 
19
94
  if config.title:
20
95
  yaml_fields["title"] = conversation.title
96
+ if config.tags:
97
+ yaml_fields["tags"] = _default_tags(conversation)
21
98
  if config.chat_link:
22
99
  yaml_fields["chat_link"] = conversation.url
23
100
  if config.create_time:
@@ -38,5 +115,5 @@ def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
38
115
  if not yaml_fields:
39
116
  return ""
40
117
 
41
- lines = [f"{key}: {value}" for key, value in yaml_fields.items()]
42
- return f"---\n{chr(10).join(lines)}\n---\n"
118
+ body = _to_yaml(yaml_fields)
119
+ return f"---\n{body}\n---\n"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: convoviz
3
- Version: 0.2.3
3
+ Version: 0.2.5
4
4
  Summary: Get analytics and visualizations on your ChatGPT data!
5
5
  Keywords: markdown,chatgpt,openai,visualization,analytics,json,export,data-analysis,obsidian
6
6
  Author: Mohamed Cheikh Sidiya
@@ -24,7 +24,7 @@ Requires-Python: >=3.12
24
24
  Project-URL: Repository, https://github.com/mohamed-chs/chatgpt-history-export-to-md
25
25
  Description-Content-Type: text/markdown
26
26
 
27
- # Convoviz 📊: Visualize your entire ChatGPT data !
27
+ # Convoviz 📊: Visualize your entire ChatGPT data
28
28
 
29
29
  Convert your ChatGPT history into well-formatted Markdown files. Additionally, visualize your data with word clouds 🔡☁️, view your prompt history graphs 📈, and access all your custom instructions 🤖 in a single location.
30
30
 
@@ -68,7 +68,7 @@ or pipx:
68
68
  pipx install convoviz
69
69
  ```
70
70
 
71
- ### 3. Run the Script 🏃‍♂️
71
+ ### 3. Run the tool 🏃‍♂️
72
72
 
73
73
  Simply run the command and follow the prompts:
74
74
 
@@ -81,9 +81,18 @@ convoviz
81
81
  You can provide arguments directly to skip the prompts:
82
82
 
83
83
  ```bash
84
- convoviz --zip path/to/your/export.zip --output path/to/output/folder
84
+ convoviz --input path/to/your/export.zip --output path/to/output/folder
85
85
  ```
86
86
 
87
+ Inputs can be any of:
88
+ - A ChatGPT export ZIP (downloaded from OpenAI)
89
+ - An extracted export directory containing `conversations.json`
90
+ - A `conversations.json` file directly
91
+
92
+ Notes:
93
+ - `--zip` / `-z` is kept as an alias for `--input` for convenience.
94
+ - You can force non-interactive mode with `--no-interactive`.
95
+
87
96
  For more options, run:
88
97
 
89
98
  ```bash
@@ -118,4 +127,20 @@ It was also a great opportunity to learn more about Python and type annotations.
118
127
 
119
128
  It should(?) also work as library, so you can import and use the models and functions. I need to add more documentation for that tho. Feel free to reach out if you need help.
120
129
 
121
- I'm working on automating it to add new conversations and updating old ones. Had some luck with a JavaScript bookmarklet, still ironing it out tho.
130
+ ### Offline / reproducible runs
131
+
132
+ Convoviz uses NLTK stopwords for word clouds. If you’re offline and NLTK data isn’t already installed, pre-download it once:
133
+
134
+ ```bash
135
+ python -c "import nltk; nltk.download('stopwords')"
136
+ ```
137
+
138
+ If you’re using `uv` without a global install, you can run:
139
+
140
+ ```bash
141
+ uv run python -c "import nltk; nltk.download('stopwords')"
142
+ ```
143
+
144
+ ### Bookmarklet
145
+
146
+ There’s also a JavaScript bookmarklet flow under `js/` (experimental) for exporting additional conversation data outside the official ZIP export.
@@ -1,8 +1,8 @@
1
1
  convoviz/__init__.py,sha256=bQLCHO2U9EyMTGqNgsYiCtBQKTKNj4iIM3-TwIkrnRY,612
2
2
  convoviz/__main__.py,sha256=1qiGW13_SgL7wJi8iioIN-AAHGkNGnEl5q_RcPUrI0s,143
3
3
  convoviz/analysis/__init__.py,sha256=FxgH5JJpyypiLJpMQn_HlM51jnb8lQdP63_C_W3Dlx4,241
4
- convoviz/analysis/graphs.py,sha256=zzM7Fc39e3DG0f1KRYH7Nkzu0ULBYehgOWyhsYGrgm0,12604
5
- convoviz/analysis/wordcloud.py,sha256=UflTUUFZQmivNI1sn3OpaYV-BaR_BAbFwn89nUbkvIk,4666
4
+ convoviz/analysis/graphs.py,sha256=3CV4yhFwfUYb5-CXtq4D-r_vf0jn5cxDXwaPu1P8M8g,14928
5
+ convoviz/analysis/wordcloud.py,sha256=ZnbA_-rcXHwXIny_xbudfJDQbIuPT7urNFfHcx6QWxQ,4673
6
6
  convoviz/assets/colormaps.txt,sha256=59TSGz428AxY14AEvymAH2IJ2RT9Mlp7Sy0N12NEdXQ,108
7
7
  convoviz/assets/fonts/AmaticSC-Regular.ttf,sha256=83clh7a3urnTLud0_yZofuIb6BdyC2LMI9jhE6G2LvU,142696
8
8
  convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf,sha256=fnrj5_N_SlY2Lj3Ehqz5aKECPZVJlJAflgsOU94_qIM,37756
@@ -37,25 +37,25 @@ convoviz/assets/fonts/YsabeauSC-Thin.ttf,sha256=hZGOZNTRrxbiUPE2VDeLbtnaRwkMOBaV
37
37
  convoviz/assets/fonts/Zeyada-Regular.ttf,sha256=fKhkrp9VHt_3Aw8JfkfkPeC2j3CilLWuPUudzBeawPQ,57468
38
38
  convoviz/assets/stopwords.txt,sha256=7_ywpxsKYOj3U5CZTh9lP4GqbbkZLMabSOjKAXFk6Wc,539
39
39
  convoviz/cli.py,sha256=8HNn-6kmDN8ECb0BspvjeGa_636SQPDffpM0yINgNII,3463
40
- convoviz/config.py,sha256=CqF0nq0FfIom1rG0Y8JBVw7hvvrJEq4ATkKCoUrLSwk,2657
40
+ convoviz/config.py,sha256=vjedCcpQ_t-mR6cZ4GJJuyRPDeY95XCIiMXufVIlm9M,2724
41
41
  convoviz/exceptions.py,sha256=bQpIKls48uOQpagEJAxpXf5LF7QoagRRfbD0MjWC7Ak,1476
42
- convoviz/interactive.py,sha256=sMD2TVIwjRcZrOOzDPnlCx2rAxaEhanQlHyDSPwXoNw,5777
42
+ convoviz/interactive.py,sha256=VXtKgYo9tZGtsoj7zThdnbTrbjSNP5MzAZbdOs3icW4,7424
43
43
  convoviz/io/__init__.py,sha256=y70TYypJ36_kaEA04E2wa1EDaKQVjprKItoKR6MMs4M,471
44
44
  convoviz/io/assets.py,sha256=BykidWJG6OQAgbVfUByQ3RLTrldzpZ_NeM7HV3a5Tig,2333
45
- convoviz/io/loaders.py,sha256=c7806lRHxluuifBhGqhloYtbI47WGn3PAVRwPYGq2u8,4765
45
+ convoviz/io/loaders.py,sha256=RuGiGzpyNcgwTxOM-m2ehhyh2mP1-k1YamK8-VynR3g,5713
46
46
  convoviz/io/writers.py,sha256=KaLr0f2F2Pw5XOoQKMA75IeQYXUTT4WbS-HAqRxsp3c,3494
47
47
  convoviz/models/__init__.py,sha256=6gAfrk6KJT2QxdvX_v15mUdfIqEw1xKxwQlKSfyA5eI,532
48
- convoviz/models/collection.py,sha256=sTSRCR4jS94FE02gCmWfNFIrn8t-PCNmHI7wb4Juh4M,4094
49
- convoviz/models/conversation.py,sha256=G0wxrcIhY5JzWeIOkGtkELSzc7J32W2wxJVbNOord58,5145
50
- convoviz/models/message.py,sha256=MN5FrQ1kfmdAbcFUwfzg6FX5pKuZmhtNNrZksqMyuD0,3978
48
+ convoviz/models/collection.py,sha256=L658yKMNC6IZrfxYxZBe-oO9COP_bzVfRznnNon7tfU,4467
49
+ convoviz/models/conversation.py,sha256=ssx1Z6LM9kJIx3OucQW8JVoAc8zCdxj1iOLtie2B3ak,5678
50
+ convoviz/models/message.py,sha256=mVnaUG6hypz92Oz3OgFAK1uuTgH3ZOJAWsFiCpLYneY,5459
51
51
  convoviz/models/node.py,sha256=1vBAtKVscYsUBDnKAOyLxuZaK9KoVF1dFXiKXRHxUnY,1946
52
- convoviz/pipeline.py,sha256=E2T82QjKEOmpy_kgY_MXd15MkXP8C2yE7MXG4jgDw1A,5018
52
+ convoviz/pipeline.py,sha256=Mwg3Xqazk5PrsIHxhVajtWbfq4PgFlIGVHWq8BsW0U0,5750
53
53
  convoviz/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
54
  convoviz/renderers/__init__.py,sha256=IQgwD9NqtUgbS9zwyPBNZbBIZcFrbZ9C7WMAV-X3Xdg,261
55
- convoviz/renderers/markdown.py,sha256=Gvc8x2OecmzcGMAab5i3f20VQHK_ITEepmJw_J-6zSo,6105
56
- convoviz/renderers/yaml.py,sha256=FqO2zToXp96gQRDrjALgchESWLg49LxuehXP59SEFeU,1522
55
+ convoviz/renderers/markdown.py,sha256=HDvTYpTJUI87o8QjS5ZfMS1FLRS4zPNBvCDyWzEpi9o,7211
56
+ convoviz/renderers/yaml.py,sha256=XG1s4VhDdx-TiqekTkgED87RZ1lVQ7IwrbA-sZHrs7k,4056
57
57
  convoviz/utils.py,sha256=IQEKYHhWOnYxlr4GwAHoquG0BXTlVRkORL80oUSaIeQ,3417
58
- convoviz-0.2.3.dist-info/WHEEL,sha256=KSLUh82mDPEPk0Bx0ScXlWL64bc8KmzIPNcpQZFV-6E,79
59
- convoviz-0.2.3.dist-info/entry_points.txt,sha256=HYsmsw5vt36yYHB05uVU48AK2WLkcwshly7m7KKuZMY,54
60
- convoviz-0.2.3.dist-info/METADATA,sha256=KPvRv4fDHpbiW7xM-_o5neTCWPEsI35E114Oulu1CRQ,4619
61
- convoviz-0.2.3.dist-info/RECORD,,
58
+ convoviz-0.2.5.dist-info/WHEEL,sha256=eycQt0QpYmJMLKpE3X9iDk8R04v2ZF0x82ogq-zP6bQ,79
59
+ convoviz-0.2.5.dist-info/entry_points.txt,sha256=HYsmsw5vt36yYHB05uVU48AK2WLkcwshly7m7KKuZMY,54
60
+ convoviz-0.2.5.dist-info/METADATA,sha256=nh8J1XdXD9CdGO3REyBLZTdan-LdCP92tofpY7w4Wt0,5309
61
+ convoviz-0.2.5.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: uv 0.9.22
2
+ Generator: uv 0.9.24
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any