convoviz 0.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. convoviz/__init__.py +25 -0
  2. convoviz/__main__.py +6 -0
  3. convoviz/analysis/__init__.py +9 -0
  4. convoviz/analysis/graphs.py +855 -0
  5. convoviz/analysis/wordcloud.py +165 -0
  6. convoviz/assets/colormaps.txt +15 -0
  7. convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
  8. convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
  9. convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
  10. convoviz/assets/fonts/Borel-Regular.ttf +0 -0
  11. convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
  12. convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
  13. convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
  14. convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
  15. convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
  16. convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
  17. convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
  18. convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
  19. convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
  20. convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
  21. convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
  22. convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
  23. convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
  24. convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
  25. convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
  26. convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
  27. convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
  28. convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
  29. convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
  30. convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
  31. convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
  32. convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
  33. convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
  34. convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
  35. convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
  36. convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
  37. convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
  38. convoviz/assets/stopwords.txt +1 -0
  39. convoviz/cli.py +117 -0
  40. convoviz/config.py +106 -0
  41. convoviz/exceptions.py +47 -0
  42. convoviz/interactive.py +247 -0
  43. convoviz/io/__init__.py +21 -0
  44. convoviz/io/assets.py +98 -0
  45. convoviz/io/loaders.py +186 -0
  46. convoviz/io/writers.py +227 -0
  47. convoviz/models/__init__.py +24 -0
  48. convoviz/models/collection.py +115 -0
  49. convoviz/models/conversation.py +158 -0
  50. convoviz/models/message.py +218 -0
  51. convoviz/models/node.py +66 -0
  52. convoviz/pipeline.py +167 -0
  53. convoviz/py.typed +0 -0
  54. convoviz/renderers/__init__.py +10 -0
  55. convoviz/renderers/markdown.py +269 -0
  56. convoviz/renderers/yaml.py +119 -0
  57. convoviz/utils.py +155 -0
  58. convoviz-0.2.12.dist-info/METADATA +148 -0
  59. convoviz-0.2.12.dist-info/RECORD +61 -0
  60. convoviz-0.2.12.dist-info/WHEEL +4 -0
  61. convoviz-0.2.12.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,247 @@
1
+ """Interactive configuration prompts using questionary."""
2
+
3
+ from pathlib import Path
4
+ from typing import Literal, Protocol, cast
5
+
6
+ from questionary import Choice, Style, checkbox, select
7
+ from questionary import path as qst_path
8
+ from questionary import text as qst_text
9
+
10
+ from convoviz.config import ConvovizConfig, get_default_config
11
+ from convoviz.io.loaders import find_latest_zip, validate_zip
12
+ from convoviz.utils import colormaps, default_font_path, font_names, font_path, validate_header
13
+
14
+ CUSTOM_STYLE = Style(
15
+ [
16
+ ("qmark", "fg:#34eb9b bold"),
17
+ ("question", "bold fg:#e0e0e0"),
18
+ ("answer", "fg:#34ebeb bold"),
19
+ ("pointer", "fg:#e834eb bold"),
20
+ ("highlighted", "fg:#349ceb bold"),
21
+ ("selected", "fg:#34ebeb"),
22
+ ("separator", "fg:#eb3434"),
23
+ ("instruction", "fg:#eb9434"),
24
+ ("text", "fg:#b2eb34"),
25
+ ("disabled", "fg:#858585 italic"),
26
+ ]
27
+ )
28
+
29
+ class _QuestionaryPrompt[T](Protocol):
30
+ def ask(self) -> T | None: ...
31
+
32
+
33
+ def _ask_or_cancel[T](prompt: _QuestionaryPrompt[T]) -> T:
34
+ """Ask a questionary prompt; treat Ctrl+C/Ctrl+D as cancelling the run.
35
+
36
+ questionary's `.ask()` returns `None` on cancellation (Ctrl+C / Ctrl+D). We
37
+ convert that to `KeyboardInterrupt` so callers can abort the whole
38
+ interactive session with a single Ctrl+C.
39
+ """
40
+
41
+ result = prompt.ask()
42
+ if result is None:
43
+ raise KeyboardInterrupt
44
+ return result
45
+
46
+
47
+ def _validate_input_path(raw: str) -> bool | str:
48
+ path = Path(raw)
49
+ if not path.exists():
50
+ return "Path must exist"
51
+
52
+ if path.is_dir():
53
+ if (path / "conversations.json").exists():
54
+ return True
55
+ return "Directory must contain conversations.json"
56
+
57
+ if path.suffix.lower() == ".json":
58
+ return True
59
+
60
+ if path.suffix.lower() == ".zip":
61
+ return True if validate_zip(path) else "ZIP must contain conversations.json"
62
+
63
+ return "Input must be a .zip, a .json, or a directory containing conversations.json"
64
+
65
+
66
+ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> ConvovizConfig:
67
+ """Run interactive prompts to configure convoviz.
68
+
69
+ Args:
70
+ initial_config: Optional starting configuration (uses defaults if None)
71
+
72
+ Returns:
73
+ Updated configuration based on user input
74
+ """
75
+ config = initial_config or get_default_config()
76
+
77
+ # Set sensible defaults if not already set
78
+ if not config.input_path:
79
+ latest = find_latest_zip()
80
+ if latest:
81
+ config.input_path = latest
82
+
83
+ if not config.wordcloud.font_path:
84
+ config.wordcloud.font_path = default_font_path()
85
+
86
+ # Prompt for input path
87
+ input_default = str(config.input_path) if config.input_path else ""
88
+ input_result: str = _ask_or_cancel(
89
+ qst_path(
90
+ "Enter the path to the export ZIP, conversations JSON, or extracted directory:",
91
+ default=input_default,
92
+ validate=_validate_input_path,
93
+ style=CUSTOM_STYLE,
94
+ )
95
+ )
96
+
97
+ if input_result:
98
+ config.input_path = Path(input_result)
99
+
100
+ # Prompt for output folder
101
+ output_result: str = _ask_or_cancel(
102
+ qst_path(
103
+ "Enter the path to the output folder:",
104
+ default=str(config.output_folder),
105
+ style=CUSTOM_STYLE,
106
+ )
107
+ )
108
+
109
+ if output_result:
110
+ config.output_folder = Path(output_result)
111
+
112
+ # Prompt for author headers
113
+ headers = config.message.author_headers
114
+ for role in ["system", "user", "assistant", "tool"]:
115
+ current = getattr(headers, role)
116
+ result: str = _ask_or_cancel(
117
+ qst_text(
118
+ f"Enter the message header for '{role}':",
119
+ default=current,
120
+ validate=lambda t: validate_header(t)
121
+ or "Must be a valid markdown header (e.g., # Title)",
122
+ style=CUSTOM_STYLE,
123
+ )
124
+ )
125
+ if result:
126
+ setattr(headers, role, result)
127
+
128
+ # Prompt for LaTeX delimiters
129
+ latex_result = cast(
130
+ Literal["default", "dollars"],
131
+ _ask_or_cancel(
132
+ select(
133
+ "Select the LaTeX math delimiters:",
134
+ choices=["default", "dollars"],
135
+ default=config.conversation.markdown.latex_delimiters,
136
+ style=CUSTOM_STYLE,
137
+ )
138
+ ),
139
+ )
140
+
141
+ if latex_result:
142
+ config.conversation.markdown.latex_delimiters = latex_result
143
+
144
+ # Prompt for markdown flavor
145
+ flavor_result = cast(
146
+ Literal["standard", "obsidian"],
147
+ _ask_or_cancel(
148
+ select(
149
+ "Select the markdown flavor:",
150
+ choices=["standard", "obsidian"],
151
+ default=config.conversation.markdown.flavor,
152
+ style=CUSTOM_STYLE,
153
+ )
154
+ ),
155
+ )
156
+
157
+ if flavor_result:
158
+ config.conversation.markdown.flavor = flavor_result
159
+
160
+ # Prompt for YAML headers
161
+ yaml_config = config.conversation.yaml
162
+ yaml_choices = [
163
+ Choice(title=field, checked=getattr(yaml_config, field))
164
+ for field in [
165
+ "title",
166
+ "tags",
167
+ "chat_link",
168
+ "create_time",
169
+ "update_time",
170
+ "model",
171
+ "used_plugins",
172
+ "message_count",
173
+ "content_types",
174
+ "custom_instructions",
175
+ ]
176
+ ]
177
+
178
+ selected: list[str] = _ask_or_cancel(
179
+ checkbox(
180
+ "Select YAML metadata headers to include:",
181
+ choices=yaml_choices,
182
+ style=CUSTOM_STYLE,
183
+ )
184
+ )
185
+
186
+ selected_set = set(selected)
187
+ for field_name in [
188
+ "title",
189
+ "tags",
190
+ "chat_link",
191
+ "create_time",
192
+ "update_time",
193
+ "model",
194
+ "used_plugins",
195
+ "message_count",
196
+ "content_types",
197
+ "custom_instructions",
198
+ ]:
199
+ setattr(yaml_config, field_name, field_name in selected_set)
200
+
201
+ # Prompt for font
202
+ available_fonts = font_names()
203
+ if available_fonts:
204
+ current_font = (
205
+ config.wordcloud.font_path.stem if config.wordcloud.font_path else available_fonts[0]
206
+ )
207
+ font_result: str = _ask_or_cancel(
208
+ select(
209
+ "Select the font for word clouds:",
210
+ choices=available_fonts,
211
+ default=current_font if current_font in available_fonts else available_fonts[0],
212
+ style=CUSTOM_STYLE,
213
+ )
214
+ )
215
+
216
+ if font_result:
217
+ config.wordcloud.font_path = font_path(font_result)
218
+
219
+ # Prompt for colormap
220
+ available_colormaps = colormaps()
221
+ if available_colormaps:
222
+ colormap_result: str = _ask_or_cancel(
223
+ select(
224
+ "Select the color theme for word clouds:",
225
+ choices=available_colormaps,
226
+ default=config.wordcloud.colormap
227
+ if config.wordcloud.colormap in available_colormaps
228
+ else available_colormaps[0],
229
+ style=CUSTOM_STYLE,
230
+ )
231
+ )
232
+
233
+ if colormap_result:
234
+ config.wordcloud.colormap = colormap_result
235
+
236
+ # Prompt for custom stopwords
237
+ stopwords_result: str = _ask_or_cancel(
238
+ qst_text(
239
+ "Enter custom stopwords (comma-separated):",
240
+ default=config.wordcloud.custom_stopwords,
241
+ style=CUSTOM_STYLE,
242
+ )
243
+ )
244
+
245
+ config.wordcloud.custom_stopwords = stopwords_result
246
+
247
+ return config
@@ -0,0 +1,21 @@
1
+ """I/O operations for convoviz."""
2
+
3
+ from convoviz.io.loaders import (
4
+ load_collection_from_json,
5
+ load_collection_from_zip,
6
+ load_conversation_from_json,
7
+ )
8
+ from convoviz.io.writers import (
9
+ save_collection,
10
+ save_conversation,
11
+ save_custom_instructions,
12
+ )
13
+
14
+ __all__ = [
15
+ "load_collection_from_json",
16
+ "load_collection_from_zip",
17
+ "load_conversation_from_json",
18
+ "save_collection",
19
+ "save_conversation",
20
+ "save_custom_instructions",
21
+ ]
convoviz/io/assets.py ADDED
@@ -0,0 +1,98 @@
1
+ "Asset management functions."
2
+
3
+ import shutil
4
+ from pathlib import Path
5
+
6
+
7
+ def resolve_asset_path(source_dir: Path, asset_id: str) -> Path | None:
8
+ """Find the actual file for a given asset ID in the source directory.
9
+
10
+ Args:
11
+ source_dir: Directory to search in
12
+ asset_id: The asset ID (e.g., "file-uuid")
13
+
14
+ Returns:
15
+ Path to the found file, or None
16
+ """
17
+ if not source_dir.exists():
18
+ return None
19
+
20
+ source_dir = source_dir.resolve()
21
+
22
+ # Safety check for asset_id
23
+ if ".." in asset_id or "/" in asset_id or "\\" in asset_id:
24
+ return None
25
+
26
+ # 1. Try exact match
27
+ exact_path = (source_dir / asset_id).resolve()
28
+ if exact_path.exists() and exact_path.is_file() and exact_path.is_relative_to(source_dir):
29
+ return exact_path
30
+
31
+ # 2. Try prefix match in root
32
+ try:
33
+ candidates = list(source_dir.glob(f"{asset_id}*"))
34
+ files = [
35
+ p.resolve()
36
+ for p in candidates
37
+ if p.is_file() and p.resolve().is_relative_to(source_dir)
38
+ ]
39
+ if files:
40
+ return files[0]
41
+ except Exception:
42
+ pass
43
+
44
+ # 3. Try prefix match in dalle-generations
45
+ dalle_dir = source_dir / "dalle-generations"
46
+ if dalle_dir.exists() and dalle_dir.is_dir():
47
+ dalle_dir = dalle_dir.resolve()
48
+ try:
49
+ candidates = list(dalle_dir.glob(f"{asset_id}*"))
50
+ files = [
51
+ p.resolve()
52
+ for p in candidates
53
+ if p.is_file() and p.resolve().is_relative_to(dalle_dir)
54
+ ]
55
+ if files:
56
+ return files[0]
57
+ except Exception:
58
+ pass
59
+
60
+ # 4. Try prefix match in user-* directories (new 2025 format)
61
+ try:
62
+ for user_dir in source_dir.glob("user-*"):
63
+ if user_dir.is_dir():
64
+ user_dir = user_dir.resolve()
65
+ candidates = list(user_dir.glob(f"{asset_id}*"))
66
+ files = [
67
+ p.resolve()
68
+ for p in candidates
69
+ if p.is_file() and p.resolve().is_relative_to(user_dir)
70
+ ]
71
+ if files:
72
+ return files[0]
73
+ except Exception:
74
+ pass
75
+
76
+ return None
77
+
78
+
79
+ def copy_asset(source_path: Path, dest_dir: Path) -> str:
80
+ """Copy an asset to the destination directory.
81
+
82
+ Args:
83
+ source_path: The source file path
84
+ dest_dir: The root output directory (assets will be in dest_dir/assets)
85
+
86
+ Returns:
87
+ Relative path to the asset (e.g., "assets/image.png")
88
+ """
89
+ assets_dir = dest_dir / "assets"
90
+ assets_dir.mkdir(parents=True, exist_ok=True)
91
+
92
+ dest_path = assets_dir / source_path.name
93
+
94
+ if not dest_path.exists():
95
+ shutil.copy2(source_path, dest_path)
96
+
97
+ # Return forward-slash path for Markdown compatibility even on Windows
98
+ return f"assets/{source_path.name}"
convoviz/io/loaders.py ADDED
@@ -0,0 +1,186 @@
1
+ """Loading functions for conversations and collections."""
2
+
3
+ from pathlib import Path, PurePosixPath
4
+ from zipfile import ZipFile
5
+
6
+ from orjson import loads
7
+
8
+ from convoviz.exceptions import InvalidZipError
9
+ from convoviz.models import Conversation, ConversationCollection
10
+
11
+
12
+ def _is_safe_zip_member_name(name: str) -> bool:
13
+ """Return True if a ZIP entry name is safe to extract.
14
+
15
+ This is intentionally OS-agnostic: it treats both ``/`` and ``\\`` as path
16
+ separators and rejects absolute paths, drive-letter paths, and ``..`` parts.
17
+ """
18
+ normalized = name.replace("\\", "/")
19
+ member_path = PurePosixPath(normalized)
20
+
21
+ # Absolute paths (e.g. "/etc/passwd") or empty names
22
+ if not normalized or member_path.is_absolute():
23
+ return False
24
+
25
+ # Windows drive letters / UNC-style prefixes stored in the archive
26
+ first = member_path.parts[0] if member_path.parts else ""
27
+ if first.endswith(":") or first.startswith("//") or first.startswith("\\\\"):
28
+ return False
29
+
30
+ return ".." not in member_path.parts
31
+
32
+
33
+ def extract_archive(filepath: Path) -> Path:
34
+ """Extract a ZIP file and return the extraction folder path.
35
+
36
+ Includes safety checks to prevent Path Traversal (Zip-Slip).
37
+
38
+ Args:
39
+ filepath: Path to the ZIP file
40
+
41
+ Returns:
42
+ Path to the extracted folder
43
+
44
+ Raises:
45
+ InvalidZipError: If extraction fails or a security risk is detected
46
+ """
47
+ folder = filepath.with_suffix("")
48
+ folder.mkdir(parents=True, exist_ok=True)
49
+
50
+ with ZipFile(filepath) as zf:
51
+ for member in zf.infolist():
52
+ # Check for path traversal (Zip-Slip) in an OS-agnostic way.
53
+ # ZIP files are typically POSIX-path-like, but malicious archives can
54
+ # embed backslashes or drive-letter tricks.
55
+ if not _is_safe_zip_member_name(member.filename):
56
+ raise InvalidZipError(
57
+ str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
58
+ )
59
+
60
+ # Additional check using resolved paths
61
+ normalized = member.filename.replace("\\", "/")
62
+ target_path = (folder / normalized).resolve()
63
+ if not target_path.is_relative_to(folder.resolve()):
64
+ raise InvalidZipError(
65
+ str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
66
+ )
67
+
68
+ zf.extractall(folder)
69
+ return folder
70
+
71
+
72
+ def validate_zip(filepath: Path) -> bool:
73
+ """Check if a ZIP file contains conversations.json.
74
+
75
+ Args:
76
+ filepath: Path to the ZIP file
77
+
78
+ Returns:
79
+ True if valid, False otherwise
80
+ """
81
+ if not filepath.is_file() or filepath.suffix != ".zip":
82
+ return False
83
+ try:
84
+ with ZipFile(filepath) as zf:
85
+ return "conversations.json" in zf.namelist()
86
+ except Exception:
87
+ return False
88
+
89
+
90
+ def load_conversation_from_json(filepath: Path | str) -> Conversation:
91
+ """Load a single conversation from a JSON file.
92
+
93
+ Args:
94
+ filepath: Path to the JSON file
95
+
96
+ Returns:
97
+ Loaded Conversation object
98
+ """
99
+ filepath = Path(filepath)
100
+ with filepath.open(encoding="utf-8") as f:
101
+ data = loads(f.read())
102
+ return Conversation(**data)
103
+
104
+
105
+ def load_collection_from_json(filepath: Path | str) -> ConversationCollection:
106
+ """Load a conversation collection from a JSON file.
107
+
108
+ The JSON file should contain an array of conversation objects,
109
+ or an object with a "conversations" key.
110
+
111
+ Args:
112
+ filepath: Path to the JSON file
113
+
114
+ Returns:
115
+ Loaded ConversationCollection object
116
+ """
117
+ filepath = Path(filepath)
118
+ with filepath.open(encoding="utf-8") as f:
119
+ data = loads(f.read())
120
+
121
+ # Handle case where export is wrapped in a top-level object
122
+ if isinstance(data, dict) and "conversations" in data:
123
+ data = data["conversations"]
124
+
125
+ return ConversationCollection(conversations=data, source_path=filepath.parent)
126
+
127
+
128
+ def load_collection_from_zip(filepath: Path | str) -> ConversationCollection:
129
+ """Load a conversation collection from a ChatGPT export ZIP file.
130
+
131
+ Args:
132
+ filepath: Path to the ZIP file
133
+
134
+ Returns:
135
+ Loaded ConversationCollection object
136
+
137
+ Raises:
138
+ InvalidZipError: If the ZIP file is invalid or missing conversations.json
139
+ """
140
+ filepath = Path(filepath)
141
+
142
+ if not validate_zip(filepath):
143
+ raise InvalidZipError(str(filepath))
144
+
145
+ extracted_folder = extract_archive(filepath)
146
+ conversations_path = extracted_folder / "conversations.json"
147
+
148
+ return load_collection_from_json(conversations_path)
149
+
150
+
151
+ def find_latest_zip(directory: Path | None = None) -> Path | None:
152
+ """Find the most recently created ZIP file in a directory.
153
+
154
+ Args:
155
+ directory: Directory to search (defaults to ~/Downloads)
156
+
157
+ Returns:
158
+ Path to the most recent ZIP, or None if none found
159
+ """
160
+ if directory is None:
161
+ directory = Path.home() / "Downloads"
162
+
163
+ zip_files = list(directory.glob("*.zip"))
164
+ if not zip_files:
165
+ return None
166
+
167
+ return max(zip_files, key=lambda p: p.stat().st_ctime)
168
+
169
+
170
+ def find_latest_bookmarklet_json(directory: Path | None = None) -> Path | None:
171
+ """Find the most recent bookmarklet JSON file in a directory.
172
+
173
+ Args:
174
+ directory: Directory to search (defaults to ~/Downloads)
175
+
176
+ Returns:
177
+ Path to the most recent bookmarklet JSON, or None if none found
178
+ """
179
+ if directory is None:
180
+ directory = Path.home() / "Downloads"
181
+
182
+ bookmarklet_files = [f for f in directory.glob("*.json") if "bookmarklet" in f.name.lower()]
183
+ if not bookmarklet_files:
184
+ return None
185
+
186
+ return max(bookmarklet_files, key=lambda p: p.stat().st_ctime)