convoviz 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. convoviz/__init__.py +34 -0
  2. convoviz/__main__.py +6 -0
  3. convoviz/analysis/__init__.py +22 -0
  4. convoviz/analysis/graphs.py +879 -0
  5. convoviz/analysis/wordcloud.py +204 -0
  6. convoviz/assets/colormaps.txt +15 -0
  7. convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
  8. convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
  9. convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
  10. convoviz/assets/fonts/Borel-Regular.ttf +0 -0
  11. convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
  12. convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
  13. convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
  14. convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
  15. convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
  16. convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
  17. convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
  18. convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
  19. convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
  20. convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
  21. convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
  22. convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
  23. convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
  24. convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
  25. convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
  26. convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
  27. convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
  28. convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
  29. convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
  30. convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
  31. convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
  32. convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
  33. convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
  34. convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
  35. convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
  36. convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
  37. convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
  38. convoviz/assets/stopwords.txt +1 -0
  39. convoviz/cli.py +149 -0
  40. convoviz/config.py +120 -0
  41. convoviz/exceptions.py +47 -0
  42. convoviz/interactive.py +264 -0
  43. convoviz/io/__init__.py +21 -0
  44. convoviz/io/assets.py +109 -0
  45. convoviz/io/loaders.py +191 -0
  46. convoviz/io/writers.py +231 -0
  47. convoviz/logging_config.py +69 -0
  48. convoviz/models/__init__.py +24 -0
  49. convoviz/models/collection.py +115 -0
  50. convoviz/models/conversation.py +158 -0
  51. convoviz/models/message.py +218 -0
  52. convoviz/models/node.py +66 -0
  53. convoviz/pipeline.py +184 -0
  54. convoviz/py.typed +0 -0
  55. convoviz/renderers/__init__.py +10 -0
  56. convoviz/renderers/markdown.py +269 -0
  57. convoviz/renderers/yaml.py +119 -0
  58. convoviz/utils.py +155 -0
  59. convoviz-0.4.1.dist-info/METADATA +215 -0
  60. convoviz-0.4.1.dist-info/RECORD +62 -0
  61. convoviz-0.4.1.dist-info/WHEEL +4 -0
  62. convoviz-0.4.1.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,269 @@
1
+ """Markdown rendering for conversations."""
2
+
3
+ import re
4
+ from collections.abc import Callable
5
+
6
+ from convoviz.config import AuthorHeaders, ConversationConfig
7
+ from convoviz.exceptions import MessageContentError
8
+ from convoviz.models import Conversation, Node
9
+ from convoviz.renderers.yaml import render_yaml_header
10
+
11
+
12
+ def close_code_blocks(text: str) -> str:
13
+ """Ensure all code blocks in the text are properly closed.
14
+
15
+ Args:
16
+ text: Markdown text that may have unclosed code blocks
17
+
18
+ Returns:
19
+ Text with all code blocks properly closed
20
+ """
21
+ open_code_block = False
22
+ lines = text.split("\n")
23
+
24
+ for line in lines:
25
+ if line.startswith("```") and not open_code_block:
26
+ open_code_block = True
27
+ continue
28
+ if line == "```" and open_code_block:
29
+ open_code_block = False
30
+
31
+ if open_code_block:
32
+ text += "\n```"
33
+
34
+ return text
35
+
36
+
37
+ def replace_latex_delimiters(text: str) -> str:
38
+ """Replace LaTeX bracket delimiters with dollar sign delimiters.
39
+
40
+ Args:
41
+ text: Text with \\[ \\] \\( \\) delimiters
42
+
43
+ Returns:
44
+ Text with $$ and $ delimiters
45
+ """
46
+ text = re.sub(r"\\\[", "$$", text)
47
+ text = re.sub(r"\\\]", "$$", text)
48
+ text = re.sub(r"\\\(", "$", text)
49
+ return re.sub(r"\\\)", "$", text)
50
+
51
+
52
+ def code_block(text: str, lang: str = "python") -> str:
53
+ """Wrap text in a markdown code block.
54
+
55
+ Args:
56
+ text: The code to wrap
57
+ lang: The language for syntax highlighting
58
+
59
+ Returns:
60
+ Markdown code block string
61
+ """
62
+ return f"```{lang}\n{text}\n```"
63
+
64
+
65
+ def render_obsidian_callout(
66
+ content: str,
67
+ title: str,
68
+ callout_type: str = "NOTE",
69
+ collapsed: bool = True,
70
+ ) -> str:
71
+ """Render content as an Obsidian collapsible callout.
72
+
73
+ Syntax: > [!TYPE]+/- Title
74
+ This is Obsidian-specific; on GitHub/standard markdown it renders as a blockquote.
75
+
76
+ Args:
77
+ content: The content to wrap
78
+ title: The callout title
79
+ callout_type: The callout type (NOTE, TIP, WARNING, etc.)
80
+ collapsed: Whether to default to collapsed (-) or expanded (+)
81
+
82
+ Returns:
83
+ Markdown callout string
84
+ """
85
+ fold = "-" if collapsed else "+"
86
+ lines = content.strip().split("\n")
87
+ quoted_lines = [f"> {line}" for line in lines]
88
+ return f"> [!{callout_type}]{fold} {title}\n" + "\n".join(quoted_lines)
89
+
90
+
91
+ def render_message_header(role: str, headers: AuthorHeaders) -> str:
92
+ """Get the markdown header for a message author.
93
+
94
+ Args:
95
+ role: The author role (user, assistant, system, tool)
96
+ headers: Configuration for author headers
97
+
98
+ Returns:
99
+ The markdown header string
100
+ """
101
+ header_map = {
102
+ "system": headers.system,
103
+ "user": headers.user,
104
+ "assistant": headers.assistant,
105
+ "tool": headers.tool,
106
+ }
107
+ return header_map.get(role, f"### {role.title()}")
108
+
109
+
110
+ def render_node_header(node: Node, headers: AuthorHeaders) -> str:
111
+ """Render the header section of a node.
112
+
113
+ Args:
114
+ node: The node to render
115
+ headers: Configuration for author headers
116
+
117
+ Returns:
118
+ The header markdown string
119
+ """
120
+ if node.message is None:
121
+ return ""
122
+
123
+ return render_message_header(node.message.author.role, headers) + "\n"
124
+
125
+
126
+ # Content types that can be rendered as collapsible callouts in Obsidian
127
+ OBSIDIAN_COLLAPSIBLE_TYPES: dict[str, tuple[str, str]] = {
128
+ # content_type: (callout_type, title)
129
+ "reasoning_recap": ("NOTE", "🧠 AI Reasoning"),
130
+ "thoughts": ("NOTE", "💭 AI Thoughts"),
131
+ }
132
+
133
+
134
+ def render_node(
135
+ node: Node,
136
+ headers: AuthorHeaders,
137
+ use_dollar_latex: bool = False,
138
+ asset_resolver: Callable[[str], str | None] | None = None,
139
+ flavor: str = "standard",
140
+ ) -> str:
141
+ """Render a complete node as markdown.
142
+
143
+ Args:
144
+ node: The node to render
145
+ headers: Configuration for author headers
146
+ use_dollar_latex: Whether to convert LaTeX delimiters to dollars
147
+ asset_resolver: Function to resolve asset IDs to paths
148
+ flavor: Markdown flavor ("standard" or "obsidian")
149
+
150
+ Returns:
151
+ Complete markdown string for the node
152
+ """
153
+ if node.message is None:
154
+ return ""
155
+
156
+ content_type = node.message.content.content_type
157
+
158
+ # For Obsidian flavor, render certain hidden types as collapsible callouts
159
+ # No separator (---) since these are visually distinct and may appear consecutively
160
+ if flavor == "obsidian" and content_type in OBSIDIAN_COLLAPSIBLE_TYPES:
161
+ try:
162
+ text = node.message.text
163
+ except MessageContentError:
164
+ text = ""
165
+
166
+ if text.strip():
167
+ callout_type, title = OBSIDIAN_COLLAPSIBLE_TYPES[content_type]
168
+ callout = render_obsidian_callout(
169
+ content=text,
170
+ title=title,
171
+ callout_type=callout_type,
172
+ collapsed=True,
173
+ )
174
+ return f"\n{callout}\n"
175
+
176
+ if node.message.is_hidden:
177
+ return ""
178
+
179
+ header = render_node_header(node, headers)
180
+
181
+ # Get and process content
182
+ try:
183
+ text = node.message.text
184
+ except MessageContentError:
185
+ # Some message types only contain non-text parts; those still may have images.
186
+ text = ""
187
+
188
+ content = close_code_blocks(text)
189
+ content = f"\n{content}\n" if content else ""
190
+ if use_dollar_latex:
191
+ content = replace_latex_delimiters(content)
192
+
193
+ # Append images if resolver is provided and images exist
194
+ if asset_resolver and node.message.images:
195
+ for image_id in node.message.images:
196
+ rel_path = asset_resolver(image_id)
197
+ if rel_path:
198
+ # Using standard markdown image syntax.
199
+ # Obsidian handles this well.
200
+ content += f"\n![Image]({rel_path})\n"
201
+
202
+ return f"\n{header}{content}\n---\n"
203
+
204
+
205
+ def _ordered_nodes(conversation: Conversation) -> list[Node]:
206
+ """Return nodes in a deterministic depth-first traversal order.
207
+
208
+ ChatGPT exports store nodes in a mapping; dict iteration order is not a
209
+ reliable semantic ordering. For markdown output, we traverse from roots.
210
+ """
211
+ mapping = conversation.node_mapping
212
+ roots = sorted((n for n in mapping.values() if n.parent is None), key=lambda n: n.id)
213
+
214
+ visited: set[str] = set()
215
+ ordered: list[Node] = []
216
+
217
+ def dfs(node: Node) -> None:
218
+ if node.id in visited:
219
+ return
220
+ visited.add(node.id)
221
+ ordered.append(node)
222
+ for child in node.children_nodes:
223
+ dfs(child)
224
+
225
+ for root in roots:
226
+ dfs(root)
227
+
228
+ # Include any disconnected/orphan nodes deterministically at the end.
229
+ for node in sorted(mapping.values(), key=lambda n: n.id):
230
+ dfs(node)
231
+
232
+ return ordered
233
+
234
+
235
+ def render_conversation(
236
+ conversation: Conversation,
237
+ config: ConversationConfig,
238
+ headers: AuthorHeaders,
239
+ asset_resolver: Callable[[str], str | None] | None = None,
240
+ ) -> str:
241
+ """Render a complete conversation as markdown.
242
+
243
+ Args:
244
+ conversation: The conversation to render
245
+ config: Conversation rendering configuration
246
+ headers: Configuration for author headers
247
+ asset_resolver: Function to resolve asset IDs to paths
248
+
249
+ Returns:
250
+ Complete markdown document string
251
+ """
252
+ use_dollar_latex = config.markdown.latex_delimiters == "dollars"
253
+ flavor = config.markdown.flavor
254
+
255
+ # Start with YAML header
256
+ markdown = render_yaml_header(conversation, config.yaml)
257
+
258
+ # Render message nodes in a deterministic traversal order.
259
+ for node in _ordered_nodes(conversation):
260
+ if node.message:
261
+ markdown += render_node(
262
+ node,
263
+ headers,
264
+ use_dollar_latex,
265
+ asset_resolver=asset_resolver,
266
+ flavor=flavor,
267
+ )
268
+
269
+ return markdown
@@ -0,0 +1,119 @@
1
+ """YAML frontmatter rendering for conversations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from datetime import datetime
7
+
8
+ from convoviz.config import YAMLConfig
9
+ from convoviz.models import Conversation
10
+
11
+ _TAG_SAFE_RE = re.compile(r"[^a-z0-9/_\-]+")
12
+
13
+
14
+ def _to_yaml_scalar(value: object) -> str:
15
+ if value is None:
16
+ return "null"
17
+ if isinstance(value, bool):
18
+ return "true" if value else "false"
19
+ if isinstance(value, (int, float)):
20
+ return str(value)
21
+ if isinstance(value, datetime):
22
+ # Frontmatter consumers generally expect ISO 8601 strings
23
+ return f'"{value.isoformat()}"'
24
+ if isinstance(value, str):
25
+ if "\n" in value:
26
+ # Multiline: use a block scalar
27
+ indented = "\n".join(f" {line}" for line in value.splitlines())
28
+ return f"|-\n{indented}"
29
+ escaped = value.replace("\\", "\\\\").replace('"', '\\"')
30
+ return f'"{escaped}"'
31
+
32
+ # Fallback: stringify and quote
33
+ escaped = str(value).replace("\\", "\\\\").replace('"', '\\"')
34
+ return f'"{escaped}"'
35
+
36
+
37
+ def _to_yaml(value: object, indent: int = 0) -> str:
38
+ pad = " " * indent
39
+
40
+ if isinstance(value, dict):
41
+ lines: list[str] = []
42
+ for k, v in value.items():
43
+ key = str(k)
44
+ if isinstance(v, (dict, list)):
45
+ lines.append(f"{pad}{key}:")
46
+ lines.append(_to_yaml(v, indent=indent + 2))
47
+ else:
48
+ scalar = _to_yaml_scalar(v)
49
+ # Block scalars already include newline + indentation
50
+ if scalar.startswith("|-"):
51
+ lines.append(f"{pad}{key}: {scalar.splitlines()[0]}")
52
+ lines.extend(f"{pad}{line}" for line in scalar.splitlines()[1:])
53
+ else:
54
+ lines.append(f"{pad}{key}: {scalar}")
55
+ return "\n".join(lines)
56
+
57
+ if isinstance(value, list):
58
+ lines = []
59
+ for item in value:
60
+ if isinstance(item, (dict, list)):
61
+ lines.append(f"{pad}-")
62
+ lines.append(_to_yaml(item, indent=indent + 2))
63
+ else:
64
+ lines.append(f"{pad}- {_to_yaml_scalar(item)}")
65
+ return "\n".join(lines)
66
+
67
+ return f"{pad}{_to_yaml_scalar(value)}"
68
+
69
+
70
+ def _default_tags(conversation: Conversation) -> list[str]:
71
+ tags: list[str] = ["chatgpt"]
72
+ tags.extend(conversation.plugins)
73
+ # Normalize to a tag-friendly form
74
+ normalized: list[str] = []
75
+ for t in tags:
76
+ t2 = _TAG_SAFE_RE.sub("-", t.strip().lower()).strip("-")
77
+ if t2 and t2 not in normalized:
78
+ normalized.append(t2)
79
+ return normalized
80
+
81
+
82
+ def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
83
+ """Render the YAML frontmatter for a conversation.
84
+
85
+ Args:
86
+ conversation: The conversation to render
87
+ config: YAML configuration specifying which fields to include
88
+
89
+ Returns:
90
+ YAML frontmatter string with --- delimiters, or empty string if no fields enabled
91
+ """
92
+ yaml_fields: dict[str, object] = {}
93
+
94
+ if config.title:
95
+ yaml_fields["title"] = conversation.title
96
+ if config.tags:
97
+ yaml_fields["tags"] = _default_tags(conversation)
98
+ if config.chat_link:
99
+ yaml_fields["chat_link"] = conversation.url
100
+ if config.create_time:
101
+ yaml_fields["create_time"] = conversation.create_time
102
+ if config.update_time:
103
+ yaml_fields["update_time"] = conversation.update_time
104
+ if config.model:
105
+ yaml_fields["model"] = conversation.model
106
+ if config.used_plugins:
107
+ yaml_fields["used_plugins"] = conversation.plugins
108
+ if config.message_count:
109
+ yaml_fields["message_count"] = conversation.message_count("user", "assistant")
110
+ if config.content_types:
111
+ yaml_fields["content_types"] = conversation.content_types
112
+ if config.custom_instructions:
113
+ yaml_fields["custom_instructions"] = conversation.custom_instructions
114
+
115
+ if not yaml_fields:
116
+ return ""
117
+
118
+ body = _to_yaml(yaml_fields)
119
+ return f"---\n{body}\n---\n"
convoviz/utils.py ADDED
@@ -0,0 +1,155 @@
1
+ """Utility functions for convoviz."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+
7
+ def sanitize(filename: str) -> str:
8
+ """Sanitize a string to be safe for use as a filename.
9
+
10
+ Replaces invalid characters with underscores, handles reserved names,
11
+ and prevents path traversal characters.
12
+
13
+ Args:
14
+ filename: The string to sanitize
15
+
16
+ Returns:
17
+ A filename-safe string, or "untitled" if empty or invalid
18
+ """
19
+ # Replace invalid characters
20
+ pattern = re.compile(r'[<>:"/\\|?*\n\r\t\f\v]+')
21
+ result = pattern.sub("_", filename.strip())
22
+
23
+ # Prevent path traversal
24
+ result = result.replace("..", "_")
25
+
26
+ # Windows reserved names
27
+ reserved = {
28
+ "CON",
29
+ "PRN",
30
+ "AUX",
31
+ "NUL",
32
+ "COM1",
33
+ "COM2",
34
+ "COM3",
35
+ "COM4",
36
+ "COM5",
37
+ "COM6",
38
+ "COM7",
39
+ "COM8",
40
+ "COM9",
41
+ "LPT1",
42
+ "LPT2",
43
+ "LPT3",
44
+ "LPT4",
45
+ "LPT5",
46
+ "LPT6",
47
+ "LPT7",
48
+ "LPT8",
49
+ "LPT9",
50
+ }
51
+ if result.upper() in reserved:
52
+ result = f"_{result}_"
53
+
54
+ # Enforce length limit (255 is common for many filesystems)
55
+ if len(result) > 255:
56
+ result = result[:255]
57
+
58
+ return result or "untitled"
59
+
60
+
61
+ def validate_header(text: str) -> bool:
62
+ """Check if text is a valid markdown header.
63
+
64
+ Args:
65
+ text: The text to validate
66
+
67
+ Returns:
68
+ True if it's a valid header (1-6 # followed by space and content)
69
+ """
70
+ max_header_level = 6
71
+ if not text.startswith("#"):
72
+ return False
73
+
74
+ parts = text.split(maxsplit=1)
75
+ if len(parts) < 2:
76
+ return False
77
+
78
+ hashes = parts[0]
79
+ return hashes == "#" * len(hashes) and 1 <= len(hashes) <= max_header_level
80
+
81
+
82
+ def root_dir() -> Path:
83
+ """Get the path to the convoviz package directory.
84
+
85
+ Returns:
86
+ Path to the package root
87
+ """
88
+ return Path(__file__).parent
89
+
90
+
91
+ def get_asset_path(relative_path: str) -> Path:
92
+ """Get the absolute path to an asset file.
93
+
94
+ Args:
95
+ relative_path: Path relative to convoviz root (e.g., "assets/fonts/foo.ttf")
96
+
97
+ Returns:
98
+ Absolute Path to the asset
99
+ """
100
+ return root_dir() / relative_path
101
+
102
+
103
+ def font_dir() -> Path:
104
+ """Get the path to the fonts directory.
105
+
106
+ Returns:
107
+ Path to the assets/fonts directory
108
+ """
109
+ return root_dir() / "assets" / "fonts"
110
+
111
+
112
+ def font_names() -> list[str]:
113
+ """Get available font names.
114
+
115
+ Returns:
116
+ List of font names (without .ttf extension)
117
+ """
118
+ fonts_path = font_dir()
119
+ if not fonts_path.exists():
120
+ return []
121
+ return [font.stem for font in fonts_path.glob("*.ttf")]
122
+
123
+
124
+ def font_path(font_name: str) -> Path:
125
+ """Get the path to a font file.
126
+
127
+ Args:
128
+ font_name: Name of the font (without extension)
129
+
130
+ Returns:
131
+ Path to the font file
132
+ """
133
+ return font_dir() / f"{font_name}.ttf"
134
+
135
+
136
+ def default_font_path() -> Path:
137
+ """Get the path to the default font.
138
+
139
+ Returns:
140
+ Path to Kalam-Regular.ttf
141
+ """
142
+ return font_path("Kalam-Regular")
143
+
144
+
145
+ def colormaps() -> list[str]:
146
+ """Get available colormap names.
147
+
148
+ Returns:
149
+ List of colormap names from colormaps.txt
150
+ """
151
+ colormaps_path = root_dir() / "assets" / "colormaps.txt"
152
+ if not colormaps_path.exists():
153
+ return []
154
+ with colormaps_path.open(encoding="utf-8") as f:
155
+ return f.read().splitlines()