convoviz 0.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- convoviz/__init__.py +25 -0
- convoviz/__main__.py +6 -0
- convoviz/analysis/__init__.py +9 -0
- convoviz/analysis/graphs.py +855 -0
- convoviz/analysis/wordcloud.py +165 -0
- convoviz/assets/colormaps.txt +15 -0
- convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
- convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
- convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
- convoviz/assets/fonts/Borel-Regular.ttf +0 -0
- convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
- convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
- convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
- convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
- convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
- convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
- convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
- convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
- convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
- convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
- convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
- convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
- convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
- convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
- convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
- convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
- convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
- convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
- convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
- convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
- convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
- convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
- convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
- convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
- convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
- convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
- convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
- convoviz/assets/stopwords.txt +1 -0
- convoviz/cli.py +117 -0
- convoviz/config.py +106 -0
- convoviz/exceptions.py +47 -0
- convoviz/interactive.py +247 -0
- convoviz/io/__init__.py +21 -0
- convoviz/io/assets.py +98 -0
- convoviz/io/loaders.py +186 -0
- convoviz/io/writers.py +227 -0
- convoviz/models/__init__.py +24 -0
- convoviz/models/collection.py +115 -0
- convoviz/models/conversation.py +158 -0
- convoviz/models/message.py +218 -0
- convoviz/models/node.py +66 -0
- convoviz/pipeline.py +167 -0
- convoviz/py.typed +0 -0
- convoviz/renderers/__init__.py +10 -0
- convoviz/renderers/markdown.py +269 -0
- convoviz/renderers/yaml.py +119 -0
- convoviz/utils.py +155 -0
- convoviz-0.2.12.dist-info/METADATA +148 -0
- convoviz-0.2.12.dist-info/RECORD +61 -0
- convoviz-0.2.12.dist-info/WHEEL +4 -0
- convoviz-0.2.12.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""Markdown rendering for conversations."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
|
|
6
|
+
from convoviz.config import AuthorHeaders, ConversationConfig
|
|
7
|
+
from convoviz.exceptions import MessageContentError
|
|
8
|
+
from convoviz.models import Conversation, Node
|
|
9
|
+
from convoviz.renderers.yaml import render_yaml_header
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def close_code_blocks(text: str) -> str:
|
|
13
|
+
"""Ensure all code blocks in the text are properly closed.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
text: Markdown text that may have unclosed code blocks
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Text with all code blocks properly closed
|
|
20
|
+
"""
|
|
21
|
+
open_code_block = False
|
|
22
|
+
lines = text.split("\n")
|
|
23
|
+
|
|
24
|
+
for line in lines:
|
|
25
|
+
if line.startswith("```") and not open_code_block:
|
|
26
|
+
open_code_block = True
|
|
27
|
+
continue
|
|
28
|
+
if line == "```" and open_code_block:
|
|
29
|
+
open_code_block = False
|
|
30
|
+
|
|
31
|
+
if open_code_block:
|
|
32
|
+
text += "\n```"
|
|
33
|
+
|
|
34
|
+
return text
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def replace_latex_delimiters(text: str) -> str:
|
|
38
|
+
"""Replace LaTeX bracket delimiters with dollar sign delimiters.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
text: Text with \\[ \\] \\( \\) delimiters
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Text with $$ and $ delimiters
|
|
45
|
+
"""
|
|
46
|
+
text = re.sub(r"\\\[", "$$", text)
|
|
47
|
+
text = re.sub(r"\\\]", "$$", text)
|
|
48
|
+
text = re.sub(r"\\\(", "$", text)
|
|
49
|
+
return re.sub(r"\\\)", "$", text)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def code_block(text: str, lang: str = "python") -> str:
|
|
53
|
+
"""Wrap text in a markdown code block.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
text: The code to wrap
|
|
57
|
+
lang: The language for syntax highlighting
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Markdown code block string
|
|
61
|
+
"""
|
|
62
|
+
return f"```{lang}\n{text}\n```"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def render_obsidian_callout(
|
|
66
|
+
content: str,
|
|
67
|
+
title: str,
|
|
68
|
+
callout_type: str = "NOTE",
|
|
69
|
+
collapsed: bool = True,
|
|
70
|
+
) -> str:
|
|
71
|
+
"""Render content as an Obsidian collapsible callout.
|
|
72
|
+
|
|
73
|
+
Syntax: > [!TYPE]+/- Title
|
|
74
|
+
This is Obsidian-specific; on GitHub/standard markdown it renders as a blockquote.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
content: The content to wrap
|
|
78
|
+
title: The callout title
|
|
79
|
+
callout_type: The callout type (NOTE, TIP, WARNING, etc.)
|
|
80
|
+
collapsed: Whether to default to collapsed (-) or expanded (+)
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Markdown callout string
|
|
84
|
+
"""
|
|
85
|
+
fold = "-" if collapsed else "+"
|
|
86
|
+
lines = content.strip().split("\n")
|
|
87
|
+
quoted_lines = [f"> {line}" for line in lines]
|
|
88
|
+
return f"> [!{callout_type}]{fold} {title}\n" + "\n".join(quoted_lines)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def render_message_header(role: str, headers: AuthorHeaders) -> str:
|
|
92
|
+
"""Get the markdown header for a message author.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
role: The author role (user, assistant, system, tool)
|
|
96
|
+
headers: Configuration for author headers
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
The markdown header string
|
|
100
|
+
"""
|
|
101
|
+
header_map = {
|
|
102
|
+
"system": headers.system,
|
|
103
|
+
"user": headers.user,
|
|
104
|
+
"assistant": headers.assistant,
|
|
105
|
+
"tool": headers.tool,
|
|
106
|
+
}
|
|
107
|
+
return header_map.get(role, f"### {role.title()}")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def render_node_header(node: Node, headers: AuthorHeaders) -> str:
|
|
111
|
+
"""Render the header section of a node.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
node: The node to render
|
|
115
|
+
headers: Configuration for author headers
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
The header markdown string
|
|
119
|
+
"""
|
|
120
|
+
if node.message is None:
|
|
121
|
+
return ""
|
|
122
|
+
|
|
123
|
+
return render_message_header(node.message.author.role, headers) + "\n"
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
# Content types that can be rendered as collapsible callouts in Obsidian
|
|
127
|
+
OBSIDIAN_COLLAPSIBLE_TYPES: dict[str, tuple[str, str]] = {
|
|
128
|
+
# content_type: (callout_type, title)
|
|
129
|
+
"reasoning_recap": ("NOTE", "π§ AI Reasoning"),
|
|
130
|
+
"thoughts": ("NOTE", "π AI Thoughts"),
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def render_node(
|
|
135
|
+
node: Node,
|
|
136
|
+
headers: AuthorHeaders,
|
|
137
|
+
use_dollar_latex: bool = False,
|
|
138
|
+
asset_resolver: Callable[[str], str | None] | None = None,
|
|
139
|
+
flavor: str = "standard",
|
|
140
|
+
) -> str:
|
|
141
|
+
"""Render a complete node as markdown.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
node: The node to render
|
|
145
|
+
headers: Configuration for author headers
|
|
146
|
+
use_dollar_latex: Whether to convert LaTeX delimiters to dollars
|
|
147
|
+
asset_resolver: Function to resolve asset IDs to paths
|
|
148
|
+
flavor: Markdown flavor ("standard" or "obsidian")
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Complete markdown string for the node
|
|
152
|
+
"""
|
|
153
|
+
if node.message is None:
|
|
154
|
+
return ""
|
|
155
|
+
|
|
156
|
+
content_type = node.message.content.content_type
|
|
157
|
+
|
|
158
|
+
# For Obsidian flavor, render certain hidden types as collapsible callouts
|
|
159
|
+
# No separator (---) since these are visually distinct and may appear consecutively
|
|
160
|
+
if flavor == "obsidian" and content_type in OBSIDIAN_COLLAPSIBLE_TYPES:
|
|
161
|
+
try:
|
|
162
|
+
text = node.message.text
|
|
163
|
+
except MessageContentError:
|
|
164
|
+
text = ""
|
|
165
|
+
|
|
166
|
+
if text.strip():
|
|
167
|
+
callout_type, title = OBSIDIAN_COLLAPSIBLE_TYPES[content_type]
|
|
168
|
+
callout = render_obsidian_callout(
|
|
169
|
+
content=text,
|
|
170
|
+
title=title,
|
|
171
|
+
callout_type=callout_type,
|
|
172
|
+
collapsed=True,
|
|
173
|
+
)
|
|
174
|
+
return f"\n{callout}\n"
|
|
175
|
+
|
|
176
|
+
if node.message.is_hidden:
|
|
177
|
+
return ""
|
|
178
|
+
|
|
179
|
+
header = render_node_header(node, headers)
|
|
180
|
+
|
|
181
|
+
# Get and process content
|
|
182
|
+
try:
|
|
183
|
+
text = node.message.text
|
|
184
|
+
except MessageContentError:
|
|
185
|
+
# Some message types only contain non-text parts; those still may have images.
|
|
186
|
+
text = ""
|
|
187
|
+
|
|
188
|
+
content = close_code_blocks(text)
|
|
189
|
+
content = f"\n{content}\n" if content else ""
|
|
190
|
+
if use_dollar_latex:
|
|
191
|
+
content = replace_latex_delimiters(content)
|
|
192
|
+
|
|
193
|
+
# Append images if resolver is provided and images exist
|
|
194
|
+
if asset_resolver and node.message.images:
|
|
195
|
+
for image_id in node.message.images:
|
|
196
|
+
rel_path = asset_resolver(image_id)
|
|
197
|
+
if rel_path:
|
|
198
|
+
# Using standard markdown image syntax.
|
|
199
|
+
# Obsidian handles this well.
|
|
200
|
+
content += f"\n\n"
|
|
201
|
+
|
|
202
|
+
return f"\n{header}{content}\n---\n"
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _ordered_nodes(conversation: Conversation) -> list[Node]:
|
|
206
|
+
"""Return nodes in a deterministic depth-first traversal order.
|
|
207
|
+
|
|
208
|
+
ChatGPT exports store nodes in a mapping; dict iteration order is not a
|
|
209
|
+
reliable semantic ordering. For markdown output, we traverse from roots.
|
|
210
|
+
"""
|
|
211
|
+
mapping = conversation.node_mapping
|
|
212
|
+
roots = sorted((n for n in mapping.values() if n.parent is None), key=lambda n: n.id)
|
|
213
|
+
|
|
214
|
+
visited: set[str] = set()
|
|
215
|
+
ordered: list[Node] = []
|
|
216
|
+
|
|
217
|
+
def dfs(node: Node) -> None:
|
|
218
|
+
if node.id in visited:
|
|
219
|
+
return
|
|
220
|
+
visited.add(node.id)
|
|
221
|
+
ordered.append(node)
|
|
222
|
+
for child in node.children_nodes:
|
|
223
|
+
dfs(child)
|
|
224
|
+
|
|
225
|
+
for root in roots:
|
|
226
|
+
dfs(root)
|
|
227
|
+
|
|
228
|
+
# Include any disconnected/orphan nodes deterministically at the end.
|
|
229
|
+
for node in sorted(mapping.values(), key=lambda n: n.id):
|
|
230
|
+
dfs(node)
|
|
231
|
+
|
|
232
|
+
return ordered
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def render_conversation(
|
|
236
|
+
conversation: Conversation,
|
|
237
|
+
config: ConversationConfig,
|
|
238
|
+
headers: AuthorHeaders,
|
|
239
|
+
asset_resolver: Callable[[str], str | None] | None = None,
|
|
240
|
+
) -> str:
|
|
241
|
+
"""Render a complete conversation as markdown.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
conversation: The conversation to render
|
|
245
|
+
config: Conversation rendering configuration
|
|
246
|
+
headers: Configuration for author headers
|
|
247
|
+
asset_resolver: Function to resolve asset IDs to paths
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
Complete markdown document string
|
|
251
|
+
"""
|
|
252
|
+
use_dollar_latex = config.markdown.latex_delimiters == "dollars"
|
|
253
|
+
flavor = config.markdown.flavor
|
|
254
|
+
|
|
255
|
+
# Start with YAML header
|
|
256
|
+
markdown = render_yaml_header(conversation, config.yaml)
|
|
257
|
+
|
|
258
|
+
# Render message nodes in a deterministic traversal order.
|
|
259
|
+
for node in _ordered_nodes(conversation):
|
|
260
|
+
if node.message:
|
|
261
|
+
markdown += render_node(
|
|
262
|
+
node,
|
|
263
|
+
headers,
|
|
264
|
+
use_dollar_latex,
|
|
265
|
+
asset_resolver=asset_resolver,
|
|
266
|
+
flavor=flavor,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
return markdown
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""YAML frontmatter rendering for conversations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
from convoviz.config import YAMLConfig
|
|
9
|
+
from convoviz.models import Conversation
|
|
10
|
+
|
|
11
|
+
_TAG_SAFE_RE = re.compile(r"[^a-z0-9/_\-]+")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _to_yaml_scalar(value: object) -> str:
|
|
15
|
+
if value is None:
|
|
16
|
+
return "null"
|
|
17
|
+
if isinstance(value, bool):
|
|
18
|
+
return "true" if value else "false"
|
|
19
|
+
if isinstance(value, (int, float)):
|
|
20
|
+
return str(value)
|
|
21
|
+
if isinstance(value, datetime):
|
|
22
|
+
# Frontmatter consumers generally expect ISO 8601 strings
|
|
23
|
+
return f'"{value.isoformat()}"'
|
|
24
|
+
if isinstance(value, str):
|
|
25
|
+
if "\n" in value:
|
|
26
|
+
# Multiline: use a block scalar
|
|
27
|
+
indented = "\n".join(f" {line}" for line in value.splitlines())
|
|
28
|
+
return f"|-\n{indented}"
|
|
29
|
+
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
|
|
30
|
+
return f'"{escaped}"'
|
|
31
|
+
|
|
32
|
+
# Fallback: stringify and quote
|
|
33
|
+
escaped = str(value).replace("\\", "\\\\").replace('"', '\\"')
|
|
34
|
+
return f'"{escaped}"'
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _to_yaml(value: object, indent: int = 0) -> str:
|
|
38
|
+
pad = " " * indent
|
|
39
|
+
|
|
40
|
+
if isinstance(value, dict):
|
|
41
|
+
lines: list[str] = []
|
|
42
|
+
for k, v in value.items():
|
|
43
|
+
key = str(k)
|
|
44
|
+
if isinstance(v, (dict, list)):
|
|
45
|
+
lines.append(f"{pad}{key}:")
|
|
46
|
+
lines.append(_to_yaml(v, indent=indent + 2))
|
|
47
|
+
else:
|
|
48
|
+
scalar = _to_yaml_scalar(v)
|
|
49
|
+
# Block scalars already include newline + indentation
|
|
50
|
+
if scalar.startswith("|-"):
|
|
51
|
+
lines.append(f"{pad}{key}: {scalar.splitlines()[0]}")
|
|
52
|
+
lines.extend(f"{pad}{line}" for line in scalar.splitlines()[1:])
|
|
53
|
+
else:
|
|
54
|
+
lines.append(f"{pad}{key}: {scalar}")
|
|
55
|
+
return "\n".join(lines)
|
|
56
|
+
|
|
57
|
+
if isinstance(value, list):
|
|
58
|
+
lines = []
|
|
59
|
+
for item in value:
|
|
60
|
+
if isinstance(item, (dict, list)):
|
|
61
|
+
lines.append(f"{pad}-")
|
|
62
|
+
lines.append(_to_yaml(item, indent=indent + 2))
|
|
63
|
+
else:
|
|
64
|
+
lines.append(f"{pad}- {_to_yaml_scalar(item)}")
|
|
65
|
+
return "\n".join(lines)
|
|
66
|
+
|
|
67
|
+
return f"{pad}{_to_yaml_scalar(value)}"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _default_tags(conversation: Conversation) -> list[str]:
|
|
71
|
+
tags: list[str] = ["chatgpt"]
|
|
72
|
+
tags.extend(conversation.plugins)
|
|
73
|
+
# Normalize to a tag-friendly form
|
|
74
|
+
normalized: list[str] = []
|
|
75
|
+
for t in tags:
|
|
76
|
+
t2 = _TAG_SAFE_RE.sub("-", t.strip().lower()).strip("-")
|
|
77
|
+
if t2 and t2 not in normalized:
|
|
78
|
+
normalized.append(t2)
|
|
79
|
+
return normalized
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
|
|
83
|
+
"""Render the YAML frontmatter for a conversation.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
conversation: The conversation to render
|
|
87
|
+
config: YAML configuration specifying which fields to include
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
YAML frontmatter string with --- delimiters, or empty string if no fields enabled
|
|
91
|
+
"""
|
|
92
|
+
yaml_fields: dict[str, object] = {}
|
|
93
|
+
|
|
94
|
+
if config.title:
|
|
95
|
+
yaml_fields["title"] = conversation.title
|
|
96
|
+
if config.tags:
|
|
97
|
+
yaml_fields["tags"] = _default_tags(conversation)
|
|
98
|
+
if config.chat_link:
|
|
99
|
+
yaml_fields["chat_link"] = conversation.url
|
|
100
|
+
if config.create_time:
|
|
101
|
+
yaml_fields["create_time"] = conversation.create_time
|
|
102
|
+
if config.update_time:
|
|
103
|
+
yaml_fields["update_time"] = conversation.update_time
|
|
104
|
+
if config.model:
|
|
105
|
+
yaml_fields["model"] = conversation.model
|
|
106
|
+
if config.used_plugins:
|
|
107
|
+
yaml_fields["used_plugins"] = conversation.plugins
|
|
108
|
+
if config.message_count:
|
|
109
|
+
yaml_fields["message_count"] = conversation.message_count("user", "assistant")
|
|
110
|
+
if config.content_types:
|
|
111
|
+
yaml_fields["content_types"] = conversation.content_types
|
|
112
|
+
if config.custom_instructions:
|
|
113
|
+
yaml_fields["custom_instructions"] = conversation.custom_instructions
|
|
114
|
+
|
|
115
|
+
if not yaml_fields:
|
|
116
|
+
return ""
|
|
117
|
+
|
|
118
|
+
body = _to_yaml(yaml_fields)
|
|
119
|
+
return f"---\n{body}\n---\n"
|
convoviz/utils.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Utility functions for convoviz."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def sanitize(filename: str) -> str:
|
|
8
|
+
"""Sanitize a string to be safe for use as a filename.
|
|
9
|
+
|
|
10
|
+
Replaces invalid characters with underscores, handles reserved names,
|
|
11
|
+
and prevents path traversal characters.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
filename: The string to sanitize
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
A filename-safe string, or "untitled" if empty or invalid
|
|
18
|
+
"""
|
|
19
|
+
# Replace invalid characters
|
|
20
|
+
pattern = re.compile(r'[<>:"/\\|?*\n\r\t\f\v]+')
|
|
21
|
+
result = pattern.sub("_", filename.strip())
|
|
22
|
+
|
|
23
|
+
# Prevent path traversal
|
|
24
|
+
result = result.replace("..", "_")
|
|
25
|
+
|
|
26
|
+
# Windows reserved names
|
|
27
|
+
reserved = {
|
|
28
|
+
"CON",
|
|
29
|
+
"PRN",
|
|
30
|
+
"AUX",
|
|
31
|
+
"NUL",
|
|
32
|
+
"COM1",
|
|
33
|
+
"COM2",
|
|
34
|
+
"COM3",
|
|
35
|
+
"COM4",
|
|
36
|
+
"COM5",
|
|
37
|
+
"COM6",
|
|
38
|
+
"COM7",
|
|
39
|
+
"COM8",
|
|
40
|
+
"COM9",
|
|
41
|
+
"LPT1",
|
|
42
|
+
"LPT2",
|
|
43
|
+
"LPT3",
|
|
44
|
+
"LPT4",
|
|
45
|
+
"LPT5",
|
|
46
|
+
"LPT6",
|
|
47
|
+
"LPT7",
|
|
48
|
+
"LPT8",
|
|
49
|
+
"LPT9",
|
|
50
|
+
}
|
|
51
|
+
if result.upper() in reserved:
|
|
52
|
+
result = f"_{result}_"
|
|
53
|
+
|
|
54
|
+
# Enforce length limit (255 is common for many filesystems)
|
|
55
|
+
if len(result) > 255:
|
|
56
|
+
result = result[:255]
|
|
57
|
+
|
|
58
|
+
return result or "untitled"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def validate_header(text: str) -> bool:
|
|
62
|
+
"""Check if text is a valid markdown header.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
text: The text to validate
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
True if it's a valid header (1-6 # followed by space and content)
|
|
69
|
+
"""
|
|
70
|
+
max_header_level = 6
|
|
71
|
+
if not text.startswith("#"):
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
parts = text.split(maxsplit=1)
|
|
75
|
+
if len(parts) < 2:
|
|
76
|
+
return False
|
|
77
|
+
|
|
78
|
+
hashes = parts[0]
|
|
79
|
+
return hashes == "#" * len(hashes) and 1 <= len(hashes) <= max_header_level
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def root_dir() -> Path:
|
|
83
|
+
"""Get the path to the convoviz package directory.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Path to the package root
|
|
87
|
+
"""
|
|
88
|
+
return Path(__file__).parent
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_asset_path(relative_path: str) -> Path:
|
|
92
|
+
"""Get the absolute path to an asset file.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
relative_path: Path relative to convoviz root (e.g., "assets/fonts/foo.ttf")
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Absolute Path to the asset
|
|
99
|
+
"""
|
|
100
|
+
return root_dir() / relative_path
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def font_dir() -> Path:
|
|
104
|
+
"""Get the path to the fonts directory.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Path to the assets/fonts directory
|
|
108
|
+
"""
|
|
109
|
+
return root_dir() / "assets" / "fonts"
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def font_names() -> list[str]:
|
|
113
|
+
"""Get available font names.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
List of font names (without .ttf extension)
|
|
117
|
+
"""
|
|
118
|
+
fonts_path = font_dir()
|
|
119
|
+
if not fonts_path.exists():
|
|
120
|
+
return []
|
|
121
|
+
return [font.stem for font in fonts_path.glob("*.ttf")]
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def font_path(font_name: str) -> Path:
|
|
125
|
+
"""Get the path to a font file.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
font_name: Name of the font (without extension)
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Path to the font file
|
|
132
|
+
"""
|
|
133
|
+
return font_dir() / f"{font_name}.ttf"
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def default_font_path() -> Path:
|
|
137
|
+
"""Get the path to the default font.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Path to Kalam-Regular.ttf
|
|
141
|
+
"""
|
|
142
|
+
return font_path("Kalam-Regular")
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def colormaps() -> list[str]:
|
|
146
|
+
"""Get available colormap names.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
List of colormap names from colormaps.txt
|
|
150
|
+
"""
|
|
151
|
+
colormaps_path = root_dir() / "assets" / "colormaps.txt"
|
|
152
|
+
if not colormaps_path.exists():
|
|
153
|
+
return []
|
|
154
|
+
with colormaps_path.open(encoding="utf-8") as f:
|
|
155
|
+
return f.read().splitlines()
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: convoviz
|
|
3
|
+
Version: 0.2.12
|
|
4
|
+
Summary: Get analytics and visualizations on your ChatGPT data!
|
|
5
|
+
Keywords: markdown,chatgpt,openai,visualization,analytics,json,export,data-analysis,obsidian
|
|
6
|
+
Author: Mohamed Cheikh Sidiya
|
|
7
|
+
Author-email: Mohamed Cheikh Sidiya <mohamedcheikhsidiya77@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Requires-Dist: matplotlib>=3.9.4
|
|
13
|
+
Requires-Dist: nltk>=3.9.2
|
|
14
|
+
Requires-Dist: orjson>=3.11.5
|
|
15
|
+
Requires-Dist: pillow>=11.3.0
|
|
16
|
+
Requires-Dist: pydantic>=2.12.5
|
|
17
|
+
Requires-Dist: pydantic-settings>=2.7.0
|
|
18
|
+
Requires-Dist: questionary>=2.1.1
|
|
19
|
+
Requires-Dist: rich>=14.2.0
|
|
20
|
+
Requires-Dist: tqdm>=4.67.1
|
|
21
|
+
Requires-Dist: typer>=0.21.0
|
|
22
|
+
Requires-Dist: wordcloud>=1.9.5
|
|
23
|
+
Requires-Python: >=3.12
|
|
24
|
+
Project-URL: Repository, https://github.com/mohamed-chs/chatgpt-history-export-to-md
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# Convoviz π: Visualize your entire ChatGPT data
|
|
28
|
+
|
|
29
|
+
Convert your ChatGPT history into well-formatted Markdown files. Additionally, visualize your data with word clouds π‘βοΈ, view your prompt history graphs π, and access all your custom instructions π€ in a single location.
|
|
30
|
+
|
|
31
|
+

|
|
32
|
+

|
|
33
|
+
|
|
34
|
+
## Features
|
|
35
|
+
|
|
36
|
+
- **YAML Headers**: Optional and included by default.
|
|
37
|
+
- **Inline Images**: Media attachments rendered directly in Markdown.
|
|
38
|
+
- **Data Visualizations**: Word clouds, graphs, and more.
|
|
39
|
+
|
|
40
|
+
See examples [here](demo).
|
|
41
|
+
|
|
42
|
+
## How to Use π
|
|
43
|
+
|
|
44
|
+
### 1. Export Your ChatGPT Data π
|
|
45
|
+
|
|
46
|
+
- Sign in at [chat.openai.com](https://chat.openai.com).
|
|
47
|
+
- Navigate: Profile Name (bottom left) -> **Settings** -> **Data controls** -> **Export** -> **Confirm export**.
|
|
48
|
+
- Await email from OpenAI and download the `.zip` file.
|
|
49
|
+
|
|
50
|
+
### 2. Install the tool π
|
|
51
|
+
|
|
52
|
+
Try it without installing using uv ([astral-sh/uv](https://github.com/astral-sh/uv)):
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
uvx convoviz
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
You can install it with uv (Recommended):
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
uv tool install convoviz
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
or pipx:
|
|
65
|
+
```bash
|
|
66
|
+
pipx install convoviz
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### 3. Run the tool πββοΈ
|
|
70
|
+
|
|
71
|
+
Simply run the command and follow the prompts:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
convoviz
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
#### Command Line Arguments
|
|
78
|
+
|
|
79
|
+
You can provide arguments directly to skip the prompts:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
convoviz --input path/to/your/export.zip --output path/to/output/folder
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Notes:
|
|
86
|
+
- `--zip` / `-z` is kept as an alias for `--input` for convenience.
|
|
87
|
+
- You can force non-interactive mode with `--no-interactive`.
|
|
88
|
+
|
|
89
|
+
For more options, run:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
convoviz --help
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### 4. Check the Output π
|
|
96
|
+
|
|
97
|
+
And that's it! After running the script, head over to the output folder to see your neatly formatted Markdown files and visualizations.
|
|
98
|
+
|
|
99
|
+
The main outputs are:
|
|
100
|
+
|
|
101
|
+
- **`Markdown/`**: one `.md` file per conversation
|
|
102
|
+
- **`Graphs/`**: a small set of high-signal plots, including:
|
|
103
|
+
- `overview.png` (dashboard)
|
|
104
|
+
- `activity_heatmap.png` (weekday Γ hour)
|
|
105
|
+
- `daily_activity.png` / `monthly_activity.png`
|
|
106
|
+
- `model_usage.png`, `conversation_lengths.png`
|
|
107
|
+
- `weekday_pattern.png`, `hourly_pattern.png`, `conversation_lifetimes.png`
|
|
108
|
+
- **`Word-Clouds/`**: weekly/monthly/yearly word clouds
|
|
109
|
+
- **`custom_instructions.json`**: extracted custom instructions
|
|
110
|
+
|
|
111
|
+
## Share Your Feedback! π
|
|
112
|
+
|
|
113
|
+
I hope you find this tool useful. I'm continuously looking to improve on this, but I need your help for that.
|
|
114
|
+
|
|
115
|
+
Whether you're a tech wizard or you're new to all this, I'd love to hear about your journey with the tool. Found a quirk? Have a suggestion? Or just want to send some good vibes? I'm all ears!
|
|
116
|
+
|
|
117
|
+
**Here's how you can share your thoughts:**
|
|
118
|
+
|
|
119
|
+
1. **GitHub Issues**: For more specific feedback or if you've stumbled upon a bug, please open an [issue](https://github.com/mohamed-chs/chatgpt-history-export-to-md/issues). This helps me track and address them effectively.
|
|
120
|
+
|
|
121
|
+
2. **GitHub Discussions**: If you just want to share your general experience, have a suggestion, or maybe a cool idea for a new feature, jump into the [discussions](https://github.com/mohamed-chs/chatgpt-history-export-to-md/discussions) page. It's a more casual space where we can chat.
|
|
122
|
+
|
|
123
|
+
And if you've had a great experience, consider giving the project a star β. It keeps me motivated and helps others discover it!
|
|
124
|
+
|
|
125
|
+
## Notes
|
|
126
|
+
|
|
127
|
+
This is just a small thing I coded to help me see my convos in beautiful markdown. It was originally built with [Obsidian](https://obsidian.md/) (my go-to note-taking app) in mind, but the default output is standard Markdown.
|
|
128
|
+
|
|
129
|
+
You can choose obsidian flavored md in the cli to get extra features like:
|
|
130
|
+
- model reasoning (`reasoning_recap`, `thoughts`) rendered as collapsible `> [!NOTE]-` callouts instead of being hidden.
|
|
131
|
+
|
|
132
|
+
I wasn't a fan of the clunky, and sometimes paid, browser extensions.
|
|
133
|
+
|
|
134
|
+
It was also a great opportunity to learn more about Python and type annotations. I had mypy, pyright, and ruff all on strict mode, 'twas fun.
|
|
135
|
+
|
|
136
|
+
It should(?) also work as library, so you can import and use the models and functions. I need to add more documentation for that tho. Feel free to reach out if you need help.
|
|
137
|
+
|
|
138
|
+
### Offline / reproducible runs
|
|
139
|
+
|
|
140
|
+
Convoviz uses NLTK stopwords for word clouds. If youβre offline and NLTK data isnβt already installed, pre-download it once:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
uv run python -c "import nltk; nltk.download('stopwords')"
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Bookmarklet
|
|
147
|
+
|
|
148
|
+
Thereβs also a JavaScript bookmarklet flow under `js/` (experimental) for exporting additional conversation data outside the official ZIP export.
|