convoviz 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- convoviz/analysis/graphs.py +410 -21
- convoviz/analysis/wordcloud.py +21 -1
- convoviz/assets/stopwords.txt +75 -0
- convoviz/cli.py +18 -15
- convoviz/config.py +14 -7
- convoviz/interactive.py +40 -11
- convoviz/io/assets.py +82 -0
- convoviz/io/loaders.py +54 -3
- convoviz/io/writers.py +17 -2
- convoviz/models/__init__.py +0 -4
- convoviz/models/collection.py +14 -6
- convoviz/models/conversation.py +4 -6
- convoviz/models/message.py +87 -7
- convoviz/pipeline.py +70 -24
- convoviz/renderers/markdown.py +91 -24
- convoviz/renderers/yaml.py +79 -2
- convoviz/utils.py +54 -4
- {convoviz-0.2.2.dist-info → convoviz-0.2.4.dist-info}/METADATA +30 -5
- {convoviz-0.2.2.dist-info → convoviz-0.2.4.dist-info}/RECORD +21 -19
- {convoviz-0.2.2.dist-info → convoviz-0.2.4.dist-info}/WHEEL +1 -1
- {convoviz-0.2.2.dist-info → convoviz-0.2.4.dist-info}/entry_points.txt +0 -0
convoviz/pipeline.py
CHANGED
|
@@ -5,7 +5,7 @@ from shutil import rmtree
|
|
|
5
5
|
|
|
6
6
|
from rich.console import Console
|
|
7
7
|
|
|
8
|
-
from convoviz.analysis.graphs import
|
|
8
|
+
from convoviz.analysis.graphs import generate_graphs
|
|
9
9
|
from convoviz.analysis.wordcloud import generate_wordclouds
|
|
10
10
|
from convoviz.config import ConvovizConfig
|
|
11
11
|
from convoviz.exceptions import InvalidZipError
|
|
@@ -19,6 +19,18 @@ from convoviz.io.writers import save_collection, save_custom_instructions
|
|
|
19
19
|
console = Console()
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
def _safe_uri(path: Path) -> str:
|
|
23
|
+
"""Best-effort URI for printing.
|
|
24
|
+
|
|
25
|
+
``Path.as_uri()`` requires an absolute path; users often provide relative
|
|
26
|
+
output paths, so we resolve first and fall back to string form.
|
|
27
|
+
"""
|
|
28
|
+
try:
|
|
29
|
+
return path.resolve().as_uri()
|
|
30
|
+
except Exception:
|
|
31
|
+
return str(path)
|
|
32
|
+
|
|
33
|
+
|
|
22
34
|
def run_pipeline(config: ConvovizConfig) -> None:
|
|
23
35
|
"""Run the main processing pipeline.
|
|
24
36
|
|
|
@@ -26,20 +38,32 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
26
38
|
config: Complete configuration for the pipeline
|
|
27
39
|
|
|
28
40
|
Raises:
|
|
29
|
-
InvalidZipError: If the
|
|
41
|
+
InvalidZipError: If the input is invalid
|
|
30
42
|
ConfigurationError: If configuration is incomplete
|
|
31
43
|
"""
|
|
32
|
-
if not config.
|
|
33
|
-
raise InvalidZipError("", reason="No
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
if not
|
|
37
|
-
raise InvalidZipError(str(
|
|
38
|
-
|
|
39
|
-
console.print("Loading data [bold yellow]📂[/bold yellow] ...\n")
|
|
40
|
-
|
|
41
|
-
# Load
|
|
42
|
-
|
|
44
|
+
if not config.input_path:
|
|
45
|
+
raise InvalidZipError("", reason="No input path specified")
|
|
46
|
+
|
|
47
|
+
input_path = Path(config.input_path)
|
|
48
|
+
if not input_path.exists():
|
|
49
|
+
raise InvalidZipError(str(input_path), reason="File does not exist")
|
|
50
|
+
|
|
51
|
+
console.print(f"Loading data from {input_path} [bold yellow]📂[/bold yellow] ...\n")
|
|
52
|
+
|
|
53
|
+
# Load collection based on input type
|
|
54
|
+
if input_path.is_dir():
|
|
55
|
+
# Check for conversations.json inside
|
|
56
|
+
json_path = input_path / "conversations.json"
|
|
57
|
+
if not json_path.exists():
|
|
58
|
+
raise InvalidZipError(
|
|
59
|
+
str(input_path), reason="Directory must contain conversations.json"
|
|
60
|
+
)
|
|
61
|
+
collection = load_collection_from_json(json_path)
|
|
62
|
+
elif input_path.suffix == ".json":
|
|
63
|
+
collection = load_collection_from_json(input_path)
|
|
64
|
+
else:
|
|
65
|
+
# Assume zip
|
|
66
|
+
collection = load_collection_from_zip(input_path)
|
|
43
67
|
|
|
44
68
|
# Try to merge bookmarklet data if available
|
|
45
69
|
bookmarklet_json = find_latest_bookmarklet_json()
|
|
@@ -54,12 +78,34 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
54
78
|
)
|
|
55
79
|
|
|
56
80
|
output_folder = config.output_folder
|
|
57
|
-
|
|
58
|
-
# Clean and recreate output folder
|
|
59
|
-
if output_folder.exists() and output_folder.is_dir():
|
|
60
|
-
rmtree(output_folder)
|
|
61
81
|
output_folder.mkdir(parents=True, exist_ok=True)
|
|
62
82
|
|
|
83
|
+
# Clean only specific sub-directories we manage
|
|
84
|
+
managed_dirs = ["Markdown", "Graphs", "Word-Clouds"]
|
|
85
|
+
for d in managed_dirs:
|
|
86
|
+
sub_dir = output_folder / d
|
|
87
|
+
if sub_dir.exists():
|
|
88
|
+
# Never follow symlinks; just unlink them.
|
|
89
|
+
if sub_dir.is_symlink():
|
|
90
|
+
sub_dir.unlink()
|
|
91
|
+
elif sub_dir.is_dir():
|
|
92
|
+
rmtree(sub_dir)
|
|
93
|
+
else:
|
|
94
|
+
sub_dir.unlink()
|
|
95
|
+
sub_dir.mkdir(exist_ok=True)
|
|
96
|
+
|
|
97
|
+
# Clean specific files we manage
|
|
98
|
+
managed_files = ["custom_instructions.json"]
|
|
99
|
+
for f in managed_files:
|
|
100
|
+
managed_file = output_folder / f
|
|
101
|
+
if managed_file.exists():
|
|
102
|
+
if managed_file.is_symlink() or managed_file.is_file():
|
|
103
|
+
managed_file.unlink()
|
|
104
|
+
elif managed_file.is_dir():
|
|
105
|
+
rmtree(managed_file)
|
|
106
|
+
else:
|
|
107
|
+
managed_file.unlink()
|
|
108
|
+
|
|
63
109
|
# Save markdown files
|
|
64
110
|
markdown_folder = output_folder / "Markdown"
|
|
65
111
|
save_collection(
|
|
@@ -71,13 +117,13 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
71
117
|
)
|
|
72
118
|
console.print(
|
|
73
119
|
f"\nDone [bold green]✅[/bold green] ! "
|
|
74
|
-
f"Check the output [bold blue]📄[/bold blue] here: {markdown_folder
|
|
120
|
+
f"Check the output [bold blue]📄[/bold blue] here: {_safe_uri(markdown_folder)} 🔗\n"
|
|
75
121
|
)
|
|
76
122
|
|
|
77
123
|
# Generate graphs
|
|
78
124
|
graph_folder = output_folder / "Graphs"
|
|
79
125
|
graph_folder.mkdir(parents=True, exist_ok=True)
|
|
80
|
-
|
|
126
|
+
generate_graphs(
|
|
81
127
|
collection,
|
|
82
128
|
graph_folder,
|
|
83
129
|
config.graph,
|
|
@@ -85,11 +131,11 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
85
131
|
)
|
|
86
132
|
console.print(
|
|
87
133
|
f"\nDone [bold green]✅[/bold green] ! "
|
|
88
|
-
f"Check the output [bold blue]📈[/bold blue] here: {graph_folder
|
|
134
|
+
f"Check the output [bold blue]📈[/bold blue] here: {_safe_uri(graph_folder)} 🔗\n"
|
|
89
135
|
)
|
|
90
136
|
|
|
91
137
|
# Generate word clouds
|
|
92
|
-
wordcloud_folder = output_folder / "Word
|
|
138
|
+
wordcloud_folder = output_folder / "Word-Clouds"
|
|
93
139
|
wordcloud_folder.mkdir(parents=True, exist_ok=True)
|
|
94
140
|
generate_wordclouds(
|
|
95
141
|
collection,
|
|
@@ -99,7 +145,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
99
145
|
)
|
|
100
146
|
console.print(
|
|
101
147
|
f"\nDone [bold green]✅[/bold green] ! "
|
|
102
|
-
f"Check the output [bold blue]🔡☁️[/bold blue] here: {wordcloud_folder
|
|
148
|
+
f"Check the output [bold blue]🔡☁️[/bold blue] here: {_safe_uri(wordcloud_folder)} 🔗\n"
|
|
103
149
|
)
|
|
104
150
|
|
|
105
151
|
# Save custom instructions
|
|
@@ -108,12 +154,12 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
108
154
|
save_custom_instructions(collection, instructions_path)
|
|
109
155
|
console.print(
|
|
110
156
|
f"\nDone [bold green]✅[/bold green] ! "
|
|
111
|
-
f"Check the output [bold blue]📝[/bold blue] here: {instructions_path
|
|
157
|
+
f"Check the output [bold blue]📝[/bold blue] here: {_safe_uri(instructions_path)} 🔗\n"
|
|
112
158
|
)
|
|
113
159
|
|
|
114
160
|
console.print(
|
|
115
161
|
"ALL DONE [bold green]🎉🎉🎉[/bold green] !\n\n"
|
|
116
|
-
f"Explore the full gallery [bold yellow]🖼️[/bold yellow] at: {output_folder
|
|
162
|
+
f"Explore the full gallery [bold yellow]🖼️[/bold yellow] at: {_safe_uri(output_folder)} 🔗\n\n"
|
|
117
163
|
"I hope you enjoy the outcome 🤞.\n\n"
|
|
118
164
|
"If you appreciate it, kindly give the project a star 🌟 on GitHub:\n\n"
|
|
119
165
|
"➡️ https://github.com/mohamed-chs/chatgpt-history-export-to-md 🔗\n\n"
|
convoviz/renderers/markdown.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
"""Markdown rendering for conversations."""
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
|
+
from collections.abc import Callable
|
|
4
5
|
|
|
5
6
|
from convoviz.config import AuthorHeaders, ConversationConfig
|
|
7
|
+
from convoviz.exceptions import MessageContentError
|
|
6
8
|
from convoviz.models import Conversation, Node
|
|
7
9
|
from convoviz.renderers.yaml import render_yaml_header
|
|
8
10
|
|
|
@@ -79,7 +81,7 @@ def render_message_header(role: str, headers: AuthorHeaders) -> str:
|
|
|
79
81
|
return header_map.get(role, f"### {role.title()}")
|
|
80
82
|
|
|
81
83
|
|
|
82
|
-
def render_node_header(node: Node, headers: AuthorHeaders) -> str:
|
|
84
|
+
def render_node_header(node: Node, headers: AuthorHeaders, flavor: str = "obsidian") -> str:
|
|
83
85
|
"""Render the header section of a node.
|
|
84
86
|
|
|
85
87
|
Includes the node ID, parent link, and message author header.
|
|
@@ -87,6 +89,7 @@ def render_node_header(node: Node, headers: AuthorHeaders) -> str:
|
|
|
87
89
|
Args:
|
|
88
90
|
node: The node to render
|
|
89
91
|
headers: Configuration for author headers
|
|
92
|
+
flavor: Markdown flavor (obsidian, standard)
|
|
90
93
|
|
|
91
94
|
Returns:
|
|
92
95
|
The header markdown string
|
|
@@ -94,45 +97,57 @@ def render_node_header(node: Node, headers: AuthorHeaders) -> str:
|
|
|
94
97
|
if node.message is None:
|
|
95
98
|
return ""
|
|
96
99
|
|
|
97
|
-
|
|
100
|
+
if flavor == "standard":
|
|
101
|
+
return render_message_header(node.message.author.role, headers) + "\n"
|
|
102
|
+
|
|
103
|
+
# Obsidian flavor
|
|
104
|
+
parts = []
|
|
98
105
|
|
|
99
106
|
# Add parent link if parent has a message
|
|
100
107
|
if node.parent_node and node.parent_node.message:
|
|
101
|
-
parts.append(f"[
|
|
108
|
+
parts.append(f"[⬆️](#^{node.parent_node.id})")
|
|
102
109
|
|
|
103
|
-
|
|
110
|
+
author_header = render_message_header(node.message.author.role, headers)
|
|
111
|
+
parts.append(f"{author_header} ^{node.id}")
|
|
104
112
|
|
|
105
113
|
return "\n".join(parts) + "\n"
|
|
106
114
|
|
|
107
115
|
|
|
108
|
-
def render_node_footer(node: Node) -> str:
|
|
116
|
+
def render_node_footer(node: Node, flavor: str = "obsidian") -> str:
|
|
109
117
|
"""Render the footer section of a node with child links.
|
|
110
118
|
|
|
111
119
|
Args:
|
|
112
120
|
node: The node to render
|
|
121
|
+
flavor: Markdown flavor (obsidian, standard)
|
|
113
122
|
|
|
114
123
|
Returns:
|
|
115
124
|
The footer markdown string with child navigation links
|
|
116
125
|
"""
|
|
117
|
-
if not node.children_nodes:
|
|
126
|
+
if flavor == "standard" or not node.children_nodes:
|
|
118
127
|
return ""
|
|
119
128
|
|
|
120
129
|
if len(node.children_nodes) == 1:
|
|
121
|
-
return f"\n[
|
|
130
|
+
return f"\n[⬇️](#^{node.children_nodes[0].id})\n"
|
|
122
131
|
|
|
123
|
-
links = " | ".join(
|
|
124
|
-
f"[child {i + 1} ⬇️](#{child.id})" for i, child in enumerate(node.children_nodes)
|
|
125
|
-
)
|
|
132
|
+
links = " | ".join(f"[{i + 1} ⬇️](#^{child.id})" for i, child in enumerate(node.children_nodes))
|
|
126
133
|
return f"\n{links}\n"
|
|
127
134
|
|
|
128
135
|
|
|
129
|
-
def render_node(
|
|
136
|
+
def render_node(
|
|
137
|
+
node: Node,
|
|
138
|
+
headers: AuthorHeaders,
|
|
139
|
+
use_dollar_latex: bool = False,
|
|
140
|
+
asset_resolver: Callable[[str], str | None] | None = None,
|
|
141
|
+
flavor: str = "obsidian",
|
|
142
|
+
) -> str:
|
|
130
143
|
"""Render a complete node as markdown.
|
|
131
144
|
|
|
132
145
|
Args:
|
|
133
146
|
node: The node to render
|
|
134
147
|
headers: Configuration for author headers
|
|
135
148
|
use_dollar_latex: Whether to convert LaTeX delimiters to dollars
|
|
149
|
+
asset_resolver: Function to resolve asset IDs to paths
|
|
150
|
+
flavor: Markdown flavor (obsidian, standard)
|
|
136
151
|
|
|
137
152
|
Returns:
|
|
138
153
|
Complete markdown string for the node
|
|
@@ -140,24 +155,72 @@ def render_node(node: Node, headers: AuthorHeaders, use_dollar_latex: bool = Fal
|
|
|
140
155
|
if node.message is None:
|
|
141
156
|
return ""
|
|
142
157
|
|
|
143
|
-
|
|
158
|
+
if node.message.is_hidden:
|
|
159
|
+
return ""
|
|
160
|
+
|
|
161
|
+
header = render_node_header(node, headers, flavor=flavor)
|
|
144
162
|
|
|
145
163
|
# Get and process content
|
|
146
164
|
try:
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
165
|
+
text = node.message.text
|
|
166
|
+
except MessageContentError:
|
|
167
|
+
# Some message types only contain non-text parts; those still may have images.
|
|
168
|
+
text = ""
|
|
169
|
+
|
|
170
|
+
content = close_code_blocks(text)
|
|
171
|
+
content = f"\n{content}\n" if content else ""
|
|
172
|
+
if use_dollar_latex:
|
|
173
|
+
content = replace_latex_delimiters(content)
|
|
174
|
+
|
|
175
|
+
# Append images if resolver is provided and images exist
|
|
176
|
+
if asset_resolver and node.message.images:
|
|
177
|
+
for image_id in node.message.images:
|
|
178
|
+
rel_path = asset_resolver(image_id)
|
|
179
|
+
if rel_path:
|
|
180
|
+
# Using standard markdown image syntax.
|
|
181
|
+
# Obsidian handles this well.
|
|
182
|
+
content += f"\n\n"
|
|
183
|
+
|
|
184
|
+
footer = render_node_footer(node, flavor=flavor)
|
|
155
185
|
|
|
156
186
|
return f"\n{header}{content}{footer}\n---\n"
|
|
157
187
|
|
|
158
188
|
|
|
189
|
+
def _ordered_nodes(conversation: Conversation) -> list[Node]:
|
|
190
|
+
"""Return nodes in a deterministic depth-first traversal order.
|
|
191
|
+
|
|
192
|
+
ChatGPT exports store nodes in a mapping; dict iteration order is not a
|
|
193
|
+
reliable semantic ordering. For markdown output, we traverse from roots.
|
|
194
|
+
"""
|
|
195
|
+
mapping = conversation.node_mapping
|
|
196
|
+
roots = sorted((n for n in mapping.values() if n.parent is None), key=lambda n: n.id)
|
|
197
|
+
|
|
198
|
+
visited: set[str] = set()
|
|
199
|
+
ordered: list[Node] = []
|
|
200
|
+
|
|
201
|
+
def dfs(node: Node) -> None:
|
|
202
|
+
if node.id in visited:
|
|
203
|
+
return
|
|
204
|
+
visited.add(node.id)
|
|
205
|
+
ordered.append(node)
|
|
206
|
+
for child in node.children_nodes:
|
|
207
|
+
dfs(child)
|
|
208
|
+
|
|
209
|
+
for root in roots:
|
|
210
|
+
dfs(root)
|
|
211
|
+
|
|
212
|
+
# Include any disconnected/orphan nodes deterministically at the end.
|
|
213
|
+
for node in sorted(mapping.values(), key=lambda n: n.id):
|
|
214
|
+
dfs(node)
|
|
215
|
+
|
|
216
|
+
return ordered
|
|
217
|
+
|
|
218
|
+
|
|
159
219
|
def render_conversation(
|
|
160
|
-
conversation: Conversation,
|
|
220
|
+
conversation: Conversation,
|
|
221
|
+
config: ConversationConfig,
|
|
222
|
+
headers: AuthorHeaders,
|
|
223
|
+
asset_resolver: Callable[[str], str | None] | None = None,
|
|
161
224
|
) -> str:
|
|
162
225
|
"""Render a complete conversation as markdown.
|
|
163
226
|
|
|
@@ -165,18 +228,22 @@ def render_conversation(
|
|
|
165
228
|
conversation: The conversation to render
|
|
166
229
|
config: Conversation rendering configuration
|
|
167
230
|
headers: Configuration for author headers
|
|
231
|
+
asset_resolver: Function to resolve asset IDs to paths
|
|
168
232
|
|
|
169
233
|
Returns:
|
|
170
234
|
Complete markdown document string
|
|
171
235
|
"""
|
|
172
236
|
use_dollar_latex = config.markdown.latex_delimiters == "dollars"
|
|
237
|
+
flavor = config.markdown.flavor
|
|
173
238
|
|
|
174
239
|
# Start with YAML header
|
|
175
240
|
markdown = render_yaml_header(conversation, config.yaml)
|
|
176
241
|
|
|
177
|
-
# Render
|
|
178
|
-
for node in conversation
|
|
242
|
+
# Render message nodes in a deterministic traversal order.
|
|
243
|
+
for node in _ordered_nodes(conversation):
|
|
179
244
|
if node.message:
|
|
180
|
-
markdown += render_node(
|
|
245
|
+
markdown += render_node(
|
|
246
|
+
node, headers, use_dollar_latex, asset_resolver=asset_resolver, flavor=flavor
|
|
247
|
+
)
|
|
181
248
|
|
|
182
249
|
return markdown
|
convoviz/renderers/yaml.py
CHANGED
|
@@ -1,8 +1,83 @@
|
|
|
1
1
|
"""YAML frontmatter rendering for conversations."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
3
8
|
from convoviz.config import YAMLConfig
|
|
4
9
|
from convoviz.models import Conversation
|
|
5
10
|
|
|
11
|
+
_TAG_SAFE_RE = re.compile(r"[^a-z0-9/_\-]+")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _to_yaml_scalar(value: object) -> str:
|
|
15
|
+
if value is None:
|
|
16
|
+
return "null"
|
|
17
|
+
if isinstance(value, bool):
|
|
18
|
+
return "true" if value else "false"
|
|
19
|
+
if isinstance(value, (int, float)):
|
|
20
|
+
return str(value)
|
|
21
|
+
if isinstance(value, datetime):
|
|
22
|
+
# Frontmatter consumers generally expect ISO 8601 strings
|
|
23
|
+
return f'"{value.isoformat()}"'
|
|
24
|
+
if isinstance(value, str):
|
|
25
|
+
if "\n" in value:
|
|
26
|
+
# Multiline: use a block scalar
|
|
27
|
+
indented = "\n".join(f" {line}" for line in value.splitlines())
|
|
28
|
+
return f"|-\n{indented}"
|
|
29
|
+
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
|
|
30
|
+
return f'"{escaped}"'
|
|
31
|
+
|
|
32
|
+
# Fallback: stringify and quote
|
|
33
|
+
escaped = str(value).replace("\\", "\\\\").replace('"', '\\"')
|
|
34
|
+
return f'"{escaped}"'
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _to_yaml(value: object, indent: int = 0) -> str:
|
|
38
|
+
pad = " " * indent
|
|
39
|
+
|
|
40
|
+
if isinstance(value, dict):
|
|
41
|
+
lines: list[str] = []
|
|
42
|
+
for k, v in value.items():
|
|
43
|
+
key = str(k)
|
|
44
|
+
if isinstance(v, (dict, list)):
|
|
45
|
+
lines.append(f"{pad}{key}:")
|
|
46
|
+
lines.append(_to_yaml(v, indent=indent + 2))
|
|
47
|
+
else:
|
|
48
|
+
scalar = _to_yaml_scalar(v)
|
|
49
|
+
# Block scalars already include newline + indentation
|
|
50
|
+
if scalar.startswith("|-"):
|
|
51
|
+
lines.append(f"{pad}{key}: {scalar.splitlines()[0]}")
|
|
52
|
+
lines.extend(f"{pad}{line}" for line in scalar.splitlines()[1:])
|
|
53
|
+
else:
|
|
54
|
+
lines.append(f"{pad}{key}: {scalar}")
|
|
55
|
+
return "\n".join(lines)
|
|
56
|
+
|
|
57
|
+
if isinstance(value, list):
|
|
58
|
+
lines = []
|
|
59
|
+
for item in value:
|
|
60
|
+
if isinstance(item, (dict, list)):
|
|
61
|
+
lines.append(f"{pad}-")
|
|
62
|
+
lines.append(_to_yaml(item, indent=indent + 2))
|
|
63
|
+
else:
|
|
64
|
+
lines.append(f"{pad}- {_to_yaml_scalar(item)}")
|
|
65
|
+
return "\n".join(lines)
|
|
66
|
+
|
|
67
|
+
return f"{pad}{_to_yaml_scalar(value)}"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _default_tags(conversation: Conversation) -> list[str]:
|
|
71
|
+
tags: list[str] = ["chatgpt"]
|
|
72
|
+
tags.extend(conversation.plugins)
|
|
73
|
+
# Normalize to a tag-friendly form
|
|
74
|
+
normalized: list[str] = []
|
|
75
|
+
for t in tags:
|
|
76
|
+
t2 = _TAG_SAFE_RE.sub("-", t.strip().lower()).strip("-")
|
|
77
|
+
if t2 and t2 not in normalized:
|
|
78
|
+
normalized.append(t2)
|
|
79
|
+
return normalized
|
|
80
|
+
|
|
6
81
|
|
|
7
82
|
def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
|
|
8
83
|
"""Render the YAML frontmatter for a conversation.
|
|
@@ -18,6 +93,8 @@ def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
|
|
|
18
93
|
|
|
19
94
|
if config.title:
|
|
20
95
|
yaml_fields["title"] = conversation.title
|
|
96
|
+
if config.tags:
|
|
97
|
+
yaml_fields["tags"] = _default_tags(conversation)
|
|
21
98
|
if config.chat_link:
|
|
22
99
|
yaml_fields["chat_link"] = conversation.url
|
|
23
100
|
if config.create_time:
|
|
@@ -38,5 +115,5 @@ def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
|
|
|
38
115
|
if not yaml_fields:
|
|
39
116
|
return ""
|
|
40
117
|
|
|
41
|
-
|
|
42
|
-
return f"---\n{
|
|
118
|
+
body = _to_yaml(yaml_fields)
|
|
119
|
+
return f"---\n{body}\n---\n"
|
convoviz/utils.py
CHANGED
|
@@ -7,16 +7,54 @@ from pathlib import Path
|
|
|
7
7
|
def sanitize(filename: str) -> str:
|
|
8
8
|
"""Sanitize a string to be safe for use as a filename.
|
|
9
9
|
|
|
10
|
-
Replaces invalid characters with underscores
|
|
10
|
+
Replaces invalid characters with underscores, handles reserved names,
|
|
11
|
+
and prevents path traversal characters.
|
|
11
12
|
|
|
12
13
|
Args:
|
|
13
14
|
filename: The string to sanitize
|
|
14
15
|
|
|
15
16
|
Returns:
|
|
16
|
-
A filename-safe string, or "untitled" if empty
|
|
17
|
+
A filename-safe string, or "untitled" if empty or invalid
|
|
17
18
|
"""
|
|
19
|
+
# Replace invalid characters
|
|
18
20
|
pattern = re.compile(r'[<>:"/\\|?*\n\r\t\f\v]+')
|
|
19
21
|
result = pattern.sub("_", filename.strip())
|
|
22
|
+
|
|
23
|
+
# Prevent path traversal
|
|
24
|
+
result = result.replace("..", "_")
|
|
25
|
+
|
|
26
|
+
# Windows reserved names
|
|
27
|
+
reserved = {
|
|
28
|
+
"CON",
|
|
29
|
+
"PRN",
|
|
30
|
+
"AUX",
|
|
31
|
+
"NUL",
|
|
32
|
+
"COM1",
|
|
33
|
+
"COM2",
|
|
34
|
+
"COM3",
|
|
35
|
+
"COM4",
|
|
36
|
+
"COM5",
|
|
37
|
+
"COM6",
|
|
38
|
+
"COM7",
|
|
39
|
+
"COM8",
|
|
40
|
+
"COM9",
|
|
41
|
+
"LPT1",
|
|
42
|
+
"LPT2",
|
|
43
|
+
"LPT3",
|
|
44
|
+
"LPT4",
|
|
45
|
+
"LPT5",
|
|
46
|
+
"LPT6",
|
|
47
|
+
"LPT7",
|
|
48
|
+
"LPT8",
|
|
49
|
+
"LPT9",
|
|
50
|
+
}
|
|
51
|
+
if result.upper() in reserved:
|
|
52
|
+
result = f"_{result}_"
|
|
53
|
+
|
|
54
|
+
# Enforce length limit (255 is common for many filesystems)
|
|
55
|
+
if len(result) > 255:
|
|
56
|
+
result = result[:255]
|
|
57
|
+
|
|
20
58
|
return result or "untitled"
|
|
21
59
|
|
|
22
60
|
|
|
@@ -50,6 +88,18 @@ def root_dir() -> Path:
|
|
|
50
88
|
return Path(__file__).parent
|
|
51
89
|
|
|
52
90
|
|
|
91
|
+
def get_asset_path(relative_path: str) -> Path:
|
|
92
|
+
"""Get the absolute path to an asset file.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
relative_path: Path relative to convoviz root (e.g., "assets/fonts/foo.ttf")
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Absolute Path to the asset
|
|
99
|
+
"""
|
|
100
|
+
return root_dir() / relative_path
|
|
101
|
+
|
|
102
|
+
|
|
53
103
|
def font_dir() -> Path:
|
|
54
104
|
"""Get the path to the fonts directory.
|
|
55
105
|
|
|
@@ -87,9 +137,9 @@ def default_font_path() -> Path:
|
|
|
87
137
|
"""Get the path to the default font.
|
|
88
138
|
|
|
89
139
|
Returns:
|
|
90
|
-
Path to
|
|
140
|
+
Path to Kalam-Regular.ttf
|
|
91
141
|
"""
|
|
92
|
-
return font_path("
|
|
142
|
+
return font_path("Kalam-Regular")
|
|
93
143
|
|
|
94
144
|
|
|
95
145
|
def colormaps() -> list[str]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: convoviz
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Get analytics and visualizations on your ChatGPT data!
|
|
5
5
|
Keywords: markdown,chatgpt,openai,visualization,analytics,json,export,data-analysis,obsidian
|
|
6
6
|
Author: Mohamed Cheikh Sidiya
|
|
@@ -24,7 +24,7 @@ Requires-Python: >=3.12
|
|
|
24
24
|
Project-URL: Repository, https://github.com/mohamed-chs/chatgpt-history-export-to-md
|
|
25
25
|
Description-Content-Type: text/markdown
|
|
26
26
|
|
|
27
|
-
# Convoviz 📊: Visualize your entire ChatGPT data
|
|
27
|
+
# Convoviz 📊: Visualize your entire ChatGPT data
|
|
28
28
|
|
|
29
29
|
Convert your ChatGPT history into well-formatted Markdown files. Additionally, visualize your data with word clouds 🔡☁️, view your prompt history graphs 📈, and access all your custom instructions 🤖 in a single location.
|
|
30
30
|
|
|
@@ -68,7 +68,7 @@ or pipx:
|
|
|
68
68
|
pipx install convoviz
|
|
69
69
|
```
|
|
70
70
|
|
|
71
|
-
### 3. Run the
|
|
71
|
+
### 3. Run the tool 🏃♂️
|
|
72
72
|
|
|
73
73
|
Simply run the command and follow the prompts:
|
|
74
74
|
|
|
@@ -81,9 +81,18 @@ convoviz
|
|
|
81
81
|
You can provide arguments directly to skip the prompts:
|
|
82
82
|
|
|
83
83
|
```bash
|
|
84
|
-
convoviz --
|
|
84
|
+
convoviz --input path/to/your/export.zip --output path/to/output/folder
|
|
85
85
|
```
|
|
86
86
|
|
|
87
|
+
Inputs can be any of:
|
|
88
|
+
- A ChatGPT export ZIP (downloaded from OpenAI)
|
|
89
|
+
- An extracted export directory containing `conversations.json`
|
|
90
|
+
- A `conversations.json` file directly
|
|
91
|
+
|
|
92
|
+
Notes:
|
|
93
|
+
- `--zip` / `-z` is kept as an alias for `--input` for convenience.
|
|
94
|
+
- You can force non-interactive mode with `--no-interactive`.
|
|
95
|
+
|
|
87
96
|
For more options, run:
|
|
88
97
|
|
|
89
98
|
```bash
|
|
@@ -118,4 +127,20 @@ It was also a great opportunity to learn more about Python and type annotations.
|
|
|
118
127
|
|
|
119
128
|
It should(?) also work as library, so you can import and use the models and functions. I need to add more documentation for that tho. Feel free to reach out if you need help.
|
|
120
129
|
|
|
121
|
-
|
|
130
|
+
### Offline / reproducible runs
|
|
131
|
+
|
|
132
|
+
Convoviz uses NLTK stopwords for word clouds. If you’re offline and NLTK data isn’t already installed, pre-download it once:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
python -c "import nltk; nltk.download('stopwords')"
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
If you’re using `uv` without a global install, you can run:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
uv run python -c "import nltk; nltk.download('stopwords')"
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Bookmarklet
|
|
145
|
+
|
|
146
|
+
There’s also a JavaScript bookmarklet flow under `js/` (experimental) for exporting additional conversation data outside the official ZIP export.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
convoviz/__init__.py,sha256=bQLCHO2U9EyMTGqNgsYiCtBQKTKNj4iIM3-TwIkrnRY,612
|
|
2
2
|
convoviz/__main__.py,sha256=1qiGW13_SgL7wJi8iioIN-AAHGkNGnEl5q_RcPUrI0s,143
|
|
3
3
|
convoviz/analysis/__init__.py,sha256=FxgH5JJpyypiLJpMQn_HlM51jnb8lQdP63_C_W3Dlx4,241
|
|
4
|
-
convoviz/analysis/graphs.py,sha256=
|
|
5
|
-
convoviz/analysis/wordcloud.py,sha256=
|
|
4
|
+
convoviz/analysis/graphs.py,sha256=3CV4yhFwfUYb5-CXtq4D-r_vf0jn5cxDXwaPu1P8M8g,14928
|
|
5
|
+
convoviz/analysis/wordcloud.py,sha256=ZnbA_-rcXHwXIny_xbudfJDQbIuPT7urNFfHcx6QWxQ,4673
|
|
6
6
|
convoviz/assets/colormaps.txt,sha256=59TSGz428AxY14AEvymAH2IJ2RT9Mlp7Sy0N12NEdXQ,108
|
|
7
7
|
convoviz/assets/fonts/AmaticSC-Regular.ttf,sha256=83clh7a3urnTLud0_yZofuIb6BdyC2LMI9jhE6G2LvU,142696
|
|
8
8
|
convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf,sha256=fnrj5_N_SlY2Lj3Ehqz5aKECPZVJlJAflgsOU94_qIM,37756
|
|
@@ -35,25 +35,27 @@ convoviz/assets/fonts/YsabeauOffice-Regular.ttf,sha256=RnW2erC5p6s2YxvWmwa019hYT
|
|
|
35
35
|
convoviz/assets/fonts/YsabeauSC-Regular.ttf,sha256=G4lkq34KKqZOaoomtxFz_KlwVmxg56UbFXFnWgijkDM,116980
|
|
36
36
|
convoviz/assets/fonts/YsabeauSC-Thin.ttf,sha256=hZGOZNTRrxbiUPE2VDeLbtnaRwkMOBaVQbq7Fwx-34c,116932
|
|
37
37
|
convoviz/assets/fonts/Zeyada-Regular.ttf,sha256=fKhkrp9VHt_3Aw8JfkfkPeC2j3CilLWuPUudzBeawPQ,57468
|
|
38
|
-
convoviz/
|
|
39
|
-
convoviz/
|
|
38
|
+
convoviz/assets/stopwords.txt,sha256=7_ywpxsKYOj3U5CZTh9lP4GqbbkZLMabSOjKAXFk6Wc,539
|
|
39
|
+
convoviz/cli.py,sha256=8HNn-6kmDN8ECb0BspvjeGa_636SQPDffpM0yINgNII,3463
|
|
40
|
+
convoviz/config.py,sha256=EbkMl5DNcExJiUSVB8Yg1cftpduMp45-Qabg6DBFoKQ,2724
|
|
40
41
|
convoviz/exceptions.py,sha256=bQpIKls48uOQpagEJAxpXf5LF7QoagRRfbD0MjWC7Ak,1476
|
|
41
|
-
convoviz/interactive.py,sha256=
|
|
42
|
+
convoviz/interactive.py,sha256=hnla88hUqRjN-YV6zcauohMwxgQwbV3Y0UMT-FfXEMw,6350
|
|
42
43
|
convoviz/io/__init__.py,sha256=y70TYypJ36_kaEA04E2wa1EDaKQVjprKItoKR6MMs4M,471
|
|
43
|
-
convoviz/io/
|
|
44
|
-
convoviz/io/
|
|
45
|
-
convoviz/
|
|
46
|
-
convoviz/models/
|
|
47
|
-
convoviz/models/
|
|
48
|
-
convoviz/models/
|
|
44
|
+
convoviz/io/assets.py,sha256=BykidWJG6OQAgbVfUByQ3RLTrldzpZ_NeM7HV3a5Tig,2333
|
|
45
|
+
convoviz/io/loaders.py,sha256=RuGiGzpyNcgwTxOM-m2ehhyh2mP1-k1YamK8-VynR3g,5713
|
|
46
|
+
convoviz/io/writers.py,sha256=KaLr0f2F2Pw5XOoQKMA75IeQYXUTT4WbS-HAqRxsp3c,3494
|
|
47
|
+
convoviz/models/__init__.py,sha256=6gAfrk6KJT2QxdvX_v15mUdfIqEw1xKxwQlKSfyA5eI,532
|
|
48
|
+
convoviz/models/collection.py,sha256=L658yKMNC6IZrfxYxZBe-oO9COP_bzVfRznnNon7tfU,4467
|
|
49
|
+
convoviz/models/conversation.py,sha256=5Xw1po0N92AdgpnbwFd6Ukb_io34OzSfDGeYDwyuPDk,5123
|
|
50
|
+
convoviz/models/message.py,sha256=mVnaUG6hypz92Oz3OgFAK1uuTgH3ZOJAWsFiCpLYneY,5459
|
|
49
51
|
convoviz/models/node.py,sha256=1vBAtKVscYsUBDnKAOyLxuZaK9KoVF1dFXiKXRHxUnY,1946
|
|
50
|
-
convoviz/pipeline.py,sha256=
|
|
52
|
+
convoviz/pipeline.py,sha256=Mwg3Xqazk5PrsIHxhVajtWbfq4PgFlIGVHWq8BsW0U0,5750
|
|
51
53
|
convoviz/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
54
|
convoviz/renderers/__init__.py,sha256=IQgwD9NqtUgbS9zwyPBNZbBIZcFrbZ9C7WMAV-X3Xdg,261
|
|
53
|
-
convoviz/renderers/markdown.py,sha256=
|
|
54
|
-
convoviz/renderers/yaml.py,sha256=
|
|
55
|
-
convoviz/utils.py,sha256=
|
|
56
|
-
convoviz-0.2.
|
|
57
|
-
convoviz-0.2.
|
|
58
|
-
convoviz-0.2.
|
|
59
|
-
convoviz-0.2.
|
|
55
|
+
convoviz/renderers/markdown.py,sha256=kBeHqDH8yEiVN0N03dUUSJ-JbmdRmdoiC863NI83gXo,7211
|
|
56
|
+
convoviz/renderers/yaml.py,sha256=XG1s4VhDdx-TiqekTkgED87RZ1lVQ7IwrbA-sZHrs7k,4056
|
|
57
|
+
convoviz/utils.py,sha256=IQEKYHhWOnYxlr4GwAHoquG0BXTlVRkORL80oUSaIeQ,3417
|
|
58
|
+
convoviz-0.2.4.dist-info/WHEEL,sha256=eycQt0QpYmJMLKpE3X9iDk8R04v2ZF0x82ogq-zP6bQ,79
|
|
59
|
+
convoviz-0.2.4.dist-info/entry_points.txt,sha256=HYsmsw5vt36yYHB05uVU48AK2WLkcwshly7m7KKuZMY,54
|
|
60
|
+
convoviz-0.2.4.dist-info/METADATA,sha256=DuwAsh5Sei0B-5Q-cMCzcXH0bUO9ZyzvbAfoV8Ury2M,5309
|
|
61
|
+
convoviz-0.2.4.dist-info/RECORD,,
|