convoviz 0.4.5__tar.gz → 0.4.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {convoviz-0.4.5 → convoviz-0.4.7}/PKG-INFO +3 -3
- {convoviz-0.4.5 → convoviz-0.4.7}/README.md +2 -2
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/config.py +3 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/interactive.py +16 -29
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/models/conversation.py +18 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/models/message.py +81 -5
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/renderers/markdown.py +96 -3
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/renderers/yaml.py +4 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/pyproject.toml +1 -1
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/__init__.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/__main__.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/analysis/__init__.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/analysis/graphs.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/analysis/wordcloud.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/colormaps.txt +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/Borel-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/assets/stopwords.txt +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/cli.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/exceptions.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/io/__init__.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/io/assets.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/io/loaders.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/io/writers.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/logging_config.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/models/__init__.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/models/collection.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/models/node.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/pipeline.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/py.typed +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/renderers/__init__.py +0 -0
- {convoviz-0.4.5 → convoviz-0.4.7}/convoviz/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: convoviz
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.7
|
|
4
4
|
Summary: Convert your ChatGPT export (ZIP) into clean Markdown text files with inline media, and generate data visualizations like word clouds and usage graphs.
|
|
5
5
|
Keywords: markdown,chatgpt,openai,visualization,analytics,json,export,data-analysis,obsidian
|
|
6
6
|
Author: Mohamed Cheikh Sidiya
|
|
@@ -170,6 +170,8 @@ After running the script, head to your output folder (defaults to `Documents/Cha
|
|
|
170
170
|
- 📝 Neatly formatted Markdown files
|
|
171
171
|
- 📊 Visualizations and graphs
|
|
172
172
|
|
|
173
|
+
If you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
|
|
174
|
+
|
|
173
175
|

|
|
174
176
|
|
|
175
177
|
---
|
|
@@ -182,8 +184,6 @@ Whether you're a tech wizard or you're new to all this, I'd love to hear about y
|
|
|
182
184
|
|
|
183
185
|
👉 **[Open an Issue](https://github.com/mohamed-chs/convoviz/issues)**
|
|
184
186
|
|
|
185
|
-
And if you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
|
|
186
|
-
|
|
187
187
|
---
|
|
188
188
|
|
|
189
189
|
## 🤝 Contributing
|
|
@@ -145,6 +145,8 @@ After running the script, head to your output folder (defaults to `Documents/Cha
|
|
|
145
145
|
- 📝 Neatly formatted Markdown files
|
|
146
146
|
- 📊 Visualizations and graphs
|
|
147
147
|
|
|
148
|
+
If you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
|
|
149
|
+
|
|
148
150
|

|
|
149
151
|
|
|
150
152
|
---
|
|
@@ -157,8 +159,6 @@ Whether you're a tech wizard or you're new to all this, I'd love to hear about y
|
|
|
157
159
|
|
|
158
160
|
👉 **[Open an Issue](https://github.com/mohamed-chs/convoviz/issues)**
|
|
159
161
|
|
|
160
|
-
And if you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
|
|
161
|
-
|
|
162
162
|
---
|
|
163
163
|
|
|
164
164
|
## 🤝 Contributing
|
|
@@ -54,7 +54,10 @@ class YAMLConfig(BaseModel):
|
|
|
54
54
|
used_plugins: bool = False
|
|
55
55
|
message_count: bool = True
|
|
56
56
|
content_types: bool = False
|
|
57
|
+
content_types: bool = False
|
|
57
58
|
custom_instructions: bool = False
|
|
59
|
+
is_starred: bool = False
|
|
60
|
+
voice: bool = False
|
|
58
61
|
|
|
59
62
|
|
|
60
63
|
class ConversationConfig(BaseModel):
|
|
@@ -8,10 +8,16 @@ from questionary import Choice, Style, checkbox, select
|
|
|
8
8
|
from questionary import path as qst_path
|
|
9
9
|
from questionary import text as qst_text
|
|
10
10
|
|
|
11
|
-
from convoviz.config import ConvovizConfig, OutputKind, get_default_config
|
|
11
|
+
from convoviz.config import ConvovizConfig, OutputKind, YAMLConfig, get_default_config
|
|
12
12
|
from convoviz.io.loaders import find_latest_zip, validate_zip
|
|
13
13
|
from convoviz.utils import colormaps, default_font_path, font_names, font_path, validate_header
|
|
14
14
|
|
|
15
|
+
OUTPUT_TITLES = {
|
|
16
|
+
OutputKind.MARKDOWN: "Markdown conversations",
|
|
17
|
+
OutputKind.GRAPHS: "Graphs (usage analytics)",
|
|
18
|
+
OutputKind.WORDCLOUDS: "Word clouds",
|
|
19
|
+
}
|
|
20
|
+
|
|
15
21
|
CUSTOM_STYLE = Style(
|
|
16
22
|
[
|
|
17
23
|
("qmark", "fg:#34eb9b bold"),
|
|
@@ -118,9 +124,12 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
|
|
|
118
124
|
|
|
119
125
|
# Prompt for outputs to generate
|
|
120
126
|
output_choices = [
|
|
121
|
-
Choice(
|
|
122
|
-
|
|
123
|
-
|
|
127
|
+
Choice(
|
|
128
|
+
title=OUTPUT_TITLES.get(kind, kind.value.title()),
|
|
129
|
+
value=kind,
|
|
130
|
+
checked=kind in config.outputs,
|
|
131
|
+
)
|
|
132
|
+
for kind in OutputKind
|
|
124
133
|
]
|
|
125
134
|
|
|
126
135
|
selected_outputs: list[OutputKind] = _ask_or_cancel(
|
|
@@ -172,20 +181,9 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
|
|
|
172
181
|
|
|
173
182
|
# Prompt for YAML headers
|
|
174
183
|
yaml_config = config.conversation.yaml
|
|
184
|
+
yaml_fields = list(YAMLConfig.model_fields.keys())
|
|
175
185
|
yaml_choices = [
|
|
176
|
-
Choice(title=field, checked=getattr(yaml_config, field))
|
|
177
|
-
for field in [
|
|
178
|
-
"title",
|
|
179
|
-
"tags",
|
|
180
|
-
"chat_link",
|
|
181
|
-
"create_time",
|
|
182
|
-
"update_time",
|
|
183
|
-
"model",
|
|
184
|
-
"used_plugins",
|
|
185
|
-
"message_count",
|
|
186
|
-
"content_types",
|
|
187
|
-
"custom_instructions",
|
|
188
|
-
]
|
|
186
|
+
Choice(title=field, checked=getattr(yaml_config, field)) for field in yaml_fields
|
|
189
187
|
]
|
|
190
188
|
|
|
191
189
|
selected: list[str] = _ask_or_cancel(
|
|
@@ -197,18 +195,7 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
|
|
|
197
195
|
)
|
|
198
196
|
|
|
199
197
|
selected_set = set(selected)
|
|
200
|
-
for field_name in
|
|
201
|
-
"title",
|
|
202
|
-
"tags",
|
|
203
|
-
"chat_link",
|
|
204
|
-
"create_time",
|
|
205
|
-
"update_time",
|
|
206
|
-
"model",
|
|
207
|
-
"used_plugins",
|
|
208
|
-
"message_count",
|
|
209
|
-
"content_types",
|
|
210
|
-
"custom_instructions",
|
|
211
|
-
]:
|
|
198
|
+
for field_name in yaml_fields:
|
|
212
199
|
setattr(yaml_config, field_name, field_name in selected_set)
|
|
213
200
|
|
|
214
201
|
# Prompt for wordcloud settings (only if wordclouds output is selected)
|
|
@@ -24,6 +24,8 @@ class Conversation(BaseModel):
|
|
|
24
24
|
mapping: dict[str, Node]
|
|
25
25
|
moderation_results: list[Any] = Field(default_factory=list)
|
|
26
26
|
current_node: str
|
|
27
|
+
is_starred: bool | None = None
|
|
28
|
+
voice: str | dict[str, Any] | None = None
|
|
27
29
|
plugin_ids: list[str] | None = None
|
|
28
30
|
conversation_id: str
|
|
29
31
|
conversation_template_id: str | None = None
|
|
@@ -156,3 +158,19 @@ class Conversation(BaseModel):
|
|
|
156
158
|
def year_start(self) -> datetime:
|
|
157
159
|
"""Get January 1st of the year this conversation was created."""
|
|
158
160
|
return self.create_time.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def citation_map(self) -> dict[str, dict[str, str | None]]:
|
|
164
|
+
"""Aggregate citation metadata from all messages in the conversation.
|
|
165
|
+
|
|
166
|
+
Traverses all nodes (including hidden ones) to collect embedded citation definitions
|
|
167
|
+
from tool outputs (e.g. search results).
|
|
168
|
+
"""
|
|
169
|
+
aggregated_map = {}
|
|
170
|
+
for node in self.all_message_nodes:
|
|
171
|
+
if not node.message:
|
|
172
|
+
continue
|
|
173
|
+
# Extract citations from message parts
|
|
174
|
+
if hasattr(node.message, "internal_citation_map"):
|
|
175
|
+
aggregated_map.update(node.message.internal_citation_map)
|
|
176
|
+
return aggregated_map
|
|
@@ -46,6 +46,9 @@ class MessageMetadata(BaseModel):
|
|
|
46
46
|
is_user_system_message: bool | None = None
|
|
47
47
|
is_visually_hidden_from_conversation: bool | None = None
|
|
48
48
|
user_context_message_data: dict[str, Any] | None = None
|
|
49
|
+
citations: list[dict[str, Any]] | None = None
|
|
50
|
+
search_result_groups: list[dict[str, Any]] | None = None
|
|
51
|
+
content_references: list[dict[str, Any]] | None = None
|
|
49
52
|
|
|
50
53
|
model_config = ConfigDict(protected_namespaces=())
|
|
51
54
|
|
|
@@ -179,11 +182,12 @@ class Message(BaseModel):
|
|
|
179
182
|
1. It is empty (no text, no images).
|
|
180
183
|
2. Explicitly marked as visually hidden.
|
|
181
184
|
3. It is an internal system message (not custom instructions).
|
|
182
|
-
4. It is a browser tool output (intermediate search steps).
|
|
185
|
+
4. It is a browser tool output (intermediate search steps) UNLESS it is a tether_quote.
|
|
183
186
|
5. It is an assistant message targeting a tool (internal call).
|
|
184
187
|
6. It is code interpreter input (content_type="code").
|
|
185
|
-
7. It is browsing status (
|
|
186
|
-
8. It is
|
|
188
|
+
7. It is browsing status, internal reasoning (o1/o3), or massive web scraps (sonic_webpage).
|
|
189
|
+
8. It is a redundant DALL-E textual status update.
|
|
190
|
+
9. It is from internal bio (memory) or web.run orchestration tools.
|
|
187
191
|
"""
|
|
188
192
|
if self.is_empty:
|
|
189
193
|
return True
|
|
@@ -197,10 +201,29 @@ class Message(BaseModel):
|
|
|
197
201
|
# Only show if explicitly marked as user system message (Custom Instructions)
|
|
198
202
|
return not self.metadata.is_user_system_message
|
|
199
203
|
|
|
200
|
-
# Hide
|
|
201
|
-
if self.
|
|
204
|
+
# Hide sonic_webpage (massive scraped text) and system_error
|
|
205
|
+
if self.content.content_type in ("sonic_webpage", "system_error"):
|
|
202
206
|
return True
|
|
203
207
|
|
|
208
|
+
if self.author.role == "tool":
|
|
209
|
+
# Hide memory updates (bio) and internal search orchestration (web.run)
|
|
210
|
+
if self.author.name in ("bio", "web.run"):
|
|
211
|
+
return True
|
|
212
|
+
|
|
213
|
+
# Hide browser tool outputs (intermediate search steps)
|
|
214
|
+
# EXCEPTION: tether_quote (citations) should remain visible
|
|
215
|
+
if self.author.name == "browser":
|
|
216
|
+
return self.content.content_type != "tether_quote"
|
|
217
|
+
|
|
218
|
+
# Hide DALL-E textual status ("DALL·E displayed 1 images...")
|
|
219
|
+
if (
|
|
220
|
+
self.author.name == "dalle.text2im"
|
|
221
|
+
and self.content.content_type == "text"
|
|
222
|
+
# Check if it doesn't have images (just in case they attach images to text logic)
|
|
223
|
+
and not self.images
|
|
224
|
+
):
|
|
225
|
+
return True
|
|
226
|
+
|
|
204
227
|
# Hide assistant messages targeting tools (e.g., search(...), code input)
|
|
205
228
|
# recipient="all" or None means it's for the user; anything else is internal
|
|
206
229
|
if self.author.role == "assistant" and self.recipient not in ("all", None):
|
|
@@ -216,3 +239,56 @@ class Message(BaseModel):
|
|
|
216
239
|
"thoughts",
|
|
217
240
|
"reasoning_recap",
|
|
218
241
|
)
|
|
242
|
+
|
|
243
|
+
@property
|
|
244
|
+
def internal_citation_map(self) -> dict[str, dict[str, str | None]]:
|
|
245
|
+
"""Extract a map of citation IDs to metadata from content parts.
|
|
246
|
+
|
|
247
|
+
Used for resolving embedded citations (e.g. citeturn0search18).
|
|
248
|
+
Key format: "turn{turn_index}search{ref_index}"
|
|
249
|
+
"""
|
|
250
|
+
if not self.content.parts:
|
|
251
|
+
return {}
|
|
252
|
+
|
|
253
|
+
citation_mapping = {}
|
|
254
|
+
|
|
255
|
+
# Helper to process a single search result entry
|
|
256
|
+
def process_entry(entry: dict[str, Any]) -> None:
|
|
257
|
+
ref_id = entry.get("ref_id")
|
|
258
|
+
if not ref_id:
|
|
259
|
+
return
|
|
260
|
+
# Only care about search results for now
|
|
261
|
+
if ref_id.get("ref_type") != "search":
|
|
262
|
+
return
|
|
263
|
+
|
|
264
|
+
turn_idx = ref_id.get("turn_index")
|
|
265
|
+
ref_idx = ref_id.get("ref_index")
|
|
266
|
+
|
|
267
|
+
if turn_idx is not None and ref_idx is not None:
|
|
268
|
+
# turn_idx is int, ref_idx is int
|
|
269
|
+
key = f"turn{turn_idx}search{ref_idx}"
|
|
270
|
+
citation_mapping[key] = {
|
|
271
|
+
"title": entry.get("title"),
|
|
272
|
+
"url": entry.get("url"),
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
# 1. Extract from self.content.parts
|
|
276
|
+
if self.content and self.content.parts:
|
|
277
|
+
for part in self.content.parts:
|
|
278
|
+
if isinstance(part, dict):
|
|
279
|
+
if part.get("type") == "search_result":
|
|
280
|
+
process_entry(part)
|
|
281
|
+
elif part.get("type") == "search_result_group":
|
|
282
|
+
for entry in part.get("entries", []):
|
|
283
|
+
process_entry(entry)
|
|
284
|
+
|
|
285
|
+
# 2. Extract from metadata.search_result_groups (if present)
|
|
286
|
+
if self.metadata and self.metadata.search_result_groups:
|
|
287
|
+
for group in self.metadata.search_result_groups:
|
|
288
|
+
if isinstance(group, dict):
|
|
289
|
+
# Groups might have 'entries' or be flat?
|
|
290
|
+
# Based on name 'groups', likely similar to part structure
|
|
291
|
+
for entry in group.get("entries", []):
|
|
292
|
+
process_entry(entry)
|
|
293
|
+
|
|
294
|
+
return citation_mapping
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
4
|
from collections.abc import Callable
|
|
5
|
+
from typing import Any
|
|
5
6
|
|
|
6
7
|
from convoviz.config import AuthorHeaders, ConversationConfig
|
|
7
8
|
from convoviz.exceptions import MessageContentError
|
|
@@ -9,6 +10,82 @@ from convoviz.models import Conversation, Node
|
|
|
9
10
|
from convoviz.renderers.yaml import render_yaml_header
|
|
10
11
|
|
|
11
12
|
|
|
13
|
+
def replace_citations(
|
|
14
|
+
text: str,
|
|
15
|
+
citations: list[dict[str, Any]] | None = None,
|
|
16
|
+
citation_map: dict[str, dict[str, str | None]] | None = None,
|
|
17
|
+
) -> str:
|
|
18
|
+
"""Replace citation placeholders in text with markdown links.
|
|
19
|
+
|
|
20
|
+
Supports two formats:
|
|
21
|
+
1. Tether v4 (metadata.citations): Placed at specific indices (【...】 placeholders).
|
|
22
|
+
2. Embedded (Tether v3?): Unicode markers citeturnXsearchY.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
text: The original message text
|
|
26
|
+
citations: List of tether v4 citation objects (start_ix/end_ix)
|
|
27
|
+
citation_map: Map of internal citation IDs to metadata (turnXsearchY -> {title, url})
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Text with all placeholders replaced by markdown links
|
|
31
|
+
"""
|
|
32
|
+
# 1. Handle Tether v4 (Index-based replacements)
|
|
33
|
+
if citations:
|
|
34
|
+
# Sort citations by start_ix descending to replace safely from end
|
|
35
|
+
sorted_citations = sorted(citations, key=lambda c: c.get("start_ix", 0), reverse=True)
|
|
36
|
+
|
|
37
|
+
for cit in sorted_citations:
|
|
38
|
+
start = cit.get("start_ix")
|
|
39
|
+
end = cit.get("end_ix")
|
|
40
|
+
meta = cit.get("metadata", {})
|
|
41
|
+
|
|
42
|
+
if start is None or end is None:
|
|
43
|
+
continue
|
|
44
|
+
|
|
45
|
+
replacement = _format_link(meta.get("title"), meta.get("url"))
|
|
46
|
+
|
|
47
|
+
# Only replace if strictly positive indices and bounds check
|
|
48
|
+
if 0 <= start < end <= len(text):
|
|
49
|
+
text = text[:start] + replacement + text[end:]
|
|
50
|
+
|
|
51
|
+
# 2. Handle Embedded Citations (Regex-based)
|
|
52
|
+
# Pattern: cite (key)+
|
|
53
|
+
# Codepoints: \uE200 (Start), \uE202 (Sep), \uE201 (End)
|
|
54
|
+
if citation_map is not None:
|
|
55
|
+
pattern = re.compile(r"\uE200cite((?:\uE202[a-zA-Z0-9]+)+)\uE201")
|
|
56
|
+
|
|
57
|
+
def replacer(match: re.Match) -> str:
|
|
58
|
+
# Group 1 contains string like: turn0search18turn0search3
|
|
59
|
+
# Split by separator \uE202 (first item will be empty string)
|
|
60
|
+
raw_keys = match.group(1).split("\ue202")
|
|
61
|
+
keys = [k for k in raw_keys if k]
|
|
62
|
+
|
|
63
|
+
links = []
|
|
64
|
+
for key in keys:
|
|
65
|
+
if key in citation_map:
|
|
66
|
+
data = citation_map[key]
|
|
67
|
+
link = _format_link(data.get("title"), data.get("url"))
|
|
68
|
+
if link:
|
|
69
|
+
links.append(link)
|
|
70
|
+
|
|
71
|
+
return "".join(links)
|
|
72
|
+
|
|
73
|
+
text = pattern.sub(replacer, text)
|
|
74
|
+
|
|
75
|
+
return text
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _format_link(title: str | None, url: str | None) -> str:
|
|
79
|
+
"""Format a title and URL into a concise markdown link."""
|
|
80
|
+
if title and url:
|
|
81
|
+
return f" [[{title}]({url})]"
|
|
82
|
+
elif url:
|
|
83
|
+
return f" [[Source]({url})]"
|
|
84
|
+
elif title:
|
|
85
|
+
return f" [{title}]"
|
|
86
|
+
return ""
|
|
87
|
+
|
|
88
|
+
|
|
12
89
|
def close_code_blocks(text: str) -> str:
|
|
13
90
|
"""Ensure all code blocks in the text are properly closed.
|
|
14
91
|
|
|
@@ -137,6 +214,7 @@ def render_node(
|
|
|
137
214
|
use_dollar_latex: bool = False,
|
|
138
215
|
asset_resolver: Callable[[str], str | None] | None = None,
|
|
139
216
|
flavor: str = "standard",
|
|
217
|
+
citation_map: dict[str, dict[str, str | None]] | None = None,
|
|
140
218
|
) -> str:
|
|
141
219
|
"""Render a complete node as markdown.
|
|
142
220
|
|
|
@@ -146,9 +224,7 @@ def render_node(
|
|
|
146
224
|
use_dollar_latex: Whether to convert LaTeX delimiters to dollars
|
|
147
225
|
asset_resolver: Function to resolve asset IDs to paths
|
|
148
226
|
flavor: Markdown flavor ("standard" or "obsidian")
|
|
149
|
-
|
|
150
|
-
Returns:
|
|
151
|
-
Complete markdown string for the node
|
|
227
|
+
citation_map: Global map of citations
|
|
152
228
|
"""
|
|
153
229
|
if node.message is None:
|
|
154
230
|
return ""
|
|
@@ -185,6 +261,19 @@ def render_node(
|
|
|
185
261
|
# Some message types only contain non-text parts; those still may have images.
|
|
186
262
|
text = ""
|
|
187
263
|
|
|
264
|
+
# Process citations if present (Tether v4 metadata or Embedded v3)
|
|
265
|
+
# Use global citation_map if provided, merging/falling back to local if needed.
|
|
266
|
+
# Actually, local internal map is subset of global map if we aggregated correctly.
|
|
267
|
+
# So we prefer the passed global map.
|
|
268
|
+
effective_map = citation_map or node.message.internal_citation_map
|
|
269
|
+
|
|
270
|
+
if node.message.metadata.citations or effective_map:
|
|
271
|
+
text = replace_citations(
|
|
272
|
+
text,
|
|
273
|
+
citations=node.message.metadata.citations,
|
|
274
|
+
citation_map=effective_map,
|
|
275
|
+
)
|
|
276
|
+
|
|
188
277
|
content = close_code_blocks(text)
|
|
189
278
|
content = f"\n{content}\n" if content else ""
|
|
190
279
|
if use_dollar_latex:
|
|
@@ -255,6 +344,9 @@ def render_conversation(
|
|
|
255
344
|
# Start with YAML header
|
|
256
345
|
markdown = render_yaml_header(conversation, config.yaml)
|
|
257
346
|
|
|
347
|
+
# Pre-calculate citation map for the conversation
|
|
348
|
+
citation_map = conversation.citation_map
|
|
349
|
+
|
|
258
350
|
# Render message nodes in a deterministic traversal order.
|
|
259
351
|
for node in _ordered_nodes(conversation):
|
|
260
352
|
if node.message:
|
|
@@ -264,6 +356,7 @@ def render_conversation(
|
|
|
264
356
|
use_dollar_latex,
|
|
265
357
|
asset_resolver=asset_resolver,
|
|
266
358
|
flavor=flavor,
|
|
359
|
+
citation_map=citation_map,
|
|
267
360
|
)
|
|
268
361
|
|
|
269
362
|
return markdown
|
|
@@ -111,6 +111,10 @@ def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
|
|
|
111
111
|
yaml_fields["content_types"] = conversation.content_types
|
|
112
112
|
if config.custom_instructions:
|
|
113
113
|
yaml_fields["custom_instructions"] = conversation.custom_instructions
|
|
114
|
+
if config.is_starred:
|
|
115
|
+
yaml_fields["is_starred"] = conversation.is_starred
|
|
116
|
+
if config.voice:
|
|
117
|
+
yaml_fields["voice"] = conversation.voice
|
|
114
118
|
|
|
115
119
|
if not yaml_fields:
|
|
116
120
|
return ""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "convoviz"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.7"
|
|
4
4
|
description = "Convert your ChatGPT export (ZIP) into clean Markdown text files with inline media, and generate data visualizations like word clouds and usage graphs."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|