convoviz 0.4.6__tar.gz → 0.4.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {convoviz-0.4.6 → convoviz-0.4.8}/PKG-INFO +5 -5
- {convoviz-0.4.6 → convoviz-0.4.8}/README.md +4 -4
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/config.py +3 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/io/assets.py +8 -4
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/io/writers.py +2 -2
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/models/conversation.py +18 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/models/message.py +82 -5
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/renderers/markdown.py +116 -9
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/renderers/yaml.py +4 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/pyproject.toml +1 -1
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/__init__.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/__main__.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/analysis/__init__.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/analysis/graphs.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/analysis/wordcloud.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/colormaps.txt +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/Borel-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/assets/stopwords.txt +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/cli.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/exceptions.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/interactive.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/io/__init__.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/io/loaders.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/logging_config.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/models/__init__.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/models/collection.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/models/node.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/pipeline.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/py.typed +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/renderers/__init__.py +0 -0
- {convoviz-0.4.6 → convoviz-0.4.8}/convoviz/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: convoviz
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.8
|
|
4
4
|
Summary: Convert your ChatGPT export (ZIP) into clean Markdown text files with inline media, and generate data visualizations like word clouds and usage graphs.
|
|
5
5
|
Keywords: markdown,chatgpt,openai,visualization,analytics,json,export,data-analysis,obsidian
|
|
6
6
|
Author: Mohamed Cheikh Sidiya
|
|
@@ -24,8 +24,7 @@ Provides-Extra: viz
|
|
|
24
24
|
Description-Content-Type: text/markdown
|
|
25
25
|
|
|
26
26
|
<p align="center">
|
|
27
|
-
<h1 align="center">Convoviz
|
|
28
|
-
<p align="center"><strong>Visualize your entire ChatGPT data</strong></p>
|
|
27
|
+
<h1 align="center">Convoviz</h1>
|
|
29
28
|
<p align="center">
|
|
30
29
|
Convert your ChatGPT history into clean, readable Markdown (text files).
|
|
31
30
|
</p>
|
|
@@ -52,6 +51,7 @@ Description-Content-Type: text/markdown
|
|
|
52
51
|
|---------|-------------|
|
|
53
52
|
| 📝 **Markdown Export** | Clean, well-formatted Markdown with optional YAML headers |
|
|
54
53
|
| 🖼️ **Inline Images** | Media attachments rendered directly in your Markdown files |
|
|
54
|
+
| 🔗 **Citations** | Web search results and source links accurately preserved |
|
|
55
55
|
| ☁️ **Word Clouds** | Visual breakdowns of your most-used words and phrases |
|
|
56
56
|
| 📈 **Usage Graphs** | Bar plots and charts showing your conversation patterns |
|
|
57
57
|
|
|
@@ -170,6 +170,8 @@ After running the script, head to your output folder (defaults to `Documents/Cha
|
|
|
170
170
|
- 📝 Neatly formatted Markdown files
|
|
171
171
|
- 📊 Visualizations and graphs
|
|
172
172
|
|
|
173
|
+
If you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
|
|
174
|
+
|
|
173
175
|

|
|
174
176
|
|
|
175
177
|
---
|
|
@@ -182,8 +184,6 @@ Whether you're a tech wizard or you're new to all this, I'd love to hear about y
|
|
|
182
184
|
|
|
183
185
|
👉 **[Open an Issue](https://github.com/mohamed-chs/convoviz/issues)**
|
|
184
186
|
|
|
185
|
-
And if you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
|
|
186
|
-
|
|
187
187
|
---
|
|
188
188
|
|
|
189
189
|
## 🤝 Contributing
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
<p align="center">
|
|
2
|
-
<h1 align="center">Convoviz
|
|
3
|
-
<p align="center"><strong>Visualize your entire ChatGPT data</strong></p>
|
|
2
|
+
<h1 align="center">Convoviz</h1>
|
|
4
3
|
<p align="center">
|
|
5
4
|
Convert your ChatGPT history into clean, readable Markdown (text files).
|
|
6
5
|
</p>
|
|
@@ -27,6 +26,7 @@
|
|
|
27
26
|
|---------|-------------|
|
|
28
27
|
| 📝 **Markdown Export** | Clean, well-formatted Markdown with optional YAML headers |
|
|
29
28
|
| 🖼️ **Inline Images** | Media attachments rendered directly in your Markdown files |
|
|
29
|
+
| 🔗 **Citations** | Web search results and source links accurately preserved |
|
|
30
30
|
| ☁️ **Word Clouds** | Visual breakdowns of your most-used words and phrases |
|
|
31
31
|
| 📈 **Usage Graphs** | Bar plots and charts showing your conversation patterns |
|
|
32
32
|
|
|
@@ -145,6 +145,8 @@ After running the script, head to your output folder (defaults to `Documents/Cha
|
|
|
145
145
|
- 📝 Neatly formatted Markdown files
|
|
146
146
|
- 📊 Visualizations and graphs
|
|
147
147
|
|
|
148
|
+
If you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
|
|
149
|
+
|
|
148
150
|

|
|
149
151
|
|
|
150
152
|
---
|
|
@@ -157,8 +159,6 @@ Whether you're a tech wizard or you're new to all this, I'd love to hear about y
|
|
|
157
159
|
|
|
158
160
|
👉 **[Open an Issue](https://github.com/mohamed-chs/convoviz/issues)**
|
|
159
161
|
|
|
160
|
-
And if you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
|
|
161
|
-
|
|
162
162
|
---
|
|
163
163
|
|
|
164
164
|
## 🤝 Contributing
|
|
@@ -54,7 +54,10 @@ class YAMLConfig(BaseModel):
|
|
|
54
54
|
used_plugins: bool = False
|
|
55
55
|
message_count: bool = True
|
|
56
56
|
content_types: bool = False
|
|
57
|
+
content_types: bool = False
|
|
57
58
|
custom_instructions: bool = False
|
|
59
|
+
is_starred: bool = False
|
|
60
|
+
voice: bool = False
|
|
58
61
|
|
|
59
62
|
|
|
60
63
|
class ConversationConfig(BaseModel):
|
|
@@ -4,6 +4,8 @@ import logging
|
|
|
4
4
|
import shutil
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
|
|
7
|
+
from convoviz.utils import sanitize
|
|
8
|
+
|
|
7
9
|
logger = logging.getLogger(__name__)
|
|
8
10
|
|
|
9
11
|
|
|
@@ -83,12 +85,13 @@ def resolve_asset_path(source_dir: Path, asset_id: str) -> Path | None:
|
|
|
83
85
|
return None
|
|
84
86
|
|
|
85
87
|
|
|
86
|
-
def copy_asset(source_path: Path, dest_dir: Path) -> str:
|
|
88
|
+
def copy_asset(source_path: Path, dest_dir: Path, target_name: str | None = None) -> str:
|
|
87
89
|
"""Copy an asset to the destination directory.
|
|
88
90
|
|
|
89
91
|
Args:
|
|
90
92
|
source_path: The source file path
|
|
91
93
|
dest_dir: The root output directory (assets will be in dest_dir/assets)
|
|
94
|
+
target_name: Optional name to rename the file to
|
|
92
95
|
|
|
93
96
|
Returns:
|
|
94
97
|
Relative path to the asset (e.g., "assets/image.png")
|
|
@@ -96,14 +99,15 @@ def copy_asset(source_path: Path, dest_dir: Path) -> str:
|
|
|
96
99
|
assets_dir = dest_dir / "assets"
|
|
97
100
|
assets_dir.mkdir(parents=True, exist_ok=True)
|
|
98
101
|
|
|
99
|
-
|
|
102
|
+
filename = sanitize(target_name) if target_name else source_path.name
|
|
103
|
+
dest_path = assets_dir / filename
|
|
100
104
|
|
|
101
105
|
if not dest_path.exists():
|
|
102
106
|
try:
|
|
103
107
|
shutil.copy2(source_path, dest_path)
|
|
104
|
-
logger.debug(f"Copied asset: {source_path.name}")
|
|
108
|
+
logger.debug(f"Copied asset: {source_path.name} -> {filename}")
|
|
105
109
|
except Exception as e:
|
|
106
110
|
logger.warning(f"Failed to copy asset {source_path}: {e}")
|
|
107
111
|
|
|
108
112
|
# Return forward-slash path for Markdown compatibility even on Windows
|
|
109
|
-
return f"assets/{
|
|
113
|
+
return f"assets/{filename}"
|
|
@@ -90,7 +90,7 @@ def save_conversation(
|
|
|
90
90
|
final_path = filepath.with_name(f"{base_name} ({counter}){filepath.suffix}")
|
|
91
91
|
|
|
92
92
|
# Define asset resolver
|
|
93
|
-
def asset_resolver(asset_id: str) -> str | None:
|
|
93
|
+
def asset_resolver(asset_id: str, target_name: str | None = None) -> str | None:
|
|
94
94
|
if not source_path:
|
|
95
95
|
return None
|
|
96
96
|
|
|
@@ -99,7 +99,7 @@ def save_conversation(
|
|
|
99
99
|
return None
|
|
100
100
|
|
|
101
101
|
# Copy to output directory (relative to the markdown file's directory)
|
|
102
|
-
return copy_asset(src_file, final_path.parent)
|
|
102
|
+
return copy_asset(src_file, final_path.parent, target_name)
|
|
103
103
|
|
|
104
104
|
# Render and write
|
|
105
105
|
markdown = render_conversation(conversation, config, headers, asset_resolver=asset_resolver)
|
|
@@ -24,6 +24,8 @@ class Conversation(BaseModel):
|
|
|
24
24
|
mapping: dict[str, Node]
|
|
25
25
|
moderation_results: list[Any] = Field(default_factory=list)
|
|
26
26
|
current_node: str
|
|
27
|
+
is_starred: bool | None = None
|
|
28
|
+
voice: str | dict[str, Any] | None = None
|
|
27
29
|
plugin_ids: list[str] | None = None
|
|
28
30
|
conversation_id: str
|
|
29
31
|
conversation_template_id: str | None = None
|
|
@@ -156,3 +158,19 @@ class Conversation(BaseModel):
|
|
|
156
158
|
def year_start(self) -> datetime:
|
|
157
159
|
"""Get January 1st of the year this conversation was created."""
|
|
158
160
|
return self.create_time.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def citation_map(self) -> dict[str, dict[str, str | None]]:
|
|
164
|
+
"""Aggregate citation metadata from all messages in the conversation.
|
|
165
|
+
|
|
166
|
+
Traverses all nodes (including hidden ones) to collect embedded citation definitions
|
|
167
|
+
from tool outputs (e.g. search results).
|
|
168
|
+
"""
|
|
169
|
+
aggregated_map = {}
|
|
170
|
+
for node in self.all_message_nodes:
|
|
171
|
+
if not node.message:
|
|
172
|
+
continue
|
|
173
|
+
# Extract citations from message parts
|
|
174
|
+
if hasattr(node.message, "internal_citation_map"):
|
|
175
|
+
aggregated_map.update(node.message.internal_citation_map)
|
|
176
|
+
return aggregated_map
|
|
@@ -46,6 +46,10 @@ class MessageMetadata(BaseModel):
|
|
|
46
46
|
is_user_system_message: bool | None = None
|
|
47
47
|
is_visually_hidden_from_conversation: bool | None = None
|
|
48
48
|
user_context_message_data: dict[str, Any] | None = None
|
|
49
|
+
citations: list[dict[str, Any]] | None = None
|
|
50
|
+
search_result_groups: list[dict[str, Any]] | None = None
|
|
51
|
+
content_references: list[dict[str, Any]] | None = None
|
|
52
|
+
attachments: list[dict[str, Any]] | None = None
|
|
49
53
|
|
|
50
54
|
model_config = ConfigDict(protected_namespaces=())
|
|
51
55
|
|
|
@@ -179,11 +183,12 @@ class Message(BaseModel):
|
|
|
179
183
|
1. It is empty (no text, no images).
|
|
180
184
|
2. Explicitly marked as visually hidden.
|
|
181
185
|
3. It is an internal system message (not custom instructions).
|
|
182
|
-
4. It is a browser tool output (intermediate search steps).
|
|
186
|
+
4. It is a browser tool output (intermediate search steps) UNLESS it is a tether_quote.
|
|
183
187
|
5. It is an assistant message targeting a tool (internal call).
|
|
184
188
|
6. It is code interpreter input (content_type="code").
|
|
185
|
-
7. It is browsing status (
|
|
186
|
-
8. It is
|
|
189
|
+
7. It is browsing status, internal reasoning (o1/o3), or massive web scraps (sonic_webpage).
|
|
190
|
+
8. It is a redundant DALL-E textual status update.
|
|
191
|
+
9. It is from internal bio (memory) or web.run orchestration tools.
|
|
187
192
|
"""
|
|
188
193
|
if self.is_empty:
|
|
189
194
|
return True
|
|
@@ -197,10 +202,29 @@ class Message(BaseModel):
|
|
|
197
202
|
# Only show if explicitly marked as user system message (Custom Instructions)
|
|
198
203
|
return not self.metadata.is_user_system_message
|
|
199
204
|
|
|
200
|
-
# Hide
|
|
201
|
-
if self.
|
|
205
|
+
# Hide sonic_webpage (massive scraped text) and system_error
|
|
206
|
+
if self.content.content_type in ("sonic_webpage", "system_error"):
|
|
202
207
|
return True
|
|
203
208
|
|
|
209
|
+
if self.author.role == "tool":
|
|
210
|
+
# Hide memory updates (bio) and internal search orchestration (web.run)
|
|
211
|
+
if self.author.name in ("bio", "web.run"):
|
|
212
|
+
return True
|
|
213
|
+
|
|
214
|
+
# Hide browser tool outputs (intermediate search steps)
|
|
215
|
+
# EXCEPTION: tether_quote (citations) should remain visible
|
|
216
|
+
if self.author.name == "browser":
|
|
217
|
+
return self.content.content_type != "tether_quote"
|
|
218
|
+
|
|
219
|
+
# Hide DALL-E textual status ("DALL·E displayed 1 images...")
|
|
220
|
+
if (
|
|
221
|
+
self.author.name == "dalle.text2im"
|
|
222
|
+
and self.content.content_type == "text"
|
|
223
|
+
# Check if it doesn't have images (just in case they attach images to text logic)
|
|
224
|
+
and not self.images
|
|
225
|
+
):
|
|
226
|
+
return True
|
|
227
|
+
|
|
204
228
|
# Hide assistant messages targeting tools (e.g., search(...), code input)
|
|
205
229
|
# recipient="all" or None means it's for the user; anything else is internal
|
|
206
230
|
if self.author.role == "assistant" and self.recipient not in ("all", None):
|
|
@@ -216,3 +240,56 @@ class Message(BaseModel):
|
|
|
216
240
|
"thoughts",
|
|
217
241
|
"reasoning_recap",
|
|
218
242
|
)
|
|
243
|
+
|
|
244
|
+
@property
|
|
245
|
+
def internal_citation_map(self) -> dict[str, dict[str, str | None]]:
|
|
246
|
+
"""Extract a map of citation IDs to metadata from content parts.
|
|
247
|
+
|
|
248
|
+
Used for resolving embedded citations (e.g. citeturn0search18).
|
|
249
|
+
Key format: "turn{turn_index}search{ref_index}"
|
|
250
|
+
"""
|
|
251
|
+
if not self.content.parts:
|
|
252
|
+
return {}
|
|
253
|
+
|
|
254
|
+
citation_mapping = {}
|
|
255
|
+
|
|
256
|
+
# Helper to process a single search result entry
|
|
257
|
+
def process_entry(entry: dict[str, Any]) -> None:
|
|
258
|
+
ref_id = entry.get("ref_id")
|
|
259
|
+
if not ref_id:
|
|
260
|
+
return
|
|
261
|
+
# Only care about search results for now
|
|
262
|
+
if ref_id.get("ref_type") != "search":
|
|
263
|
+
return
|
|
264
|
+
|
|
265
|
+
turn_idx = ref_id.get("turn_index")
|
|
266
|
+
ref_idx = ref_id.get("ref_index")
|
|
267
|
+
|
|
268
|
+
if turn_idx is not None and ref_idx is not None:
|
|
269
|
+
# turn_idx is int, ref_idx is int
|
|
270
|
+
key = f"turn{turn_idx}search{ref_idx}"
|
|
271
|
+
citation_mapping[key] = {
|
|
272
|
+
"title": entry.get("title"),
|
|
273
|
+
"url": entry.get("url"),
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
# 1. Extract from self.content.parts
|
|
277
|
+
if self.content and self.content.parts:
|
|
278
|
+
for part in self.content.parts:
|
|
279
|
+
if isinstance(part, dict):
|
|
280
|
+
if part.get("type") == "search_result":
|
|
281
|
+
process_entry(part)
|
|
282
|
+
elif part.get("type") == "search_result_group":
|
|
283
|
+
for entry in part.get("entries", []):
|
|
284
|
+
process_entry(entry)
|
|
285
|
+
|
|
286
|
+
# 2. Extract from metadata.search_result_groups (if present)
|
|
287
|
+
if self.metadata and self.metadata.search_result_groups:
|
|
288
|
+
for group in self.metadata.search_result_groups:
|
|
289
|
+
if isinstance(group, dict):
|
|
290
|
+
# Groups might have 'entries' or be flat?
|
|
291
|
+
# Based on name 'groups', likely similar to part structure
|
|
292
|
+
for entry in group.get("entries", []):
|
|
293
|
+
process_entry(entry)
|
|
294
|
+
|
|
295
|
+
return citation_mapping
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
4
|
from collections.abc import Callable
|
|
5
|
+
from typing import Any
|
|
6
|
+
from urllib.parse import quote
|
|
5
7
|
|
|
6
8
|
from convoviz.config import AuthorHeaders, ConversationConfig
|
|
7
9
|
from convoviz.exceptions import MessageContentError
|
|
@@ -9,6 +11,82 @@ from convoviz.models import Conversation, Node
|
|
|
9
11
|
from convoviz.renderers.yaml import render_yaml_header
|
|
10
12
|
|
|
11
13
|
|
|
14
|
+
def replace_citations(
|
|
15
|
+
text: str,
|
|
16
|
+
citations: list[dict[str, Any]] | None = None,
|
|
17
|
+
citation_map: dict[str, dict[str, str | None]] | None = None,
|
|
18
|
+
) -> str:
|
|
19
|
+
"""Replace citation placeholders in text with markdown links.
|
|
20
|
+
|
|
21
|
+
Supports two formats:
|
|
22
|
+
1. Tether v4 (metadata.citations): Placed at specific indices (【...】 placeholders).
|
|
23
|
+
2. Embedded (Tether v3?): Unicode markers citeturnXsearchY.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
text: The original message text
|
|
27
|
+
citations: List of tether v4 citation objects (start_ix/end_ix)
|
|
28
|
+
citation_map: Map of internal citation IDs to metadata (turnXsearchY -> {title, url})
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Text with all placeholders replaced by markdown links
|
|
32
|
+
"""
|
|
33
|
+
# 1. Handle Tether v4 (Index-based replacements)
|
|
34
|
+
if citations:
|
|
35
|
+
# Sort citations by start_ix descending to replace safely from end
|
|
36
|
+
sorted_citations = sorted(citations, key=lambda c: c.get("start_ix", 0), reverse=True)
|
|
37
|
+
|
|
38
|
+
for cit in sorted_citations:
|
|
39
|
+
start = cit.get("start_ix")
|
|
40
|
+
end = cit.get("end_ix")
|
|
41
|
+
meta = cit.get("metadata", {})
|
|
42
|
+
|
|
43
|
+
if start is None or end is None:
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
replacement = _format_link(meta.get("title"), meta.get("url"))
|
|
47
|
+
|
|
48
|
+
# Only replace if strictly positive indices and bounds check
|
|
49
|
+
if 0 <= start < end <= len(text):
|
|
50
|
+
text = text[:start] + replacement + text[end:]
|
|
51
|
+
|
|
52
|
+
# 2. Handle Embedded Citations (Regex-based)
|
|
53
|
+
# Pattern: cite (key)+
|
|
54
|
+
# Codepoints: \uE200 (Start), \uE202 (Sep), \uE201 (End)
|
|
55
|
+
if citation_map is not None:
|
|
56
|
+
pattern = re.compile(r"\uE200cite((?:\uE202[a-zA-Z0-9]+)+)\uE201")
|
|
57
|
+
|
|
58
|
+
def replacer(match: re.Match) -> str:
|
|
59
|
+
# Group 1 contains string like: turn0search18turn0search3
|
|
60
|
+
# Split by separator \uE202 (first item will be empty string)
|
|
61
|
+
raw_keys = match.group(1).split("\ue202")
|
|
62
|
+
keys = [k for k in raw_keys if k]
|
|
63
|
+
|
|
64
|
+
links = []
|
|
65
|
+
for key in keys:
|
|
66
|
+
if key in citation_map:
|
|
67
|
+
data = citation_map[key]
|
|
68
|
+
link = _format_link(data.get("title"), data.get("url"))
|
|
69
|
+
if link:
|
|
70
|
+
links.append(link)
|
|
71
|
+
|
|
72
|
+
return "".join(links)
|
|
73
|
+
|
|
74
|
+
text = pattern.sub(replacer, text)
|
|
75
|
+
|
|
76
|
+
return text
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _format_link(title: str | None, url: str | None) -> str:
|
|
80
|
+
"""Format a title and URL into a concise markdown link."""
|
|
81
|
+
if title and url:
|
|
82
|
+
return f" [[{title}]({url})]"
|
|
83
|
+
elif url:
|
|
84
|
+
return f" [[Source]({url})]"
|
|
85
|
+
elif title:
|
|
86
|
+
return f" [{title}]"
|
|
87
|
+
return ""
|
|
88
|
+
|
|
89
|
+
|
|
12
90
|
def close_code_blocks(text: str) -> str:
|
|
13
91
|
"""Ensure all code blocks in the text are properly closed.
|
|
14
92
|
|
|
@@ -135,8 +213,9 @@ def render_node(
|
|
|
135
213
|
node: Node,
|
|
136
214
|
headers: AuthorHeaders,
|
|
137
215
|
use_dollar_latex: bool = False,
|
|
138
|
-
asset_resolver: Callable[[str], str | None] | None = None,
|
|
216
|
+
asset_resolver: Callable[[str, str | None], str | None] | None = None,
|
|
139
217
|
flavor: str = "standard",
|
|
218
|
+
citation_map: dict[str, dict[str, str | None]] | None = None,
|
|
140
219
|
) -> str:
|
|
141
220
|
"""Render a complete node as markdown.
|
|
142
221
|
|
|
@@ -144,11 +223,9 @@ def render_node(
|
|
|
144
223
|
node: The node to render
|
|
145
224
|
headers: Configuration for author headers
|
|
146
225
|
use_dollar_latex: Whether to convert LaTeX delimiters to dollars
|
|
147
|
-
asset_resolver: Function to resolve asset IDs to paths
|
|
226
|
+
asset_resolver: Function to resolve asset IDs to paths, optionally renaming them
|
|
148
227
|
flavor: Markdown flavor ("standard" or "obsidian")
|
|
149
|
-
|
|
150
|
-
Returns:
|
|
151
|
-
Complete markdown string for the node
|
|
228
|
+
citation_map: Global map of citations
|
|
152
229
|
"""
|
|
153
230
|
if node.message is None:
|
|
154
231
|
return ""
|
|
@@ -185,6 +262,19 @@ def render_node(
|
|
|
185
262
|
# Some message types only contain non-text parts; those still may have images.
|
|
186
263
|
text = ""
|
|
187
264
|
|
|
265
|
+
# Process citations if present (Tether v4 metadata or Embedded v3)
|
|
266
|
+
# Use global citation_map if provided, merging/falling back to local if needed.
|
|
267
|
+
# Actually, local internal map is subset of global map if we aggregated correctly.
|
|
268
|
+
# So we prefer the passed global map.
|
|
269
|
+
effective_map = citation_map or node.message.internal_citation_map
|
|
270
|
+
|
|
271
|
+
if node.message.metadata.citations or effective_map:
|
|
272
|
+
text = replace_citations(
|
|
273
|
+
text,
|
|
274
|
+
citations=node.message.metadata.citations,
|
|
275
|
+
citation_map=effective_map,
|
|
276
|
+
)
|
|
277
|
+
|
|
188
278
|
content = close_code_blocks(text)
|
|
189
279
|
content = f"\n{content}\n" if content else ""
|
|
190
280
|
if use_dollar_latex:
|
|
@@ -192,12 +282,25 @@ def render_node(
|
|
|
192
282
|
|
|
193
283
|
# Append images if resolver is provided and images exist
|
|
194
284
|
if asset_resolver and node.message.images:
|
|
285
|
+
# Build map of file-id -> desired name from metadata.attachments
|
|
286
|
+
attachment_map = {}
|
|
287
|
+
if node.message.metadata.attachments:
|
|
288
|
+
for att in node.message.metadata.attachments:
|
|
289
|
+
if (att_id := att.get("id")) and (name := att.get("name")):
|
|
290
|
+
attachment_map[att_id] = name
|
|
291
|
+
|
|
195
292
|
for image_id in node.message.images:
|
|
196
|
-
|
|
293
|
+
# Pass the desired name if we have one for this ID
|
|
294
|
+
target_name = attachment_map.get(image_id)
|
|
295
|
+
rel_path = asset_resolver(image_id, target_name)
|
|
197
296
|
if rel_path:
|
|
297
|
+
# URL-encode the path to handle spaces/special characters in Markdown links
|
|
298
|
+
# We only encode the filename part if we want to be safe, but rel_path is "assets/..."
|
|
299
|
+
# quote() by default doesn't encode / which is good.
|
|
300
|
+
encoded_path = quote(rel_path)
|
|
198
301
|
# Using standard markdown image syntax.
|
|
199
302
|
# Obsidian handles this well.
|
|
200
|
-
content += f"\n\n"
|
|
201
304
|
|
|
202
305
|
return f"\n{header}{content}\n---\n"
|
|
203
306
|
|
|
@@ -236,7 +339,7 @@ def render_conversation(
|
|
|
236
339
|
conversation: Conversation,
|
|
237
340
|
config: ConversationConfig,
|
|
238
341
|
headers: AuthorHeaders,
|
|
239
|
-
asset_resolver: Callable[[str], str | None] | None = None,
|
|
342
|
+
asset_resolver: Callable[[str, str | None], str | None] | None = None,
|
|
240
343
|
) -> str:
|
|
241
344
|
"""Render a complete conversation as markdown.
|
|
242
345
|
|
|
@@ -244,7 +347,7 @@ def render_conversation(
|
|
|
244
347
|
conversation: The conversation to render
|
|
245
348
|
config: Conversation rendering configuration
|
|
246
349
|
headers: Configuration for author headers
|
|
247
|
-
asset_resolver: Function to resolve asset IDs to paths
|
|
350
|
+
asset_resolver: Function to resolve asset IDs to paths, optionally renaming them
|
|
248
351
|
|
|
249
352
|
Returns:
|
|
250
353
|
Complete markdown document string
|
|
@@ -255,6 +358,9 @@ def render_conversation(
|
|
|
255
358
|
# Start with YAML header
|
|
256
359
|
markdown = render_yaml_header(conversation, config.yaml)
|
|
257
360
|
|
|
361
|
+
# Pre-calculate citation map for the conversation
|
|
362
|
+
citation_map = conversation.citation_map
|
|
363
|
+
|
|
258
364
|
# Render message nodes in a deterministic traversal order.
|
|
259
365
|
for node in _ordered_nodes(conversation):
|
|
260
366
|
if node.message:
|
|
@@ -264,6 +370,7 @@ def render_conversation(
|
|
|
264
370
|
use_dollar_latex,
|
|
265
371
|
asset_resolver=asset_resolver,
|
|
266
372
|
flavor=flavor,
|
|
373
|
+
citation_map=citation_map,
|
|
267
374
|
)
|
|
268
375
|
|
|
269
376
|
return markdown
|
|
@@ -111,6 +111,10 @@ def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
|
|
|
111
111
|
yaml_fields["content_types"] = conversation.content_types
|
|
112
112
|
if config.custom_instructions:
|
|
113
113
|
yaml_fields["custom_instructions"] = conversation.custom_instructions
|
|
114
|
+
if config.is_starred:
|
|
115
|
+
yaml_fields["is_starred"] = conversation.is_starred
|
|
116
|
+
if config.voice:
|
|
117
|
+
yaml_fields["voice"] = conversation.voice
|
|
114
118
|
|
|
115
119
|
if not yaml_fields:
|
|
116
120
|
return ""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "convoviz"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.8"
|
|
4
4
|
description = "Convert your ChatGPT export (ZIP) into clean Markdown text files with inline media, and generate data visualizations like word clouds and usage graphs."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|