PyPI - convoviz - Versions diffs - 0.4.6__tar.gz → 0.4.8__tar.gz - Mend

convoviz 0.4.6tar.gz → 0.4.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{convoviz-0.4.6 → convoviz-0.4.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: convoviz
-Version: 0.4.6
+Version: 0.4.8
 Summary: Convert your ChatGPT export (ZIP) into clean Markdown text files with inline media, and generate data visualizations like word clouds and usage graphs.
 Keywords: markdown,chatgpt,openai,visualization,analytics,json,export,data-analysis,obsidian
 Author: Mohamed Cheikh Sidiya
@@ -24,8 +24,7 @@ Provides-Extra: viz
 Description-Content-Type: text/markdown
 <p align="center">
-  <h1 align="center">Convoviz 📊</h1>
-  <p align="center"><strong>Visualize your entire ChatGPT data</strong></p>
+  <h1 align="center">Convoviz</h1>
   <p align="center">
     Convert your ChatGPT history into clean, readable Markdown (text files).
   </p>
@@ -52,6 +51,7 @@ Description-Content-Type: text/markdown
 |---------|-------------|
 | 📝 **Markdown Export** | Clean, well-formatted Markdown with optional YAML headers |
 | 🖼️ **Inline Images** | Media attachments rendered directly in your Markdown files |
+| 🔗 **Citations** | Web search results and source links accurately preserved |
 | ☁️ **Word Clouds** | Visual breakdowns of your most-used words and phrases |
 | 📈 **Usage Graphs** | Bar plots and charts showing your conversation patterns |
@@ -170,6 +170,8 @@ After running the script, head to your output folder (defaults to `Documents/Cha
 - 📝 Neatly formatted Markdown files
 - 📊 Visualizations and graphs
+If you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
 ![wordcloud example](https://raw.githubusercontent.com/mohamed-chs/convoviz/main/demo/wordcloud-example.png)
 ---
@@ -182,8 +184,6 @@ Whether you're a tech wizard or you're new to all this, I'd love to hear about y
 👉 **[Open an Issue](https://github.com/mohamed-chs/convoviz/issues)**
-And if you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
 ---
 ## 🤝 Contributing

{convoviz-0.4.6 → convoviz-0.4.8}/README.md RENAMED Viewed

@@ -1,6 +1,5 @@
 <p align="center">
-  <h1 align="center">Convoviz 📊</h1>
-  <p align="center"><strong>Visualize your entire ChatGPT data</strong></p>
+  <h1 align="center">Convoviz</h1>
   <p align="center">
     Convert your ChatGPT history into clean, readable Markdown (text files).
   </p>
@@ -27,6 +26,7 @@
 |---------|-------------|
 | 📝 **Markdown Export** | Clean, well-formatted Markdown with optional YAML headers |
 | 🖼️ **Inline Images** | Media attachments rendered directly in your Markdown files |
+| 🔗 **Citations** | Web search results and source links accurately preserved |
 | ☁️ **Word Clouds** | Visual breakdowns of your most-used words and phrases |
 | 📈 **Usage Graphs** | Bar plots and charts showing your conversation patterns |
@@ -145,6 +145,8 @@ After running the script, head to your output folder (defaults to `Documents/Cha
 - 📝 Neatly formatted Markdown files
 - 📊 Visualizations and graphs
+If you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
 ![wordcloud example](https://raw.githubusercontent.com/mohamed-chs/convoviz/main/demo/wordcloud-example.png)
 ---
@@ -157,8 +159,6 @@ Whether you're a tech wizard or you're new to all this, I'd love to hear about y
 👉 **[Open an Issue](https://github.com/mohamed-chs/convoviz/issues)**
-And if you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
 ---
 ## 🤝 Contributing

{convoviz-0.4.6 → convoviz-0.4.8}/convoviz/config.py RENAMED Viewed

@@ -54,7 +54,10 @@ class YAMLConfig(BaseModel):
     used_plugins: bool = False
     message_count: bool = True
     content_types: bool = False
+    content_types: bool = False
     custom_instructions: bool = False
+    is_starred: bool = False
+    voice: bool = False
 class ConversationConfig(BaseModel):

{convoviz-0.4.6 → convoviz-0.4.8}/convoviz/io/assets.py RENAMED Viewed

@@ -4,6 +4,8 @@ import logging
 import shutil
 from pathlib import Path
+from convoviz.utils import sanitize
 logger = logging.getLogger(__name__)
@@ -83,12 +85,13 @@ def resolve_asset_path(source_dir: Path, asset_id: str) -> Path | None:
     return None
-def copy_asset(source_path: Path, dest_dir: Path) -> str:
+def copy_asset(source_path: Path, dest_dir: Path, target_name: str | None = None) -> str:
     """Copy an asset to the destination directory.
     Args:
         source_path: The source file path
         dest_dir: The root output directory (assets will be in dest_dir/assets)
+        target_name: Optional name to rename the file to
     Returns:
         Relative path to the asset (e.g., "assets/image.png")
@@ -96,14 +99,15 @@ def copy_asset(source_path: Path, dest_dir: Path) -> str:
     assets_dir = dest_dir / "assets"
     assets_dir.mkdir(parents=True, exist_ok=True)
-    dest_path = assets_dir / source_path.name
+    filename = sanitize(target_name) if target_name else source_path.name
+    dest_path = assets_dir / filename
     if not dest_path.exists():
         try:
             shutil.copy2(source_path, dest_path)
-            logger.debug(f"Copied asset: {source_path.name}")
+            logger.debug(f"Copied asset: {source_path.name} -> {filename}")
         except Exception as e:
             logger.warning(f"Failed to copy asset {source_path}: {e}")
     # Return forward-slash path for Markdown compatibility even on Windows
-    return f"assets/{source_path.name}"
+    return f"assets/{filename}"

{convoviz-0.4.6 → convoviz-0.4.8}/convoviz/io/writers.py RENAMED Viewed

@@ -90,7 +90,7 @@ def save_conversation(
         final_path = filepath.with_name(f"{base_name} ({counter}){filepath.suffix}")
     # Define asset resolver
-    def asset_resolver(asset_id: str) -> str | None:
+    def asset_resolver(asset_id: str, target_name: str | None = None) -> str | None:
         if not source_path:
             return None
@@ -99,7 +99,7 @@ def save_conversation(
             return None
         # Copy to output directory (relative to the markdown file's directory)
-        return copy_asset(src_file, final_path.parent)
+        return copy_asset(src_file, final_path.parent, target_name)
     # Render and write
     markdown = render_conversation(conversation, config, headers, asset_resolver=asset_resolver)

{convoviz-0.4.6 → convoviz-0.4.8}/convoviz/models/conversation.py RENAMED Viewed

@@ -24,6 +24,8 @@ class Conversation(BaseModel):
     mapping: dict[str, Node]
     moderation_results: list[Any] = Field(default_factory=list)
     current_node: str
+    is_starred: bool | None = None
+    voice: str | dict[str, Any] | None = None
     plugin_ids: list[str] | None = None
     conversation_id: str
     conversation_template_id: str | None = None
@@ -156,3 +158,19 @@ class Conversation(BaseModel):
     def year_start(self) -> datetime:
         """Get January 1st of the year this conversation was created."""
         return self.create_time.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
+    @property
+    def citation_map(self) -> dict[str, dict[str, str | None]]:
+        """Aggregate citation metadata from all messages in the conversation.
+        Traverses all nodes (including hidden ones) to collect embedded citation definitions
+        from tool outputs (e.g. search results).
+        """
+        aggregated_map = {}
+        for node in self.all_message_nodes:
+            if not node.message:
+                continue
+            # Extract citations from message parts
+            if hasattr(node.message, "internal_citation_map"):
+                aggregated_map.update(node.message.internal_citation_map)
+        return aggregated_map

{convoviz-0.4.6 → convoviz-0.4.8}/convoviz/models/message.py RENAMED Viewed

@@ -46,6 +46,10 @@ class MessageMetadata(BaseModel):
     is_user_system_message: bool | None = None
     is_visually_hidden_from_conversation: bool | None = None
     user_context_message_data: dict[str, Any] | None = None
+    citations: list[dict[str, Any]] | None = None
+    search_result_groups: list[dict[str, Any]] | None = None
+    content_references: list[dict[str, Any]] | None = None
+    attachments: list[dict[str, Any]] | None = None
     model_config = ConfigDict(protected_namespaces=())
@@ -179,11 +183,12 @@ class Message(BaseModel):
         1. It is empty (no text, no images).
         2. Explicitly marked as visually hidden.
         3. It is an internal system message (not custom instructions).
-        4. It is a browser tool output (intermediate search steps).
+        4. It is a browser tool output (intermediate search steps) UNLESS it is a tether_quote.
         5. It is an assistant message targeting a tool (internal call).
         6. It is code interpreter input (content_type="code").
-        7. It is browsing status (tether_browsing_display).
-        8. It is internal reasoning (thoughts, reasoning_recap from o1/o3).
+        7. It is browsing status, internal reasoning (o1/o3), or massive web scraps (sonic_webpage).
+        8. It is a redundant DALL-E textual status update.
+        9. It is from internal bio (memory) or web.run orchestration tools.
         """
         if self.is_empty:
             return True
@@ -197,10 +202,29 @@ class Message(BaseModel):
             # Only show if explicitly marked as user system message (Custom Instructions)
             return not self.metadata.is_user_system_message
-        # Hide browser tool outputs (intermediate search steps)
-        if self.author.role == "tool" and self.author.name == "browser":
+        # Hide sonic_webpage (massive scraped text) and system_error
+        if self.content.content_type in ("sonic_webpage", "system_error"):
             return True
+        if self.author.role == "tool":
+            # Hide memory updates (bio) and internal search orchestration (web.run)
+            if self.author.name in ("bio", "web.run"):
+                return True
+            # Hide browser tool outputs (intermediate search steps)
+            # EXCEPTION: tether_quote (citations) should remain visible
+            if self.author.name == "browser":
+                return self.content.content_type != "tether_quote"
+            # Hide DALL-E textual status ("DALL·E displayed 1 images...")
+            if (
+                self.author.name == "dalle.text2im"
+                and self.content.content_type == "text"
+                # Check if it doesn't have images (just in case they attach images to text logic)
+                and not self.images
+            ):
+                return True
         # Hide assistant messages targeting tools (e.g., search(...), code input)
         # recipient="all" or None means it's for the user; anything else is internal
         if self.author.role == "assistant" and self.recipient not in ("all", None):
@@ -216,3 +240,56 @@ class Message(BaseModel):
             "thoughts",
             "reasoning_recap",
         )
+    @property
+    def internal_citation_map(self) -> dict[str, dict[str, str | None]]:
+        """Extract a map of citation IDs to metadata from content parts.
+        Used for resolving embedded citations (e.g. citeturn0search18).
+        Key format: "turn{turn_index}search{ref_index}"
+        """
+        if not self.content.parts:
+            return {}
+        citation_mapping = {}
+        # Helper to process a single search result entry
+        def process_entry(entry: dict[str, Any]) -> None:
+            ref_id = entry.get("ref_id")
+            if not ref_id:
+                return
+            # Only care about search results for now
+            if ref_id.get("ref_type") != "search":
+                return
+            turn_idx = ref_id.get("turn_index")
+            ref_idx = ref_id.get("ref_index")
+            if turn_idx is not None and ref_idx is not None:
+                # turn_idx is int, ref_idx is int
+                key = f"turn{turn_idx}search{ref_idx}"
+                citation_mapping[key] = {
+                    "title": entry.get("title"),
+                    "url": entry.get("url"),
+                }
+        # 1. Extract from self.content.parts
+        if self.content and self.content.parts:
+            for part in self.content.parts:
+                if isinstance(part, dict):
+                    if part.get("type") == "search_result":
+                        process_entry(part)
+                    elif part.get("type") == "search_result_group":
+                        for entry in part.get("entries", []):
+                            process_entry(entry)
+        # 2. Extract from metadata.search_result_groups (if present)
+        if self.metadata and self.metadata.search_result_groups:
+            for group in self.metadata.search_result_groups:
+                if isinstance(group, dict):
+                    # Groups might have 'entries' or be flat?
+                    # Based on name 'groups', likely similar to part structure
+                    for entry in group.get("entries", []):
+                        process_entry(entry)
+        return citation_mapping

{convoviz-0.4.6 → convoviz-0.4.8}/convoviz/renderers/markdown.py RENAMED Viewed

@@ -2,6 +2,8 @@
 import re
 from collections.abc import Callable
+from typing import Any
+from urllib.parse import quote
 from convoviz.config import AuthorHeaders, ConversationConfig
 from convoviz.exceptions import MessageContentError
@@ -9,6 +11,82 @@ from convoviz.models import Conversation, Node
 from convoviz.renderers.yaml import render_yaml_header
+def replace_citations(
+    text: str,
+    citations: list[dict[str, Any]] | None = None,
+    citation_map: dict[str, dict[str, str | None]] | None = None,
+) -> str:
+    """Replace citation placeholders in text with markdown links.
+    Supports two formats:
+    1. Tether v4 (metadata.citations): Placed at specific indices (【...】 placeholders).
+    2. Embedded (Tether v3?): Unicode markers citeturnXsearchY.
+    Args:
+        text: The original message text
+        citations: List of tether v4 citation objects (start_ix/end_ix)
+        citation_map: Map of internal citation IDs to metadata (turnXsearchY -> {title, url})
+    Returns:
+        Text with all placeholders replaced by markdown links
+    """
+    # 1. Handle Tether v4 (Index-based replacements)
+    if citations:
+        # Sort citations by start_ix descending to replace safely from end
+        sorted_citations = sorted(citations, key=lambda c: c.get("start_ix", 0), reverse=True)
+        for cit in sorted_citations:
+            start = cit.get("start_ix")
+            end = cit.get("end_ix")
+            meta = cit.get("metadata", {})
+            if start is None or end is None:
+                continue
+            replacement = _format_link(meta.get("title"), meta.get("url"))
+            # Only replace if strictly positive indices and bounds check
+            if 0 <= start < end <= len(text):
+                text = text[:start] + replacement + text[end:]
+    # 2. Handle Embedded Citations (Regex-based)
+    # Pattern: cite (key)+ 
+    # Codepoints: \uE200 (Start), \uE202 (Sep), \uE201 (End)
+    if citation_map is not None:
+        pattern = re.compile(r"\uE200cite((?:\uE202[a-zA-Z0-9]+)+)\uE201")
+        def replacer(match: re.Match) -> str:
+            # Group 1 contains string like: turn0search18turn0search3
+            # Split by separator \uE202 (first item will be empty string)
+            raw_keys = match.group(1).split("\ue202")
+            keys = [k for k in raw_keys if k]
+            links = []
+            for key in keys:
+                if key in citation_map:
+                    data = citation_map[key]
+                    link = _format_link(data.get("title"), data.get("url"))
+                    if link:
+                        links.append(link)
+            return "".join(links)
+        text = pattern.sub(replacer, text)
+    return text
+def _format_link(title: str | None, url: str | None) -> str:
+    """Format a title and URL into a concise markdown link."""
+    if title and url:
+        return f" [[{title}]({url})]"
+    elif url:
+        return f" [[Source]({url})]"
+    elif title:
+        return f" [{title}]"
+    return ""
 def close_code_blocks(text: str) -> str:
     """Ensure all code blocks in the text are properly closed.
@@ -135,8 +213,9 @@ def render_node(
     node: Node,
     headers: AuthorHeaders,
     use_dollar_latex: bool = False,
-    asset_resolver: Callable[[str], str | None] | None = None,
+    asset_resolver: Callable[[str, str | None], str | None] | None = None,
     flavor: str = "standard",
+    citation_map: dict[str, dict[str, str | None]] | None = None,
 ) -> str:
     """Render a complete node as markdown.
@@ -144,11 +223,9 @@ def render_node(
         node: The node to render
         headers: Configuration for author headers
         use_dollar_latex: Whether to convert LaTeX delimiters to dollars
-        asset_resolver: Function to resolve asset IDs to paths
+        asset_resolver: Function to resolve asset IDs to paths, optionally renaming them
         flavor: Markdown flavor ("standard" or "obsidian")
-    Returns:
-        Complete markdown string for the node
+        citation_map: Global map of citations
     """
     if node.message is None:
         return ""
@@ -185,6 +262,19 @@ def render_node(
         # Some message types only contain non-text parts; those still may have images.
         text = ""
+    # Process citations if present (Tether v4 metadata or Embedded v3)
+    # Use global citation_map if provided, merging/falling back to local if needed.
+    # Actually, local internal map is subset of global map if we aggregated correctly.
+    # So we prefer the passed global map.
+    effective_map = citation_map or node.message.internal_citation_map
+    if node.message.metadata.citations or effective_map:
+        text = replace_citations(
+            text,
+            citations=node.message.metadata.citations,
+            citation_map=effective_map,
+        )
     content = close_code_blocks(text)
     content = f"\n{content}\n" if content else ""
     if use_dollar_latex:
@@ -192,12 +282,25 @@ def render_node(
     # Append images if resolver is provided and images exist
     if asset_resolver and node.message.images:
+        # Build map of file-id -> desired name from metadata.attachments
+        attachment_map = {}
+        if node.message.metadata.attachments:
+            for att in node.message.metadata.attachments:
+                if (att_id := att.get("id")) and (name := att.get("name")):
+                    attachment_map[att_id] = name
         for image_id in node.message.images:
-            rel_path = asset_resolver(image_id)
+            # Pass the desired name if we have one for this ID
+            target_name = attachment_map.get(image_id)
+            rel_path = asset_resolver(image_id, target_name)
             if rel_path:
+                # URL-encode the path to handle spaces/special characters in Markdown links
+                # We only encode the filename part if we want to be safe, but rel_path is "assets/..."
+                # quote() by default doesn't encode / which is good.
+                encoded_path = quote(rel_path)
                 # Using standard markdown image syntax.
                 # Obsidian handles this well.
-                content += f"\n![Image]({rel_path})\n"
+                content += f"\n![Image]({encoded_path})\n"
     return f"\n{header}{content}\n---\n"
@@ -236,7 +339,7 @@ def render_conversation(
     conversation: Conversation,
     config: ConversationConfig,
     headers: AuthorHeaders,
-    asset_resolver: Callable[[str], str | None] | None = None,
+    asset_resolver: Callable[[str, str | None], str | None] | None = None,
 ) -> str:
     """Render a complete conversation as markdown.
@@ -244,7 +347,7 @@ def render_conversation(
         conversation: The conversation to render
         config: Conversation rendering configuration
         headers: Configuration for author headers
-        asset_resolver: Function to resolve asset IDs to paths
+        asset_resolver: Function to resolve asset IDs to paths, optionally renaming them
     Returns:
         Complete markdown document string
@@ -255,6 +358,9 @@ def render_conversation(
     # Start with YAML header
     markdown = render_yaml_header(conversation, config.yaml)
+    # Pre-calculate citation map for the conversation
+    citation_map = conversation.citation_map
     # Render message nodes in a deterministic traversal order.
     for node in _ordered_nodes(conversation):
         if node.message:
@@ -264,6 +370,7 @@ def render_conversation(
                 use_dollar_latex,
                 asset_resolver=asset_resolver,
                 flavor=flavor,
+                citation_map=citation_map,
             )
     return markdown

{convoviz-0.4.6 → convoviz-0.4.8}/convoviz/renderers/yaml.py RENAMED Viewed

@@ -111,6 +111,10 @@ def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
         yaml_fields["content_types"] = conversation.content_types
     if config.custom_instructions:
         yaml_fields["custom_instructions"] = conversation.custom_instructions
+    if config.is_starred:
+        yaml_fields["is_starred"] = conversation.is_starred
+    if config.voice:
+        yaml_fields["voice"] = conversation.voice
     if not yaml_fields:
         return ""

{convoviz-0.4.6 → convoviz-0.4.8}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "convoviz"
-version = "0.4.6"
+version = "0.4.8"
 description = "Convert your ChatGPT export (ZIP) into clean Markdown text files with inline media, and generate data visualizations like word clouds and usage graphs."
 readme = "README.md"
 requires-python = ">=3.12"