PyPI - convoviz - Versions diffs - 0.4.5__tar.gz → 0.4.7__tar.gz - Mend

convoviz 0.4.5tar.gz → 0.4.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{convoviz-0.4.5 → convoviz-0.4.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: convoviz
-Version: 0.4.5
+Version: 0.4.7
 Summary: Convert your ChatGPT export (ZIP) into clean Markdown text files with inline media, and generate data visualizations like word clouds and usage graphs.
 Keywords: markdown,chatgpt,openai,visualization,analytics,json,export,data-analysis,obsidian
 Author: Mohamed Cheikh Sidiya
@@ -170,6 +170,8 @@ After running the script, head to your output folder (defaults to `Documents/Cha
 - 📝 Neatly formatted Markdown files
 - 📊 Visualizations and graphs
+If you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
 ![wordcloud example](https://raw.githubusercontent.com/mohamed-chs/convoviz/main/demo/wordcloud-example.png)
 ---
@@ -182,8 +184,6 @@ Whether you're a tech wizard or you're new to all this, I'd love to hear about y
 👉 **[Open an Issue](https://github.com/mohamed-chs/convoviz/issues)**
-And if you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
 ---
 ## 🤝 Contributing

{convoviz-0.4.5 → convoviz-0.4.7}/README.md RENAMED Viewed

@@ -145,6 +145,8 @@ After running the script, head to your output folder (defaults to `Documents/Cha
 - 📝 Neatly formatted Markdown files
 - 📊 Visualizations and graphs
+If you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
 ![wordcloud example](https://raw.githubusercontent.com/mohamed-chs/convoviz/main/demo/wordcloud-example.png)
 ---
@@ -157,8 +159,6 @@ Whether you're a tech wizard or you're new to all this, I'd love to hear about y
 👉 **[Open an Issue](https://github.com/mohamed-chs/convoviz/issues)**
-And if you've had a great experience, consider giving the project a ⭐ **star**! It keeps me motivated and helps others discover it!
 ---
 ## 🤝 Contributing

{convoviz-0.4.5 → convoviz-0.4.7}/convoviz/config.py RENAMED Viewed

@@ -54,7 +54,10 @@ class YAMLConfig(BaseModel):
     used_plugins: bool = False
     message_count: bool = True
     content_types: bool = False
+    content_types: bool = False
     custom_instructions: bool = False
+    is_starred: bool = False
+    voice: bool = False
 class ConversationConfig(BaseModel):

{convoviz-0.4.5 → convoviz-0.4.7}/convoviz/interactive.py RENAMED Viewed

@@ -8,10 +8,16 @@ from questionary import Choice, Style, checkbox, select
 from questionary import path as qst_path
 from questionary import text as qst_text
-from convoviz.config import ConvovizConfig, OutputKind, get_default_config
+from convoviz.config import ConvovizConfig, OutputKind, YAMLConfig, get_default_config
 from convoviz.io.loaders import find_latest_zip, validate_zip
 from convoviz.utils import colormaps, default_font_path, font_names, font_path, validate_header
+OUTPUT_TITLES = {
+    OutputKind.MARKDOWN: "Markdown conversations",
+    OutputKind.GRAPHS: "Graphs (usage analytics)",
+    OutputKind.WORDCLOUDS: "Word clouds",
+}
 CUSTOM_STYLE = Style(
     [
         ("qmark", "fg:#34eb9b bold"),
@@ -118,9 +124,12 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
     # Prompt for outputs to generate
     output_choices = [
-        Choice(title="Markdown conversations", value=OutputKind.MARKDOWN, checked=True),
-        Choice(title="Graphs (usage analytics)", value=OutputKind.GRAPHS, checked=True),
-        Choice(title="Word clouds", value=OutputKind.WORDCLOUDS, checked=True),
+        Choice(
+            title=OUTPUT_TITLES.get(kind, kind.value.title()),
+            value=kind,
+            checked=kind in config.outputs,
+        )
+        for kind in OutputKind
     ]
     selected_outputs: list[OutputKind] = _ask_or_cancel(
@@ -172,20 +181,9 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
         # Prompt for YAML headers
         yaml_config = config.conversation.yaml
+        yaml_fields = list(YAMLConfig.model_fields.keys())
         yaml_choices = [
-            Choice(title=field, checked=getattr(yaml_config, field))
-            for field in [
-                "title",
-                "tags",
-                "chat_link",
-                "create_time",
-                "update_time",
-                "model",
-                "used_plugins",
-                "message_count",
-                "content_types",
-                "custom_instructions",
-            ]
+            Choice(title=field, checked=getattr(yaml_config, field)) for field in yaml_fields
         ]
         selected: list[str] = _ask_or_cancel(
@@ -197,18 +195,7 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
         )
         selected_set = set(selected)
-        for field_name in [
-            "title",
-            "tags",
-            "chat_link",
-            "create_time",
-            "update_time",
-            "model",
-            "used_plugins",
-            "message_count",
-            "content_types",
-            "custom_instructions",
-        ]:
+        for field_name in yaml_fields:
             setattr(yaml_config, field_name, field_name in selected_set)
     # Prompt for wordcloud settings (only if wordclouds output is selected)

{convoviz-0.4.5 → convoviz-0.4.7}/convoviz/models/conversation.py RENAMED Viewed

@@ -24,6 +24,8 @@ class Conversation(BaseModel):
     mapping: dict[str, Node]
     moderation_results: list[Any] = Field(default_factory=list)
     current_node: str
+    is_starred: bool | None = None
+    voice: str | dict[str, Any] | None = None
     plugin_ids: list[str] | None = None
     conversation_id: str
     conversation_template_id: str | None = None
@@ -156,3 +158,19 @@ class Conversation(BaseModel):
     def year_start(self) -> datetime:
         """Get January 1st of the year this conversation was created."""
         return self.create_time.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
+    @property
+    def citation_map(self) -> dict[str, dict[str, str | None]]:
+        """Aggregate citation metadata from all messages in the conversation.
+        Traverses all nodes (including hidden ones) to collect embedded citation definitions
+        from tool outputs (e.g. search results).
+        """
+        aggregated_map = {}
+        for node in self.all_message_nodes:
+            if not node.message:
+                continue
+            # Extract citations from message parts
+            if hasattr(node.message, "internal_citation_map"):
+                aggregated_map.update(node.message.internal_citation_map)
+        return aggregated_map

{convoviz-0.4.5 → convoviz-0.4.7}/convoviz/models/message.py RENAMED Viewed

@@ -46,6 +46,9 @@ class MessageMetadata(BaseModel):
     is_user_system_message: bool | None = None
     is_visually_hidden_from_conversation: bool | None = None
     user_context_message_data: dict[str, Any] | None = None
+    citations: list[dict[str, Any]] | None = None
+    search_result_groups: list[dict[str, Any]] | None = None
+    content_references: list[dict[str, Any]] | None = None
     model_config = ConfigDict(protected_namespaces=())
@@ -179,11 +182,12 @@ class Message(BaseModel):
         1. It is empty (no text, no images).
         2. Explicitly marked as visually hidden.
         3. It is an internal system message (not custom instructions).
-        4. It is a browser tool output (intermediate search steps).
+        4. It is a browser tool output (intermediate search steps) UNLESS it is a tether_quote.
         5. It is an assistant message targeting a tool (internal call).
         6. It is code interpreter input (content_type="code").
-        7. It is browsing status (tether_browsing_display).
-        8. It is internal reasoning (thoughts, reasoning_recap from o1/o3).
+        7. It is browsing status, internal reasoning (o1/o3), or massive web scraps (sonic_webpage).
+        8. It is a redundant DALL-E textual status update.
+        9. It is from internal bio (memory) or web.run orchestration tools.
         """
         if self.is_empty:
             return True
@@ -197,10 +201,29 @@ class Message(BaseModel):
             # Only show if explicitly marked as user system message (Custom Instructions)
             return not self.metadata.is_user_system_message
-        # Hide browser tool outputs (intermediate search steps)
-        if self.author.role == "tool" and self.author.name == "browser":
+        # Hide sonic_webpage (massive scraped text) and system_error
+        if self.content.content_type in ("sonic_webpage", "system_error"):
             return True
+        if self.author.role == "tool":
+            # Hide memory updates (bio) and internal search orchestration (web.run)
+            if self.author.name in ("bio", "web.run"):
+                return True
+            # Hide browser tool outputs (intermediate search steps)
+            # EXCEPTION: tether_quote (citations) should remain visible
+            if self.author.name == "browser":
+                return self.content.content_type != "tether_quote"
+            # Hide DALL-E textual status ("DALL·E displayed 1 images...")
+            if (
+                self.author.name == "dalle.text2im"
+                and self.content.content_type == "text"
+                # Check if it doesn't have images (just in case they attach images to text logic)
+                and not self.images
+            ):
+                return True
         # Hide assistant messages targeting tools (e.g., search(...), code input)
         # recipient="all" or None means it's for the user; anything else is internal
         if self.author.role == "assistant" and self.recipient not in ("all", None):
@@ -216,3 +239,56 @@ class Message(BaseModel):
             "thoughts",
             "reasoning_recap",
         )
+    @property
+    def internal_citation_map(self) -> dict[str, dict[str, str | None]]:
+        """Extract a map of citation IDs to metadata from content parts.
+        Used for resolving embedded citations (e.g. citeturn0search18).
+        Key format: "turn{turn_index}search{ref_index}"
+        """
+        if not self.content.parts:
+            return {}
+        citation_mapping = {}
+        # Helper to process a single search result entry
+        def process_entry(entry: dict[str, Any]) -> None:
+            ref_id = entry.get("ref_id")
+            if not ref_id:
+                return
+            # Only care about search results for now
+            if ref_id.get("ref_type") != "search":
+                return
+            turn_idx = ref_id.get("turn_index")
+            ref_idx = ref_id.get("ref_index")
+            if turn_idx is not None and ref_idx is not None:
+                # turn_idx is int, ref_idx is int
+                key = f"turn{turn_idx}search{ref_idx}"
+                citation_mapping[key] = {
+                    "title": entry.get("title"),
+                    "url": entry.get("url"),
+                }
+        # 1. Extract from self.content.parts
+        if self.content and self.content.parts:
+            for part in self.content.parts:
+                if isinstance(part, dict):
+                    if part.get("type") == "search_result":
+                        process_entry(part)
+                    elif part.get("type") == "search_result_group":
+                        for entry in part.get("entries", []):
+                            process_entry(entry)
+        # 2. Extract from metadata.search_result_groups (if present)
+        if self.metadata and self.metadata.search_result_groups:
+            for group in self.metadata.search_result_groups:
+                if isinstance(group, dict):
+                    # Groups might have 'entries' or be flat?
+                    # Based on name 'groups', likely similar to part structure
+                    for entry in group.get("entries", []):
+                        process_entry(entry)
+        return citation_mapping

{convoviz-0.4.5 → convoviz-0.4.7}/convoviz/renderers/markdown.py RENAMED Viewed

@@ -2,6 +2,7 @@
 import re
 from collections.abc import Callable
+from typing import Any
 from convoviz.config import AuthorHeaders, ConversationConfig
 from convoviz.exceptions import MessageContentError
@@ -9,6 +10,82 @@ from convoviz.models import Conversation, Node
 from convoviz.renderers.yaml import render_yaml_header
+def replace_citations(
+    text: str,
+    citations: list[dict[str, Any]] | None = None,
+    citation_map: dict[str, dict[str, str | None]] | None = None,
+) -> str:
+    """Replace citation placeholders in text with markdown links.
+    Supports two formats:
+    1. Tether v4 (metadata.citations): Placed at specific indices (【...】 placeholders).
+    2. Embedded (Tether v3?): Unicode markers citeturnXsearchY.
+    Args:
+        text: The original message text
+        citations: List of tether v4 citation objects (start_ix/end_ix)
+        citation_map: Map of internal citation IDs to metadata (turnXsearchY -> {title, url})
+    Returns:
+        Text with all placeholders replaced by markdown links
+    """
+    # 1. Handle Tether v4 (Index-based replacements)
+    if citations:
+        # Sort citations by start_ix descending to replace safely from end
+        sorted_citations = sorted(citations, key=lambda c: c.get("start_ix", 0), reverse=True)
+        for cit in sorted_citations:
+            start = cit.get("start_ix")
+            end = cit.get("end_ix")
+            meta = cit.get("metadata", {})
+            if start is None or end is None:
+                continue
+            replacement = _format_link(meta.get("title"), meta.get("url"))
+            # Only replace if strictly positive indices and bounds check
+            if 0 <= start < end <= len(text):
+                text = text[:start] + replacement + text[end:]
+    # 2. Handle Embedded Citations (Regex-based)
+    # Pattern: cite (key)+ 
+    # Codepoints: \uE200 (Start), \uE202 (Sep), \uE201 (End)
+    if citation_map is not None:
+        pattern = re.compile(r"\uE200cite((?:\uE202[a-zA-Z0-9]+)+)\uE201")
+        def replacer(match: re.Match) -> str:
+            # Group 1 contains string like: turn0search18turn0search3
+            # Split by separator \uE202 (first item will be empty string)
+            raw_keys = match.group(1).split("\ue202")
+            keys = [k for k in raw_keys if k]
+            links = []
+            for key in keys:
+                if key in citation_map:
+                    data = citation_map[key]
+                    link = _format_link(data.get("title"), data.get("url"))
+                    if link:
+                        links.append(link)
+            return "".join(links)
+        text = pattern.sub(replacer, text)
+    return text
+def _format_link(title: str | None, url: str | None) -> str:
+    """Format a title and URL into a concise markdown link."""
+    if title and url:
+        return f" [[{title}]({url})]"
+    elif url:
+        return f" [[Source]({url})]"
+    elif title:
+        return f" [{title}]"
+    return ""
 def close_code_blocks(text: str) -> str:
     """Ensure all code blocks in the text are properly closed.
@@ -137,6 +214,7 @@ def render_node(
     use_dollar_latex: bool = False,
     asset_resolver: Callable[[str], str | None] | None = None,
     flavor: str = "standard",
+    citation_map: dict[str, dict[str, str | None]] | None = None,
 ) -> str:
     """Render a complete node as markdown.
@@ -146,9 +224,7 @@ def render_node(
         use_dollar_latex: Whether to convert LaTeX delimiters to dollars
         asset_resolver: Function to resolve asset IDs to paths
         flavor: Markdown flavor ("standard" or "obsidian")
-    Returns:
-        Complete markdown string for the node
+        citation_map: Global map of citations
     """
     if node.message is None:
         return ""
@@ -185,6 +261,19 @@ def render_node(
         # Some message types only contain non-text parts; those still may have images.
         text = ""
+    # Process citations if present (Tether v4 metadata or Embedded v3)
+    # Use global citation_map if provided, merging/falling back to local if needed.
+    # Actually, local internal map is subset of global map if we aggregated correctly.
+    # So we prefer the passed global map.
+    effective_map = citation_map or node.message.internal_citation_map
+    if node.message.metadata.citations or effective_map:
+        text = replace_citations(
+            text,
+            citations=node.message.metadata.citations,
+            citation_map=effective_map,
+        )
     content = close_code_blocks(text)
     content = f"\n{content}\n" if content else ""
     if use_dollar_latex:
@@ -255,6 +344,9 @@ def render_conversation(
     # Start with YAML header
     markdown = render_yaml_header(conversation, config.yaml)
+    # Pre-calculate citation map for the conversation
+    citation_map = conversation.citation_map
     # Render message nodes in a deterministic traversal order.
     for node in _ordered_nodes(conversation):
         if node.message:
@@ -264,6 +356,7 @@ def render_conversation(
                 use_dollar_latex,
                 asset_resolver=asset_resolver,
                 flavor=flavor,
+                citation_map=citation_map,
             )
     return markdown

{convoviz-0.4.5 → convoviz-0.4.7}/convoviz/renderers/yaml.py RENAMED Viewed

@@ -111,6 +111,10 @@ def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
         yaml_fields["content_types"] = conversation.content_types
     if config.custom_instructions:
         yaml_fields["custom_instructions"] = conversation.custom_instructions
+    if config.is_starred:
+        yaml_fields["is_starred"] = conversation.is_starred
+    if config.voice:
+        yaml_fields["voice"] = conversation.voice
     if not yaml_fields:
         return ""

{convoviz-0.4.5 → convoviz-0.4.7}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "convoviz"
-version = "0.4.5"
+version = "0.4.7"
 description = "Convert your ChatGPT export (ZIP) into clean Markdown text files with inline media, and generate data visualizations like word clouds and usage graphs."
 readme = "README.md"
 requires-python = ">=3.12"