PyPI - convoviz - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl - Mend

convoviz 0.2.2py3-none-any.whl → 0.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

convoviz/analysis/graphs.py +410 -21
convoviz/analysis/wordcloud.py +21 -1
convoviz/assets/stopwords.txt +75 -0
convoviz/cli.py +18 -15
convoviz/config.py +14 -7
convoviz/interactive.py +40 -11
convoviz/io/assets.py +82 -0
convoviz/io/loaders.py +54 -3
convoviz/io/writers.py +17 -2
convoviz/models/__init__.py +0 -4
convoviz/models/collection.py +14 -6
convoviz/models/conversation.py +4 -6
convoviz/models/message.py +87 -7
convoviz/pipeline.py +70 -24
convoviz/renderers/markdown.py +91 -24
convoviz/renderers/yaml.py +79 -2
convoviz/utils.py +54 -4
{convoviz-0.2.2.dist-info → convoviz-0.2.4.dist-info}/METADATA +30 -5
{convoviz-0.2.2.dist-info → convoviz-0.2.4.dist-info}/RECORD +21 -19
{convoviz-0.2.2.dist-info → convoviz-0.2.4.dist-info}/WHEEL +1 -1
{convoviz-0.2.2.dist-info → convoviz-0.2.4.dist-info}/entry_points.txt +0 -0

convoviz/config.py CHANGED Viewed

@@ -19,6 +19,7 @@ class MarkdownConfig(BaseModel):
     """Configuration for markdown output."""
     latex_delimiters: Literal["default", "dollars"] = "default"
+    flavor: Literal["obsidian", "standard"] = "obsidian"
 class YAMLConfig(BaseModel):
@@ -53,27 +54,33 @@ class WordCloudConfig(BaseModel):
     """Configuration for word cloud generation."""
     font_path: Path | None = None
-    colormap: str = "magma"
+    colormap: str = "RdYlBu"
     custom_stopwords: str = "use, file, "
+    exclude_programming_keywords: bool = True
     background_color: str | None = None
     mode: Literal["RGB", "RGBA"] = "RGBA"
     include_numbers: bool = False
-    width: int = 1000
-    height: int = 1000
+    width: int = 600
+    height: int = 600
 class GraphConfig(BaseModel):
     """Configuration for graph generation."""
-    # Extensible for future graph options
-    pass
+    color: str = "#4A90E2"
+    grid: bool = True
+    show_counts: bool = True
+    font_name: str = "Montserrat-Regular.ttf"
+    figsize: tuple[int, int] = (10, 6)
+    dpi: int = 300
+    timezone: Literal["utc", "local"] = "local"
 class ConvovizConfig(BaseModel):
     """Main configuration for convoviz."""
-    zip_filepath: Path | None = None
-    output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT Data")
+    input_path: Path | None = None
+    output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT-Data")
     message: MessageConfig = Field(default_factory=MessageConfig)
     conversation: ConversationConfig = Field(default_factory=ConversationConfig)
     wordcloud: WordCloudConfig = Field(default_factory=WordCloudConfig)

convoviz/interactive.py CHANGED Viewed

@@ -26,6 +26,25 @@ CUSTOM_STYLE = Style(
 )
+def _validate_input_path(raw: str) -> bool | str:
+    path = Path(raw)
+    if not path.exists():
+        return "Path must exist"
+    if path.is_dir():
+        if (path / "conversations.json").exists():
+            return True
+        return "Directory must contain conversations.json"
+    if path.suffix.lower() == ".json":
+        return True
+    if path.suffix.lower() == ".zip":
+        return True if validate_zip(path) else "ZIP must contain conversations.json"
+    return "Input must be a .zip, a .json, or a directory containing conversations.json"
 def run_interactive_config(initial_config: ConvovizConfig | None = None) -> ConvovizConfig:
     """Run interactive prompts to configure convoviz.
@@ -38,26 +57,25 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
     config = initial_config or get_default_config()
     # Set sensible defaults if not already set
-    if not config.zip_filepath:
+    if not config.input_path:
         latest = find_latest_zip()
         if latest:
-            config.zip_filepath = latest
+            config.input_path = latest
     if not config.wordcloud.font_path:
         config.wordcloud.font_path = default_font_path()
-    # Prompt for zip file path
-    zip_default = str(config.zip_filepath) if config.zip_filepath else ""
-    zip_result = qst_path(
-        "Enter the path to the zip file:",
-        default=zip_default,
-        validate=lambda p: validate_zip(Path(p))
-        or "Invalid zip file (must contain conversations.json)",
+    # Prompt for input path
+    input_default = str(config.input_path) if config.input_path else ""
+    input_result = qst_path(
+        "Enter the path to the export ZIP, conversations JSON, or extracted directory:",
+        default=input_default,
+        validate=_validate_input_path,
         style=CUSTOM_STYLE,
     ).ask()
-    if zip_result:
-        config.zip_filepath = Path(zip_result)
+    if input_result:
+        config.input_path = Path(input_result)
     # Prompt for output folder
     output_result = qst_path(
@@ -94,6 +112,17 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
     if latex_result:
         config.conversation.markdown.latex_delimiters = latex_result
+    # Prompt for markdown flavor
+    flavor_result = select(
+        "Select the markdown flavor:",
+        choices=["obsidian", "standard"],
+        default=config.conversation.markdown.flavor,
+        style=CUSTOM_STYLE,
+    ).ask()
+    if flavor_result:
+        config.conversation.markdown.flavor = flavor_result
     # Prompt for YAML headers
     yaml_config = config.conversation.yaml
     yaml_choices = [

convoviz/io/assets.py ADDED Viewed

@@ -0,0 +1,82 @@
+"Asset management functions."
+import shutil
+from pathlib import Path
+def resolve_asset_path(source_dir: Path, asset_id: str) -> Path | None:
+    """Find the actual file for a given asset ID in the source directory.
+    Args:
+        source_dir: Directory to search in
+        asset_id: The asset ID (e.g., "file-uuid")
+    Returns:
+        Path to the found file, or None
+    """
+    if not source_dir.exists():
+        return None
+    source_dir = source_dir.resolve()
+    # Safety check for asset_id
+    if ".." in asset_id or "/" in asset_id or "\\" in asset_id:
+        return None
+    # 1. Try exact match
+    exact_path = (source_dir / asset_id).resolve()
+    if exact_path.exists() and exact_path.is_file() and exact_path.is_relative_to(source_dir):
+        return exact_path
+    # 2. Try prefix match in root
+    try:
+        candidates = list(source_dir.glob(f"{asset_id}*"))
+        files = [
+            p.resolve()
+            for p in candidates
+            if p.is_file() and p.resolve().is_relative_to(source_dir)
+        ]
+        if files:
+            return files[0]
+    except Exception:
+        pass
+    # 3. Try prefix match in dalle-generations
+    dalle_dir = source_dir / "dalle-generations"
+    if dalle_dir.exists() and dalle_dir.is_dir():
+        dalle_dir = dalle_dir.resolve()
+        try:
+            candidates = list(dalle_dir.glob(f"{asset_id}*"))
+            files = [
+                p.resolve()
+                for p in candidates
+                if p.is_file() and p.resolve().is_relative_to(dalle_dir)
+            ]
+            if files:
+                return files[0]
+        except Exception:
+            pass
+    return None
+def copy_asset(source_path: Path, dest_dir: Path) -> str:
+    """Copy an asset to the destination directory.
+    Args:
+        source_path: The source file path
+        dest_dir: The root output directory (assets will be in dest_dir/assets)
+    Returns:
+        Relative path to the asset (e.g., "assets/image.png")
+    """
+    assets_dir = dest_dir / "assets"
+    assets_dir.mkdir(parents=True, exist_ok=True)
+    dest_path = assets_dir / source_path.name
+    if not dest_path.exists():
+        shutil.copy2(source_path, dest_path)
+    # Return forward-slash path for Markdown compatibility even on Windows
+    return f"assets/{source_path.name}"

convoviz/io/loaders.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Loading functions for conversations and collections."""
-from pathlib import Path
+from pathlib import Path, PurePosixPath
 from zipfile import ZipFile
 from orjson import loads
@@ -9,17 +9,62 @@ from convoviz.exceptions import InvalidZipError
 from convoviz.models import Conversation, ConversationCollection
+def _is_safe_zip_member_name(name: str) -> bool:
+    """Return True if a ZIP entry name is safe to extract.
+    This is intentionally OS-agnostic: it treats both ``/`` and ``\\`` as path
+    separators and rejects absolute paths, drive-letter paths, and ``..`` parts.
+    """
+    normalized = name.replace("\\", "/")
+    member_path = PurePosixPath(normalized)
+    # Absolute paths (e.g. "/etc/passwd") or empty names
+    if not normalized or member_path.is_absolute():
+        return False
+    # Windows drive letters / UNC-style prefixes stored in the archive
+    first = member_path.parts[0] if member_path.parts else ""
+    if first.endswith(":") or first.startswith("//") or first.startswith("\\\\"):
+        return False
+    return ".." not in member_path.parts
 def extract_archive(filepath: Path) -> Path:
     """Extract a ZIP file and return the extraction folder path.
+    Includes safety checks to prevent Path Traversal (Zip-Slip).
     Args:
         filepath: Path to the ZIP file
     Returns:
         Path to the extracted folder
+    Raises:
+        InvalidZipError: If extraction fails or a security risk is detected
     """
     folder = filepath.with_suffix("")
+    folder.mkdir(parents=True, exist_ok=True)
     with ZipFile(filepath) as zf:
+        for member in zf.infolist():
+            # Check for path traversal (Zip-Slip) in an OS-agnostic way.
+            # ZIP files are typically POSIX-path-like, but malicious archives can
+            # embed backslashes or drive-letter tricks.
+            if not _is_safe_zip_member_name(member.filename):
+                raise InvalidZipError(
+                    str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
+                )
+            # Additional check using resolved paths
+            normalized = member.filename.replace("\\", "/")
+            target_path = (folder / normalized).resolve()
+            if not target_path.is_relative_to(folder.resolve()):
+                raise InvalidZipError(
+                    str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
+                )
         zf.extractall(folder)
     return folder
@@ -60,7 +105,8 @@ def load_conversation_from_json(filepath: Path | str) -> Conversation:
 def load_collection_from_json(filepath: Path | str) -> ConversationCollection:
     """Load a conversation collection from a JSON file.
-    The JSON file should contain an array of conversation objects.
+    The JSON file should contain an array of conversation objects,
+    or an object with a "conversations" key.
     Args:
         filepath: Path to the JSON file
@@ -71,7 +117,12 @@ def load_collection_from_json(filepath: Path | str) -> ConversationCollection:
     filepath = Path(filepath)
     with filepath.open(encoding="utf-8") as f:
         data = loads(f.read())
-    return ConversationCollection(conversations=data)
+    # Handle case where export is wrapped in a top-level object
+    if isinstance(data, dict) and "conversations" in data:
+        data = data["conversations"]
+    return ConversationCollection(conversations=data, source_path=filepath.parent)
 def load_collection_from_zip(filepath: Path | str) -> ConversationCollection:

convoviz/io/writers.py CHANGED Viewed

@@ -7,6 +7,7 @@ from orjson import OPT_INDENT_2, dumps
 from tqdm import tqdm
 from convoviz.config import AuthorHeaders, ConversationConfig
+from convoviz.io.assets import copy_asset, resolve_asset_path
 from convoviz.models import Conversation, ConversationCollection
 from convoviz.renderers import render_conversation
 from convoviz.utils import sanitize
@@ -17,6 +18,7 @@ def save_conversation(
     filepath: Path,
     config: ConversationConfig,
     headers: AuthorHeaders,
+    source_path: Path | None = None,
 ) -> Path:
     """Save a conversation to a markdown file.
@@ -28,6 +30,7 @@ def save_conversation(
         filepath: Target file path
         config: Conversation rendering configuration
         headers: Author header configuration
+        source_path: Path to the source directory containing assets
     Returns:
         The actual path the file was saved to (may differ if there was a conflict)
@@ -41,8 +44,20 @@ def save_conversation(
         counter += 1
         final_path = filepath.with_name(f"{base_name} ({counter}){filepath.suffix}")
+    # Define asset resolver
+    def asset_resolver(asset_id: str) -> str | None:
+        if not source_path:
+            return None
+        src_file = resolve_asset_path(source_path, asset_id)
+        if not src_file:
+            return None
+        # Copy to output directory (relative to the markdown file's directory)
+        return copy_asset(src_file, final_path.parent)
     # Render and write
-    markdown = render_conversation(conversation, config, headers)
+    markdown = render_conversation(conversation, config, headers, asset_resolver=asset_resolver)
     with final_path.open("w", encoding="utf-8") as f:
         f.write(markdown)
@@ -78,7 +93,7 @@ def save_collection(
         disable=not progress_bar,
     ):
         filepath = directory / f"{sanitize(conv.title)}.md"
-        save_conversation(conv, filepath, config, headers)
+        save_conversation(conv, filepath, config, headers, source_path=collection.source_path)
 def save_custom_instructions(

convoviz/models/__init__.py CHANGED Viewed

@@ -11,14 +11,10 @@ from convoviz.models.message import (
 )
 from convoviz.models.node import Node, build_node_tree
-# Backward compatibility alias
-ConversationSet = ConversationCollection
 __all__ = [
     "AuthorRole",
     "Conversation",
     "ConversationCollection",
-    "ConversationSet",
     "Message",
     "MessageAuthor",
     "MessageContent",

convoviz/models/collection.py CHANGED Viewed

@@ -4,6 +4,7 @@ This is a pure data model - I/O and visualization logic are in separate modules.
 """
 from datetime import datetime
+from pathlib import Path
 from typing import Any
 from pydantic import BaseModel, Field
@@ -19,6 +20,7 @@ class ConversationCollection(BaseModel):
     """
     conversations: list[Conversation] = Field(default_factory=list)
+    source_path: Path | None = None
     @property
     def index(self) -> dict[str, Conversation]:
@@ -35,14 +37,20 @@ class ConversationCollection(BaseModel):
     def update(self, other: "ConversationCollection") -> None:
         """Merge another collection into this one.
-        Only updates if the other collection has newer content.
+        Merges per-conversation, keeping the newest version when IDs collide.
+        Note: We intentionally do *not* gate on ``other.last_updated`` because
+        "new" conversations can still have older timestamps than the most recent
+        conversation in this collection (e.g. bookmarklet downloads).
         """
-        if other.last_updated <= self.last_updated:
-            return
+        merged: dict[str, Conversation] = dict(self.index)
+        for conv_id, incoming in other.index.items():
+            existing = merged.get(conv_id)
+            if existing is None or incoming.update_time > existing.update_time:
+                merged[conv_id] = incoming
-        merged_index = self.index
-        merged_index.update(other.index)
-        self.conversations = list(merged_index.values())
+        self.conversations = list(merged.values())
     def add(self, conversation: Conversation) -> None:
         """Add a conversation to the collection."""

convoviz/models/conversation.py CHANGED Viewed

@@ -98,12 +98,10 @@ class Conversation(BaseModel):
     def custom_instructions(self) -> dict[str, str]:
         """Get custom instructions used for this conversation."""
         system_nodes = self.nodes_by_author("system")
-        if len(system_nodes) < 2:
-            return {}
-        context_message = system_nodes[1].message
-        if context_message and context_message.metadata.is_user_system_message:
-            return context_message.metadata.user_context_message_data or {}
+        for node in system_nodes:
+            context_message = node.message
+            if context_message and context_message.metadata.is_user_system_message:
+                return context_message.metadata.user_context_message_data or {}
         return {}
     def timestamps(self, *authors: AuthorRole) -> list[float]:

convoviz/models/message.py CHANGED Viewed

@@ -6,11 +6,11 @@ Object path: conversations.json -> conversation -> mapping -> mapping node -> me
 from datetime import datetime
 from typing import Any, Literal
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, Field
 from convoviz.exceptions import MessageContentError
-AuthorRole = Literal["user", "assistant", "system", "tool"]
+AuthorRole = Literal["user", "assistant", "system", "tool", "function"]
 class MessageAuthor(BaseModel):
@@ -18,14 +18,14 @@ class MessageAuthor(BaseModel):
     role: AuthorRole
     name: str | None = None
-    metadata: dict[str, Any] = {}
+    metadata: dict[str, Any] = Field(default_factory=dict)
 class MessageContent(BaseModel):
     """Content of a message."""
     content_type: str
-    parts: list[str] | None = None
+    parts: list[Any] | None = None
     text: str | None = None
     result: str | None = None
@@ -55,14 +55,56 @@ class Message(BaseModel):
     status: str
     end_turn: bool | None = None
     weight: float
-    metadata: MessageMetadata
-    recipient: str
+    metadata: MessageMetadata = Field(default_factory=MessageMetadata)
+    recipient: str | None = None
+    @property
+    def images(self) -> list[str]:
+        """Extract image asset pointers from the message content."""
+        if not self.content.parts:
+            return []
+        image_ids = []
+        for part in self.content.parts:
+            if isinstance(part, dict) and part.get("content_type") == "image_asset_pointer":
+                pointer = part.get("asset_pointer", "")
+                # Strip prefixes like "file-service://" or "sediment://"
+                if pointer.startswith("file-service://"):
+                    pointer = pointer[len("file-service://") :]
+                elif pointer.startswith("sediment://"):
+                    pointer = pointer[len("sediment://") :]
+                if pointer:
+                    image_ids.append(pointer)
+        return image_ids
     @property
     def text(self) -> str:
         """Extract the text content of the message."""
         if self.content.parts is not None:
-            return str(self.content.parts[0]) if self.content.parts else ""
+            # Handle multimodal content where parts can be mixed strings and dicts
+            text_parts = []
+            for part in self.content.parts:
+                if isinstance(part, str):
+                    text_parts.append(part)
+                elif isinstance(part, dict) and "text" in part:
+                    # Some parts might be dicts wrapping text (e.g. code interpreter?)
+                    # But based on spec, usually text is just a string in the list.
+                    # We'll stick to string extraction for now.
+                    pass
+            # If we found string parts, join them.
+            # If parts existed but no strings (e.g. only images), return empty string?
+            # Or should we return a placeholder? For now, let's return joined text.
+            if text_parts:
+                return "".join(text_parts)
+            # If parts list is not empty but contains no strings, we might want to fall through
+            # or return empty string if we consider it "handled".
+            # The original code returned "" if parts was empty list.
+            if self.content.parts:
+                return ""
         if self.content.text is not None:
             return self.content.text
         if self.content.result is not None:
@@ -75,3 +117,41 @@ class Message(BaseModel):
         return bool(
             self.content.parts or self.content.text is not None or self.content.result is not None
         )
+    @property
+    def is_empty(self) -> bool:
+        """Check if the message is effectively empty (no text, no images)."""
+        try:
+            return not self.text.strip() and not self.images
+        except MessageContentError:
+            return True
+    @property
+    def is_hidden(self) -> bool:
+        """Check if message should be hidden in export.
+        Hidden if:
+        1. It is empty (no text, no images).
+        2. It is an internal system message (not custom instructions).
+        3. It is a browser tool output (intermediate search steps).
+        """
+        if self.is_empty:
+            return True
+        # Hide internal system messages
+        if self.author.role == "system":
+            # Only show if explicitly marked as user system message (Custom Instructions)
+            return not self.metadata.is_user_system_message
+        # Hide browser tool outputs (usually intermediate search steps)
+        if self.author.role == "tool" and self.author.name == "browser":
+            return True
+        # Hide assistant calls to browser tool (e.g. "search(...)") or code interpreter
+        if self.author.role == "assistant" and (
+            self.recipient == "browser" or self.content.content_type == "code"
+        ):
+            return True
+        # Hide browsing status messages
+        return self.content.content_type == "tether_browsing_display"

convoviz 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

convoviz 0.2.2py3-none-any.whl → 0.2.4py3-none-any.whl