PyPI - convoviz - Versions diffs - 0.4.1__py3-none-any.whl - Mend

convoviz 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

convoviz/__init__.py +34 -0
convoviz/__main__.py +6 -0
convoviz/analysis/__init__.py +22 -0
convoviz/analysis/graphs.py +879 -0
convoviz/analysis/wordcloud.py +204 -0
convoviz/assets/colormaps.txt +15 -0
convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
convoviz/assets/fonts/Borel-Regular.ttf +0 -0
convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
convoviz/assets/stopwords.txt +1 -0
convoviz/cli.py +149 -0
convoviz/config.py +120 -0
convoviz/exceptions.py +47 -0
convoviz/interactive.py +264 -0
convoviz/io/__init__.py +21 -0
convoviz/io/assets.py +109 -0
convoviz/io/loaders.py +191 -0
convoviz/io/writers.py +231 -0
convoviz/logging_config.py +69 -0
convoviz/models/__init__.py +24 -0
convoviz/models/collection.py +115 -0
convoviz/models/conversation.py +158 -0
convoviz/models/message.py +218 -0
convoviz/models/node.py +66 -0
convoviz/pipeline.py +184 -0
convoviz/py.typed +0 -0
convoviz/renderers/__init__.py +10 -0
convoviz/renderers/markdown.py +269 -0
convoviz/renderers/yaml.py +119 -0
convoviz/utils.py +155 -0
convoviz-0.4.1.dist-info/METADATA +215 -0
convoviz-0.4.1.dist-info/RECORD +62 -0
convoviz-0.4.1.dist-info/WHEEL +4 -0
convoviz-0.4.1.dist-info/entry_points.txt +3 -0

convoviz/models/message.py ADDED Viewed

@@ -0,0 +1,218 @@
+"""Message model - pure data class.
+Object path: conversations.json -> conversation -> mapping -> mapping node -> message
+"""
+from datetime import datetime
+from typing import Any, Literal
+from pydantic import BaseModel, ConfigDict, Field
+from convoviz.exceptions import MessageContentError
+AuthorRole = Literal["user", "assistant", "system", "tool", "function"]
+class MessageAuthor(BaseModel):
+    """Author information for a message."""
+    role: AuthorRole
+    name: str | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+class MessageContent(BaseModel):
+    """Content of a message."""
+    content_type: str
+    parts: list[Any] | None = None
+    text: str | None = None
+    result: str | None = None
+    # reasoning_recap content type
+    content: str | None = None
+    # thoughts content type (list of thought objects with summary/content/finished)
+    thoughts: list[Any] | None = None
+    # tether_quote content type
+    url: str | None = None
+    domain: str | None = None
+    title: str | None = None
+class MessageMetadata(BaseModel):
+    """Metadata for a message."""
+    model_slug: str | None = None
+    invoked_plugin: dict[str, Any] | None = None
+    is_user_system_message: bool | None = None
+    is_visually_hidden_from_conversation: bool | None = None
+    user_context_message_data: dict[str, Any] | None = None
+    model_config = ConfigDict(protected_namespaces=())
+class Message(BaseModel):
+    """A single message in a conversation.
+    This is a pure data model - rendering logic is in the renderers module.
+    """
+    id: str
+    author: MessageAuthor
+    create_time: datetime | None = None
+    update_time: datetime | None = None
+    content: MessageContent
+    status: str
+    end_turn: bool | None = None
+    weight: float
+    metadata: MessageMetadata = Field(default_factory=MessageMetadata)
+    recipient: str | None = None
+    @property
+    def images(self) -> list[str]:
+        """Extract image asset pointers from the message content."""
+        if not self.content.parts:
+            return []
+        image_ids = []
+        for part in self.content.parts:
+            if isinstance(part, dict) and part.get("content_type") == "image_asset_pointer":
+                pointer = part.get("asset_pointer", "")
+                # Strip prefixes like "file-service://" or "sediment://"
+                if pointer.startswith("file-service://"):
+                    pointer = pointer[len("file-service://") :]
+                elif pointer.startswith("sediment://"):
+                    pointer = pointer[len("sediment://") :]
+                if pointer:
+                    image_ids.append(pointer)
+        return image_ids
+    @property
+    def text(self) -> str:
+        """Extract the text content of the message."""
+        if self.content.parts is not None:
+            # Handle multimodal content where parts can be mixed strings and dicts
+            text_parts = []
+            for part in self.content.parts:
+                if isinstance(part, str):
+                    text_parts.append(part)
+                elif isinstance(part, dict) and "text" in part:
+                    # Some parts might be dicts wrapping text (e.g. code interpreter?)
+                    # But based on spec, usually text is just a string in the list.
+                    # We'll stick to string extraction for now.
+                    pass
+            # If we found string parts, join them.
+            # If parts existed but no strings (e.g. only images), return empty string?
+            # Or should we return a placeholder? For now, let's return joined text.
+            if text_parts:
+                return "".join(text_parts)
+            # If parts list is not empty but contains no strings, we might want to fall through
+            # or return empty string if we consider it "handled".
+            # The original code returned "" if parts was empty list.
+            if self.content.parts:
+                return ""
+        # tether_quote: render as a blockquote with attribution (check before .text)
+        if self.content.content_type == "tether_quote":
+            return self._render_tether_quote()
+        if self.content.text is not None:
+            return self.content.text
+        if self.content.result is not None:
+            return self.content.result
+        # reasoning_recap content type uses 'content' field
+        if self.content.content is not None:
+            return self.content.content
+        # thoughts content type uses 'thoughts' field (list of thought objects)
+        if self.content.thoughts is not None:
+            return self._render_thoughts()
+        raise MessageContentError(self.id)
+    def _render_thoughts(self) -> str:
+        """Render thoughts content (list of thought objects with summary/content)."""
+        if not self.content.thoughts:
+            return ""
+        summaries = []
+        for thought in self.content.thoughts:
+            if isinstance(thought, dict) and (summary := thought.get("summary")):
+                summaries.append(summary)
+        return "\n".join(summaries) if summaries else ""
+    def _render_tether_quote(self) -> str:
+        """Render tether_quote content as a blockquote."""
+        quote_text = self.content.text or ""
+        if not quote_text.strip():
+            return ""
+        # Format as blockquote with source
+        lines = [f"> {line}" for line in quote_text.strip().split("\n")]
+        blockquote = "\n".join(lines)
+        # Add attribution if we have title/domain/url
+        if self.content.title and self.content.url:
+            blockquote += f"\n> — [{self.content.title}]({self.content.url})"
+        elif self.content.domain and self.content.url:
+            blockquote += f"\n> — [{self.content.domain}]({self.content.url})"
+        elif self.content.url:
+            blockquote += f"\n> — <{self.content.url}>"
+        return blockquote
+    @property
+    def has_content(self) -> bool:
+        """Check if the message has extractable content."""
+        return bool(
+            self.content.parts or self.content.text is not None or self.content.result is not None
+        )
+    @property
+    def is_empty(self) -> bool:
+        """Check if the message is effectively empty (no text, no images)."""
+        try:
+            return not self.text.strip() and not self.images
+        except MessageContentError:
+            return True
+    @property
+    def is_hidden(self) -> bool:
+        """Check if message should be hidden in export.
+        Hidden if:
+        1. It is empty (no text, no images).
+        2. Explicitly marked as visually hidden.
+        3. It is an internal system message (not custom instructions).
+        4. It is a browser tool output (intermediate search steps).
+        5. It is an assistant message targeting a tool (internal call).
+        6. It is code interpreter input (content_type="code").
+        7. It is browsing status (tether_browsing_display).
+        8. It is internal reasoning (thoughts, reasoning_recap from o1/o3).
+        """
+        if self.is_empty:
+            return True
+        # Explicitly marked as hidden by OpenAI
+        if self.metadata.is_visually_hidden_from_conversation:
+            return True
+        # Hide internal system messages
+        if self.author.role == "system":
+            # Only show if explicitly marked as user system message (Custom Instructions)
+            return not self.metadata.is_user_system_message
+        # Hide browser tool outputs (intermediate search steps)
+        if self.author.role == "tool" and self.author.name == "browser":
+            return True
+        # Hide assistant messages targeting tools (e.g., search(...), code input)
+        # recipient="all" or None means it's for the user; anything else is internal
+        if self.author.role == "assistant" and self.recipient not in ("all", None):
+            return True
+        # Hide code interpreter input (content_type="code")
+        if self.author.role == "assistant" and self.content.content_type == "code":
+            return True
+        # Hide browsing status and internal reasoning steps (o1/o3 models)
+        return self.content.content_type in (
+            "tether_browsing_display",
+            "thoughts",
+            "reasoning_recap",
+        )

convoviz/models/node.py ADDED Viewed

@@ -0,0 +1,66 @@
+"""Node model - pure data class.
+Object path: conversations.json -> conversation -> mapping -> mapping node
+Nodes form a tree structure representing conversation branches.
+"""
+from pydantic import BaseModel, Field
+from convoviz.models.message import Message
+class Node(BaseModel):
+    """A node in the conversation tree.
+    Each node can have a message and links to parent/children nodes.
+    This is a pure data model - rendering logic is in the renderers module.
+    """
+    id: str
+    message: Message | None = None
+    parent: str | None = None
+    children: list[str] = Field(default_factory=list)
+    # Runtime-populated references (not from JSON)
+    parent_node: "Node | None" = None
+    children_nodes: list["Node"] = Field(default_factory=list)
+    def add_child(self, node: "Node") -> None:
+        """Add a child node and set up bidirectional references."""
+        self.children_nodes.append(node)
+        node.parent_node = self
+    @property
+    def has_message(self) -> bool:
+        """Check if this node contains a message."""
+        return self.message is not None
+    @property
+    def is_leaf(self) -> bool:
+        """Check if this node is a leaf (no children)."""
+        return len(self.children_nodes) == 0
+def build_node_tree(mapping: dict[str, Node]) -> dict[str, Node]:
+    """Build the node tree by connecting parent/child references.
+    Args:
+        mapping: Dictionary of node_id -> Node
+    Returns:
+        The same dictionary with nodes connected via parent_node/children_nodes
+    """
+    # Reset connections to avoid duplicates on repeated calls
+    for node in mapping.values():
+        node.children_nodes = []
+        node.parent_node = None
+    # Build connections
+    for node in mapping.values():
+        for child_id in node.children:
+            if child_id in mapping:
+                child_node = mapping[child_id]
+                node.add_child(child_node)
+    return mapping

convoviz/pipeline.py ADDED Viewed

@@ -0,0 +1,184 @@
+"""Main processing pipeline for convoviz."""
+import logging
+from pathlib import Path
+from shutil import rmtree
+from rich.console import Console
+from convoviz.config import ConvovizConfig, OutputKind
+from convoviz.exceptions import ConfigurationError, InvalidZipError
+from convoviz.io.loaders import (
+    find_latest_bookmarklet_json,
+    load_collection_from_json,
+    load_collection_from_zip,
+)
+from convoviz.io.writers import save_collection
+console = Console()
+logger = logging.getLogger(__name__)
+def _safe_uri(path: Path) -> str:
+    """Best-effort URI for printing.
+    ``Path.as_uri()`` requires an absolute path; users often provide relative
+    output paths, so we resolve first and fall back to string form.
+    """
+    try:
+        return path.resolve().as_uri()
+    except Exception:
+        return str(path)
+def run_pipeline(config: ConvovizConfig) -> None:
+    """Run the main processing pipeline.
+    Args:
+        config: Complete configuration for the pipeline
+    Raises:
+        InvalidZipError: If the input is invalid
+        ConfigurationError: If configuration is incomplete
+    """
+    if not config.input_path:
+        raise InvalidZipError("", reason="No input path specified")
+    input_path = Path(config.input_path)
+    if not input_path.exists():
+        raise InvalidZipError(str(input_path), reason="File does not exist")
+    logger.info(f"Starting pipeline with input: {input_path}")
+    console.print(f"Loading data from {input_path} [bold yellow]📂[/bold yellow] ...\n")
+    # Load collection based on input type
+    if input_path.is_dir():
+        # Check for conversations.json inside
+        json_path = input_path / "conversations.json"
+        if not json_path.exists():
+            raise InvalidZipError(
+                str(input_path), reason="Directory must contain conversations.json"
+            )
+        collection = load_collection_from_json(json_path)
+    elif input_path.suffix == ".json":
+        collection = load_collection_from_json(input_path)
+    else:
+        # Assume zip
+        collection = load_collection_from_zip(input_path)
+    logger.info(f"Loaded collection with {len(collection.conversations)} conversations")
+    # Try to merge bookmarklet data if available
+    bookmarklet_json = find_latest_bookmarklet_json()
+    if bookmarklet_json:
+        console.print("Found bookmarklet download, loading [bold yellow]📂[/bold yellow] ...\n")
+        try:
+            bookmarklet_collection = load_collection_from_json(bookmarklet_json)
+            collection.update(bookmarklet_collection)
+            logger.info("Merged bookmarklet data")
+        except Exception as e:
+            console.print(
+                f"[bold yellow]Warning:[/bold yellow] Failed to load bookmarklet data: {e}"
+            )
+    output_folder = config.output_folder
+    output_folder.mkdir(parents=True, exist_ok=True)
+    # Determine which outputs are selected
+    selected_outputs = config.outputs
+    # Build mapping of output kind -> directory name
+    output_dir_map: dict[OutputKind, str] = {
+        OutputKind.MARKDOWN: "Markdown",
+        OutputKind.GRAPHS: "Graphs",
+        OutputKind.WORDCLOUDS: "Word-Clouds",
+    }
+    # Clean only specific sub-directories we manage (only for selected outputs)
+    for output_kind, dir_name in output_dir_map.items():
+        if output_kind not in selected_outputs:
+            continue
+        sub_dir = output_folder / dir_name
+        if sub_dir.exists():
+            # Never follow symlinks; just unlink them.
+            if sub_dir.is_symlink():
+                sub_dir.unlink()
+            elif sub_dir.is_dir():
+                rmtree(sub_dir)
+            else:
+                sub_dir.unlink()
+        sub_dir.mkdir(exist_ok=True)
+    # Save markdown files (if selected)
+    if OutputKind.MARKDOWN in selected_outputs:
+        markdown_folder = output_folder / "Markdown"
+        save_collection(
+            collection,
+            markdown_folder,
+            config.conversation,
+            config.message.author_headers,
+            folder_organization=config.folder_organization,
+            progress_bar=True,
+        )
+        logger.info("Markdown generation complete")
+        console.print(
+            f"\nDone [bold green]✅[/bold green] ! "
+            f"Check the output [bold blue]📄[/bold blue] here: {_safe_uri(markdown_folder)} 🔗\n"
+        )
+    # Generate graphs (if selected)
+    if OutputKind.GRAPHS in selected_outputs:
+        # Lazy import to allow markdown-only usage without matplotlib
+        try:
+            from convoviz.analysis.graphs import generate_graphs
+        except ModuleNotFoundError as e:
+            raise ConfigurationError(
+                "Graph generation requires matplotlib. "
+                'Reinstall with the [viz] extra: uv tool install "convoviz[viz]"'
+            ) from e
+        graph_folder = output_folder / "Graphs"
+        graph_folder.mkdir(parents=True, exist_ok=True)
+        generate_graphs(
+            collection,
+            graph_folder,
+            config.graph,
+            progress_bar=True,
+        )
+        logger.info("Graph generation complete")
+        console.print(
+            f"\nDone [bold green]✅[/bold green] ! "
+            f"Check the output [bold blue]📈[/bold blue] here: {_safe_uri(graph_folder)} 🔗\n"
+        )
+    # Generate word clouds (if selected)
+    if OutputKind.WORDCLOUDS in selected_outputs:
+        # Lazy import to allow markdown-only usage without wordcloud/nltk
+        try:
+            from convoviz.analysis.wordcloud import generate_wordclouds
+        except ModuleNotFoundError as e:
+            raise ConfigurationError(
+                "Word cloud generation requires wordcloud and nltk. "
+                'Reinstall with the [viz] extra: uv tool install "convoviz[viz]"'
+            ) from e
+        wordcloud_folder = output_folder / "Word-Clouds"
+        wordcloud_folder.mkdir(parents=True, exist_ok=True)
+        generate_wordclouds(
+            collection,
+            wordcloud_folder,
+            config.wordcloud,
+            progress_bar=True,
+        )
+        logger.info("Wordcloud generation complete")
+        console.print(
+            f"\nDone [bold green]✅[/bold green] ! "
+            f"Check the output [bold blue]🔡☁️[/bold blue] here: {_safe_uri(wordcloud_folder)} 🔗\n"
+        )
+    console.print(
+        "ALL DONE [bold green]🎉🎉🎉[/bold green] !\n\n"
+        f"Explore the full gallery [bold yellow]🖼️[/bold yellow] at: {_safe_uri(output_folder)} 🔗\n\n"
+        "I hope you enjoy the outcome 🤞.\n\n"
+        "If you appreciate it, kindly give the project a star 🌟 on GitHub:\n\n"
+        "➡️ https://github.com/mohamed-chs/convoviz 🔗\n\n"
+    )

convoviz/py.typed ADDED Viewed

File without changes

convoviz/renderers/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""Rendering utilities for conversations."""
+from convoviz.renderers.markdown import render_conversation, render_node
+from convoviz.renderers.yaml import render_yaml_header
+__all__ = [
+    "render_conversation",
+    "render_node",
+    "render_yaml_header",
+]