PyPI - convoviz - Versions diffs - 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

convoviz 0.1.7py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

convoviz/__init__.py +25 -5
convoviz/__main__.py +6 -5
convoviz/analysis/__init__.py +9 -0
convoviz/analysis/graphs.py +98 -0
convoviz/analysis/wordcloud.py +142 -0
convoviz/assets/colormaps.txt +15 -16
convoviz/cli.py +101 -94
convoviz/config.py +88 -0
convoviz/exceptions.py +47 -0
convoviz/interactive.py +178 -0
convoviz/io/__init__.py +21 -0
convoviz/io/loaders.py +135 -0
convoviz/io/writers.py +96 -0
convoviz/models/__init__.py +26 -6
convoviz/models/collection.py +107 -0
convoviz/models/conversation.py +149 -0
convoviz/models/message.py +77 -0
convoviz/models/node.py +66 -0
convoviz/pipeline.py +120 -0
convoviz/renderers/__init__.py +10 -0
convoviz/renderers/markdown.py +182 -0
convoviz/renderers/yaml.py +42 -0
convoviz/utils.py +68 -237
{convoviz-0.1.7.dist-info → convoviz-0.2.0.dist-info}/METADATA +61 -42
{convoviz-0.1.7.dist-info → convoviz-0.2.0.dist-info}/RECORD +27 -17
convoviz-0.2.0.dist-info/WHEEL +4 -0
convoviz-0.2.0.dist-info/entry_points.txt +3 -0
convoviz/configuration.py +0 -125
convoviz/data_analysis.py +0 -119
convoviz/long_runs.py +0 -93
convoviz/models/_conversation.py +0 -289
convoviz/models/_conversation_set.py +0 -191
convoviz/models/_message.py +0 -89
convoviz/models/_node.py +0 -74
convoviz-0.1.7.dist-info/LICENSE +0 -21
convoviz-0.1.7.dist-info/WHEEL +0 -4

convoviz/models/conversation.py ADDED Viewed

@@ -0,0 +1,149 @@
+"""Conversation model - pure data class.
+Object path: conversations.json -> conversation (one of the list items)
+"""
+from datetime import datetime, timedelta
+from typing import Any
+from pydantic import BaseModel, Field
+from convoviz.models.message import AuthorRole
+from convoviz.models.node import Node, build_node_tree
+class Conversation(BaseModel):
+    """A single ChatGPT conversation.
+    This is a pure data model - rendering and I/O logic are in separate modules.
+    """
+    title: str
+    create_time: datetime
+    update_time: datetime
+    mapping: dict[str, Node]
+    moderation_results: list[Any] = Field(default_factory=list)
+    current_node: str
+    plugin_ids: list[str] | None = None
+    conversation_id: str
+    conversation_template_id: str | None = None
+    id: str | None = None
+    @property
+    def node_mapping(self) -> dict[str, Node]:
+        """Get the connected node tree."""
+        return build_node_tree(self.mapping)
+    @property
+    def all_message_nodes(self) -> list[Node]:
+        """Get all nodes that have messages (including all branches)."""
+        return [node for node in self.node_mapping.values() if node.has_message]
+    def nodes_by_author(self, *authors: AuthorRole) -> list[Node]:
+        """Get nodes with messages from specified authors.
+        Args:
+            *authors: Author roles to filter by. Defaults to ("user",) if empty.
+        """
+        if not authors:
+            authors = ("user",)
+        return [
+            node
+            for node in self.all_message_nodes
+            if node.message and node.message.author.role in authors
+        ]
+    @property
+    def leaf_count(self) -> int:
+        """Count the number of leaf nodes (conversation endpoints)."""
+        return sum(1 for node in self.all_message_nodes if node.is_leaf)
+    @property
+    def url(self) -> str:
+        """Get the ChatGPT URL for this conversation."""
+        return f"https://chat.openai.com/c/{self.conversation_id}"
+    @property
+    def content_types(self) -> list[str]:
+        """Get all unique content types in the conversation."""
+        return list(
+            {node.message.content.content_type for node in self.all_message_nodes if node.message}
+        )
+    def message_count(self, *authors: AuthorRole) -> int:
+        """Count messages from specified authors."""
+        return len(self.nodes_by_author(*authors))
+    @property
+    def model(self) -> str | None:
+        """Get the ChatGPT model used for this conversation."""
+        assistant_nodes = self.nodes_by_author("assistant")
+        if not assistant_nodes:
+            return None
+        message = assistant_nodes[0].message
+        return message.metadata.model_slug if message else None
+    @property
+    def plugins(self) -> list[str]:
+        """Get all plugins used in this conversation."""
+        return list(
+            {
+                node.message.metadata.invoked_plugin["namespace"]
+                for node in self.nodes_by_author("tool")
+                if node.message and node.message.metadata.invoked_plugin
+            }
+        )
+    @property
+    def custom_instructions(self) -> dict[str, str]:
+        """Get custom instructions used for this conversation."""
+        system_nodes = self.nodes_by_author("system")
+        if len(system_nodes) < 2:
+            return {}
+        context_message = system_nodes[1].message
+        if context_message and context_message.metadata.is_user_system_message:
+            return context_message.metadata.user_context_message_data or {}
+        return {}
+    def timestamps(self, *authors: AuthorRole) -> list[float]:
+        """Get message timestamps from specified authors.
+        Useful for generating time-based visualizations.
+        """
+        if not authors:
+            authors = ("user",)
+        return [
+            node.message.create_time.timestamp()
+            for node in self.nodes_by_author(*authors)
+            if node.message and node.message.create_time
+        ]
+    def plaintext(self, *authors: AuthorRole) -> str:
+        """Get concatenated plain text from specified authors.
+        Useful for word cloud generation.
+        """
+        if not authors:
+            authors = ("user",)
+        return "\n".join(
+            node.message.text
+            for node in self.nodes_by_author(*authors)
+            if node.message and node.message.has_content
+        )
+    @property
+    def week_start(self) -> datetime:
+        """Get the Monday of the week this conversation was created."""
+        start_of_week = self.create_time - timedelta(days=self.create_time.weekday())
+        return start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
+    @property
+    def month_start(self) -> datetime:
+        """Get the first day of the month this conversation was created."""
+        return self.create_time.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
+    @property
+    def year_start(self) -> datetime:
+        """Get January 1st of the year this conversation was created."""
+        return self.create_time.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)

convoviz/models/message.py ADDED Viewed

@@ -0,0 +1,77 @@
+"""Message model - pure data class.
+Object path: conversations.json -> conversation -> mapping -> mapping node -> message
+"""
+from datetime import datetime
+from typing import Any, Literal
+from pydantic import BaseModel, ConfigDict
+from convoviz.exceptions import MessageContentError
+AuthorRole = Literal["user", "assistant", "system", "tool"]
+class MessageAuthor(BaseModel):
+    """Author information for a message."""
+    role: AuthorRole
+    name: str | None = None
+    metadata: dict[str, Any] = {}
+class MessageContent(BaseModel):
+    """Content of a message."""
+    content_type: str
+    parts: list[str] | None = None
+    text: str | None = None
+    result: str | None = None
+class MessageMetadata(BaseModel):
+    """Metadata for a message."""
+    model_slug: str | None = None
+    invoked_plugin: dict[str, Any] | None = None
+    is_user_system_message: bool | None = None
+    user_context_message_data: dict[str, Any] | None = None
+    model_config = ConfigDict(protected_namespaces=())
+class Message(BaseModel):
+    """A single message in a conversation.
+    This is a pure data model - rendering logic is in the renderers module.
+    """
+    id: str
+    author: MessageAuthor
+    create_time: datetime | None = None
+    update_time: datetime | None = None
+    content: MessageContent
+    status: str
+    end_turn: bool | None = None
+    weight: float
+    metadata: MessageMetadata
+    recipient: str
+    @property
+    def text(self) -> str:
+        """Extract the text content of the message."""
+        if self.content.parts is not None:
+            return str(self.content.parts[0]) if self.content.parts else ""
+        if self.content.text is not None:
+            return self.content.text
+        if self.content.result is not None:
+            return self.content.result
+        raise MessageContentError(self.id)
+    @property
+    def has_content(self) -> bool:
+        """Check if the message has extractable content."""
+        return bool(
+            self.content.parts or self.content.text is not None or self.content.result is not None
+        )

convoviz/models/node.py ADDED Viewed

@@ -0,0 +1,66 @@
+"""Node model - pure data class.
+Object path: conversations.json -> conversation -> mapping -> mapping node
+Nodes form a tree structure representing conversation branches.
+"""
+from pydantic import BaseModel, Field
+from convoviz.models.message import Message
+class Node(BaseModel):
+    """A node in the conversation tree.
+    Each node can have a message and links to parent/children nodes.
+    This is a pure data model - rendering logic is in the renderers module.
+    """
+    id: str
+    message: Message | None = None
+    parent: str | None = None
+    children: list[str] = Field(default_factory=list)
+    # Runtime-populated references (not from JSON)
+    parent_node: "Node | None" = None
+    children_nodes: list["Node"] = Field(default_factory=list)
+    def add_child(self, node: "Node") -> None:
+        """Add a child node and set up bidirectional references."""
+        self.children_nodes.append(node)
+        node.parent_node = self
+    @property
+    def has_message(self) -> bool:
+        """Check if this node contains a message."""
+        return self.message is not None
+    @property
+    def is_leaf(self) -> bool:
+        """Check if this node is a leaf (no children)."""
+        return len(self.children_nodes) == 0
+def build_node_tree(mapping: dict[str, Node]) -> dict[str, Node]:
+    """Build the node tree by connecting parent/child references.
+    Args:
+        mapping: Dictionary of node_id -> Node
+    Returns:
+        The same dictionary with nodes connected via parent_node/children_nodes
+    """
+    # Reset connections to avoid duplicates on repeated calls
+    for node in mapping.values():
+        node.children_nodes = []
+        node.parent_node = None
+    # Build connections
+    for node in mapping.values():
+        for child_id in node.children:
+            if child_id in mapping:
+                child_node = mapping[child_id]
+                node.add_child(child_node)
+    return mapping

convoviz/pipeline.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Main processing pipeline for convoviz."""
+from pathlib import Path
+from shutil import rmtree
+from rich.console import Console
+from convoviz.analysis.graphs import generate_week_barplots
+from convoviz.analysis.wordcloud import generate_wordclouds
+from convoviz.config import ConvovizConfig
+from convoviz.exceptions import InvalidZipError
+from convoviz.io.loaders import (
+    find_latest_bookmarklet_json,
+    load_collection_from_json,
+    load_collection_from_zip,
+)
+from convoviz.io.writers import save_collection, save_custom_instructions
+console = Console()
+def run_pipeline(config: ConvovizConfig) -> None:
+    """Run the main processing pipeline.
+    Args:
+        config: Complete configuration for the pipeline
+    Raises:
+        InvalidZipError: If the zip file is invalid
+        ConfigurationError: If configuration is incomplete
+    """
+    if not config.zip_filepath:
+        raise InvalidZipError("", reason="No zip file specified")
+    zip_path = Path(config.zip_filepath)
+    if not zip_path.exists():
+        raise InvalidZipError(str(zip_path), reason="File does not exist")
+    console.print("Loading data [bold yellow]📂[/bold yellow] ...\n")
+    # Load main collection from zip
+    collection = load_collection_from_zip(zip_path)
+    # Try to merge bookmarklet data if available
+    bookmarklet_json = find_latest_bookmarklet_json()
+    if bookmarklet_json:
+        console.print("Found bookmarklet download, loading [bold yellow]📂[/bold yellow] ...\n")
+        try:
+            bookmarklet_collection = load_collection_from_json(bookmarklet_json)
+            collection.update(bookmarklet_collection)
+        except Exception as e:
+            console.print(
+                f"[bold yellow]Warning:[/bold yellow] Failed to load bookmarklet data: {e}"
+            )
+    output_folder = config.output_folder
+    # Clean and recreate output folder
+    if output_folder.exists() and output_folder.is_dir():
+        rmtree(output_folder)
+    output_folder.mkdir(parents=True, exist_ok=True)
+    # Save markdown files
+    markdown_folder = output_folder / "Markdown"
+    save_collection(
+        collection,
+        markdown_folder,
+        config.conversation,
+        config.message.author_headers,
+        progress_bar=True,
+    )
+    console.print(
+        f"\nDone [bold green]✅[/bold green] ! "
+        f"Check the output [bold blue]📄[/bold blue] here: {markdown_folder.as_uri()} 🔗\n"
+    )
+    # Generate graphs
+    graph_folder = output_folder / "Graphs"
+    graph_folder.mkdir(parents=True, exist_ok=True)
+    generate_week_barplots(
+        collection,
+        graph_folder,
+        config.graph,
+        progress_bar=True,
+    )
+    console.print(
+        f"\nDone [bold green]✅[/bold green] ! "
+        f"Check the output [bold blue]📈[/bold blue] here: {graph_folder.as_uri()} 🔗\n"
+    )
+    # Generate word clouds
+    wordcloud_folder = output_folder / "Word Clouds"
+    wordcloud_folder.mkdir(parents=True, exist_ok=True)
+    generate_wordclouds(
+        collection,
+        wordcloud_folder,
+        config.wordcloud,
+        progress_bar=True,
+    )
+    console.print(
+        f"\nDone [bold green]✅[/bold green] ! "
+        f"Check the output [bold blue]🔡☁️[/bold blue] here: {wordcloud_folder.as_uri()} 🔗\n"
+    )
+    # Save custom instructions
+    console.print("Writing custom instructions [bold blue]📝[/bold blue] ...\n")
+    instructions_path = output_folder / "custom_instructions.json"
+    save_custom_instructions(collection, instructions_path)
+    console.print(
+        f"\nDone [bold green]✅[/bold green] ! "
+        f"Check the output [bold blue]📝[/bold blue] here: {instructions_path.as_uri()} 🔗\n"
+    )
+    console.print(
+        "ALL DONE [bold green]🎉🎉🎉[/bold green] !\n\n"
+        f"Explore the full gallery [bold yellow]🖼️[/bold yellow] at: {output_folder.as_uri()} 🔗\n\n"
+        "I hope you enjoy the outcome 🤞.\n\n"
+        "If you appreciate it, kindly give the project a star 🌟 on GitHub:\n\n"
+        "➡️ https://github.com/mohamed-chs/chatgpt-history-export-to-md 🔗\n\n"
+    )

convoviz/renderers/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""Rendering utilities for conversations."""
+from convoviz.renderers.markdown import render_conversation, render_node
+from convoviz.renderers.yaml import render_yaml_header
+__all__ = [
+    "render_conversation",
+    "render_node",
+    "render_yaml_header",
+]

convoviz/renderers/markdown.py ADDED Viewed

@@ -0,0 +1,182 @@
+"""Markdown rendering for conversations."""
+import re
+from convoviz.config import AuthorHeaders, ConversationConfig
+from convoviz.models import Conversation, Node
+from convoviz.renderers.yaml import render_yaml_header
+def close_code_blocks(text: str) -> str:
+    """Ensure all code blocks in the text are properly closed.
+    Args:
+        text: Markdown text that may have unclosed code blocks
+    Returns:
+        Text with all code blocks properly closed
+    """
+    open_code_block = False
+    lines = text.split("\n")
+    for line in lines:
+        if line.startswith("```") and not open_code_block:
+            open_code_block = True
+            continue
+        if line == "```" and open_code_block:
+            open_code_block = False
+    if open_code_block:
+        text += "\n```"
+    return text
+def replace_latex_delimiters(text: str) -> str:
+    """Replace LaTeX bracket delimiters with dollar sign delimiters.
+    Args:
+        text: Text with \\[ \\] \\( \\) delimiters
+    Returns:
+        Text with $$ and $ delimiters
+    """
+    text = re.sub(r"\\\[", "$$", text)
+    text = re.sub(r"\\\]", "$$", text)
+    text = re.sub(r"\\\(", "$", text)
+    return re.sub(r"\\\)", "$", text)
+def code_block(text: str, lang: str = "python") -> str:
+    """Wrap text in a markdown code block.
+    Args:
+        text: The code to wrap
+        lang: The language for syntax highlighting
+    Returns:
+        Markdown code block string
+    """
+    return f"```{lang}\n{text}\n```"
+def render_message_header(role: str, headers: AuthorHeaders) -> str:
+    """Get the markdown header for a message author.
+    Args:
+        role: The author role (user, assistant, system, tool)
+        headers: Configuration for author headers
+    Returns:
+        The markdown header string
+    """
+    header_map = {
+        "system": headers.system,
+        "user": headers.user,
+        "assistant": headers.assistant,
+        "tool": headers.tool,
+    }
+    return header_map.get(role, f"### {role.title()}")
+def render_node_header(node: Node, headers: AuthorHeaders) -> str:
+    """Render the header section of a node.
+    Includes the node ID, parent link, and message author header.
+    Args:
+        node: The node to render
+        headers: Configuration for author headers
+    Returns:
+        The header markdown string
+    """
+    if node.message is None:
+        return ""
+    parts = [f"###### {node.id}"]
+    # Add parent link if parent has a message
+    if node.parent_node and node.parent_node.message:
+        parts.append(f"[parent ⬆️](#{node.parent_node.id})")
+    parts.append(render_message_header(node.message.author.role, headers))
+    return "\n".join(parts) + "\n"
+def render_node_footer(node: Node) -> str:
+    """Render the footer section of a node with child links.
+    Args:
+        node: The node to render
+    Returns:
+        The footer markdown string with child navigation links
+    """
+    if not node.children_nodes:
+        return ""
+    if len(node.children_nodes) == 1:
+        return f"\n[child ⬇️](#{node.children_nodes[0].id})\n"
+    links = " | ".join(
+        f"[child {i + 1} ⬇️](#{child.id})" for i, child in enumerate(node.children_nodes)
+    )
+    return f"\n{links}\n"
+def render_node(node: Node, headers: AuthorHeaders, use_dollar_latex: bool = False) -> str:
+    """Render a complete node as markdown.
+    Args:
+        node: The node to render
+        headers: Configuration for author headers
+        use_dollar_latex: Whether to convert LaTeX delimiters to dollars
+    Returns:
+        Complete markdown string for the node
+    """
+    if node.message is None:
+        return ""
+    header = render_node_header(node, headers)
+    # Get and process content
+    try:
+        content = close_code_blocks(node.message.text)
+        content = f"\n{content}\n" if content else ""
+        if use_dollar_latex:
+            content = replace_latex_delimiters(content)
+    except Exception:
+        content = ""
+    footer = render_node_footer(node)
+    return f"\n{header}{content}{footer}\n---\n"
+def render_conversation(
+    conversation: Conversation, config: ConversationConfig, headers: AuthorHeaders
+) -> str:
+    """Render a complete conversation as markdown.
+    Args:
+        conversation: The conversation to render
+        config: Conversation rendering configuration
+        headers: Configuration for author headers
+    Returns:
+        Complete markdown document string
+    """
+    use_dollar_latex = config.markdown.latex_delimiters == "dollars"
+    # Start with YAML header
+    markdown = render_yaml_header(conversation, config.yaml)
+    # Render all message nodes
+    for node in conversation.all_message_nodes:
+        if node.message:
+            markdown += render_node(node, headers, use_dollar_latex)
+    return markdown

convoviz/renderers/yaml.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""YAML frontmatter rendering for conversations."""
+from convoviz.config import YAMLConfig
+from convoviz.models import Conversation
+def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
+    """Render the YAML frontmatter for a conversation.
+    Args:
+        conversation: The conversation to render
+        config: YAML configuration specifying which fields to include
+    Returns:
+        YAML frontmatter string with --- delimiters, or empty string if no fields enabled
+    """
+    yaml_fields: dict[str, object] = {}
+    if config.title:
+        yaml_fields["title"] = conversation.title
+    if config.chat_link:
+        yaml_fields["chat_link"] = conversation.url
+    if config.create_time:
+        yaml_fields["create_time"] = conversation.create_time
+    if config.update_time:
+        yaml_fields["update_time"] = conversation.update_time
+    if config.model:
+        yaml_fields["model"] = conversation.model
+    if config.used_plugins:
+        yaml_fields["used_plugins"] = conversation.plugins
+    if config.message_count:
+        yaml_fields["message_count"] = conversation.message_count("user", "assistant")
+    if config.content_types:
+        yaml_fields["content_types"] = conversation.content_types
+    if config.custom_instructions:
+        yaml_fields["custom_instructions"] = conversation.custom_instructions
+    if not yaml_fields:
+        return ""
+    lines = [f"{key}: {value}" for key, value in yaml_fields.items()]
+    return f"---\n{chr(10).join(lines)}\n---\n"

convoviz 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl

convoviz 0.1.7py3-none-any.whl → 0.2.0py3-none-any.whl