PyPI - convoviz - Versions diffs - 0.4.1__py3-none-any.whl - Mend

convoviz 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

convoviz/__init__.py +34 -0
convoviz/__main__.py +6 -0
convoviz/analysis/__init__.py +22 -0
convoviz/analysis/graphs.py +879 -0
convoviz/analysis/wordcloud.py +204 -0
convoviz/assets/colormaps.txt +15 -0
convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
convoviz/assets/fonts/Borel-Regular.ttf +0 -0
convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
convoviz/assets/stopwords.txt +1 -0
convoviz/cli.py +149 -0
convoviz/config.py +120 -0
convoviz/exceptions.py +47 -0
convoviz/interactive.py +264 -0
convoviz/io/__init__.py +21 -0
convoviz/io/assets.py +109 -0
convoviz/io/loaders.py +191 -0
convoviz/io/writers.py +231 -0
convoviz/logging_config.py +69 -0
convoviz/models/__init__.py +24 -0
convoviz/models/collection.py +115 -0
convoviz/models/conversation.py +158 -0
convoviz/models/message.py +218 -0
convoviz/models/node.py +66 -0
convoviz/pipeline.py +184 -0
convoviz/py.typed +0 -0
convoviz/renderers/__init__.py +10 -0
convoviz/renderers/markdown.py +269 -0
convoviz/renderers/yaml.py +119 -0
convoviz/utils.py +155 -0
convoviz-0.4.1.dist-info/METADATA +215 -0
convoviz-0.4.1.dist-info/RECORD +62 -0
convoviz-0.4.1.dist-info/WHEEL +4 -0
convoviz-0.4.1.dist-info/entry_points.txt +3 -0

convoviz/io/writers.py ADDED Viewed

@@ -0,0 +1,231 @@
+"""Writing functions for conversations and collections."""
+import logging
+from os import utime as os_utime
+from pathlib import Path
+from urllib.parse import quote
+from orjson import OPT_INDENT_2, dumps
+from tqdm import tqdm
+from convoviz.config import AuthorHeaders, ConversationConfig, FolderOrganization
+from convoviz.io.assets import copy_asset, resolve_asset_path
+from convoviz.models import Conversation, ConversationCollection
+from convoviz.renderers import render_conversation
+from convoviz.utils import sanitize
+logger = logging.getLogger(__name__)
+# Month names for folder naming
+_MONTH_NAMES = [
+    "January",
+    "February",
+    "March",
+    "April",
+    "May",
+    "June",
+    "July",
+    "August",
+    "September",
+    "October",
+    "November",
+    "December",
+]
+def get_date_folder_path(conversation: Conversation) -> Path:
+    """Get the date-based folder path for a conversation.
+    Creates a nested structure: year/month
+    Example: 2024/03-March/
+    Args:
+        conversation: The conversation to get the path for
+    Returns:
+        Relative path for the date-based folder structure
+    """
+    create_time = conversation.create_time
+    # Year folder: "2024"
+    year = str(create_time.year)
+    # Month folder: "03-March"
+    month_num = create_time.month
+    month_name = _MONTH_NAMES[month_num - 1]
+    month = f"{month_num:02d}-{month_name}"
+    return Path(year) / month
+def save_conversation(
+    conversation: Conversation,
+    filepath: Path,
+    config: ConversationConfig,
+    headers: AuthorHeaders,
+    source_path: Path | None = None,
+) -> Path:
+    """Save a conversation to a markdown file.
+    Handles filename conflicts by appending a counter. Sets the file's
+    modification time to match the conversation's update time.
+    Args:
+        conversation: The conversation to save
+        filepath: Target file path
+        config: Conversation rendering configuration
+        headers: Author header configuration
+        source_path: Path to the source directory containing assets
+    Returns:
+        The actual path the file was saved to (may differ if there was a conflict)
+    """
+    # Handle filename conflicts
+    base_name = sanitize(filepath.stem)
+    final_path = filepath
+    counter = 0
+    while final_path.exists():
+        counter += 1
+        final_path = filepath.with_name(f"{base_name} ({counter}){filepath.suffix}")
+    # Define asset resolver
+    def asset_resolver(asset_id: str) -> str | None:
+        if not source_path:
+            return None
+        src_file = resolve_asset_path(source_path, asset_id)
+        if not src_file:
+            return None
+        # Copy to output directory (relative to the markdown file's directory)
+        return copy_asset(src_file, final_path.parent)
+    # Render and write
+    markdown = render_conversation(conversation, config, headers, asset_resolver=asset_resolver)
+    with final_path.open("w", encoding="utf-8") as f:
+        f.write(markdown)
+    logger.debug(f"Saved conversation: {final_path}")
+    # Set modification time
+    timestamp = conversation.update_time.timestamp()
+    os_utime(final_path, (timestamp, timestamp))
+    return final_path
+def _generate_year_index(year_dir: Path, year: str) -> None:
+    """Generate a _index.md file for a year folder.
+    Args:
+        year_dir: Path to the year directory
+        year: The year string (e.g., "2024")
+    """
+    months = sorted(
+        [d.name for d in year_dir.iterdir() if d.is_dir()],
+        key=lambda m: int(m.split("-")[0]),
+    )
+    lines = [
+        f"# {year}",
+        "",
+        "## Months",
+        "",
+    ]
+    for month in months:
+        month_name = month.split("-", 1)[1] if "-" in month else month
+        lines.append(f"- [{month_name}]({month}/_index.md)")
+    index_path = year_dir / "_index.md"
+    index_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
+    logger.debug(f"Generated year index: {index_path}")
+def _generate_month_index(month_dir: Path, year: str, month: str) -> None:
+    """Generate a _index.md file for a month folder.
+    Args:
+        month_dir: Path to the month directory
+        year: The year string (e.g., "2024")
+        month: The month folder name (e.g., "03-March")
+    """
+    month_name = month.split("-", 1)[1] if "-" in month else month
+    files = sorted([f.name for f in month_dir.glob("*.md") if f.name != "_index.md"])
+    lines = [
+        f"# {month_name} {year}",
+        "",
+        "## Conversations",
+        "",
+    ]
+    for file in files:
+        title = file[:-3]  # Remove .md extension
+        encoded_file = quote(file)
+        lines.append(f"- [{title}]({encoded_file})")
+    index_path = month_dir / "_index.md"
+    index_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
+    logger.debug(f"Generated month index: {index_path}")
+def save_collection(
+    collection: ConversationCollection,
+    directory: Path,
+    config: ConversationConfig,
+    headers: AuthorHeaders,
+    *,
+    folder_organization: FolderOrganization = FolderOrganization.FLAT,
+    progress_bar: bool = False,
+) -> None:
+    """Save all conversations in a collection to markdown files.
+    Args:
+        collection: The collection to save
+        directory: Target directory
+        config: Conversation rendering configuration
+        headers: Author header configuration
+        folder_organization: How to organize files in folders (flat or by date)
+        progress_bar: Whether to show a progress bar
+    """
+    directory.mkdir(parents=True, exist_ok=True)
+    for conv in tqdm(
+        collection.conversations,
+        desc="Writing Markdown 📄 files",
+        disable=not progress_bar,
+    ):
+        # Determine target directory based on organization setting
+        if folder_organization == FolderOrganization.DATE:
+            target_dir = directory / get_date_folder_path(conv)
+            target_dir.mkdir(parents=True, exist_ok=True)
+        else:
+            target_dir = directory
+        filepath = target_dir / f"{sanitize(conv.title)}.md"
+        save_conversation(conv, filepath, config, headers, source_path=collection.source_path)
+    # Generate index files for date organization
+    if folder_organization == FolderOrganization.DATE:
+        for year_dir in directory.iterdir():
+            if year_dir.is_dir() and year_dir.name.isdigit():
+                for month_dir in year_dir.iterdir():
+                    if month_dir.is_dir():
+                        _generate_month_index(month_dir, year_dir.name, month_dir.name)
+                _generate_year_index(year_dir, year_dir.name)
+def save_custom_instructions(
+    collection: ConversationCollection,
+    filepath: Path,
+) -> None:
+    """Save all custom instructions from a collection to a JSON file.
+    Args:
+        collection: The collection to extract instructions from
+        filepath: Target JSON file path
+    """
+    instructions = collection.custom_instructions
+    with filepath.open("w", encoding="utf-8") as f:
+        f.write(dumps(instructions, option=OPT_INDENT_2).decode())

convoviz/logging_config.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""Logging configuration for convoviz."""
+import logging
+import tempfile
+from pathlib import Path
+from rich.logging import RichHandler
+def setup_logging(
+    verbosity: int = 0,
+    log_file: Path | None = None,
+) -> Path:
+    """Set up logging configuration.
+    Args:
+        verbosity: Level of verbosity (0=WARNING, 1=INFO, 2=DEBUG)
+        log_file: Path to log file. If None, a temporary file is created.
+    Returns:
+        Path to the log file used.
+    """
+    # clear existing handlers
+    root_logger = logging.getLogger()
+    root_logger.handlers.clear()
+    # Determine log level for console
+    if verbosity >= 2:
+        console_level = logging.DEBUG
+    elif verbosity >= 1:
+        console_level = logging.INFO
+    else:
+        console_level = logging.WARNING
+    # Console handler (Rich)
+    console_handler = RichHandler(
+        rich_tracebacks=True,
+        markup=True,
+        show_time=False,
+        show_path=False,
+    )
+    console_handler.setLevel(console_level)
+    # File handler
+    if log_file is None:
+        # Create temp file if not specified
+        with tempfile.NamedTemporaryFile(prefix="convoviz_", suffix=".log", delete=False) as tf:
+            log_file = Path(tf.name)
+    # Ensure parent dir exists
+    if not log_file.parent.exists():
+        log_file.parent.mkdir(parents=True, exist_ok=True)
+    file_handler = logging.FileHandler(log_file, encoding="utf-8")
+    file_handler.setLevel(logging.DEBUG)  # Always log DEBUG to file
+    file_formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+    file_handler.setFormatter(file_formatter)
+    # Configure root logger
+    # We set root level to DEBUG so that the handlers can filter as they please
+    root_logger.setLevel(logging.DEBUG)
+    root_logger.addHandler(console_handler)
+    root_logger.addHandler(file_handler)
+    # Reduce noise from explicit libraries if necessary
+    logging.getLogger("matplotlib").setLevel(logging.WARNING)
+    logging.getLogger("PIL").setLevel(logging.WARNING)
+    return log_file

convoviz/models/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""Data models for convoviz."""
+from convoviz.models.collection import ConversationCollection
+from convoviz.models.conversation import Conversation
+from convoviz.models.message import (
+    AuthorRole,
+    Message,
+    MessageAuthor,
+    MessageContent,
+    MessageMetadata,
+)
+from convoviz.models.node import Node, build_node_tree
+__all__ = [
+    "AuthorRole",
+    "Conversation",
+    "ConversationCollection",
+    "Message",
+    "MessageAuthor",
+    "MessageContent",
+    "MessageMetadata",
+    "Node",
+    "build_node_tree",
+]

convoviz/models/collection.py ADDED Viewed

@@ -0,0 +1,115 @@
+"""ConversationCollection model - manages a set of conversations.
+This is a pure data model - I/O and visualization logic are in separate modules.
+"""
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+from pydantic import BaseModel, Field
+from convoviz.models.conversation import Conversation
+from convoviz.models.message import AuthorRole
+class ConversationCollection(BaseModel):
+    """A collection of ChatGPT conversations.
+    Provides grouping and aggregation operations over conversations.
+    """
+    conversations: list[Conversation] = Field(default_factory=list)
+    source_path: Path | None = None
+    @property
+    def index(self) -> dict[str, Conversation]:
+        """Get conversations indexed by conversation_id."""
+        return {conv.conversation_id: conv for conv in self.conversations}
+    @property
+    def last_updated(self) -> datetime:
+        """Get the most recent update time across all conversations."""
+        if not self.conversations:
+            return datetime.min
+        return max(conv.update_time for conv in self.conversations)
+    def update(self, other: "ConversationCollection") -> None:
+        """Merge another collection into this one.
+        Merges per-conversation, keeping the newest version when IDs collide.
+        Note: We intentionally do *not* gate on ``other.last_updated`` because
+        "new" conversations can still have older timestamps than the most recent
+        conversation in this collection (e.g. bookmarklet downloads).
+        """
+        merged: dict[str, Conversation] = dict(self.index)
+        for conv_id, incoming in other.index.items():
+            existing = merged.get(conv_id)
+            if existing is None or incoming.update_time > existing.update_time:
+                merged[conv_id] = incoming
+        self.conversations = list(merged.values())
+    def add(self, conversation: Conversation) -> None:
+        """Add a conversation to the collection."""
+        self.conversations.append(conversation)
+    @property
+    def custom_instructions(self) -> list[dict[str, Any]]:
+        """Get all custom instructions from all conversations."""
+        instructions: list[dict[str, Any]] = []
+        for conv in self.conversations:
+            if not conv.custom_instructions:
+                continue
+            instructions.append(
+                {
+                    "chat_title": conv.title,
+                    "chat_link": conv.url,
+                    "time": conv.create_time,
+                    "custom_instructions": conv.custom_instructions,
+                }
+            )
+        return instructions
+    def timestamps(self, *authors: AuthorRole) -> list[float]:
+        """Get all message timestamps from specified authors."""
+        result: list[float] = []
+        for conv in self.conversations:
+            result.extend(conv.timestamps(*authors))
+        return result
+    def plaintext(self, *authors: AuthorRole) -> str:
+        """Get concatenated plain text from all conversations."""
+        return "\n".join(conv.plaintext(*authors) for conv in self.conversations)
+    def group_by_week(self) -> dict[datetime, "ConversationCollection"]:
+        """Group conversations by the week they were created."""
+        groups: dict[datetime, ConversationCollection] = {}
+        for conv in self.conversations:
+            week_start = conv.week_start
+            if week_start not in groups:
+                groups[week_start] = ConversationCollection()
+            groups[week_start].add(conv)
+        return groups
+    def group_by_month(self) -> dict[datetime, "ConversationCollection"]:
+        """Group conversations by the month they were created."""
+        groups: dict[datetime, ConversationCollection] = {}
+        for conv in self.conversations:
+            month_start = conv.month_start
+            if month_start not in groups:
+                groups[month_start] = ConversationCollection()
+            groups[month_start].add(conv)
+        return groups
+    def group_by_year(self) -> dict[datetime, "ConversationCollection"]:
+        """Group conversations by the year they were created."""
+        groups: dict[datetime, ConversationCollection] = {}
+        for conv in self.conversations:
+            year_start = conv.year_start
+            if year_start not in groups:
+                groups[year_start] = ConversationCollection()
+            groups[year_start].add(conv)
+        return groups

convoviz/models/conversation.py ADDED Viewed

@@ -0,0 +1,158 @@
+"""Conversation model - pure data class.
+Object path: conversations.json -> conversation (one of the list items)
+"""
+from datetime import datetime, timedelta
+from typing import Any
+from pydantic import BaseModel, Field
+from convoviz.models.message import AuthorRole
+from convoviz.models.node import Node, build_node_tree
+class Conversation(BaseModel):
+    """A single ChatGPT conversation.
+    This is a pure data model - rendering and I/O logic are in separate modules.
+    """
+    title: str
+    create_time: datetime
+    update_time: datetime
+    mapping: dict[str, Node]
+    moderation_results: list[Any] = Field(default_factory=list)
+    current_node: str
+    plugin_ids: list[str] | None = None
+    conversation_id: str
+    conversation_template_id: str | None = None
+    id: str | None = None
+    @property
+    def node_mapping(self) -> dict[str, Node]:
+        """Get the connected node tree."""
+        return build_node_tree(self.mapping)
+    @property
+    def all_message_nodes(self) -> list[Node]:
+        """Get all nodes that have messages (including hidden/internal ones)."""
+        return [node for node in self.node_mapping.values() if node.has_message]
+    @property
+    def visible_message_nodes(self) -> list[Node]:
+        """Get all nodes that have *visible* (non-hidden) messages."""
+        return [
+            node
+            for node in self.node_mapping.values()
+            if node.has_message and node.message is not None and not node.message.is_hidden
+        ]
+    def nodes_by_author(self, *authors: AuthorRole, include_hidden: bool = False) -> list[Node]:
+        """Get nodes with messages from specified authors.
+        Args:
+            *authors: Author roles to filter by. Defaults to ("user",) if empty.
+            include_hidden: Whether to include hidden/internal messages.
+        """
+        if not authors:
+            authors = ("user",)
+        nodes = self.all_message_nodes if include_hidden else self.visible_message_nodes
+        return [node for node in nodes if node.message and node.message.author.role in authors]
+    @property
+    def leaf_count(self) -> int:
+        """Count the number of leaf nodes (conversation endpoints)."""
+        return sum(1 for node in self.all_message_nodes if node.is_leaf)
+    @property
+    def url(self) -> str:
+        """Get the ChatGPT URL for this conversation."""
+        return f"https://chat.openai.com/c/{self.conversation_id}"
+    @property
+    def content_types(self) -> list[str]:
+        """Get all unique content types in the conversation (excluding hidden messages)."""
+        return list(
+            {
+                node.message.content.content_type
+                for node in self.visible_message_nodes
+                if node.message
+            }
+        )
+    def message_count(self, *authors: AuthorRole) -> int:
+        """Count messages from specified authors."""
+        return len(self.nodes_by_author(*authors))
+    @property
+    def model(self) -> str | None:
+        """Get the ChatGPT model used for this conversation."""
+        assistant_nodes = self.nodes_by_author("assistant")
+        if not assistant_nodes:
+            return None
+        message = assistant_nodes[0].message
+        return message.metadata.model_slug if message else None
+    @property
+    def plugins(self) -> list[str]:
+        """Get all plugins used in this conversation."""
+        return list(
+            {
+                node.message.metadata.invoked_plugin["namespace"]
+                for node in self.nodes_by_author("tool")
+                if node.message and node.message.metadata.invoked_plugin
+            }
+        )
+    @property
+    def custom_instructions(self) -> dict[str, str]:
+        """Get custom instructions used for this conversation."""
+        system_nodes = self.nodes_by_author("system")
+        for node in system_nodes:
+            context_message = node.message
+            if context_message and context_message.metadata.is_user_system_message:
+                return context_message.metadata.user_context_message_data or {}
+        return {}
+    def timestamps(self, *authors: AuthorRole) -> list[float]:
+        """Get message timestamps from specified authors.
+        Useful for generating time-based visualizations.
+        """
+        if not authors:
+            authors = ("user",)
+        return [
+            node.message.create_time.timestamp()
+            for node in self.nodes_by_author(*authors)
+            if node.message and node.message.create_time
+        ]
+    def plaintext(self, *authors: AuthorRole) -> str:
+        """Get concatenated plain text from specified authors.
+        Useful for word cloud generation.
+        """
+        if not authors:
+            authors = ("user",)
+        return "\n".join(
+            node.message.text
+            for node in self.nodes_by_author(*authors)
+            if node.message and node.message.has_content
+        )
+    @property
+    def week_start(self) -> datetime:
+        """Get the Monday of the week this conversation was created."""
+        start_of_week = self.create_time - timedelta(days=self.create_time.weekday())
+        return start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
+    @property
+    def month_start(self) -> datetime:
+        """Get the first day of the month this conversation was created."""
+        return self.create_time.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
+    @property
+    def year_start(self) -> datetime:
+        """Get January 1st of the year this conversation was created."""
+        return self.create_time.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)