PyPI - convoviz - Versions diffs - 0.4.1__py3-none-any.whl - Mend

convoviz 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

convoviz/__init__.py +34 -0
convoviz/__main__.py +6 -0
convoviz/analysis/__init__.py +22 -0
convoviz/analysis/graphs.py +879 -0
convoviz/analysis/wordcloud.py +204 -0
convoviz/assets/colormaps.txt +15 -0
convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
convoviz/assets/fonts/Borel-Regular.ttf +0 -0
convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
convoviz/assets/stopwords.txt +1 -0
convoviz/cli.py +149 -0
convoviz/config.py +120 -0
convoviz/exceptions.py +47 -0
convoviz/interactive.py +264 -0
convoviz/io/__init__.py +21 -0
convoviz/io/assets.py +109 -0
convoviz/io/loaders.py +191 -0
convoviz/io/writers.py +231 -0
convoviz/logging_config.py +69 -0
convoviz/models/__init__.py +24 -0
convoviz/models/collection.py +115 -0
convoviz/models/conversation.py +158 -0
convoviz/models/message.py +218 -0
convoviz/models/node.py +66 -0
convoviz/pipeline.py +184 -0
convoviz/py.typed +0 -0
convoviz/renderers/__init__.py +10 -0
convoviz/renderers/markdown.py +269 -0
convoviz/renderers/yaml.py +119 -0
convoviz/utils.py +155 -0
convoviz-0.4.1.dist-info/METADATA +215 -0
convoviz-0.4.1.dist-info/RECORD +62 -0
convoviz-0.4.1.dist-info/WHEEL +4 -0
convoviz-0.4.1.dist-info/entry_points.txt +3 -0

convoviz/renderers/markdown.py ADDED Viewed

@@ -0,0 +1,269 @@
+"""Markdown rendering for conversations."""
+import re
+from collections.abc import Callable
+from convoviz.config import AuthorHeaders, ConversationConfig
+from convoviz.exceptions import MessageContentError
+from convoviz.models import Conversation, Node
+from convoviz.renderers.yaml import render_yaml_header
+def close_code_blocks(text: str) -> str:
+    """Ensure all code blocks in the text are properly closed.
+    Args:
+        text: Markdown text that may have unclosed code blocks
+    Returns:
+        Text with all code blocks properly closed
+    """
+    open_code_block = False
+    lines = text.split("\n")
+    for line in lines:
+        if line.startswith("```") and not open_code_block:
+            open_code_block = True
+            continue
+        if line == "```" and open_code_block:
+            open_code_block = False
+    if open_code_block:
+        text += "\n```"
+    return text
+def replace_latex_delimiters(text: str) -> str:
+    """Replace LaTeX bracket delimiters with dollar sign delimiters.
+    Args:
+        text: Text with \\[ \\] \\( \\) delimiters
+    Returns:
+        Text with $$ and $ delimiters
+    """
+    text = re.sub(r"\\\[", "$$", text)
+    text = re.sub(r"\\\]", "$$", text)
+    text = re.sub(r"\\\(", "$", text)
+    return re.sub(r"\\\)", "$", text)
+def code_block(text: str, lang: str = "python") -> str:
+    """Wrap text in a markdown code block.
+    Args:
+        text: The code to wrap
+        lang: The language for syntax highlighting
+    Returns:
+        Markdown code block string
+    """
+    return f"```{lang}\n{text}\n```"
+def render_obsidian_callout(
+    content: str,
+    title: str,
+    callout_type: str = "NOTE",
+    collapsed: bool = True,
+) -> str:
+    """Render content as an Obsidian collapsible callout.
+    Syntax: > [!TYPE]+/- Title
+    This is Obsidian-specific; on GitHub/standard markdown it renders as a blockquote.
+    Args:
+        content: The content to wrap
+        title: The callout title
+        callout_type: The callout type (NOTE, TIP, WARNING, etc.)
+        collapsed: Whether to default to collapsed (-) or expanded (+)
+    Returns:
+        Markdown callout string
+    """
+    fold = "-" if collapsed else "+"
+    lines = content.strip().split("\n")
+    quoted_lines = [f"> {line}" for line in lines]
+    return f"> [!{callout_type}]{fold} {title}\n" + "\n".join(quoted_lines)
+def render_message_header(role: str, headers: AuthorHeaders) -> str:
+    """Get the markdown header for a message author.
+    Args:
+        role: The author role (user, assistant, system, tool)
+        headers: Configuration for author headers
+    Returns:
+        The markdown header string
+    """
+    header_map = {
+        "system": headers.system,
+        "user": headers.user,
+        "assistant": headers.assistant,
+        "tool": headers.tool,
+    }
+    return header_map.get(role, f"### {role.title()}")
+def render_node_header(node: Node, headers: AuthorHeaders) -> str:
+    """Render the header section of a node.
+    Args:
+        node: The node to render
+        headers: Configuration for author headers
+    Returns:
+        The header markdown string
+    """
+    if node.message is None:
+        return ""
+    return render_message_header(node.message.author.role, headers) + "\n"
+# Content types that can be rendered as collapsible callouts in Obsidian
+OBSIDIAN_COLLAPSIBLE_TYPES: dict[str, tuple[str, str]] = {
+    # content_type: (callout_type, title)
+    "reasoning_recap": ("NOTE", "🧠 AI Reasoning"),
+    "thoughts": ("NOTE", "💭 AI Thoughts"),
+}
+def render_node(
+    node: Node,
+    headers: AuthorHeaders,
+    use_dollar_latex: bool = False,
+    asset_resolver: Callable[[str], str | None] | None = None,
+    flavor: str = "standard",
+) -> str:
+    """Render a complete node as markdown.
+    Args:
+        node: The node to render
+        headers: Configuration for author headers
+        use_dollar_latex: Whether to convert LaTeX delimiters to dollars
+        asset_resolver: Function to resolve asset IDs to paths
+        flavor: Markdown flavor ("standard" or "obsidian")
+    Returns:
+        Complete markdown string for the node
+    """
+    if node.message is None:
+        return ""
+    content_type = node.message.content.content_type
+    # For Obsidian flavor, render certain hidden types as collapsible callouts
+    # No separator (---) since these are visually distinct and may appear consecutively
+    if flavor == "obsidian" and content_type in OBSIDIAN_COLLAPSIBLE_TYPES:
+        try:
+            text = node.message.text
+        except MessageContentError:
+            text = ""
+        if text.strip():
+            callout_type, title = OBSIDIAN_COLLAPSIBLE_TYPES[content_type]
+            callout = render_obsidian_callout(
+                content=text,
+                title=title,
+                callout_type=callout_type,
+                collapsed=True,
+            )
+            return f"\n{callout}\n"
+    if node.message.is_hidden:
+        return ""
+    header = render_node_header(node, headers)
+    # Get and process content
+    try:
+        text = node.message.text
+    except MessageContentError:
+        # Some message types only contain non-text parts; those still may have images.
+        text = ""
+    content = close_code_blocks(text)
+    content = f"\n{content}\n" if content else ""
+    if use_dollar_latex:
+        content = replace_latex_delimiters(content)
+    # Append images if resolver is provided and images exist
+    if asset_resolver and node.message.images:
+        for image_id in node.message.images:
+            rel_path = asset_resolver(image_id)
+            if rel_path:
+                # Using standard markdown image syntax.
+                # Obsidian handles this well.
+                content += f"\n![Image]({rel_path})\n"
+    return f"\n{header}{content}\n---\n"
+def _ordered_nodes(conversation: Conversation) -> list[Node]:
+    """Return nodes in a deterministic depth-first traversal order.
+    ChatGPT exports store nodes in a mapping; dict iteration order is not a
+    reliable semantic ordering. For markdown output, we traverse from roots.
+    """
+    mapping = conversation.node_mapping
+    roots = sorted((n for n in mapping.values() if n.parent is None), key=lambda n: n.id)
+    visited: set[str] = set()
+    ordered: list[Node] = []
+    def dfs(node: Node) -> None:
+        if node.id in visited:
+            return
+        visited.add(node.id)
+        ordered.append(node)
+        for child in node.children_nodes:
+            dfs(child)
+    for root in roots:
+        dfs(root)
+    # Include any disconnected/orphan nodes deterministically at the end.
+    for node in sorted(mapping.values(), key=lambda n: n.id):
+        dfs(node)
+    return ordered
+def render_conversation(
+    conversation: Conversation,
+    config: ConversationConfig,
+    headers: AuthorHeaders,
+    asset_resolver: Callable[[str], str | None] | None = None,
+) -> str:
+    """Render a complete conversation as markdown.
+    Args:
+        conversation: The conversation to render
+        config: Conversation rendering configuration
+        headers: Configuration for author headers
+        asset_resolver: Function to resolve asset IDs to paths
+    Returns:
+        Complete markdown document string
+    """
+    use_dollar_latex = config.markdown.latex_delimiters == "dollars"
+    flavor = config.markdown.flavor
+    # Start with YAML header
+    markdown = render_yaml_header(conversation, config.yaml)
+    # Render message nodes in a deterministic traversal order.
+    for node in _ordered_nodes(conversation):
+        if node.message:
+            markdown += render_node(
+                node,
+                headers,
+                use_dollar_latex,
+                asset_resolver=asset_resolver,
+                flavor=flavor,
+            )
+    return markdown

convoviz/renderers/yaml.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""YAML frontmatter rendering for conversations."""
+from __future__ import annotations
+import re
+from datetime import datetime
+from convoviz.config import YAMLConfig
+from convoviz.models import Conversation
+_TAG_SAFE_RE = re.compile(r"[^a-z0-9/_\-]+")
+def _to_yaml_scalar(value: object) -> str:
+    if value is None:
+        return "null"
+    if isinstance(value, bool):
+        return "true" if value else "false"
+    if isinstance(value, (int, float)):
+        return str(value)
+    if isinstance(value, datetime):
+        # Frontmatter consumers generally expect ISO 8601 strings
+        return f'"{value.isoformat()}"'
+    if isinstance(value, str):
+        if "\n" in value:
+            # Multiline: use a block scalar
+            indented = "\n".join(f"  {line}" for line in value.splitlines())
+            return f"|-\n{indented}"
+        escaped = value.replace("\\", "\\\\").replace('"', '\\"')
+        return f'"{escaped}"'
+    # Fallback: stringify and quote
+    escaped = str(value).replace("\\", "\\\\").replace('"', '\\"')
+    return f'"{escaped}"'
+def _to_yaml(value: object, indent: int = 0) -> str:
+    pad = " " * indent
+    if isinstance(value, dict):
+        lines: list[str] = []
+        for k, v in value.items():
+            key = str(k)
+            if isinstance(v, (dict, list)):
+                lines.append(f"{pad}{key}:")
+                lines.append(_to_yaml(v, indent=indent + 2))
+            else:
+                scalar = _to_yaml_scalar(v)
+                # Block scalars already include newline + indentation
+                if scalar.startswith("|-"):
+                    lines.append(f"{pad}{key}: {scalar.splitlines()[0]}")
+                    lines.extend(f"{pad}{line}" for line in scalar.splitlines()[1:])
+                else:
+                    lines.append(f"{pad}{key}: {scalar}")
+        return "\n".join(lines)
+    if isinstance(value, list):
+        lines = []
+        for item in value:
+            if isinstance(item, (dict, list)):
+                lines.append(f"{pad}-")
+                lines.append(_to_yaml(item, indent=indent + 2))
+            else:
+                lines.append(f"{pad}- {_to_yaml_scalar(item)}")
+        return "\n".join(lines)
+    return f"{pad}{_to_yaml_scalar(value)}"
+def _default_tags(conversation: Conversation) -> list[str]:
+    tags: list[str] = ["chatgpt"]
+    tags.extend(conversation.plugins)
+    # Normalize to a tag-friendly form
+    normalized: list[str] = []
+    for t in tags:
+        t2 = _TAG_SAFE_RE.sub("-", t.strip().lower()).strip("-")
+        if t2 and t2 not in normalized:
+            normalized.append(t2)
+    return normalized
+def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
+    """Render the YAML frontmatter for a conversation.
+    Args:
+        conversation: The conversation to render
+        config: YAML configuration specifying which fields to include
+    Returns:
+        YAML frontmatter string with --- delimiters, or empty string if no fields enabled
+    """
+    yaml_fields: dict[str, object] = {}
+    if config.title:
+        yaml_fields["title"] = conversation.title
+    if config.tags:
+        yaml_fields["tags"] = _default_tags(conversation)
+    if config.chat_link:
+        yaml_fields["chat_link"] = conversation.url
+    if config.create_time:
+        yaml_fields["create_time"] = conversation.create_time
+    if config.update_time:
+        yaml_fields["update_time"] = conversation.update_time
+    if config.model:
+        yaml_fields["model"] = conversation.model
+    if config.used_plugins:
+        yaml_fields["used_plugins"] = conversation.plugins
+    if config.message_count:
+        yaml_fields["message_count"] = conversation.message_count("user", "assistant")
+    if config.content_types:
+        yaml_fields["content_types"] = conversation.content_types
+    if config.custom_instructions:
+        yaml_fields["custom_instructions"] = conversation.custom_instructions
+    if not yaml_fields:
+        return ""
+    body = _to_yaml(yaml_fields)
+    return f"---\n{body}\n---\n"

convoviz/utils.py ADDED Viewed

@@ -0,0 +1,155 @@
+"""Utility functions for convoviz."""
+import re
+from pathlib import Path
+def sanitize(filename: str) -> str:
+    """Sanitize a string to be safe for use as a filename.
+    Replaces invalid characters with underscores, handles reserved names,
+    and prevents path traversal characters.
+    Args:
+        filename: The string to sanitize
+    Returns:
+        A filename-safe string, or "untitled" if empty or invalid
+    """
+    # Replace invalid characters
+    pattern = re.compile(r'[<>:"/\\|?*\n\r\t\f\v]+')
+    result = pattern.sub("_", filename.strip())
+    # Prevent path traversal
+    result = result.replace("..", "_")
+    # Windows reserved names
+    reserved = {
+        "CON",
+        "PRN",
+        "AUX",
+        "NUL",
+        "COM1",
+        "COM2",
+        "COM3",
+        "COM4",
+        "COM5",
+        "COM6",
+        "COM7",
+        "COM8",
+        "COM9",
+        "LPT1",
+        "LPT2",
+        "LPT3",
+        "LPT4",
+        "LPT5",
+        "LPT6",
+        "LPT7",
+        "LPT8",
+        "LPT9",
+    }
+    if result.upper() in reserved:
+        result = f"_{result}_"
+    # Enforce length limit (255 is common for many filesystems)
+    if len(result) > 255:
+        result = result[:255]
+    return result or "untitled"
+def validate_header(text: str) -> bool:
+    """Check if text is a valid markdown header.
+    Args:
+        text: The text to validate
+    Returns:
+        True if it's a valid header (1-6 # followed by space and content)
+    """
+    max_header_level = 6
+    if not text.startswith("#"):
+        return False
+    parts = text.split(maxsplit=1)
+    if len(parts) < 2:
+        return False
+    hashes = parts[0]
+    return hashes == "#" * len(hashes) and 1 <= len(hashes) <= max_header_level
+def root_dir() -> Path:
+    """Get the path to the convoviz package directory.
+    Returns:
+        Path to the package root
+    """
+    return Path(__file__).parent
+def get_asset_path(relative_path: str) -> Path:
+    """Get the absolute path to an asset file.
+    Args:
+        relative_path: Path relative to convoviz root (e.g., "assets/fonts/foo.ttf")
+    Returns:
+        Absolute Path to the asset
+    """
+    return root_dir() / relative_path
+def font_dir() -> Path:
+    """Get the path to the fonts directory.
+    Returns:
+        Path to the assets/fonts directory
+    """
+    return root_dir() / "assets" / "fonts"
+def font_names() -> list[str]:
+    """Get available font names.
+    Returns:
+        List of font names (without .ttf extension)
+    """
+    fonts_path = font_dir()
+    if not fonts_path.exists():
+        return []
+    return [font.stem for font in fonts_path.glob("*.ttf")]
+def font_path(font_name: str) -> Path:
+    """Get the path to a font file.
+    Args:
+        font_name: Name of the font (without extension)
+    Returns:
+        Path to the font file
+    """
+    return font_dir() / f"{font_name}.ttf"
+def default_font_path() -> Path:
+    """Get the path to the default font.
+    Returns:
+        Path to Kalam-Regular.ttf
+    """
+    return font_path("Kalam-Regular")
+def colormaps() -> list[str]:
+    """Get available colormap names.
+    Returns:
+        List of colormap names from colormaps.txt
+    """
+    colormaps_path = root_dir() / "assets" / "colormaps.txt"
+    if not colormaps_path.exists():
+        return []
+    with colormaps_path.open(encoding="utf-8") as f:
+        return f.read().splitlines()