PyPI - convoviz - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

convoviz 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

convoviz/analysis/graphs.py +349 -18
convoviz/analysis/wordcloud.py +20 -0
convoviz/assets/stopwords.txt +75 -0
convoviz/cli.py +18 -15
convoviz/config.py +12 -7
convoviz/interactive.py +22 -12
convoviz/io/assets.py +82 -0
convoviz/io/loaders.py +30 -2
convoviz/io/writers.py +17 -2
convoviz/models/__init__.py +0 -4
convoviz/models/collection.py +2 -0
convoviz/models/message.py +45 -3
convoviz/pipeline.py +42 -19
convoviz/renderers/markdown.py +46 -15
convoviz/utils.py +54 -4
{convoviz-0.2.1.dist-info → convoviz-0.2.3.dist-info}/METADATA +4 -24
{convoviz-0.2.1.dist-info → convoviz-0.2.3.dist-info}/RECORD +19 -17
{convoviz-0.2.1.dist-info → convoviz-0.2.3.dist-info}/WHEEL +0 -0
{convoviz-0.2.1.dist-info → convoviz-0.2.3.dist-info}/entry_points.txt +0 -0

convoviz/interactive.py CHANGED Viewed

@@ -7,7 +7,7 @@ from questionary import path as qst_path
 from questionary import text as qst_text
 from convoviz.config import ConvovizConfig, get_default_config
-from convoviz.io.loaders import find_latest_zip, validate_zip
+from convoviz.io.loaders import find_latest_zip
 from convoviz.utils import colormaps, default_font_path, font_names, font_path, validate_header
 CUSTOM_STYLE = Style(
@@ -38,26 +38,25 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
     config = initial_config or get_default_config()
     # Set sensible defaults if not already set
-    if not config.zip_filepath:
+    if not config.input_path:
         latest = find_latest_zip()
         if latest:
-            config.zip_filepath = latest
+            config.input_path = latest
     if not config.wordcloud.font_path:
         config.wordcloud.font_path = default_font_path()
-    # Prompt for zip file path
-    zip_default = str(config.zip_filepath) if config.zip_filepath else ""
-    zip_result = qst_path(
-        "Enter the path to the zip file:",
-        default=zip_default,
-        validate=lambda p: validate_zip(Path(p))
-        or "Invalid zip file (must contain conversations.json)",
+    # Prompt for input path
+    input_default = str(config.input_path) if config.input_path else ""
+    input_result = qst_path(
+        "Enter the path to the zip file or extracted directory:",
+        default=input_default,
+        validate=lambda p: Path(p).exists() or "Path must exist",
         style=CUSTOM_STYLE,
     ).ask()
-    if zip_result:
-        config.zip_filepath = Path(zip_result)
+    if input_result:
+        config.input_path = Path(input_result)
     # Prompt for output folder
     output_result = qst_path(
@@ -94,6 +93,17 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
     if latex_result:
         config.conversation.markdown.latex_delimiters = latex_result
+    # Prompt for markdown flavor
+    flavor_result = select(
+        "Select the markdown flavor:",
+        choices=["obsidian", "standard"],
+        default=config.conversation.markdown.flavor,
+        style=CUSTOM_STYLE,
+    ).ask()
+    if flavor_result:
+        config.conversation.markdown.flavor = flavor_result
     # Prompt for YAML headers
     yaml_config = config.conversation.yaml
     yaml_choices = [

convoviz/io/assets.py ADDED Viewed

@@ -0,0 +1,82 @@
+"Asset management functions."
+import shutil
+from pathlib import Path
+def resolve_asset_path(source_dir: Path, asset_id: str) -> Path | None:
+    """Find the actual file for a given asset ID in the source directory.
+    Args:
+        source_dir: Directory to search in
+        asset_id: The asset ID (e.g., "file-uuid")
+    Returns:
+        Path to the found file, or None
+    """
+    if not source_dir.exists():
+        return None
+    source_dir = source_dir.resolve()
+    # Safety check for asset_id
+    if ".." in asset_id or "/" in asset_id or "\\" in asset_id:
+        return None
+    # 1. Try exact match
+    exact_path = (source_dir / asset_id).resolve()
+    if exact_path.exists() and exact_path.is_file() and exact_path.is_relative_to(source_dir):
+        return exact_path
+    # 2. Try prefix match in root
+    try:
+        candidates = list(source_dir.glob(f"{asset_id}*"))
+        files = [
+            p.resolve()
+            for p in candidates
+            if p.is_file() and p.resolve().is_relative_to(source_dir)
+        ]
+        if files:
+            return files[0]
+    except Exception:
+        pass
+    # 3. Try prefix match in dalle-generations
+    dalle_dir = source_dir / "dalle-generations"
+    if dalle_dir.exists() and dalle_dir.is_dir():
+        dalle_dir = dalle_dir.resolve()
+        try:
+            candidates = list(dalle_dir.glob(f"{asset_id}*"))
+            files = [
+                p.resolve()
+                for p in candidates
+                if p.is_file() and p.resolve().is_relative_to(dalle_dir)
+            ]
+            if files:
+                return files[0]
+        except Exception:
+            pass
+    return None
+def copy_asset(source_path: Path, dest_dir: Path) -> str:
+    """Copy an asset to the destination directory.
+    Args:
+        source_path: The source file path
+        dest_dir: The root output directory (assets will be in dest_dir/assets)
+    Returns:
+        Relative path to the asset (e.g., "assets/image.png")
+    """
+    assets_dir = dest_dir / "assets"
+    assets_dir.mkdir(parents=True, exist_ok=True)
+    dest_path = assets_dir / source_path.name
+    if not dest_path.exists():
+        shutil.copy2(source_path, dest_path)
+    # Return forward-slash path for Markdown compatibility even on Windows
+    return f"assets/{source_path.name}"

convoviz/io/loaders.py CHANGED Viewed

@@ -12,14 +12,36 @@ from convoviz.models import Conversation, ConversationCollection
 def extract_archive(filepath: Path) -> Path:
     """Extract a ZIP file and return the extraction folder path.
+    Includes safety checks to prevent Path Traversal (Zip-Slip).
     Args:
         filepath: Path to the ZIP file
     Returns:
         Path to the extracted folder
+    Raises:
+        InvalidZipError: If extraction fails or a security risk is detected
     """
     folder = filepath.with_suffix("")
+    folder.mkdir(parents=True, exist_ok=True)
     with ZipFile(filepath) as zf:
+        for member in zf.infolist():
+            # Check for path traversal (Zip-Slip)
+            member_path = Path(member.filename)
+            if member_path.is_absolute() or ".." in member_path.parts:
+                raise InvalidZipError(
+                    str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
+                )
+            # Additional check using resolved paths
+            target_path = (folder / member.filename).resolve()
+            if not target_path.is_relative_to(folder.resolve()):
+                raise InvalidZipError(
+                    str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
+                )
         zf.extractall(folder)
     return folder
@@ -60,7 +82,8 @@ def load_conversation_from_json(filepath: Path | str) -> Conversation:
 def load_collection_from_json(filepath: Path | str) -> ConversationCollection:
     """Load a conversation collection from a JSON file.
-    The JSON file should contain an array of conversation objects.
+    The JSON file should contain an array of conversation objects,
+    or an object with a "conversations" key.
     Args:
         filepath: Path to the JSON file
@@ -71,7 +94,12 @@ def load_collection_from_json(filepath: Path | str) -> ConversationCollection:
     filepath = Path(filepath)
     with filepath.open(encoding="utf-8") as f:
         data = loads(f.read())
-    return ConversationCollection(conversations=data)
+    # Handle case where export is wrapped in a top-level object
+    if isinstance(data, dict) and "conversations" in data:
+        data = data["conversations"]
+    return ConversationCollection(conversations=data, source_path=filepath.parent)
 def load_collection_from_zip(filepath: Path | str) -> ConversationCollection:

convoviz/io/writers.py CHANGED Viewed

@@ -7,6 +7,7 @@ from orjson import OPT_INDENT_2, dumps
 from tqdm import tqdm
 from convoviz.config import AuthorHeaders, ConversationConfig
+from convoviz.io.assets import copy_asset, resolve_asset_path
 from convoviz.models import Conversation, ConversationCollection
 from convoviz.renderers import render_conversation
 from convoviz.utils import sanitize
@@ -17,6 +18,7 @@ def save_conversation(
     filepath: Path,
     config: ConversationConfig,
     headers: AuthorHeaders,
+    source_path: Path | None = None,
 ) -> Path:
     """Save a conversation to a markdown file.
@@ -28,6 +30,7 @@ def save_conversation(
         filepath: Target file path
         config: Conversation rendering configuration
         headers: Author header configuration
+        source_path: Path to the source directory containing assets
     Returns:
         The actual path the file was saved to (may differ if there was a conflict)
@@ -41,8 +44,20 @@ def save_conversation(
         counter += 1
         final_path = filepath.with_name(f"{base_name} ({counter}){filepath.suffix}")
+    # Define asset resolver
+    def asset_resolver(asset_id: str) -> str | None:
+        if not source_path:
+            return None
+        src_file = resolve_asset_path(source_path, asset_id)
+        if not src_file:
+            return None
+        # Copy to output directory (relative to the markdown file's directory)
+        return copy_asset(src_file, final_path.parent)
     # Render and write
-    markdown = render_conversation(conversation, config, headers)
+    markdown = render_conversation(conversation, config, headers, asset_resolver=asset_resolver)
     with final_path.open("w", encoding="utf-8") as f:
         f.write(markdown)
@@ -78,7 +93,7 @@ def save_collection(
         disable=not progress_bar,
     ):
         filepath = directory / f"{sanitize(conv.title)}.md"
-        save_conversation(conv, filepath, config, headers)
+        save_conversation(conv, filepath, config, headers, source_path=collection.source_path)
 def save_custom_instructions(

convoviz/models/__init__.py CHANGED Viewed

@@ -11,14 +11,10 @@ from convoviz.models.message import (
 )
 from convoviz.models.node import Node, build_node_tree
-# Backward compatibility alias
-ConversationSet = ConversationCollection
 __all__ = [
     "AuthorRole",
     "Conversation",
     "ConversationCollection",
-    "ConversationSet",
     "Message",
     "MessageAuthor",
     "MessageContent",

convoviz/models/collection.py CHANGED Viewed

@@ -4,6 +4,7 @@ This is a pure data model - I/O and visualization logic are in separate modules.
 """
 from datetime import datetime
+from pathlib import Path
 from typing import Any
 from pydantic import BaseModel, Field
@@ -19,6 +20,7 @@ class ConversationCollection(BaseModel):
     """
     conversations: list[Conversation] = Field(default_factory=list)
+    source_path: Path | None = None
     @property
     def index(self) -> dict[str, Conversation]:

convoviz/models/message.py CHANGED Viewed

@@ -10,7 +10,7 @@ from pydantic import BaseModel, ConfigDict
 from convoviz.exceptions import MessageContentError
-AuthorRole = Literal["user", "assistant", "system", "tool"]
+AuthorRole = Literal["user", "assistant", "system", "tool", "function"]
 class MessageAuthor(BaseModel):
@@ -25,7 +25,7 @@ class MessageContent(BaseModel):
     """Content of a message."""
     content_type: str
-    parts: list[str] | None = None
+    parts: list[Any] | None = None
     text: str | None = None
     result: str | None = None
@@ -58,11 +58,53 @@ class Message(BaseModel):
     metadata: MessageMetadata
     recipient: str
+    @property
+    def images(self) -> list[str]:
+        """Extract image asset pointers from the message content."""
+        if not self.content.parts:
+            return []
+        image_ids = []
+        for part in self.content.parts:
+            if isinstance(part, dict) and part.get("content_type") == "image_asset_pointer":
+                pointer = part.get("asset_pointer", "")
+                # Strip prefixes like "file-service://" or "sediment://"
+                if pointer.startswith("file-service://"):
+                    pointer = pointer[len("file-service://") :]
+                elif pointer.startswith("sediment://"):
+                    pointer = pointer[len("sediment://") :]
+                if pointer:
+                    image_ids.append(pointer)
+        return image_ids
     @property
     def text(self) -> str:
         """Extract the text content of the message."""
         if self.content.parts is not None:
-            return str(self.content.parts[0]) if self.content.parts else ""
+            # Handle multimodal content where parts can be mixed strings and dicts
+            text_parts = []
+            for part in self.content.parts:
+                if isinstance(part, str):
+                    text_parts.append(part)
+                elif isinstance(part, dict) and "text" in part:
+                    # Some parts might be dicts wrapping text (e.g. code interpreter?)
+                    # But based on spec, usually text is just a string in the list.
+                    # We'll stick to string extraction for now.
+                    pass
+            # If we found string parts, join them.
+            # If parts existed but no strings (e.g. only images), return empty string?
+            # Or should we return a placeholder? For now, let's return joined text.
+            if text_parts:
+                return "".join(text_parts)
+            # If parts list is not empty but contains no strings, we might want to fall through
+            # or return empty string if we consider it "handled".
+            # The original code returned "" if parts was empty list.
+            if self.content.parts:
+                return ""
         if self.content.text is not None:
             return self.content.text
         if self.content.result is not None:

convoviz/pipeline.py CHANGED Viewed

@@ -5,7 +5,7 @@ from shutil import rmtree
 from rich.console import Console
-from convoviz.analysis.graphs import generate_week_barplots
+from convoviz.analysis.graphs import generate_graphs
 from convoviz.analysis.wordcloud import generate_wordclouds
 from convoviz.config import ConvovizConfig
 from convoviz.exceptions import InvalidZipError
@@ -26,20 +26,32 @@ def run_pipeline(config: ConvovizConfig) -> None:
         config: Complete configuration for the pipeline
     Raises:
-        InvalidZipError: If the zip file is invalid
+        InvalidZipError: If the input is invalid
         ConfigurationError: If configuration is incomplete
     """
-    if not config.zip_filepath:
-        raise InvalidZipError("", reason="No zip file specified")
-    zip_path = Path(config.zip_filepath)
-    if not zip_path.exists():
-        raise InvalidZipError(str(zip_path), reason="File does not exist")
-    console.print("Loading data [bold yellow]📂[/bold yellow] ...\n")
-    # Load main collection from zip
-    collection = load_collection_from_zip(zip_path)
+    if not config.input_path:
+        raise InvalidZipError("", reason="No input path specified")
+    input_path = Path(config.input_path)
+    if not input_path.exists():
+        raise InvalidZipError(str(input_path), reason="File does not exist")
+    console.print(f"Loading data from {input_path} [bold yellow]📂[/bold yellow] ...\n")
+    # Load collection based on input type
+    if input_path.is_dir():
+        # Check for conversations.json inside
+        json_path = input_path / "conversations.json"
+        if not json_path.exists():
+            raise InvalidZipError(
+                str(input_path), reason="Directory must contain conversations.json"
+            )
+        collection = load_collection_from_json(json_path)
+    elif input_path.suffix == ".json":
+        collection = load_collection_from_json(input_path)
+    else:
+        # Assume zip
+        collection = load_collection_from_zip(input_path)
     # Try to merge bookmarklet data if available
     bookmarklet_json = find_latest_bookmarklet_json()
@@ -54,12 +66,23 @@ def run_pipeline(config: ConvovizConfig) -> None:
             )
     output_folder = config.output_folder
-    # Clean and recreate output folder
-    if output_folder.exists() and output_folder.is_dir():
-        rmtree(output_folder)
     output_folder.mkdir(parents=True, exist_ok=True)
+    # Clean only specific sub-directories we manage
+    managed_dirs = ["Markdown", "Graphs", "Word-Clouds"]
+    for d in managed_dirs:
+        sub_dir = output_folder / d
+        if sub_dir.exists() and sub_dir.is_dir():
+            rmtree(sub_dir)
+        sub_dir.mkdir(exist_ok=True)
+    # Clean specific files we manage
+    managed_files = ["custom_instructions.json"]
+    for f in managed_files:
+        managed_file = output_folder / f
+        if managed_file.exists():
+            managed_file.unlink()
     # Save markdown files
     markdown_folder = output_folder / "Markdown"
     save_collection(
@@ -77,7 +100,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
     # Generate graphs
     graph_folder = output_folder / "Graphs"
     graph_folder.mkdir(parents=True, exist_ok=True)
-    generate_week_barplots(
+    generate_graphs(
         collection,
         graph_folder,
         config.graph,
@@ -89,7 +112,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
     )
     # Generate word clouds
-    wordcloud_folder = output_folder / "Word Clouds"
+    wordcloud_folder = output_folder / "Word-Clouds"
     wordcloud_folder.mkdir(parents=True, exist_ok=True)
     generate_wordclouds(
         collection,

convoviz/renderers/markdown.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Markdown rendering for conversations."""
 import re
+from collections.abc import Callable
 from convoviz.config import AuthorHeaders, ConversationConfig
 from convoviz.models import Conversation, Node
@@ -79,7 +80,7 @@ def render_message_header(role: str, headers: AuthorHeaders) -> str:
     return header_map.get(role, f"### {role.title()}")
-def render_node_header(node: Node, headers: AuthorHeaders) -> str:
+def render_node_header(node: Node, headers: AuthorHeaders, flavor: str = "obsidian") -> str:
     """Render the header section of a node.
     Includes the node ID, parent link, and message author header.
@@ -87,6 +88,7 @@ def render_node_header(node: Node, headers: AuthorHeaders) -> str:
     Args:
         node: The node to render
         headers: Configuration for author headers
+        flavor: Markdown flavor (obsidian, standard)
     Returns:
         The header markdown string
@@ -94,45 +96,57 @@ def render_node_header(node: Node, headers: AuthorHeaders) -> str:
     if node.message is None:
         return ""
-    parts = [f"###### {node.id}"]
+    if flavor == "standard":
+        return render_message_header(node.message.author.role, headers) + "\n"
+    # Obsidian flavor
+    parts = []
     # Add parent link if parent has a message
     if node.parent_node and node.parent_node.message:
-        parts.append(f"[parent ⬆️](#{node.parent_node.id})")
+        parts.append(f"[⬆️](#^{node.parent_node.id})")
-    parts.append(render_message_header(node.message.author.role, headers))
+    author_header = render_message_header(node.message.author.role, headers)
+    parts.append(f"{author_header} ^{node.id}")
     return "\n".join(parts) + "\n"
-def render_node_footer(node: Node) -> str:
+def render_node_footer(node: Node, flavor: str = "obsidian") -> str:
     """Render the footer section of a node with child links.
     Args:
         node: The node to render
+        flavor: Markdown flavor (obsidian, standard)
     Returns:
         The footer markdown string with child navigation links
     """
-    if not node.children_nodes:
+    if flavor == "standard" or not node.children_nodes:
         return ""
     if len(node.children_nodes) == 1:
-        return f"\n[child ⬇️](#{node.children_nodes[0].id})\n"
+        return f"\n[⬇️](#^{node.children_nodes[0].id})\n"
-    links = " | ".join(
-        f"[child {i + 1} ⬇️](#{child.id})" for i, child in enumerate(node.children_nodes)
-    )
+    links = " | ".join(f"[{i + 1} ⬇️](#^{child.id})" for i, child in enumerate(node.children_nodes))
     return f"\n{links}\n"
-def render_node(node: Node, headers: AuthorHeaders, use_dollar_latex: bool = False) -> str:
+def render_node(
+    node: Node,
+    headers: AuthorHeaders,
+    use_dollar_latex: bool = False,
+    asset_resolver: Callable[[str], str | None] | None = None,
+    flavor: str = "obsidian",
+) -> str:
     """Render a complete node as markdown.
     Args:
         node: The node to render
         headers: Configuration for author headers
         use_dollar_latex: Whether to convert LaTeX delimiters to dollars
+        asset_resolver: Function to resolve asset IDs to paths
+        flavor: Markdown flavor (obsidian, standard)
     Returns:
         Complete markdown string for the node
@@ -140,7 +154,7 @@ def render_node(node: Node, headers: AuthorHeaders, use_dollar_latex: bool = Fal
     if node.message is None:
         return ""
-    header = render_node_header(node, headers)
+    header = render_node_header(node, headers, flavor=flavor)
     # Get and process content
     try:
@@ -148,16 +162,29 @@ def render_node(node: Node, headers: AuthorHeaders, use_dollar_latex: bool = Fal
         content = f"\n{content}\n" if content else ""
         if use_dollar_latex:
             content = replace_latex_delimiters(content)
+        # Append images if resolver is provided and images exist
+        if asset_resolver and node.message.images:
+            for image_id in node.message.images:
+                rel_path = asset_resolver(image_id)
+                if rel_path:
+                    # Using standard markdown image syntax.
+                    # Obsidian handles this well.
+                    content += f"\n![Image]({rel_path})\n"
     except Exception:
         content = ""
-    footer = render_node_footer(node)
+    footer = render_node_footer(node, flavor=flavor)
     return f"\n{header}{content}{footer}\n---\n"
 def render_conversation(
-    conversation: Conversation, config: ConversationConfig, headers: AuthorHeaders
+    conversation: Conversation,
+    config: ConversationConfig,
+    headers: AuthorHeaders,
+    asset_resolver: Callable[[str], str | None] | None = None,
 ) -> str:
     """Render a complete conversation as markdown.
@@ -165,11 +192,13 @@ def render_conversation(
         conversation: The conversation to render
         config: Conversation rendering configuration
         headers: Configuration for author headers
+        asset_resolver: Function to resolve asset IDs to paths
     Returns:
         Complete markdown document string
     """
     use_dollar_latex = config.markdown.latex_delimiters == "dollars"
+    flavor = config.markdown.flavor
     # Start with YAML header
     markdown = render_yaml_header(conversation, config.yaml)
@@ -177,6 +206,8 @@ def render_conversation(
     # Render all message nodes
     for node in conversation.all_message_nodes:
         if node.message:
-            markdown += render_node(node, headers, use_dollar_latex)
+            markdown += render_node(
+                node, headers, use_dollar_latex, asset_resolver=asset_resolver, flavor=flavor
+            )
     return markdown

convoviz 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

convoviz 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl