PyPI - convoviz - Versions diffs - 0.4.1__py3-none-any.whl - Mend

convoviz 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

convoviz/__init__.py +34 -0
convoviz/__main__.py +6 -0
convoviz/analysis/__init__.py +22 -0
convoviz/analysis/graphs.py +879 -0
convoviz/analysis/wordcloud.py +204 -0
convoviz/assets/colormaps.txt +15 -0
convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
convoviz/assets/fonts/Borel-Regular.ttf +0 -0
convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
convoviz/assets/stopwords.txt +1 -0
convoviz/cli.py +149 -0
convoviz/config.py +120 -0
convoviz/exceptions.py +47 -0
convoviz/interactive.py +264 -0
convoviz/io/__init__.py +21 -0
convoviz/io/assets.py +109 -0
convoviz/io/loaders.py +191 -0
convoviz/io/writers.py +231 -0
convoviz/logging_config.py +69 -0
convoviz/models/__init__.py +24 -0
convoviz/models/collection.py +115 -0
convoviz/models/conversation.py +158 -0
convoviz/models/message.py +218 -0
convoviz/models/node.py +66 -0
convoviz/pipeline.py +184 -0
convoviz/py.typed +0 -0
convoviz/renderers/__init__.py +10 -0
convoviz/renderers/markdown.py +269 -0
convoviz/renderers/yaml.py +119 -0
convoviz/utils.py +155 -0
convoviz-0.4.1.dist-info/METADATA +215 -0
convoviz-0.4.1.dist-info/RECORD +62 -0
convoviz-0.4.1.dist-info/WHEEL +4 -0
convoviz-0.4.1.dist-info/entry_points.txt +3 -0

convoviz/analysis/wordcloud.py ADDED Viewed

@@ -0,0 +1,204 @@
+"""Word cloud generation for conversation text."""
+import logging
+import os
+from concurrent.futures import ProcessPoolExecutor
+from functools import lru_cache
+from pathlib import Path
+from nltk import download as nltk_download
+from nltk.corpus import stopwords as nltk_stopwords
+from nltk.data import find as nltk_find
+from PIL.Image import Image
+from tqdm import tqdm
+from wordcloud import WordCloud
+from convoviz.config import WordCloudConfig
+from convoviz.models import ConversationCollection
+logger = logging.getLogger(__name__)
+# Languages for stopwords
+STOPWORD_LANGUAGES = [
+    "arabic",
+    "english",
+    "french",
+    "german",
+    "spanish",
+    "portuguese",
+]
+@lru_cache(maxsize=1)
+def load_programming_stopwords() -> frozenset[str]:
+    """Load programming keywords and types from assets.
+    Returns:
+        Frozen set of programming stop words
+    """
+    stopwords_path = Path(__file__).parent.parent / "assets" / "stopwords.txt"
+    if not stopwords_path.exists():
+        return frozenset()
+    with open(stopwords_path, encoding="utf-8") as f:
+        return frozenset(
+            line.strip().lower() for line in f if line.strip() and not line.strip().startswith("#")
+        )
+@lru_cache(maxsize=1)
+def load_nltk_stopwords() -> frozenset[str]:
+    """Load and cache NLTK stopwords.
+    Downloads stopwords if not already present.
+    Returns:
+        Frozen set of stopwords from multiple languages
+    """
+    try:
+        nltk_find("corpora/stopwords")
+    except LookupError:
+        nltk_download("stopwords", quiet=True)
+    words: set[str] = set()
+    for lang in STOPWORD_LANGUAGES:
+        words.update(nltk_stopwords.words(fileids=lang))
+    return frozenset(words)
+def parse_custom_stopwords(stopwords_str: str | None) -> set[str]:
+    """Parse a comma-separated string of custom stopwords.
+    Args:
+        stopwords_str: Comma-separated stopwords
+    Returns:
+        Set of lowercase, stripped stopwords
+    """
+    if not stopwords_str:
+        return set()
+    return {word.strip().lower() for word in stopwords_str.split(",") if word.strip()}
+def generate_wordcloud(text: str, config: WordCloudConfig) -> Image:
+    """Generate a word cloud from text.
+    Args:
+        text: The text to create a word cloud from
+        config: Word cloud configuration
+    Returns:
+        PIL Image of the word cloud
+    """
+    # Combine NLTK and custom stopwords
+    stopwords = set(load_nltk_stopwords())
+    stopwords.update(parse_custom_stopwords(config.custom_stopwords))
+    if config.exclude_programming_keywords:
+        stopwords.update(load_programming_stopwords())
+    wc = WordCloud(
+        font_path=str(config.font_path) if config.font_path else None,
+        width=config.width,
+        height=config.height,
+        stopwords=stopwords,
+        background_color=config.background_color,
+        mode=config.mode,
+        colormap=config.colormap,
+        include_numbers=config.include_numbers,
+    )
+    wc.generate(text)
+    result: Image = wc.to_image()
+    return result
+def _generate_and_save_wordcloud(args: tuple[str, str, Path, WordCloudConfig]) -> bool:
+    """Worker function for parallel wordcloud generation.
+    Must be at module level for pickling by ProcessPoolExecutor.
+    Args:
+        args: Tuple of (text, filename, output_dir, config)
+    Returns:
+        True if wordcloud was generated, False if skipped (empty text)
+    """
+    text, filename, output_dir, config = args
+    if not text.strip():
+        return False
+    img = generate_wordcloud(text, config)
+    img.save(output_dir / filename, optimize=True)
+    return True
+def generate_wordclouds(
+    collection: ConversationCollection,
+    output_dir: Path,
+    config: WordCloudConfig,
+    *,
+    progress_bar: bool = False,
+) -> None:
+    """Generate word clouds for weekly, monthly, and yearly groupings.
+    Uses parallel processing to speed up generation on multi-core systems.
+    Args:
+        collection: Collection of conversations
+        output_dir: Directory to save the word clouds
+        config: Word cloud configuration
+        progress_bar: Whether to show progress bars
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+    logger.info(f"Generating wordclouds to {output_dir}")
+    week_groups = collection.group_by_week()
+    month_groups = collection.group_by_month()
+    year_groups = collection.group_by_year()
+    # Pre-load/download NLTK stopwords in the main process to avoid race conditions in workers
+    load_nltk_stopwords()
+    # Build list of all tasks: (text, filename, output_dir, config)
+    tasks: list[tuple[str, str, Path, WordCloudConfig]] = []
+    for week, group in week_groups.items():
+        text = group.plaintext("user", "assistant")
+        # Format: 2024-W15.png (ISO week format)
+        filename = f"{week.strftime('%Y-W%W')}.png"
+        tasks.append((text, filename, output_dir, config))
+    for month, group in month_groups.items():
+        text = group.plaintext("user", "assistant")
+        # Format: 2024-03-March.png (consistent with folder naming)
+        filename = f"{month.strftime('%Y-%m-%B')}.png"
+        tasks.append((text, filename, output_dir, config))
+    for year, group in year_groups.items():
+        text = group.plaintext("user", "assistant")
+        # Format: 2024.png
+        filename = f"{year.strftime('%Y')}.png"
+        tasks.append((text, filename, output_dir, config))
+    if not tasks:
+        return
+    # Determine worker count: use config if set, otherwise half CPU count (min 1)
+    max_workers = config.max_workers
+    if max_workers is None:
+        cpu_count = os.cpu_count() or 2
+        max_workers = max(1, cpu_count // 2)
+    # Use parallel processing for speedup on multi-core systems
+    logger.debug(f"Starting wordcloud generation with {max_workers} workers for {len(tasks)} tasks")
+    with ProcessPoolExecutor(max_workers=max_workers) as executor:
+        list(
+            tqdm(
+                executor.map(_generate_and_save_wordcloud, tasks),
+                total=len(tasks),
+                desc="Creating wordclouds 🔡☁️",
+                disable=not progress_bar,
+            )
+        )

convoviz/assets/colormaps.txt ADDED Viewed

@@ -0,0 +1,15 @@
+viridis
+plasma
+inferno
+magma
+cividis
+Blues
+Greens
+YlGnBu
+YlOrRd
+RdYlBu
+Spectral
+coolwarm
+terrain
+ocean
+flag

convoviz/assets/fonts/AmaticSC-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/BebasNeue-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Borel-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Courgette-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/CroissantOne-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Handjet-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/IndieFlower-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Kalam-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Lobster-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/MartianMono-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/MartianMono-Thin.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Montserrat-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Mooli-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Pacifico-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/PlayfairDisplay-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Raleway-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/RobotoMono-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/RobotoMono-Thin.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/RobotoSlab-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/RobotoSlab-Thin.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Ruwudu-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Sacramento-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/TitilliumWeb-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Yellowtail-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/YsabeauOffice-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/YsabeauSC-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/YsabeauSC-Thin.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Zeyada-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/stopwords.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

convoviz/cli.py ADDED Viewed

@@ -0,0 +1,149 @@
+"""Command-line interface for convoviz."""
+import logging
+from pathlib import Path
+import typer
+from rich.console import Console
+from rich.markup import escape
+from convoviz.config import FolderOrganization, OutputKind, get_default_config
+from convoviz.exceptions import ConfigurationError, InvalidZipError
+from convoviz.interactive import run_interactive_config
+from convoviz.io.loaders import find_latest_zip
+from convoviz.logging_config import setup_logging
+from convoviz.pipeline import run_pipeline
+from convoviz.utils import default_font_path
+app = typer.Typer(
+    add_completion=False,
+    help="ChatGPT Data Visualizer 📊 - Convert and visualize your ChatGPT history",
+)
+console = Console()
+@app.callback(invoke_without_command=True)
+def run(
+    ctx: typer.Context,
+    input_path: Path | None = typer.Option(
+        None,
+        "--input",
+        "--zip",
+        "-z",
+        help="Path to the ChatGPT export zip file, JSON file, or extracted directory.",
+        exists=True,
+        file_okay=True,
+        dir_okay=True,
+    ),
+    output_dir: Path | None = typer.Option(
+        None,
+        "--output",
+        "-o",
+        help="Path to the output directory.",
+    ),
+    outputs: list[OutputKind] | None = typer.Option(
+        None,
+        "--outputs",
+        help="Output types to generate (repeatable). Options: markdown, graphs, wordclouds. "
+        "If not specified, all outputs are generated.",
+    ),
+    flat: bool = typer.Option(
+        False,
+        "--flat",
+        "-f",
+        help="Put all markdown files in a single folder (disables date organization).",
+    ),
+    interactive: bool | None = typer.Option(
+        None,
+        "--interactive/--no-interactive",
+        "-i/-I",
+        help="Force interactive mode on or off.",
+    ),
+    verbose: int = typer.Option(
+        0,
+        "--verbose",
+        "-v",
+        help="Increase verbosity. Use -vv for debug.",
+        count=True,
+    ),
+    log_file: Path | None = typer.Option(
+        None,
+        "--log-file",
+        help="Path to log file. Defaults to a temporary file.",
+    ),
+) -> None:
+    """Convert ChatGPT export data to markdown and generate visualizations."""
+    # Setup logging immediately
+    log_path = setup_logging(verbose, log_file)
+    logger = logging.getLogger("convoviz.cli")
+    console.print(f"[dim]Logging to: {log_path}[/dim]")
+    logger.debug(f"Logging initialized. Output: {log_path}")
+    if ctx.invoked_subcommand is not None:
+        return
+    # Start with default config
+    config = get_default_config()
+    # Override with CLI args
+    if input_path:
+        config.input_path = input_path
+    if output_dir:
+        config.output_folder = output_dir
+    if outputs:
+        config.outputs = set(outputs)
+    if flat:
+        config.folder_organization = FolderOrganization.FLAT
+    # Determine mode: interactive if explicitly requested or no input provided
+    use_interactive = interactive if interactive is not None else (input_path is None)
+    if use_interactive:
+        console.print("Welcome to ChatGPT Data Visualizer ✨📊!\n")
+        try:
+            config = run_interactive_config(config)
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Cancelled by user.[/yellow]")
+            raise typer.Exit(code=0) from None
+    else:
+        # Non-interactive mode: validate we have what we need
+        if not config.input_path:
+            # Try to find a default
+            latest = find_latest_zip()
+            if latest:
+                console.print(f"No input specified, using latest zip found: {latest}")
+                config.input_path = latest
+            else:
+                console.print(
+                    "[bold red]Error:[/bold red] No input file provided and none found in Downloads."
+                )
+                raise typer.Exit(code=1)
+        # Validate the input (basic check)
+        if not config.input_path.exists():
+            console.print(
+                f"[bold red]Error:[/bold red] Input path does not exist: {config.input_path}"
+            )
+            raise typer.Exit(code=1)
+        # Set default font if not set
+        if not config.wordcloud.font_path:
+            config.wordcloud.font_path = default_font_path()
+    # Run the pipeline
+    try:
+        run_pipeline(config)
+    except (InvalidZipError, ConfigurationError) as e:
+        logger.error(f"Known error: {e}")
+        console.print(f"[bold red]Error:[/bold red] {escape(str(e))}")
+        raise typer.Exit(code=1) from None
+    except Exception as e:
+        logger.exception("Unexpected error occurred")
+        console.print(f"[bold red]Unexpected error:[/bold red] {escape(str(e))}")
+        console.print(f"[dim]See log file for details: {log_path}[/dim]")
+        raise typer.Exit(code=1) from None
+def main_entry() -> None:
+    """Entry point for the CLI."""
+    app()

convoviz/config.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Configuration models using Pydantic v2."""
+from enum import Enum
+from pathlib import Path
+from typing import Literal
+from pydantic import BaseModel, Field
+class FolderOrganization(str, Enum):
+    """How to organize markdown output files in folders."""
+    FLAT = "flat"  # All files in one directory
+    DATE = "date"  # Nested by year/month (default)
+class OutputKind(str, Enum):
+    """Types of outputs that can be generated."""
+    MARKDOWN = "markdown"  # Conversation markdown files
+    GRAPHS = "graphs"  # Usage analytics graphs
+    WORDCLOUDS = "wordclouds"  # Word cloud visualizations
+# Default: generate all outputs
+ALL_OUTPUTS: frozenset[OutputKind] = frozenset(OutputKind)
+class AuthorHeaders(BaseModel):
+    """Headers for different message authors in markdown output."""
+    system: str = "### System"
+    user: str = "# Me"
+    assistant: str = "# ChatGPT"
+    tool: str = "### Tool output"
+class MarkdownConfig(BaseModel):
+    """Configuration for markdown output."""
+    latex_delimiters: Literal["default", "dollars"] = "dollars"
+    flavor: Literal["standard", "obsidian"] = "standard"
+class YAMLConfig(BaseModel):
+    """Configuration for YAML frontmatter in markdown files."""
+    title: bool = True
+    tags: bool = False
+    chat_link: bool = True
+    create_time: bool = True
+    update_time: bool = True
+    model: bool = True
+    used_plugins: bool = False
+    message_count: bool = True
+    content_types: bool = False
+    custom_instructions: bool = False
+class ConversationConfig(BaseModel):
+    """Configuration for conversation rendering."""
+    markdown: MarkdownConfig = Field(default_factory=MarkdownConfig)
+    yaml: YAMLConfig = Field(default_factory=YAMLConfig)
+class MessageConfig(BaseModel):
+    """Configuration for message rendering."""
+    author_headers: AuthorHeaders = Field(default_factory=AuthorHeaders)
+class WordCloudConfig(BaseModel):
+    """Configuration for word cloud generation."""
+    font_path: Path | None = None
+    colormap: str = "RdYlBu"
+    custom_stopwords: str = "use, file, "
+    exclude_programming_keywords: bool = True
+    background_color: str | None = None
+    mode: Literal["RGB", "RGBA"] = "RGBA"
+    include_numbers: bool = False
+    width: int = 600
+    height: int = 600
+    max_workers: int | None = None  # None = use half CPU count
+class GraphConfig(BaseModel):
+    """Configuration for graph generation."""
+    color: str = "#4A90E2"
+    grid: bool = True
+    show_counts: bool = True
+    font_name: str = "Montserrat-Regular.ttf"
+    figsize: tuple[int, int] = (10, 6)
+    dpi: int = 300
+    timezone: Literal["utc", "local"] = "local"
+    generate_monthly_breakdowns: bool = False
+    generate_yearly_breakdowns: bool = False
+class ConvovizConfig(BaseModel):
+    """Main configuration for convoviz."""
+    input_path: Path | None = None
+    output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT-Data")
+    folder_organization: FolderOrganization = FolderOrganization.DATE
+    outputs: set[OutputKind] = Field(default_factory=lambda: set(ALL_OUTPUTS))
+    message: MessageConfig = Field(default_factory=MessageConfig)
+    conversation: ConversationConfig = Field(default_factory=ConversationConfig)
+    wordcloud: WordCloudConfig = Field(default_factory=WordCloudConfig)
+    graph: GraphConfig = Field(default_factory=GraphConfig)
+    model_config = {"validate_default": True}
+# Default configuration instance
+def get_default_config() -> ConvovizConfig:
+    """Get a fresh default configuration instance."""
+    return ConvovizConfig()

convoviz/exceptions.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Custom exceptions for convoviz."""
+class ConvovizError(Exception):
+    """Base exception for all convoviz errors."""
+class InvalidZipError(ConvovizError):
+    """Raised when a ZIP file is invalid or missing conversations.json."""
+    def __init__(self, path: str, reason: str = "missing conversations.json") -> None:
+        self.path = path
+        self.reason = reason
+        super().__init__(f"Invalid ZIP file '{path}': {reason}")
+class ConfigurationError(ConvovizError):
+    """Raised for configuration-related errors."""
+    def __init__(self, message: str, field: str | None = None) -> None:
+        self.field = field
+        super().__init__(message)
+class RenderingError(ConvovizError):
+    """Raised when rendering fails."""
+    def __init__(self, message: str, conversation_id: str | None = None) -> None:
+        self.conversation_id = conversation_id
+        super().__init__(message)
+class MessageContentError(ConvovizError):
+    """Raised when message content cannot be extracted."""
+    def __init__(self, message_id: str) -> None:
+        self.message_id = message_id
+        super().__init__(f"No valid content found in message: {message_id}")
+class FileNotFoundError(ConvovizError):
+    """Raised when a required file is not found."""
+    def __init__(self, path: str, file_type: str = "file") -> None:
+        self.path = path
+        self.file_type = file_type
+        super().__init__(f"{file_type.capitalize()} not found: {path}")