PyPI - convoviz - Versions diffs - 0.2.12__py3-none-any.whl - Mend

convoviz 0.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

convoviz/__init__.py +25 -0
convoviz/__main__.py +6 -0
convoviz/analysis/__init__.py +9 -0
convoviz/analysis/graphs.py +855 -0
convoviz/analysis/wordcloud.py +165 -0
convoviz/assets/colormaps.txt +15 -0
convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
convoviz/assets/fonts/Borel-Regular.ttf +0 -0
convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
convoviz/assets/stopwords.txt +1 -0
convoviz/cli.py +117 -0
convoviz/config.py +106 -0
convoviz/exceptions.py +47 -0
convoviz/interactive.py +247 -0
convoviz/io/__init__.py +21 -0
convoviz/io/assets.py +98 -0
convoviz/io/loaders.py +186 -0
convoviz/io/writers.py +227 -0
convoviz/models/__init__.py +24 -0
convoviz/models/collection.py +115 -0
convoviz/models/conversation.py +158 -0
convoviz/models/message.py +218 -0
convoviz/models/node.py +66 -0
convoviz/pipeline.py +167 -0
convoviz/py.typed +0 -0
convoviz/renderers/__init__.py +10 -0
convoviz/renderers/markdown.py +269 -0
convoviz/renderers/yaml.py +119 -0
convoviz/utils.py +155 -0
convoviz-0.2.12.dist-info/METADATA +148 -0
convoviz-0.2.12.dist-info/RECORD +61 -0
convoviz-0.2.12.dist-info/WHEEL +4 -0
convoviz-0.2.12.dist-info/entry_points.txt +3 -0

convoviz/analysis/wordcloud.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""Word cloud generation for conversation text."""
+from functools import lru_cache
+from pathlib import Path
+from nltk import download as nltk_download
+from nltk.corpus import stopwords as nltk_stopwords
+from nltk.data import find as nltk_find
+from PIL.Image import Image
+from tqdm import tqdm
+from wordcloud import WordCloud
+from convoviz.config import WordCloudConfig
+from convoviz.models import ConversationCollection
+# Languages for stopwords
+STOPWORD_LANGUAGES = [
+    "arabic",
+    "english",
+    "french",
+    "german",
+    "spanish",
+    "portuguese",
+]
+@lru_cache(maxsize=1)
+def load_programming_stopwords() -> frozenset[str]:
+    """Load programming keywords and types from assets.
+    Returns:
+        Frozen set of programming stop words
+    """
+    stopwords_path = Path(__file__).parent.parent / "assets" / "stopwords.txt"
+    if not stopwords_path.exists():
+        return frozenset()
+    with open(stopwords_path, encoding="utf-8") as f:
+        return frozenset(
+            line.strip().lower() for line in f if line.strip() and not line.strip().startswith("#")
+        )
+@lru_cache(maxsize=1)
+def load_nltk_stopwords() -> frozenset[str]:
+    """Load and cache NLTK stopwords.
+    Downloads stopwords if not already present.
+    Returns:
+        Frozen set of stopwords from multiple languages
+    """
+    try:
+        nltk_find("corpora/stopwords")
+    except LookupError:
+        nltk_download("stopwords", quiet=True)
+    words: set[str] = set()
+    for lang in STOPWORD_LANGUAGES:
+        words.update(nltk_stopwords.words(fileids=lang))
+    return frozenset(words)
+def parse_custom_stopwords(stopwords_str: str | None) -> set[str]:
+    """Parse a comma-separated string of custom stopwords.
+    Args:
+        stopwords_str: Comma-separated stopwords
+    Returns:
+        Set of lowercase, stripped stopwords
+    """
+    if not stopwords_str:
+        return set()
+    return {word.strip().lower() for word in stopwords_str.split(",") if word.strip()}
+def generate_wordcloud(text: str, config: WordCloudConfig) -> Image:
+    """Generate a word cloud from text.
+    Args:
+        text: The text to create a word cloud from
+        config: Word cloud configuration
+    Returns:
+        PIL Image of the word cloud
+    """
+    # Combine NLTK and custom stopwords
+    stopwords = set(load_nltk_stopwords())
+    stopwords.update(parse_custom_stopwords(config.custom_stopwords))
+    if config.exclude_programming_keywords:
+        stopwords.update(load_programming_stopwords())
+    wc = WordCloud(
+        font_path=str(config.font_path) if config.font_path else None,
+        width=config.width,
+        height=config.height,
+        stopwords=stopwords,
+        background_color=config.background_color,
+        mode=config.mode,
+        colormap=config.colormap,
+        include_numbers=config.include_numbers,
+    )
+    wc.generate(text)
+    result: Image = wc.to_image()
+    return result
+def generate_wordclouds(
+    collection: ConversationCollection,
+    output_dir: Path,
+    config: WordCloudConfig,
+    *,
+    progress_bar: bool = False,
+) -> None:
+    """Generate word clouds for weekly, monthly, and yearly groupings.
+    Args:
+        collection: Collection of conversations
+        output_dir: Directory to save the word clouds
+        config: Word cloud configuration
+        progress_bar: Whether to show progress bars
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+    week_groups = collection.group_by_week()
+    month_groups = collection.group_by_month()
+    year_groups = collection.group_by_year()
+    for week, group in tqdm(
+        week_groups.items(),
+        desc="Creating weekly wordclouds 🔡☁️",
+        disable=not progress_bar,
+    ):
+        text = group.plaintext("user", "assistant")
+        if text.strip():
+            img = generate_wordcloud(text, config)
+            # Format: 2024-W15.png (ISO week format)
+            img.save(output_dir / f"{week.strftime('%Y-W%W')}.png", optimize=True)
+    for month, group in tqdm(
+        month_groups.items(),
+        desc="Creating monthly wordclouds 🔡☁️",
+        disable=not progress_bar,
+    ):
+        text = group.plaintext("user", "assistant")
+        if text.strip():
+            img = generate_wordcloud(text, config)
+            # Format: 2024-03-March.png (consistent with folder naming)
+            img.save(output_dir / f"{month.strftime('%Y-%m-%B')}.png", optimize=True)
+    for year, group in tqdm(
+        year_groups.items(),
+        desc="Creating yearly wordclouds 🔡☁️",
+        disable=not progress_bar,
+    ):
+        text = group.plaintext("user", "assistant")
+        if text.strip():
+            img = generate_wordcloud(text, config)
+            # Format: 2024.png
+            img.save(output_dir / f"{year.strftime('%Y')}.png", optimize=True)

convoviz/assets/colormaps.txt ADDED Viewed

@@ -0,0 +1,15 @@
+viridis
+plasma
+inferno
+magma
+cividis
+Blues
+Greens
+YlGnBu
+YlOrRd
+RdYlBu
+Spectral
+coolwarm
+terrain
+ocean
+flag

convoviz/assets/fonts/AmaticSC-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/BebasNeue-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Borel-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Courgette-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/CroissantOne-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Handjet-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/IndieFlower-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Kalam-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Lobster-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/MartianMono-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/MartianMono-Thin.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Montserrat-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Mooli-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Pacifico-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/PlayfairDisplay-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Raleway-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/RobotoMono-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/RobotoMono-Thin.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/RobotoSlab-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/RobotoSlab-Thin.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Ruwudu-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Sacramento-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/TitilliumWeb-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Yellowtail-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/YsabeauOffice-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/YsabeauSC-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/YsabeauSC-Thin.ttf ADDED Viewed

Binary file

convoviz/assets/fonts/Zeyada-Regular.ttf ADDED Viewed

Binary file

convoviz/assets/stopwords.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

convoviz/cli.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""Command-line interface for convoviz."""
+from pathlib import Path
+import typer
+from rich.console import Console
+from convoviz.config import FolderOrganization, get_default_config
+from convoviz.exceptions import ConfigurationError, InvalidZipError
+from convoviz.interactive import run_interactive_config
+from convoviz.io.loaders import find_latest_zip
+from convoviz.pipeline import run_pipeline
+from convoviz.utils import default_font_path
+app = typer.Typer(
+    add_completion=False,
+    help="ChatGPT Data Visualizer 📊 - Convert and visualize your ChatGPT history",
+)
+console = Console()
+@app.callback(invoke_without_command=True)
+def run(
+    ctx: typer.Context,
+    input_path: Path | None = typer.Option(
+        None,
+        "--input",
+        "--zip",
+        "-z",
+        help="Path to the ChatGPT export zip file, JSON file, or extracted directory.",
+        exists=True,
+        file_okay=True,
+        dir_okay=True,
+    ),
+    output_dir: Path | None = typer.Option(
+        None,
+        "--output",
+        "-o",
+        help="Path to the output directory.",
+    ),
+    flat: bool = typer.Option(
+        False,
+        "--flat",
+        "-f",
+        help="Put all markdown files in a single folder (disables date organization).",
+    ),
+    interactive: bool | None = typer.Option(
+        None,
+        "--interactive/--no-interactive",
+        "-i/-I",
+        help="Force interactive mode on or off.",
+    ),
+) -> None:
+    """Convert ChatGPT export data to markdown and generate visualizations."""
+    if ctx.invoked_subcommand is not None:
+        return
+    # Start with default config
+    config = get_default_config()
+    # Override with CLI args
+    if input_path:
+        config.input_path = input_path
+    if output_dir:
+        config.output_folder = output_dir
+    if flat:
+        config.folder_organization = FolderOrganization.FLAT
+    # Determine mode: interactive if explicitly requested or no input provided
+    use_interactive = interactive if interactive is not None else (input_path is None)
+    if use_interactive:
+        console.print("Welcome to ChatGPT Data Visualizer ✨📊!\n")
+        try:
+            config = run_interactive_config(config)
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Cancelled by user.[/yellow]")
+            raise typer.Exit(code=0) from None
+    else:
+        # Non-interactive mode: validate we have what we need
+        if not config.input_path:
+            # Try to find a default
+            latest = find_latest_zip()
+            if latest:
+                console.print(f"No input specified, using latest zip found: {latest}")
+                config.input_path = latest
+            else:
+                console.print(
+                    "[bold red]Error:[/bold red] No input file provided and none found in Downloads."
+                )
+                raise typer.Exit(code=1)
+        # Validate the input (basic check)
+        if not config.input_path.exists():
+            console.print(
+                f"[bold red]Error:[/bold red] Input path does not exist: {config.input_path}"
+            )
+            raise typer.Exit(code=1)
+        # Set default font if not set
+        if not config.wordcloud.font_path:
+            config.wordcloud.font_path = default_font_path()
+    # Run the pipeline
+    try:
+        run_pipeline(config)
+    except (InvalidZipError, ConfigurationError) as e:
+        console.print(f"[bold red]Error:[/bold red] {e}")
+        raise typer.Exit(code=1) from None
+    except Exception as e:
+        console.print(f"[bold red]Unexpected error:[/bold red] {e}")
+        raise typer.Exit(code=1) from None
+def main_entry() -> None:
+    """Entry point for the CLI."""
+    app()

convoviz/config.py ADDED Viewed

@@ -0,0 +1,106 @@
+"""Configuration models using Pydantic v2."""
+from enum import Enum
+from pathlib import Path
+from typing import Literal
+from pydantic import BaseModel, Field
+class FolderOrganization(str, Enum):
+    """How to organize markdown output files in folders."""
+    FLAT = "flat"  # All files in one directory
+    DATE = "date"  # Nested by year/month (default)
+class AuthorHeaders(BaseModel):
+    """Headers for different message authors in markdown output."""
+    system: str = "### System"
+    user: str = "# Me"
+    assistant: str = "# ChatGPT"
+    tool: str = "### Tool output"
+class MarkdownConfig(BaseModel):
+    """Configuration for markdown output."""
+    latex_delimiters: Literal["default", "dollars"] = "default"
+    flavor: Literal["standard", "obsidian"] = "standard"
+class YAMLConfig(BaseModel):
+    """Configuration for YAML frontmatter in markdown files."""
+    title: bool = True
+    tags: bool = False
+    chat_link: bool = True
+    create_time: bool = True
+    update_time: bool = True
+    model: bool = True
+    used_plugins: bool = True
+    message_count: bool = True
+    content_types: bool = True
+    custom_instructions: bool = True
+class ConversationConfig(BaseModel):
+    """Configuration for conversation rendering."""
+    markdown: MarkdownConfig = Field(default_factory=MarkdownConfig)
+    yaml: YAMLConfig = Field(default_factory=YAMLConfig)
+class MessageConfig(BaseModel):
+    """Configuration for message rendering."""
+    author_headers: AuthorHeaders = Field(default_factory=AuthorHeaders)
+class WordCloudConfig(BaseModel):
+    """Configuration for word cloud generation."""
+    font_path: Path | None = None
+    colormap: str = "RdYlBu"
+    custom_stopwords: str = "use, file, "
+    exclude_programming_keywords: bool = True
+    background_color: str | None = None
+    mode: Literal["RGB", "RGBA"] = "RGBA"
+    include_numbers: bool = False
+    width: int = 600
+    height: int = 600
+class GraphConfig(BaseModel):
+    """Configuration for graph generation."""
+    color: str = "#4A90E2"
+    grid: bool = True
+    show_counts: bool = True
+    font_name: str = "Montserrat-Regular.ttf"
+    figsize: tuple[int, int] = (10, 6)
+    dpi: int = 300
+    timezone: Literal["utc", "local"] = "local"
+    generate_monthly_breakdowns: bool = False
+    generate_yearly_breakdowns: bool = False
+class ConvovizConfig(BaseModel):
+    """Main configuration for convoviz."""
+    input_path: Path | None = None
+    output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT-Data")
+    folder_organization: FolderOrganization = FolderOrganization.DATE
+    message: MessageConfig = Field(default_factory=MessageConfig)
+    conversation: ConversationConfig = Field(default_factory=ConversationConfig)
+    wordcloud: WordCloudConfig = Field(default_factory=WordCloudConfig)
+    graph: GraphConfig = Field(default_factory=GraphConfig)
+    model_config = {"validate_default": True}
+# Default configuration instance
+def get_default_config() -> ConvovizConfig:
+    """Get a fresh default configuration instance."""
+    return ConvovizConfig()

convoviz/exceptions.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Custom exceptions for convoviz."""
+class ConvovizError(Exception):
+    """Base exception for all convoviz errors."""
+class InvalidZipError(ConvovizError):
+    """Raised when a ZIP file is invalid or missing conversations.json."""
+    def __init__(self, path: str, reason: str = "missing conversations.json") -> None:
+        self.path = path
+        self.reason = reason
+        super().__init__(f"Invalid ZIP file '{path}': {reason}")
+class ConfigurationError(ConvovizError):
+    """Raised for configuration-related errors."""
+    def __init__(self, message: str, field: str | None = None) -> None:
+        self.field = field
+        super().__init__(message)
+class RenderingError(ConvovizError):
+    """Raised when rendering fails."""
+    def __init__(self, message: str, conversation_id: str | None = None) -> None:
+        self.conversation_id = conversation_id
+        super().__init__(message)
+class MessageContentError(ConvovizError):
+    """Raised when message content cannot be extracted."""
+    def __init__(self, message_id: str) -> None:
+        self.message_id = message_id
+        super().__init__(f"No valid content found in message: {message_id}")
+class FileNotFoundError(ConvovizError):
+    """Raised when a required file is not found."""
+    def __init__(self, path: str, file_type: str = "file") -> None:
+        self.path = path
+        self.file_type = file_type
+        super().__init__(f"{file_type.capitalize()} not found: {path}")