PyPI - convoviz - Versions diffs - 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

convoviz 0.1.6py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

convoviz/__init__.py +25 -5
convoviz/__main__.py +6 -5
convoviz/analysis/__init__.py +9 -0
convoviz/analysis/graphs.py +98 -0
convoviz/analysis/wordcloud.py +142 -0
convoviz/assets/colormaps.txt +15 -16
convoviz/cli.py +101 -94
convoviz/config.py +88 -0
convoviz/exceptions.py +47 -0
convoviz/interactive.py +178 -0
convoviz/io/__init__.py +21 -0
convoviz/io/loaders.py +135 -0
convoviz/io/writers.py +96 -0
convoviz/models/__init__.py +26 -6
convoviz/models/collection.py +107 -0
convoviz/models/conversation.py +149 -0
convoviz/models/message.py +77 -0
convoviz/models/node.py +66 -0
convoviz/pipeline.py +120 -0
convoviz/renderers/__init__.py +10 -0
convoviz/renderers/markdown.py +182 -0
convoviz/renderers/yaml.py +42 -0
convoviz/utils.py +68 -237
{convoviz-0.1.6.dist-info → convoviz-0.2.0.dist-info}/METADATA +61 -42
{convoviz-0.1.6.dist-info → convoviz-0.2.0.dist-info}/RECORD +27 -17
convoviz-0.2.0.dist-info/WHEEL +4 -0
convoviz-0.2.0.dist-info/entry_points.txt +3 -0
convoviz/configuration.py +0 -125
convoviz/data_analysis.py +0 -118
convoviz/long_runs.py +0 -91
convoviz/models/_conversation.py +0 -288
convoviz/models/_conversation_set.py +0 -190
convoviz/models/_message.py +0 -89
convoviz/models/_node.py +0 -74
convoviz-0.1.6.dist-info/LICENSE +0 -21
convoviz-0.1.6.dist-info/WHEEL +0 -4

convoviz/__init__.py CHANGED Viewed

@@ -1,5 +1,25 @@
-"""Main convoviz package."""
-from . import configuration, data_analysis, long_runs, models, utils
-__all__ = ["models", "utils", "data_analysis", "configuration", "long_runs"]
+"""Convoviz - ChatGPT data visualization and export tool."""
+from convoviz import analysis, config, io, models, renderers, utils
+from convoviz.config import ConvovizConfig, get_default_config
+from convoviz.models import Conversation, ConversationCollection, Message, Node
+from convoviz.pipeline import run_pipeline
+__all__ = [
+    # Submodules
+    "analysis",
+    "config",
+    "io",
+    "models",
+    "renderers",
+    "utils",
+    # Main classes
+    "Conversation",
+    "ConversationCollection",
+    "ConvovizConfig",
+    "Message",
+    "Node",
+    # Functions
+    "get_default_config",
+    "run_pipeline",
+]

convoviz/__main__.py CHANGED Viewed

@@ -1,5 +1,6 @@
-"""Run convoviz as a module."""
-from .cli import main
-main()
+"""Allow running convoviz as a module: python -m convoviz"""
+from convoviz.cli import main_entry
+if __name__ == "__main__":
+    main_entry()

convoviz/analysis/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Data analysis and visualization for convoviz."""
+from convoviz.analysis.graphs import generate_week_barplot
+from convoviz.analysis.wordcloud import generate_wordcloud
+__all__ = [
+    "generate_week_barplot",
+    "generate_wordcloud",
+]

convoviz/analysis/graphs.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""Graph generation for conversation analytics."""
+from collections import defaultdict
+from datetime import UTC, datetime
+from pathlib import Path
+from matplotlib.figure import Figure
+from tqdm import tqdm
+from convoviz.config import GraphConfig
+from convoviz.models import ConversationCollection
+WEEKDAYS = [
+    "Monday",
+    "Tuesday",
+    "Wednesday",
+    "Thursday",
+    "Friday",
+    "Saturday",
+    "Sunday",
+]
+def generate_week_barplot(
+    timestamps: list[float],
+    title: str,
+    _config: GraphConfig | None = None,
+) -> Figure:
+    """Create a bar graph showing message distribution across weekdays.
+    Args:
+        timestamps: List of Unix timestamps
+        title: Title for the graph
+        config: Optional graph configuration (for future extensions)
+    Returns:
+        Matplotlib Figure object
+    """
+    dates = [datetime.fromtimestamp(ts, UTC) for ts in timestamps]
+    weekday_counts: defaultdict[str, int] = defaultdict(int)
+    for date in dates:
+        weekday_counts[WEEKDAYS[date.weekday()]] += 1
+    x = WEEKDAYS
+    y = [weekday_counts[day] for day in WEEKDAYS]
+    fig = Figure(dpi=300)
+    ax = fig.add_subplot()
+    ax.bar(x, y)
+    ax.set_xlabel("Weekday")
+    ax.set_ylabel("Prompt Count")
+    ax.set_title(title)
+    ax.set_xticks(range(len(x)))
+    ax.set_xticklabels(x, rotation=45)
+    fig.tight_layout()
+    return fig
+def generate_week_barplots(
+    collection: ConversationCollection,
+    output_dir: Path,
+    config: GraphConfig | None = None,
+    *,
+    progress_bar: bool = False,
+) -> None:
+    """Generate weekly bar plots for monthly and yearly groupings.
+    Args:
+        collection: Collection of conversations
+        output_dir: Directory to save the graphs
+        config: Optional graph configuration
+        progress_bar: Whether to show progress bars
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+    month_groups = collection.group_by_month()
+    year_groups = collection.group_by_year()
+    for month, group in tqdm(
+        month_groups.items(),
+        desc="Creating monthly weekwise graphs 📈",
+        disable=not progress_bar,
+    ):
+        title = month.strftime("%B '%y")
+        fig = generate_week_barplot(group.timestamps("user"), title, config)
+        fig.savefig(output_dir / f"{month.strftime('%Y %B')}.png")
+    for year, group in tqdm(
+        year_groups.items(),
+        desc="Creating yearly weekwise graphs 📈",
+        disable=not progress_bar,
+    ):
+        title = year.strftime("%Y")
+        fig = generate_week_barplot(group.timestamps("user"), title, config)
+        fig.savefig(output_dir / f"{year.strftime('%Y')}.png")

convoviz/analysis/wordcloud.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""Word cloud generation for conversation text."""
+from functools import lru_cache
+from pathlib import Path
+from nltk import download as nltk_download
+from nltk.corpus import stopwords as nltk_stopwords
+from nltk.data import find as nltk_find
+from PIL.Image import Image
+from tqdm import tqdm
+from wordcloud import WordCloud
+from convoviz.config import WordCloudConfig
+from convoviz.models import ConversationCollection
+# Languages for stopwords
+STOPWORD_LANGUAGES = [
+    "arabic",
+    "english",
+    "french",
+    "german",
+    "spanish",
+    "portuguese",
+]
+@lru_cache(maxsize=1)
+def load_nltk_stopwords() -> frozenset[str]:
+    """Load and cache NLTK stopwords.
+    Downloads stopwords if not already present.
+    Returns:
+        Frozen set of stopwords from multiple languages
+    """
+    try:
+        nltk_find("corpora/stopwords")
+    except LookupError:
+        nltk_download("stopwords", quiet=True)
+    words: set[str] = set()
+    for lang in STOPWORD_LANGUAGES:
+        words.update(nltk_stopwords.words(fileids=lang))
+    return frozenset(words)
+def parse_custom_stopwords(stopwords_str: str) -> set[str]:
+    """Parse a comma-separated string of custom stopwords.
+    Args:
+        stopwords_str: Comma-separated stopwords
+    Returns:
+        Set of lowercase, stripped stopwords
+    """
+    if not stopwords_str:
+        return set()
+    return {word.strip().lower() for word in stopwords_str.split(",") if word.strip()}
+def generate_wordcloud(text: str, config: WordCloudConfig) -> Image:
+    """Generate a word cloud from text.
+    Args:
+        text: The text to create a word cloud from
+        config: Word cloud configuration
+    Returns:
+        PIL Image of the word cloud
+    """
+    # Combine NLTK and custom stopwords
+    stopwords = set(load_nltk_stopwords())
+    stopwords.update(parse_custom_stopwords(config.custom_stopwords))
+    wc = WordCloud(
+        font_path=str(config.font_path) if config.font_path else None,
+        width=config.width,
+        height=config.height,
+        stopwords=stopwords,
+        background_color=config.background_color,
+        mode=config.mode,
+        colormap=config.colormap,
+        include_numbers=config.include_numbers,
+    )
+    wc.generate(text)
+    result: Image = wc.to_image()
+    return result
+def generate_wordclouds(
+    collection: ConversationCollection,
+    output_dir: Path,
+    config: WordCloudConfig,
+    *,
+    progress_bar: bool = False,
+) -> None:
+    """Generate word clouds for weekly, monthly, and yearly groupings.
+    Args:
+        collection: Collection of conversations
+        output_dir: Directory to save the word clouds
+        config: Word cloud configuration
+        progress_bar: Whether to show progress bars
+    """
+    output_dir.mkdir(parents=True, exist_ok=True)
+    week_groups = collection.group_by_week()
+    month_groups = collection.group_by_month()
+    year_groups = collection.group_by_year()
+    for week, group in tqdm(
+        week_groups.items(),
+        desc="Creating weekly wordclouds 🔡☁️",
+        disable=not progress_bar,
+    ):
+        text = group.plaintext("user", "assistant")
+        if text.strip():
+            img = generate_wordcloud(text, config)
+            img.save(output_dir / f"{week.strftime('%Y week %W')}.png", optimize=True)
+    for month, group in tqdm(
+        month_groups.items(),
+        desc="Creating monthly wordclouds 🔡☁️",
+        disable=not progress_bar,
+    ):
+        text = group.plaintext("user", "assistant")
+        if text.strip():
+            img = generate_wordcloud(text, config)
+            img.save(output_dir / f"{month.strftime('%Y %B')}.png", optimize=True)
+    for year, group in tqdm(
+        year_groups.items(),
+        desc="Creating yearly wordclouds 🔡☁️",
+        disable=not progress_bar,
+    ):
+        text = group.plaintext("user", "assistant")
+        if text.strip():
+            img = generate_wordcloud(text, config)
+            img.save(output_dir / f"{year.strftime('%Y')}.png", optimize=True)

convoviz/assets/colormaps.txt CHANGED Viewed

@@ -1,16 +1,15 @@
-viridis
-plasma
-inferno
-magma
-cividis
-Blues
-Greens
-YlGnBu
-YlOrRd
-RdYlBu
-Spectral
-coolwarm
-terrain
-ocean
-prism
-flag
+viridis
+plasma
+inferno
+magma
+cividis
+Blues
+Greens
+YlGnBu
+YlOrRd
+RdYlBu
+Spectral
+coolwarm
+terrain
+ocean
+flag

convoviz/cli.py CHANGED Viewed

@@ -1,99 +1,106 @@
-"""Main file for running the program from the command line."""
-from __future__ import annotations
+"""Command-line interface for convoviz."""
 from pathlib import Path
-from shutil import rmtree
-from .configuration import UserConfigs
-from .long_runs import (
-    generate_week_barplots,
-    generate_wordclouds,
-)
-from .models import ConversationSet
-from .utils import latest_bookmarklet_json
-def main() -> None:
-    """Run the program."""
-    print(
-        "Welcome to ChatGPT Data Visualizer ✨📊!\n\n"
-        "Follow the instructions in the command line.\n\n"
-        "Press 'ENTER' to select the default options.\n\n"
-        "If you encounter any issues 🐛, please report 🚨 them here:\n\n"
-        "➡️ https://github.com/mohamed-chs/chatgpt-history-export-to-md/issues/new/choose"
-        " 🔗\n\n",
-    )
-    user = UserConfigs()
-    user.prompt()
-    print("\n\nAnd we're off! 🚀🚀🚀\n")
-    user.set_model_configs()
-    print("Loading data 📂 ...\n")
-    entire_collection = ConversationSet.from_zip(user.configs["zip_filepath"])
-    bkmrklet_json = latest_bookmarklet_json()
-    if bkmrklet_json:
-        print("Found bookmarklet download, loading 📂 ...\n")
-        bkmrklet_collection = ConversationSet.from_json(bkmrklet_json)
-        entire_collection.update(bkmrklet_collection)
-    output_folder = Path(user.configs["output_folder"])
-    # overwrite the output folder if it already exists (might change this in the future)
-    if output_folder.exists() and output_folder.is_dir():
-        rmtree(output_folder)
-    output_folder.mkdir(parents=True, exist_ok=True)
+import typer
+from rich.console import Console
-    markdown_folder = output_folder / "Markdown"
+from convoviz.config import get_default_config
+from convoviz.exceptions import ConfigurationError, InvalidZipError
+from convoviz.interactive import run_interactive_config
+from convoviz.io.loaders import find_latest_zip, validate_zip
+from convoviz.pipeline import run_pipeline
+from convoviz.utils import default_font_path
-    entire_collection.save(markdown_folder, progress_bar=True)
-    print(f"\nDone ✅ ! Check the output 📄 here : {markdown_folder.as_uri()} 🔗\n")
-    graph_folder = output_folder / "Graphs"
-    graph_folder.mkdir(parents=True, exist_ok=True)
-    generate_week_barplots(
-        entire_collection,
-        graph_folder,
-        **user.configs["graph"],
-        progress_bar=True,
-    )
-    print(f"\nDone ✅ ! Check the output 📈 here : {graph_folder.as_uri()} 🔗\n")
-    print("(more graphs 📈 will be added in the future ...)\n")
-    wordcloud_folder = output_folder / "Word Clouds"
-    wordcloud_folder.mkdir(parents=True, exist_ok=True)
-    generate_wordclouds(
-        entire_collection,
-        wordcloud_folder,
-        **user.configs["wordcloud"],
-        progress_bar=True,
-    )
-    print(f"\nDone ✅ ! Check the output 🔡☁️ here : {wordcloud_folder.as_uri()} 🔗\n")
-    print("Writing custom instructions 📝 ...\n")
-    cstm_inst_filepath = output_folder / "custom_instructions.json"
-    entire_collection.save_custom_instructions(cstm_inst_filepath)
-    print(f"\nDone ✅ ! Check the output 📝 here : {cstm_inst_filepath.as_uri()} 🔗\n")
-    print(
-        "ALL DONE 🎉🎉🎉 !\n\n"
-        f"Explore the full gallery 🖼️ at: {output_folder.as_uri()} 🔗\n\n"
-        "I hope you enjoy the outcome 🤞.\n\n"
-        "If you appreciate it, kindly give the project a star 🌟 on GitHub :\n\n"
-        "➡️ https://github.com/mohamed-chs/chatgpt-history-export-to-md 🔗\n\n",
-    )
+app = typer.Typer(
+    add_completion=False,
+    help="ChatGPT Data Visualizer 📊 - Convert and visualize your ChatGPT history",
+)
+console = Console()
+@app.callback(invoke_without_command=True)
+def run(
+    ctx: typer.Context,
+    zip_path: Path | None = typer.Option(
+        None,
+        "--zip",
+        "-z",
+        help="Path to the ChatGPT export zip file.",
+        exists=True,
+        file_okay=True,
+        dir_okay=False,
+    ),
+    output_dir: Path | None = typer.Option(
+        None,
+        "--output",
+        "-o",
+        help="Path to the output directory.",
+    ),
+    interactive: bool | None = typer.Option(
+        None,
+        "--interactive/--no-interactive",
+        "-i/-I",
+        help="Force interactive mode on or off.",
+    ),
+) -> None:
+    """Convert ChatGPT export data to markdown and generate visualizations."""
+    if ctx.invoked_subcommand is not None:
+        return
+    # Start with default config
+    config = get_default_config()
+    # Override with CLI args
+    if zip_path:
+        config.zip_filepath = zip_path
+    if output_dir:
+        config.output_folder = output_dir
+    # Determine mode: interactive if explicitly requested or no zip provided
+    use_interactive = interactive if interactive is not None else (zip_path is None)
+    if use_interactive:
+        console.print("Welcome to ChatGPT Data Visualizer ✨📊!\n")
+        try:
+            config = run_interactive_config(config)
+        except KeyboardInterrupt:
+            console.print("\n[yellow]Cancelled by user.[/yellow]")
+            raise typer.Exit(code=0) from None
+    else:
+        # Non-interactive mode: validate we have what we need
+        if not config.zip_filepath:
+            # Try to find a default
+            latest = find_latest_zip()
+            if latest:
+                console.print(f"No zip file specified, using latest found: {latest}")
+                config.zip_filepath = latest
+            else:
+                console.print(
+                    "[bold red]Error:[/bold red] No zip file provided and none found in Downloads."
+                )
+                raise typer.Exit(code=1)
+        # Validate the zip
+        if not validate_zip(config.zip_filepath):
+            console.print(f"[bold red]Error:[/bold red] Invalid zip file: {config.zip_filepath}")
+            raise typer.Exit(code=1)
+        # Set default font if not set
+        if not config.wordcloud.font_path:
+            config.wordcloud.font_path = default_font_path()
+    # Run the pipeline
+    try:
+        run_pipeline(config)
+    except (InvalidZipError, ConfigurationError) as e:
+        console.print(f"[bold red]Error:[/bold red] {e}")
+        raise typer.Exit(code=1) from None
+    except Exception as e:
+        console.print(f"[bold red]Unexpected error:[/bold red] {e}")
+        raise typer.Exit(code=1) from None
+def main_entry() -> None:
+    """Entry point for the CLI."""
+    app()

convoviz/config.py ADDED Viewed

@@ -0,0 +1,88 @@
+"""Configuration models using Pydantic v2."""
+from pathlib import Path
+from typing import Literal
+from pydantic import BaseModel, Field
+class AuthorHeaders(BaseModel):
+    """Headers for different message authors in markdown output."""
+    system: str = "### System"
+    user: str = "# Me"
+    assistant: str = "# ChatGPT"
+    tool: str = "### Tool output"
+class MarkdownConfig(BaseModel):
+    """Configuration for markdown output."""
+    latex_delimiters: Literal["default", "dollars"] = "default"
+class YAMLConfig(BaseModel):
+    """Configuration for YAML frontmatter in markdown files."""
+    title: bool = True
+    tags: bool = False
+    chat_link: bool = True
+    create_time: bool = True
+    update_time: bool = True
+    model: bool = True
+    used_plugins: bool = True
+    message_count: bool = True
+    content_types: bool = True
+    custom_instructions: bool = True
+class ConversationConfig(BaseModel):
+    """Configuration for conversation rendering."""
+    markdown: MarkdownConfig = Field(default_factory=MarkdownConfig)
+    yaml: YAMLConfig = Field(default_factory=YAMLConfig)
+class MessageConfig(BaseModel):
+    """Configuration for message rendering."""
+    author_headers: AuthorHeaders = Field(default_factory=AuthorHeaders)
+class WordCloudConfig(BaseModel):
+    """Configuration for word cloud generation."""
+    font_path: Path | None = None
+    colormap: str = "magma"
+    custom_stopwords: str = "use, file, "
+    background_color: str | None = None
+    mode: Literal["RGB", "RGBA"] = "RGBA"
+    include_numbers: bool = False
+    width: int = 1000
+    height: int = 1000
+class GraphConfig(BaseModel):
+    """Configuration for graph generation."""
+    # Extensible for future graph options
+    pass
+class ConvovizConfig(BaseModel):
+    """Main configuration for convoviz."""
+    zip_filepath: Path | None = None
+    output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT Data")
+    message: MessageConfig = Field(default_factory=MessageConfig)
+    conversation: ConversationConfig = Field(default_factory=ConversationConfig)
+    wordcloud: WordCloudConfig = Field(default_factory=WordCloudConfig)
+    graph: GraphConfig = Field(default_factory=GraphConfig)
+    model_config = {"validate_default": True}
+# Default configuration instance
+def get_default_config() -> ConvovizConfig:
+    """Get a fresh default configuration instance."""
+    return ConvovizConfig()

convoviz/exceptions.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Custom exceptions for convoviz."""
+class ConvovizError(Exception):
+    """Base exception for all convoviz errors."""
+class InvalidZipError(ConvovizError):
+    """Raised when a ZIP file is invalid or missing conversations.json."""
+    def __init__(self, path: str, reason: str = "missing conversations.json") -> None:
+        self.path = path
+        self.reason = reason
+        super().__init__(f"Invalid ZIP file '{path}': {reason}")
+class ConfigurationError(ConvovizError):
+    """Raised for configuration-related errors."""
+    def __init__(self, message: str, field: str | None = None) -> None:
+        self.field = field
+        super().__init__(message)
+class RenderingError(ConvovizError):
+    """Raised when rendering fails."""
+    def __init__(self, message: str, conversation_id: str | None = None) -> None:
+        self.conversation_id = conversation_id
+        super().__init__(message)
+class MessageContentError(ConvovizError):
+    """Raised when message content cannot be extracted."""
+    def __init__(self, message_id: str) -> None:
+        self.message_id = message_id
+        super().__init__(f"No valid content found in message: {message_id}")
+class FileNotFoundError(ConvovizError):
+    """Raised when a required file is not found."""
+    def __init__(self, path: str, file_type: str = "file") -> None:
+        self.path = path
+        self.file_type = file_type
+        super().__init__(f"{file_type.capitalize()} not found: {path}")

convoviz 0.1.6__py3-none-any.whl → 0.2.0__py3-none-any.whl

convoviz 0.1.6py3-none-any.whl → 0.2.0py3-none-any.whl