convoviz 0.1.7__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
convoviz/__init__.py CHANGED
@@ -1,5 +1,25 @@
1
- """Main convoviz package."""
2
-
3
- from . import configuration, data_analysis, long_runs, models, utils
4
-
5
- __all__ = ["models", "utils", "data_analysis", "configuration", "long_runs"]
1
+ """Convoviz - ChatGPT data visualization and export tool."""
2
+
3
+ from convoviz import analysis, config, io, models, renderers, utils
4
+ from convoviz.config import ConvovizConfig, get_default_config
5
+ from convoviz.models import Conversation, ConversationCollection, Message, Node
6
+ from convoviz.pipeline import run_pipeline
7
+
8
+ __all__ = [
9
+ # Submodules
10
+ "analysis",
11
+ "config",
12
+ "io",
13
+ "models",
14
+ "renderers",
15
+ "utils",
16
+ # Main classes
17
+ "Conversation",
18
+ "ConversationCollection",
19
+ "ConvovizConfig",
20
+ "Message",
21
+ "Node",
22
+ # Functions
23
+ "get_default_config",
24
+ "run_pipeline",
25
+ ]
convoviz/__main__.py CHANGED
@@ -1,5 +1,6 @@
1
- """Run convoviz as a module."""
2
-
3
- from .cli import main
4
-
5
- main()
1
+ """Allow running convoviz as a module: python -m convoviz"""
2
+
3
+ from convoviz.cli import main_entry
4
+
5
+ if __name__ == "__main__":
6
+ main_entry()
@@ -0,0 +1,9 @@
1
+ """Data analysis and visualization for convoviz."""
2
+
3
+ from convoviz.analysis.graphs import generate_week_barplot
4
+ from convoviz.analysis.wordcloud import generate_wordcloud
5
+
6
+ __all__ = [
7
+ "generate_week_barplot",
8
+ "generate_wordcloud",
9
+ ]
@@ -0,0 +1,98 @@
1
+ """Graph generation for conversation analytics."""
2
+
3
+ from collections import defaultdict
4
+ from datetime import UTC, datetime
5
+ from pathlib import Path
6
+
7
+ from matplotlib.figure import Figure
8
+ from tqdm import tqdm
9
+
10
+ from convoviz.config import GraphConfig
11
+ from convoviz.models import ConversationCollection
12
+
13
+ WEEKDAYS = [
14
+ "Monday",
15
+ "Tuesday",
16
+ "Wednesday",
17
+ "Thursday",
18
+ "Friday",
19
+ "Saturday",
20
+ "Sunday",
21
+ ]
22
+
23
+
24
+ def generate_week_barplot(
25
+ timestamps: list[float],
26
+ title: str,
27
+ _config: GraphConfig | None = None,
28
+ ) -> Figure:
29
+ """Create a bar graph showing message distribution across weekdays.
30
+
31
+ Args:
32
+ timestamps: List of Unix timestamps
33
+ title: Title for the graph
34
+ config: Optional graph configuration (for future extensions)
35
+
36
+ Returns:
37
+ Matplotlib Figure object
38
+ """
39
+ dates = [datetime.fromtimestamp(ts, UTC) for ts in timestamps]
40
+
41
+ weekday_counts: defaultdict[str, int] = defaultdict(int)
42
+ for date in dates:
43
+ weekday_counts[WEEKDAYS[date.weekday()]] += 1
44
+
45
+ x = WEEKDAYS
46
+ y = [weekday_counts[day] for day in WEEKDAYS]
47
+
48
+ fig = Figure(dpi=300)
49
+ ax = fig.add_subplot()
50
+
51
+ ax.bar(x, y)
52
+ ax.set_xlabel("Weekday")
53
+ ax.set_ylabel("Prompt Count")
54
+ ax.set_title(title)
55
+ ax.set_xticks(range(len(x)))
56
+ ax.set_xticklabels(x, rotation=45)
57
+ fig.tight_layout()
58
+
59
+ return fig
60
+
61
+
62
+ def generate_week_barplots(
63
+ collection: ConversationCollection,
64
+ output_dir: Path,
65
+ config: GraphConfig | None = None,
66
+ *,
67
+ progress_bar: bool = False,
68
+ ) -> None:
69
+ """Generate weekly bar plots for monthly and yearly groupings.
70
+
71
+ Args:
72
+ collection: Collection of conversations
73
+ output_dir: Directory to save the graphs
74
+ config: Optional graph configuration
75
+ progress_bar: Whether to show progress bars
76
+ """
77
+ output_dir.mkdir(parents=True, exist_ok=True)
78
+
79
+ month_groups = collection.group_by_month()
80
+ year_groups = collection.group_by_year()
81
+
82
+ for month, group in tqdm(
83
+ month_groups.items(),
84
+ desc="Creating monthly weekwise graphs 📈",
85
+ disable=not progress_bar,
86
+ ):
87
+ title = month.strftime("%B '%y")
88
+ fig = generate_week_barplot(group.timestamps("user"), title, config)
89
+ fig.savefig(output_dir / f"{month.strftime('%Y %B')}.png")
90
+
91
+ for year, group in tqdm(
92
+ year_groups.items(),
93
+ desc="Creating yearly weekwise graphs 📈",
94
+ disable=not progress_bar,
95
+ ):
96
+ title = year.strftime("%Y")
97
+ fig = generate_week_barplot(group.timestamps("user"), title, config)
98
+ fig.savefig(output_dir / f"{year.strftime('%Y')}.png")
@@ -0,0 +1,142 @@
1
+ """Word cloud generation for conversation text."""
2
+
3
+ from functools import lru_cache
4
+ from pathlib import Path
5
+
6
+ from nltk import download as nltk_download
7
+ from nltk.corpus import stopwords as nltk_stopwords
8
+ from nltk.data import find as nltk_find
9
+ from PIL.Image import Image
10
+ from tqdm import tqdm
11
+ from wordcloud import WordCloud
12
+
13
+ from convoviz.config import WordCloudConfig
14
+ from convoviz.models import ConversationCollection
15
+
16
+ # Languages for stopwords
17
+ STOPWORD_LANGUAGES = [
18
+ "arabic",
19
+ "english",
20
+ "french",
21
+ "german",
22
+ "spanish",
23
+ "portuguese",
24
+ ]
25
+
26
+
27
+ @lru_cache(maxsize=1)
28
+ def load_nltk_stopwords() -> frozenset[str]:
29
+ """Load and cache NLTK stopwords.
30
+
31
+ Downloads stopwords if not already present.
32
+
33
+ Returns:
34
+ Frozen set of stopwords from multiple languages
35
+ """
36
+ try:
37
+ nltk_find("corpora/stopwords")
38
+ except LookupError:
39
+ nltk_download("stopwords", quiet=True)
40
+
41
+ words: set[str] = set()
42
+ for lang in STOPWORD_LANGUAGES:
43
+ words.update(nltk_stopwords.words(fileids=lang))
44
+
45
+ return frozenset(words)
46
+
47
+
48
+ def parse_custom_stopwords(stopwords_str: str) -> set[str]:
49
+ """Parse a comma-separated string of custom stopwords.
50
+
51
+ Args:
52
+ stopwords_str: Comma-separated stopwords
53
+
54
+ Returns:
55
+ Set of lowercase, stripped stopwords
56
+ """
57
+ if not stopwords_str:
58
+ return set()
59
+
60
+ return {word.strip().lower() for word in stopwords_str.split(",") if word.strip()}
61
+
62
+
63
+ def generate_wordcloud(text: str, config: WordCloudConfig) -> Image:
64
+ """Generate a word cloud from text.
65
+
66
+ Args:
67
+ text: The text to create a word cloud from
68
+ config: Word cloud configuration
69
+
70
+ Returns:
71
+ PIL Image of the word cloud
72
+ """
73
+ # Combine NLTK and custom stopwords
74
+ stopwords = set(load_nltk_stopwords())
75
+ stopwords.update(parse_custom_stopwords(config.custom_stopwords))
76
+
77
+ wc = WordCloud(
78
+ font_path=str(config.font_path) if config.font_path else None,
79
+ width=config.width,
80
+ height=config.height,
81
+ stopwords=stopwords,
82
+ background_color=config.background_color,
83
+ mode=config.mode,
84
+ colormap=config.colormap,
85
+ include_numbers=config.include_numbers,
86
+ )
87
+
88
+ wc.generate(text)
89
+ result: Image = wc.to_image()
90
+ return result
91
+
92
+
93
+ def generate_wordclouds(
94
+ collection: ConversationCollection,
95
+ output_dir: Path,
96
+ config: WordCloudConfig,
97
+ *,
98
+ progress_bar: bool = False,
99
+ ) -> None:
100
+ """Generate word clouds for weekly, monthly, and yearly groupings.
101
+
102
+ Args:
103
+ collection: Collection of conversations
104
+ output_dir: Directory to save the word clouds
105
+ config: Word cloud configuration
106
+ progress_bar: Whether to show progress bars
107
+ """
108
+ output_dir.mkdir(parents=True, exist_ok=True)
109
+
110
+ week_groups = collection.group_by_week()
111
+ month_groups = collection.group_by_month()
112
+ year_groups = collection.group_by_year()
113
+
114
+ for week, group in tqdm(
115
+ week_groups.items(),
116
+ desc="Creating weekly wordclouds 🔡☁️",
117
+ disable=not progress_bar,
118
+ ):
119
+ text = group.plaintext("user", "assistant")
120
+ if text.strip():
121
+ img = generate_wordcloud(text, config)
122
+ img.save(output_dir / f"{week.strftime('%Y week %W')}.png", optimize=True)
123
+
124
+ for month, group in tqdm(
125
+ month_groups.items(),
126
+ desc="Creating monthly wordclouds 🔡☁️",
127
+ disable=not progress_bar,
128
+ ):
129
+ text = group.plaintext("user", "assistant")
130
+ if text.strip():
131
+ img = generate_wordcloud(text, config)
132
+ img.save(output_dir / f"{month.strftime('%Y %B')}.png", optimize=True)
133
+
134
+ for year, group in tqdm(
135
+ year_groups.items(),
136
+ desc="Creating yearly wordclouds 🔡☁️",
137
+ disable=not progress_bar,
138
+ ):
139
+ text = group.plaintext("user", "assistant")
140
+ if text.strip():
141
+ img = generate_wordcloud(text, config)
142
+ img.save(output_dir / f"{year.strftime('%Y')}.png", optimize=True)
@@ -1,16 +1,15 @@
1
- viridis
2
- plasma
3
- inferno
4
- magma
5
- cividis
6
- Blues
7
- Greens
8
- YlGnBu
9
- YlOrRd
10
- RdYlBu
11
- Spectral
12
- coolwarm
13
- terrain
14
- ocean
15
- prism
16
- flag
1
+ viridis
2
+ plasma
3
+ inferno
4
+ magma
5
+ cividis
6
+ Blues
7
+ Greens
8
+ YlGnBu
9
+ YlOrRd
10
+ RdYlBu
11
+ Spectral
12
+ coolwarm
13
+ terrain
14
+ ocean
15
+ flag
convoviz/cli.py CHANGED
@@ -1,99 +1,106 @@
1
- """Main file for running the program from the command line."""
2
-
3
- from __future__ import annotations
1
+ """Command-line interface for convoviz."""
4
2
 
5
3
  from pathlib import Path
6
- from shutil import rmtree
7
-
8
- from .configuration import UserConfigs
9
- from .long_runs import (
10
- generate_week_barplots,
11
- generate_wordclouds,
12
- )
13
- from .models import ConversationSet
14
- from .utils import latest_bookmarklet_json
15
-
16
-
17
- def main() -> None:
18
- """Run the program."""
19
- print(
20
- "Welcome to ChatGPT Data Visualizer ✨📊!\n\n"
21
- "Follow the instructions in the command line.\n\n"
22
- "Press 'ENTER' to select the default options.\n\n"
23
- "If you encounter any issues 🐛, please report 🚨 them here:\n\n"
24
- "➡️ https://github.com/mohamed-chs/chatgpt-history-export-to-md/issues/new/choose"
25
- " 🔗\n\n",
26
- )
27
-
28
- user = UserConfigs()
29
-
30
- user.prompt()
31
-
32
- print("\n\nAnd we're off! 🚀🚀🚀\n")
33
-
34
- user.set_model_configs()
35
-
36
- print("Loading data 📂 ...\n")
37
-
38
- entire_collection = ConversationSet.from_zip(user.configs["zip_filepath"])
39
-
40
- bkmrklet_json = latest_bookmarklet_json()
41
- if bkmrklet_json:
42
- print("Found bookmarklet download, loading 📂 ...\n")
43
- bkmrklet_collection = ConversationSet.from_json(bkmrklet_json)
44
- entire_collection.update(bkmrklet_collection)
45
-
46
- output_folder = Path(user.configs["output_folder"])
47
-
48
- # overwrite the output folder if it already exists (might change this in the future)
49
- if output_folder.exists() and output_folder.is_dir():
50
- rmtree(output_folder)
51
4
 
52
- output_folder.mkdir(parents=True, exist_ok=True)
5
+ import typer
6
+ from rich.console import Console
53
7
 
54
- markdown_folder = output_folder / "Markdown"
8
+ from convoviz.config import get_default_config
9
+ from convoviz.exceptions import ConfigurationError, InvalidZipError
10
+ from convoviz.interactive import run_interactive_config
11
+ from convoviz.io.loaders import find_latest_zip, validate_zip
12
+ from convoviz.pipeline import run_pipeline
13
+ from convoviz.utils import default_font_path
55
14
 
56
- entire_collection.save(markdown_folder, progress_bar=True)
57
-
58
- print(f"\nDone ! Check the output 📄 here : {markdown_folder.as_uri()} 🔗\n")
59
-
60
- graph_folder = output_folder / "Graphs"
61
- graph_folder.mkdir(parents=True, exist_ok=True)
62
-
63
- generate_week_barplots(
64
- entire_collection,
65
- graph_folder,
66
- **user.configs["graph"],
67
- progress_bar=True,
68
- )
69
-
70
- print(f"\nDone ! Check the output 📈 here : {graph_folder.as_uri()} 🔗\n")
71
- print("(more graphs 📈 will be added in the future ...)\n")
72
-
73
- wordcloud_folder = output_folder / "Word Clouds"
74
- wordcloud_folder.mkdir(parents=True, exist_ok=True)
75
-
76
- generate_wordclouds(
77
- entire_collection,
78
- wordcloud_folder,
79
- **user.configs["wordcloud"],
80
- progress_bar=True,
81
- )
82
-
83
- print(f"\nDone ✅ ! Check the output 🔡☁️ here : {wordcloud_folder.as_uri()} 🔗\n")
84
-
85
- print("Writing custom instructions 📝 ...\n")
86
-
87
- cstm_inst_filepath = output_folder / "custom_instructions.json"
88
-
89
- entire_collection.save_custom_instructions(cstm_inst_filepath)
90
-
91
- print(f"\nDone ✅ ! Check the output 📝 here : {cstm_inst_filepath.as_uri()} 🔗\n")
92
-
93
- print(
94
- "ALL DONE 🎉🎉🎉 !\n\n"
95
- f"Explore the full gallery 🖼️ at: {output_folder.as_uri()} 🔗\n\n"
96
- "I hope you enjoy the outcome 🤞.\n\n"
97
- "If you appreciate it, kindly give the project a star 🌟 on GitHub :\n\n"
98
- "➡️ https://github.com/mohamed-chs/chatgpt-history-export-to-md 🔗\n\n",
99
- )
15
+ app = typer.Typer(
16
+ add_completion=False,
17
+ help="ChatGPT Data Visualizer 📊 - Convert and visualize your ChatGPT history",
18
+ )
19
+ console = Console()
20
+
21
+
22
+ @app.callback(invoke_without_command=True)
23
+ def run(
24
+ ctx: typer.Context,
25
+ zip_path: Path | None = typer.Option(
26
+ None,
27
+ "--zip",
28
+ "-z",
29
+ help="Path to the ChatGPT export zip file.",
30
+ exists=True,
31
+ file_okay=True,
32
+ dir_okay=False,
33
+ ),
34
+ output_dir: Path | None = typer.Option(
35
+ None,
36
+ "--output",
37
+ "-o",
38
+ help="Path to the output directory.",
39
+ ),
40
+ interactive: bool | None = typer.Option(
41
+ None,
42
+ "--interactive/--no-interactive",
43
+ "-i/-I",
44
+ help="Force interactive mode on or off.",
45
+ ),
46
+ ) -> None:
47
+ """Convert ChatGPT export data to markdown and generate visualizations."""
48
+ if ctx.invoked_subcommand is not None:
49
+ return
50
+
51
+ # Start with default config
52
+ config = get_default_config()
53
+
54
+ # Override with CLI args
55
+ if zip_path:
56
+ config.zip_filepath = zip_path
57
+ if output_dir:
58
+ config.output_folder = output_dir
59
+
60
+ # Determine mode: interactive if explicitly requested or no zip provided
61
+ use_interactive = interactive if interactive is not None else (zip_path is None)
62
+
63
+ if use_interactive:
64
+ console.print("Welcome to ChatGPT Data Visualizer ✨📊!\n")
65
+ try:
66
+ config = run_interactive_config(config)
67
+ except KeyboardInterrupt:
68
+ console.print("\n[yellow]Cancelled by user.[/yellow]")
69
+ raise typer.Exit(code=0) from None
70
+ else:
71
+ # Non-interactive mode: validate we have what we need
72
+ if not config.zip_filepath:
73
+ # Try to find a default
74
+ latest = find_latest_zip()
75
+ if latest:
76
+ console.print(f"No zip file specified, using latest found: {latest}")
77
+ config.zip_filepath = latest
78
+ else:
79
+ console.print(
80
+ "[bold red]Error:[/bold red] No zip file provided and none found in Downloads."
81
+ )
82
+ raise typer.Exit(code=1)
83
+
84
+ # Validate the zip
85
+ if not validate_zip(config.zip_filepath):
86
+ console.print(f"[bold red]Error:[/bold red] Invalid zip file: {config.zip_filepath}")
87
+ raise typer.Exit(code=1)
88
+
89
+ # Set default font if not set
90
+ if not config.wordcloud.font_path:
91
+ config.wordcloud.font_path = default_font_path()
92
+
93
+ # Run the pipeline
94
+ try:
95
+ run_pipeline(config)
96
+ except (InvalidZipError, ConfigurationError) as e:
97
+ console.print(f"[bold red]Error:[/bold red] {e}")
98
+ raise typer.Exit(code=1) from None
99
+ except Exception as e:
100
+ console.print(f"[bold red]Unexpected error:[/bold red] {e}")
101
+ raise typer.Exit(code=1) from None
102
+
103
+
104
+ def main_entry() -> None:
105
+ """Entry point for the CLI."""
106
+ app()
convoviz/config.py ADDED
@@ -0,0 +1,88 @@
1
+ """Configuration models using Pydantic v2."""
2
+
3
+ from pathlib import Path
4
+ from typing import Literal
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class AuthorHeaders(BaseModel):
10
+ """Headers for different message authors in markdown output."""
11
+
12
+ system: str = "### System"
13
+ user: str = "# Me"
14
+ assistant: str = "# ChatGPT"
15
+ tool: str = "### Tool output"
16
+
17
+
18
+ class MarkdownConfig(BaseModel):
19
+ """Configuration for markdown output."""
20
+
21
+ latex_delimiters: Literal["default", "dollars"] = "default"
22
+
23
+
24
+ class YAMLConfig(BaseModel):
25
+ """Configuration for YAML frontmatter in markdown files."""
26
+
27
+ title: bool = True
28
+ tags: bool = False
29
+ chat_link: bool = True
30
+ create_time: bool = True
31
+ update_time: bool = True
32
+ model: bool = True
33
+ used_plugins: bool = True
34
+ message_count: bool = True
35
+ content_types: bool = True
36
+ custom_instructions: bool = True
37
+
38
+
39
+ class ConversationConfig(BaseModel):
40
+ """Configuration for conversation rendering."""
41
+
42
+ markdown: MarkdownConfig = Field(default_factory=MarkdownConfig)
43
+ yaml: YAMLConfig = Field(default_factory=YAMLConfig)
44
+
45
+
46
+ class MessageConfig(BaseModel):
47
+ """Configuration for message rendering."""
48
+
49
+ author_headers: AuthorHeaders = Field(default_factory=AuthorHeaders)
50
+
51
+
52
+ class WordCloudConfig(BaseModel):
53
+ """Configuration for word cloud generation."""
54
+
55
+ font_path: Path | None = None
56
+ colormap: str = "magma"
57
+ custom_stopwords: str = "use, file, "
58
+ background_color: str | None = None
59
+ mode: Literal["RGB", "RGBA"] = "RGBA"
60
+ include_numbers: bool = False
61
+ width: int = 1000
62
+ height: int = 1000
63
+
64
+
65
+ class GraphConfig(BaseModel):
66
+ """Configuration for graph generation."""
67
+
68
+ # Extensible for future graph options
69
+ pass
70
+
71
+
72
+ class ConvovizConfig(BaseModel):
73
+ """Main configuration for convoviz."""
74
+
75
+ zip_filepath: Path | None = None
76
+ output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT Data")
77
+ message: MessageConfig = Field(default_factory=MessageConfig)
78
+ conversation: ConversationConfig = Field(default_factory=ConversationConfig)
79
+ wordcloud: WordCloudConfig = Field(default_factory=WordCloudConfig)
80
+ graph: GraphConfig = Field(default_factory=GraphConfig)
81
+
82
+ model_config = {"validate_default": True}
83
+
84
+
85
+ # Default configuration instance
86
+ def get_default_config() -> ConvovizConfig:
87
+ """Get a fresh default configuration instance."""
88
+ return ConvovizConfig()
convoviz/exceptions.py ADDED
@@ -0,0 +1,47 @@
1
+ """Custom exceptions for convoviz."""
2
+
3
+
4
+ class ConvovizError(Exception):
5
+ """Base exception for all convoviz errors."""
6
+
7
+
8
+ class InvalidZipError(ConvovizError):
9
+ """Raised when a ZIP file is invalid or missing conversations.json."""
10
+
11
+ def __init__(self, path: str, reason: str = "missing conversations.json") -> None:
12
+ self.path = path
13
+ self.reason = reason
14
+ super().__init__(f"Invalid ZIP file '{path}': {reason}")
15
+
16
+
17
+ class ConfigurationError(ConvovizError):
18
+ """Raised for configuration-related errors."""
19
+
20
+ def __init__(self, message: str, field: str | None = None) -> None:
21
+ self.field = field
22
+ super().__init__(message)
23
+
24
+
25
+ class RenderingError(ConvovizError):
26
+ """Raised when rendering fails."""
27
+
28
+ def __init__(self, message: str, conversation_id: str | None = None) -> None:
29
+ self.conversation_id = conversation_id
30
+ super().__init__(message)
31
+
32
+
33
+ class MessageContentError(ConvovizError):
34
+ """Raised when message content cannot be extracted."""
35
+
36
+ def __init__(self, message_id: str) -> None:
37
+ self.message_id = message_id
38
+ super().__init__(f"No valid content found in message: {message_id}")
39
+
40
+
41
+ class FileNotFoundError(ConvovizError):
42
+ """Raised when a required file is not found."""
43
+
44
+ def __init__(self, path: str, file_type: str = "file") -> None:
45
+ self.path = path
46
+ self.file_type = file_type
47
+ super().__init__(f"{file_type.capitalize()} not found: {path}")