convoviz 0.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. convoviz/__init__.py +25 -0
  2. convoviz/__main__.py +6 -0
  3. convoviz/analysis/__init__.py +9 -0
  4. convoviz/analysis/graphs.py +855 -0
  5. convoviz/analysis/wordcloud.py +165 -0
  6. convoviz/assets/colormaps.txt +15 -0
  7. convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
  8. convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
  9. convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
  10. convoviz/assets/fonts/Borel-Regular.ttf +0 -0
  11. convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
  12. convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
  13. convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
  14. convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
  15. convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
  16. convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
  17. convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
  18. convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
  19. convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
  20. convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
  21. convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
  22. convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
  23. convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
  24. convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
  25. convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
  26. convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
  27. convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
  28. convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
  29. convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
  30. convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
  31. convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
  32. convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
  33. convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
  34. convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
  35. convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
  36. convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
  37. convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
  38. convoviz/assets/stopwords.txt +1 -0
  39. convoviz/cli.py +117 -0
  40. convoviz/config.py +106 -0
  41. convoviz/exceptions.py +47 -0
  42. convoviz/interactive.py +247 -0
  43. convoviz/io/__init__.py +21 -0
  44. convoviz/io/assets.py +98 -0
  45. convoviz/io/loaders.py +186 -0
  46. convoviz/io/writers.py +227 -0
  47. convoviz/models/__init__.py +24 -0
  48. convoviz/models/collection.py +115 -0
  49. convoviz/models/conversation.py +158 -0
  50. convoviz/models/message.py +218 -0
  51. convoviz/models/node.py +66 -0
  52. convoviz/pipeline.py +167 -0
  53. convoviz/py.typed +0 -0
  54. convoviz/renderers/__init__.py +10 -0
  55. convoviz/renderers/markdown.py +269 -0
  56. convoviz/renderers/yaml.py +119 -0
  57. convoviz/utils.py +155 -0
  58. convoviz-0.2.12.dist-info/METADATA +148 -0
  59. convoviz-0.2.12.dist-info/RECORD +61 -0
  60. convoviz-0.2.12.dist-info/WHEEL +4 -0
  61. convoviz-0.2.12.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,165 @@
1
+ """Word cloud generation for conversation text."""
2
+
3
+ from functools import lru_cache
4
+ from pathlib import Path
5
+
6
+ from nltk import download as nltk_download
7
+ from nltk.corpus import stopwords as nltk_stopwords
8
+ from nltk.data import find as nltk_find
9
+ from PIL.Image import Image
10
+ from tqdm import tqdm
11
+ from wordcloud import WordCloud
12
+
13
+ from convoviz.config import WordCloudConfig
14
+ from convoviz.models import ConversationCollection
15
+
16
+ # Languages for stopwords
17
+ STOPWORD_LANGUAGES = [
18
+ "arabic",
19
+ "english",
20
+ "french",
21
+ "german",
22
+ "spanish",
23
+ "portuguese",
24
+ ]
25
+
26
+
27
+ @lru_cache(maxsize=1)
28
+ def load_programming_stopwords() -> frozenset[str]:
29
+ """Load programming keywords and types from assets.
30
+
31
+ Returns:
32
+ Frozen set of programming stop words
33
+ """
34
+ stopwords_path = Path(__file__).parent.parent / "assets" / "stopwords.txt"
35
+ if not stopwords_path.exists():
36
+ return frozenset()
37
+
38
+ with open(stopwords_path, encoding="utf-8") as f:
39
+ return frozenset(
40
+ line.strip().lower() for line in f if line.strip() and not line.strip().startswith("#")
41
+ )
42
+
43
+
44
+ @lru_cache(maxsize=1)
45
+ def load_nltk_stopwords() -> frozenset[str]:
46
+ """Load and cache NLTK stopwords.
47
+
48
+ Downloads stopwords if not already present.
49
+
50
+ Returns:
51
+ Frozen set of stopwords from multiple languages
52
+ """
53
+ try:
54
+ nltk_find("corpora/stopwords")
55
+ except LookupError:
56
+ nltk_download("stopwords", quiet=True)
57
+
58
+ words: set[str] = set()
59
+ for lang in STOPWORD_LANGUAGES:
60
+ words.update(nltk_stopwords.words(fileids=lang))
61
+
62
+ return frozenset(words)
63
+
64
+
65
+ def parse_custom_stopwords(stopwords_str: str | None) -> set[str]:
66
+ """Parse a comma-separated string of custom stopwords.
67
+
68
+ Args:
69
+ stopwords_str: Comma-separated stopwords
70
+
71
+ Returns:
72
+ Set of lowercase, stripped stopwords
73
+ """
74
+ if not stopwords_str:
75
+ return set()
76
+
77
+ return {word.strip().lower() for word in stopwords_str.split(",") if word.strip()}
78
+
79
+
80
+ def generate_wordcloud(text: str, config: WordCloudConfig) -> Image:
81
+ """Generate a word cloud from text.
82
+
83
+ Args:
84
+ text: The text to create a word cloud from
85
+ config: Word cloud configuration
86
+
87
+ Returns:
88
+ PIL Image of the word cloud
89
+ """
90
+ # Combine NLTK and custom stopwords
91
+ stopwords = set(load_nltk_stopwords())
92
+ stopwords.update(parse_custom_stopwords(config.custom_stopwords))
93
+
94
+ if config.exclude_programming_keywords:
95
+ stopwords.update(load_programming_stopwords())
96
+
97
+ wc = WordCloud(
98
+ font_path=str(config.font_path) if config.font_path else None,
99
+ width=config.width,
100
+ height=config.height,
101
+ stopwords=stopwords,
102
+ background_color=config.background_color,
103
+ mode=config.mode,
104
+ colormap=config.colormap,
105
+ include_numbers=config.include_numbers,
106
+ )
107
+
108
+ wc.generate(text)
109
+ result: Image = wc.to_image()
110
+ return result
111
+
112
+
113
+ def generate_wordclouds(
114
+ collection: ConversationCollection,
115
+ output_dir: Path,
116
+ config: WordCloudConfig,
117
+ *,
118
+ progress_bar: bool = False,
119
+ ) -> None:
120
+ """Generate word clouds for weekly, monthly, and yearly groupings.
121
+
122
+ Args:
123
+ collection: Collection of conversations
124
+ output_dir: Directory to save the word clouds
125
+ config: Word cloud configuration
126
+ progress_bar: Whether to show progress bars
127
+ """
128
+ output_dir.mkdir(parents=True, exist_ok=True)
129
+
130
+ week_groups = collection.group_by_week()
131
+ month_groups = collection.group_by_month()
132
+ year_groups = collection.group_by_year()
133
+
134
+ for week, group in tqdm(
135
+ week_groups.items(),
136
+ desc="Creating weekly wordclouds 🔡☁️",
137
+ disable=not progress_bar,
138
+ ):
139
+ text = group.plaintext("user", "assistant")
140
+ if text.strip():
141
+ img = generate_wordcloud(text, config)
142
+ # Format: 2024-W15.png (ISO week format)
143
+ img.save(output_dir / f"{week.strftime('%Y-W%W')}.png", optimize=True)
144
+
145
+ for month, group in tqdm(
146
+ month_groups.items(),
147
+ desc="Creating monthly wordclouds 🔡☁️",
148
+ disable=not progress_bar,
149
+ ):
150
+ text = group.plaintext("user", "assistant")
151
+ if text.strip():
152
+ img = generate_wordcloud(text, config)
153
+ # Format: 2024-03-March.png (consistent with folder naming)
154
+ img.save(output_dir / f"{month.strftime('%Y-%m-%B')}.png", optimize=True)
155
+
156
+ for year, group in tqdm(
157
+ year_groups.items(),
158
+ desc="Creating yearly wordclouds 🔡☁️",
159
+ disable=not progress_bar,
160
+ ):
161
+ text = group.plaintext("user", "assistant")
162
+ if text.strip():
163
+ img = generate_wordcloud(text, config)
164
+ # Format: 2024.png
165
+ img.save(output_dir / f"{year.strftime('%Y')}.png", optimize=True)
@@ -0,0 +1,15 @@
1
+ viridis
2
+ plasma
3
+ inferno
4
+ magma
5
+ cividis
6
+ Blues
7
+ Greens
8
+ YlGnBu
9
+ YlOrRd
10
+ RdYlBu
11
+ Spectral
12
+ coolwarm
13
+ terrain
14
+ ocean
15
+ flag
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1 @@
1
+
convoviz/cli.py ADDED
@@ -0,0 +1,117 @@
1
+ """Command-line interface for convoviz."""
2
+
3
+ from pathlib import Path
4
+
5
+ import typer
6
+ from rich.console import Console
7
+
8
+ from convoviz.config import FolderOrganization, get_default_config
9
+ from convoviz.exceptions import ConfigurationError, InvalidZipError
10
+ from convoviz.interactive import run_interactive_config
11
+ from convoviz.io.loaders import find_latest_zip
12
+ from convoviz.pipeline import run_pipeline
13
+ from convoviz.utils import default_font_path
14
+
15
+ app = typer.Typer(
16
+ add_completion=False,
17
+ help="ChatGPT Data Visualizer 📊 - Convert and visualize your ChatGPT history",
18
+ )
19
+ console = Console()
20
+
21
+
22
+ @app.callback(invoke_without_command=True)
23
+ def run(
24
+ ctx: typer.Context,
25
+ input_path: Path | None = typer.Option(
26
+ None,
27
+ "--input",
28
+ "--zip",
29
+ "-z",
30
+ help="Path to the ChatGPT export zip file, JSON file, or extracted directory.",
31
+ exists=True,
32
+ file_okay=True,
33
+ dir_okay=True,
34
+ ),
35
+ output_dir: Path | None = typer.Option(
36
+ None,
37
+ "--output",
38
+ "-o",
39
+ help="Path to the output directory.",
40
+ ),
41
+ flat: bool = typer.Option(
42
+ False,
43
+ "--flat",
44
+ "-f",
45
+ help="Put all markdown files in a single folder (disables date organization).",
46
+ ),
47
+ interactive: bool | None = typer.Option(
48
+ None,
49
+ "--interactive/--no-interactive",
50
+ "-i/-I",
51
+ help="Force interactive mode on or off.",
52
+ ),
53
+ ) -> None:
54
+ """Convert ChatGPT export data to markdown and generate visualizations."""
55
+ if ctx.invoked_subcommand is not None:
56
+ return
57
+
58
+ # Start with default config
59
+ config = get_default_config()
60
+
61
+ # Override with CLI args
62
+ if input_path:
63
+ config.input_path = input_path
64
+ if output_dir:
65
+ config.output_folder = output_dir
66
+ if flat:
67
+ config.folder_organization = FolderOrganization.FLAT
68
+
69
+ # Determine mode: interactive if explicitly requested or no input provided
70
+ use_interactive = interactive if interactive is not None else (input_path is None)
71
+
72
+ if use_interactive:
73
+ console.print("Welcome to ChatGPT Data Visualizer ✨📊!\n")
74
+ try:
75
+ config = run_interactive_config(config)
76
+ except KeyboardInterrupt:
77
+ console.print("\n[yellow]Cancelled by user.[/yellow]")
78
+ raise typer.Exit(code=0) from None
79
+ else:
80
+ # Non-interactive mode: validate we have what we need
81
+ if not config.input_path:
82
+ # Try to find a default
83
+ latest = find_latest_zip()
84
+ if latest:
85
+ console.print(f"No input specified, using latest zip found: {latest}")
86
+ config.input_path = latest
87
+ else:
88
+ console.print(
89
+ "[bold red]Error:[/bold red] No input file provided and none found in Downloads."
90
+ )
91
+ raise typer.Exit(code=1)
92
+
93
+ # Validate the input (basic check)
94
+ if not config.input_path.exists():
95
+ console.print(
96
+ f"[bold red]Error:[/bold red] Input path does not exist: {config.input_path}"
97
+ )
98
+ raise typer.Exit(code=1)
99
+
100
+ # Set default font if not set
101
+ if not config.wordcloud.font_path:
102
+ config.wordcloud.font_path = default_font_path()
103
+
104
+ # Run the pipeline
105
+ try:
106
+ run_pipeline(config)
107
+ except (InvalidZipError, ConfigurationError) as e:
108
+ console.print(f"[bold red]Error:[/bold red] {e}")
109
+ raise typer.Exit(code=1) from None
110
+ except Exception as e:
111
+ console.print(f"[bold red]Unexpected error:[/bold red] {e}")
112
+ raise typer.Exit(code=1) from None
113
+
114
+
115
+ def main_entry() -> None:
116
+ """Entry point for the CLI."""
117
+ app()
convoviz/config.py ADDED
@@ -0,0 +1,106 @@
1
+ """Configuration models using Pydantic v2."""
2
+
3
+ from enum import Enum
4
+ from pathlib import Path
5
+ from typing import Literal
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class FolderOrganization(str, Enum):
11
+ """How to organize markdown output files in folders."""
12
+
13
+ FLAT = "flat" # All files in one directory
14
+ DATE = "date" # Nested by year/month (default)
15
+
16
+
17
+ class AuthorHeaders(BaseModel):
18
+ """Headers for different message authors in markdown output."""
19
+
20
+ system: str = "### System"
21
+ user: str = "# Me"
22
+ assistant: str = "# ChatGPT"
23
+ tool: str = "### Tool output"
24
+
25
+
26
+ class MarkdownConfig(BaseModel):
27
+ """Configuration for markdown output."""
28
+
29
+ latex_delimiters: Literal["default", "dollars"] = "default"
30
+ flavor: Literal["standard", "obsidian"] = "standard"
31
+
32
+
33
+ class YAMLConfig(BaseModel):
34
+ """Configuration for YAML frontmatter in markdown files."""
35
+
36
+ title: bool = True
37
+ tags: bool = False
38
+ chat_link: bool = True
39
+ create_time: bool = True
40
+ update_time: bool = True
41
+ model: bool = True
42
+ used_plugins: bool = True
43
+ message_count: bool = True
44
+ content_types: bool = True
45
+ custom_instructions: bool = True
46
+
47
+
48
+ class ConversationConfig(BaseModel):
49
+ """Configuration for conversation rendering."""
50
+
51
+ markdown: MarkdownConfig = Field(default_factory=MarkdownConfig)
52
+ yaml: YAMLConfig = Field(default_factory=YAMLConfig)
53
+
54
+
55
+ class MessageConfig(BaseModel):
56
+ """Configuration for message rendering."""
57
+
58
+ author_headers: AuthorHeaders = Field(default_factory=AuthorHeaders)
59
+
60
+
61
+ class WordCloudConfig(BaseModel):
62
+ """Configuration for word cloud generation."""
63
+
64
+ font_path: Path | None = None
65
+ colormap: str = "RdYlBu"
66
+ custom_stopwords: str = "use, file, "
67
+ exclude_programming_keywords: bool = True
68
+ background_color: str | None = None
69
+ mode: Literal["RGB", "RGBA"] = "RGBA"
70
+ include_numbers: bool = False
71
+ width: int = 600
72
+ height: int = 600
73
+
74
+
75
+ class GraphConfig(BaseModel):
76
+ """Configuration for graph generation."""
77
+
78
+ color: str = "#4A90E2"
79
+ grid: bool = True
80
+ show_counts: bool = True
81
+ font_name: str = "Montserrat-Regular.ttf"
82
+ figsize: tuple[int, int] = (10, 6)
83
+ dpi: int = 300
84
+ timezone: Literal["utc", "local"] = "local"
85
+ generate_monthly_breakdowns: bool = False
86
+ generate_yearly_breakdowns: bool = False
87
+
88
+
89
+ class ConvovizConfig(BaseModel):
90
+ """Main configuration for convoviz."""
91
+
92
+ input_path: Path | None = None
93
+ output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT-Data")
94
+ folder_organization: FolderOrganization = FolderOrganization.DATE
95
+ message: MessageConfig = Field(default_factory=MessageConfig)
96
+ conversation: ConversationConfig = Field(default_factory=ConversationConfig)
97
+ wordcloud: WordCloudConfig = Field(default_factory=WordCloudConfig)
98
+ graph: GraphConfig = Field(default_factory=GraphConfig)
99
+
100
+ model_config = {"validate_default": True}
101
+
102
+
103
+ # Default configuration instance
104
+ def get_default_config() -> ConvovizConfig:
105
+ """Get a fresh default configuration instance."""
106
+ return ConvovizConfig()
convoviz/exceptions.py ADDED
@@ -0,0 +1,47 @@
1
+ """Custom exceptions for convoviz."""
2
+
3
+
4
+ class ConvovizError(Exception):
5
+ """Base exception for all convoviz errors."""
6
+
7
+
8
+ class InvalidZipError(ConvovizError):
9
+ """Raised when a ZIP file is invalid or missing conversations.json."""
10
+
11
+ def __init__(self, path: str, reason: str = "missing conversations.json") -> None:
12
+ self.path = path
13
+ self.reason = reason
14
+ super().__init__(f"Invalid ZIP file '{path}': {reason}")
15
+
16
+
17
+ class ConfigurationError(ConvovizError):
18
+ """Raised for configuration-related errors."""
19
+
20
+ def __init__(self, message: str, field: str | None = None) -> None:
21
+ self.field = field
22
+ super().__init__(message)
23
+
24
+
25
+ class RenderingError(ConvovizError):
26
+ """Raised when rendering fails."""
27
+
28
+ def __init__(self, message: str, conversation_id: str | None = None) -> None:
29
+ self.conversation_id = conversation_id
30
+ super().__init__(message)
31
+
32
+
33
+ class MessageContentError(ConvovizError):
34
+ """Raised when message content cannot be extracted."""
35
+
36
+ def __init__(self, message_id: str) -> None:
37
+ self.message_id = message_id
38
+ super().__init__(f"No valid content found in message: {message_id}")
39
+
40
+
41
+ class FileNotFoundError(ConvovizError):
42
+ """Raised when a required file is not found."""
43
+
44
+ def __init__(self, path: str, file_type: str = "file") -> None:
45
+ self.path = path
46
+ self.file_type = file_type
47
+ super().__init__(f"{file_type.capitalize()} not found: {path}")