convoviz 0.2.12__py3-none-any.whl → 0.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
convoviz/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """Convoviz - ChatGPT data visualization and export tool."""
2
2
 
3
- from convoviz import analysis, config, io, models, renderers, utils
3
+ from convoviz import config, io, models, renderers, utils
4
4
  from convoviz.config import ConvovizConfig, get_default_config
5
5
  from convoviz.models import Conversation, ConversationCollection, Message, Node
6
6
  from convoviz.pipeline import run_pipeline
@@ -23,3 +23,12 @@ __all__ = [
23
23
  "get_default_config",
24
24
  "run_pipeline",
25
25
  ]
26
+
27
+
28
+ def __getattr__(name: str):
29
+ """Lazy import for optional submodules like analysis."""
30
+ if name == "analysis":
31
+ from convoviz import analysis
32
+
33
+ return analysis
34
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -1,9 +1,22 @@
1
- """Data analysis and visualization for convoviz."""
1
+ """Data analysis and visualization for convoviz.
2
2
 
3
- from convoviz.analysis.graphs import generate_week_barplot
4
- from convoviz.analysis.wordcloud import generate_wordcloud
3
+ Requires the [viz] extra: uv tool install "convoviz[viz]"
4
+ """
5
5
 
6
6
  __all__ = [
7
7
  "generate_week_barplot",
8
8
  "generate_wordcloud",
9
9
  ]
10
+
11
+
12
+ def __getattr__(name: str):
13
+ """Lazy import for visualization functions requiring optional dependencies."""
14
+ if name == "generate_week_barplot":
15
+ from convoviz.analysis.graphs import generate_week_barplot
16
+
17
+ return generate_week_barplot
18
+ if name == "generate_wordcloud":
19
+ from convoviz.analysis.wordcloud import generate_wordcloud
20
+
21
+ return generate_wordcloud
22
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -8,6 +8,7 @@ Goals:
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
+ import logging
11
12
  from collections import defaultdict
12
13
  from collections.abc import Callable, Iterable
13
14
  from datetime import UTC, datetime
@@ -25,6 +26,8 @@ from convoviz.config import GraphConfig, get_default_config
25
26
  from convoviz.models import ConversationCollection
26
27
  from convoviz.utils import get_asset_path
27
28
 
29
+ logger = logging.getLogger(__name__)
30
+
28
31
  WEEKDAYS = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
29
32
 
30
33
 
@@ -347,7 +350,9 @@ def generate_length_histogram(
347
350
  color="#cf222e",
348
351
  )
349
352
 
350
- ax.set_title("Conversation length (user prompts)", fontproperties=font_prop, fontsize=14, pad=14)
353
+ ax.set_title(
354
+ "Conversation length (user prompts)", fontproperties=font_prop, fontsize=14, pad=14
355
+ )
351
356
  ax.set_xlabel("User prompts per conversation", fontproperties=font_prop)
352
357
  ax.set_ylabel("Conversations", fontproperties=font_prop)
353
358
  ax.set_xlim(left=0, right=cap)
@@ -619,7 +624,9 @@ def generate_summary_dashboard(
619
624
  locator = mdates.AutoDateLocator(minticks=4, maxticks=10)
620
625
  ax_ts.xaxis.set_major_locator(locator)
621
626
  ax_ts.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
622
- ax_ts.set_title("Monthly activity (user prompts)", fontproperties=font_prop, fontsize=13, pad=10)
627
+ ax_ts.set_title(
628
+ "Monthly activity (user prompts)", fontproperties=font_prop, fontsize=13, pad=10
629
+ )
623
630
  ax_ts.set_ylabel("User prompts", fontproperties=font_prop)
624
631
  ax_ts.set_xlabel(f"Month ({_tz_label(cfg)})", fontproperties=font_prop)
625
632
  _apply_tick_font(ax_ts, font_prop)
@@ -738,14 +745,31 @@ def generate_summary_graphs(
738
745
  cfg = config or get_default_config().graph
739
746
 
740
747
  user_ts = collection.timestamps("user")
748
+ logger.info(f"Generating summary graphs to {output_dir}")
741
749
 
742
750
  tasks: list[tuple[str, str, Callable[[], Figure]]] = [
743
751
  ("Overview", "overview.png", lambda: generate_summary_dashboard(collection, cfg)),
744
- ("Activity heatmap", "activity_heatmap.png", lambda: generate_activity_heatmap(collection, cfg)),
745
- ("Daily activity", "daily_activity.png", lambda: generate_daily_activity_lineplot(collection, cfg)),
746
- ("Monthly activity", "monthly_activity.png", lambda: generate_monthly_activity_barplot(collection, cfg)),
752
+ (
753
+ "Activity heatmap",
754
+ "activity_heatmap.png",
755
+ lambda: generate_activity_heatmap(collection, cfg),
756
+ ),
757
+ (
758
+ "Daily activity",
759
+ "daily_activity.png",
760
+ lambda: generate_daily_activity_lineplot(collection, cfg),
761
+ ),
762
+ (
763
+ "Monthly activity",
764
+ "monthly_activity.png",
765
+ lambda: generate_monthly_activity_barplot(collection, cfg),
766
+ ),
747
767
  ("Model usage", "model_usage.png", lambda: generate_model_piechart(collection, cfg)),
748
- ("Conversation lengths", "conversation_lengths.png", lambda: generate_length_histogram(collection, cfg)),
768
+ (
769
+ "Conversation lengths",
770
+ "conversation_lengths.png",
771
+ lambda: generate_length_histogram(collection, cfg),
772
+ ),
749
773
  (
750
774
  "Conversation lifetimes",
751
775
  "conversation_lifetimes.png",
@@ -1,5 +1,8 @@
1
1
  """Word cloud generation for conversation text."""
2
2
 
3
+ import logging
4
+ import os
5
+ from concurrent.futures import ProcessPoolExecutor
3
6
  from functools import lru_cache
4
7
  from pathlib import Path
5
8
 
@@ -13,6 +16,8 @@ from wordcloud import WordCloud
13
16
  from convoviz.config import WordCloudConfig
14
17
  from convoviz.models import ConversationCollection
15
18
 
19
+ logger = logging.getLogger(__name__)
20
+
16
21
  # Languages for stopwords
17
22
  STOPWORD_LANGUAGES = [
18
23
  "arabic",
@@ -110,6 +115,25 @@ def generate_wordcloud(text: str, config: WordCloudConfig) -> Image:
110
115
  return result
111
116
 
112
117
 
118
+ def _generate_and_save_wordcloud(args: tuple[str, str, Path, WordCloudConfig]) -> bool:
119
+ """Worker function for parallel wordcloud generation.
120
+
121
+ Must be at module level for pickling by ProcessPoolExecutor.
122
+
123
+ Args:
124
+ args: Tuple of (text, filename, output_dir, config)
125
+
126
+ Returns:
127
+ True if wordcloud was generated, False if skipped (empty text)
128
+ """
129
+ text, filename, output_dir, config = args
130
+ if not text.strip():
131
+ return False
132
+ img = generate_wordcloud(text, config)
133
+ img.save(output_dir / filename, optimize=True)
134
+ return True
135
+
136
+
113
137
  def generate_wordclouds(
114
138
  collection: ConversationCollection,
115
139
  output_dir: Path,
@@ -119,6 +143,8 @@ def generate_wordclouds(
119
143
  ) -> None:
120
144
  """Generate word clouds for weekly, monthly, and yearly groupings.
121
145
 
146
+ Uses parallel processing to speed up generation on multi-core systems.
147
+
122
148
  Args:
123
149
  collection: Collection of conversations
124
150
  output_dir: Directory to save the word clouds
@@ -126,40 +152,53 @@ def generate_wordclouds(
126
152
  progress_bar: Whether to show progress bars
127
153
  """
128
154
  output_dir.mkdir(parents=True, exist_ok=True)
155
+ logger.info(f"Generating wordclouds to {output_dir}")
129
156
 
130
157
  week_groups = collection.group_by_week()
131
158
  month_groups = collection.group_by_month()
132
159
  year_groups = collection.group_by_year()
133
160
 
134
- for week, group in tqdm(
135
- week_groups.items(),
136
- desc="Creating weekly wordclouds 🔡☁️",
137
- disable=not progress_bar,
138
- ):
161
+ # Pre-load/download NLTK stopwords in the main process to avoid race conditions in workers
162
+ load_nltk_stopwords()
163
+
164
+ # Build list of all tasks: (text, filename, output_dir, config)
165
+ tasks: list[tuple[str, str, Path, WordCloudConfig]] = []
166
+
167
+ for week, group in week_groups.items():
139
168
  text = group.plaintext("user", "assistant")
140
- if text.strip():
141
- img = generate_wordcloud(text, config)
142
- # Format: 2024-W15.png (ISO week format)
143
- img.save(output_dir / f"{week.strftime('%Y-W%W')}.png", optimize=True)
144
-
145
- for month, group in tqdm(
146
- month_groups.items(),
147
- desc="Creating monthly wordclouds 🔡☁️",
148
- disable=not progress_bar,
149
- ):
169
+ # Format: 2024-W15.png (ISO week format)
170
+ filename = f"{week.strftime('%Y-W%W')}.png"
171
+ tasks.append((text, filename, output_dir, config))
172
+
173
+ for month, group in month_groups.items():
150
174
  text = group.plaintext("user", "assistant")
151
- if text.strip():
152
- img = generate_wordcloud(text, config)
153
- # Format: 2024-03-March.png (consistent with folder naming)
154
- img.save(output_dir / f"{month.strftime('%Y-%m-%B')}.png", optimize=True)
155
-
156
- for year, group in tqdm(
157
- year_groups.items(),
158
- desc="Creating yearly wordclouds 🔡☁️",
159
- disable=not progress_bar,
160
- ):
175
+ # Format: 2024-03-March.png (consistent with folder naming)
176
+ filename = f"{month.strftime('%Y-%m-%B')}.png"
177
+ tasks.append((text, filename, output_dir, config))
178
+
179
+ for year, group in year_groups.items():
161
180
  text = group.plaintext("user", "assistant")
162
- if text.strip():
163
- img = generate_wordcloud(text, config)
164
- # Format: 2024.png
165
- img.save(output_dir / f"{year.strftime('%Y')}.png", optimize=True)
181
+ # Format: 2024.png
182
+ filename = f"{year.strftime('%Y')}.png"
183
+ tasks.append((text, filename, output_dir, config))
184
+
185
+ if not tasks:
186
+ return
187
+
188
+ # Determine worker count: use config if set, otherwise half CPU count (min 1)
189
+ max_workers = config.max_workers
190
+ if max_workers is None:
191
+ cpu_count = os.cpu_count() or 2
192
+ max_workers = max(1, cpu_count // 2)
193
+
194
+ # Use parallel processing for speedup on multi-core systems
195
+ logger.debug(f"Starting wordcloud generation with {max_workers} workers for {len(tasks)} tasks")
196
+ with ProcessPoolExecutor(max_workers=max_workers) as executor:
197
+ list(
198
+ tqdm(
199
+ executor.map(_generate_and_save_wordcloud, tasks),
200
+ total=len(tasks),
201
+ desc="Creating wordclouds 🔡☁️",
202
+ disable=not progress_bar,
203
+ )
204
+ )
convoviz/cli.py CHANGED
@@ -1,14 +1,18 @@
1
1
  """Command-line interface for convoviz."""
2
2
 
3
+ import logging
4
+ from importlib.metadata import version as get_version
3
5
  from pathlib import Path
4
6
 
5
7
  import typer
6
8
  from rich.console import Console
9
+ from rich.markup import escape
7
10
 
8
- from convoviz.config import FolderOrganization, get_default_config
11
+ from convoviz.config import FolderOrganization, OutputKind, get_default_config
9
12
  from convoviz.exceptions import ConfigurationError, InvalidZipError
10
13
  from convoviz.interactive import run_interactive_config
11
14
  from convoviz.io.loaders import find_latest_zip
15
+ from convoviz.logging_config import setup_logging
12
16
  from convoviz.pipeline import run_pipeline
13
17
  from convoviz.utils import default_font_path
14
18
 
@@ -19,6 +23,13 @@ app = typer.Typer(
19
23
  console = Console()
20
24
 
21
25
 
26
+ def _version_callback(value: bool) -> None:
27
+ """Print version and exit."""
28
+ if value:
29
+ console.print(f"convoviz {get_version('convoviz')}")
30
+ raise typer.Exit()
31
+
32
+
22
33
  @app.callback(invoke_without_command=True)
23
34
  def run(
24
35
  ctx: typer.Context,
@@ -38,6 +49,12 @@ def run(
38
49
  "-o",
39
50
  help="Path to the output directory.",
40
51
  ),
52
+ outputs: list[OutputKind] | None = typer.Option(
53
+ None,
54
+ "--outputs",
55
+ help="Output types to generate (repeatable). Options: markdown, graphs, wordclouds. "
56
+ "If not specified, all outputs are generated.",
57
+ ),
41
58
  flat: bool = typer.Option(
42
59
  False,
43
60
  "--flat",
@@ -50,8 +67,34 @@ def run(
50
67
  "-i/-I",
51
68
  help="Force interactive mode on or off.",
52
69
  ),
70
+ verbose: int = typer.Option(
71
+ 0,
72
+ "--verbose",
73
+ "-v",
74
+ help="Increase verbosity. Use -vv for debug.",
75
+ count=True,
76
+ ),
77
+ log_file: Path | None = typer.Option(
78
+ None,
79
+ "--log-file",
80
+ help="Path to log file. Defaults to a temporary file.",
81
+ ),
82
+ _version: bool = typer.Option(
83
+ False,
84
+ "--version",
85
+ "-V",
86
+ help="Show version and exit.",
87
+ callback=_version_callback,
88
+ is_eager=True,
89
+ ),
53
90
  ) -> None:
54
91
  """Convert ChatGPT export data to markdown and generate visualizations."""
92
+ # Setup logging immediately
93
+ log_path = setup_logging(verbose, log_file)
94
+ logger = logging.getLogger("convoviz.cli")
95
+ console.print(f"[dim]Logging to: {log_path}[/dim]")
96
+ logger.debug(f"Logging initialized. Output: {log_path}")
97
+
55
98
  if ctx.invoked_subcommand is not None:
56
99
  return
57
100
 
@@ -63,6 +106,8 @@ def run(
63
106
  config.input_path = input_path
64
107
  if output_dir:
65
108
  config.output_folder = output_dir
109
+ if outputs:
110
+ config.outputs = set(outputs)
66
111
  if flat:
67
112
  config.folder_organization = FolderOrganization.FLAT
68
113
 
@@ -105,10 +150,13 @@ def run(
105
150
  try:
106
151
  run_pipeline(config)
107
152
  except (InvalidZipError, ConfigurationError) as e:
108
- console.print(f"[bold red]Error:[/bold red] {e}")
153
+ logger.error(f"Known error: {e}")
154
+ console.print(f"[bold red]Error:[/bold red] {escape(str(e))}")
109
155
  raise typer.Exit(code=1) from None
110
156
  except Exception as e:
111
- console.print(f"[bold red]Unexpected error:[/bold red] {e}")
157
+ logger.exception("Unexpected error occurred")
158
+ console.print(f"[bold red]Unexpected error:[/bold red] {escape(str(e))}")
159
+ console.print(f"[dim]See log file for details: {log_path}[/dim]")
112
160
  raise typer.Exit(code=1) from None
113
161
 
114
162
 
convoviz/config.py CHANGED
@@ -14,6 +14,18 @@ class FolderOrganization(str, Enum):
14
14
  DATE = "date" # Nested by year/month (default)
15
15
 
16
16
 
17
+ class OutputKind(str, Enum):
18
+ """Types of outputs that can be generated."""
19
+
20
+ MARKDOWN = "markdown" # Conversation markdown files
21
+ GRAPHS = "graphs" # Usage analytics graphs
22
+ WORDCLOUDS = "wordclouds" # Word cloud visualizations
23
+
24
+
25
+ # Default: generate all outputs
26
+ ALL_OUTPUTS: frozenset[OutputKind] = frozenset(OutputKind)
27
+
28
+
17
29
  class AuthorHeaders(BaseModel):
18
30
  """Headers for different message authors in markdown output."""
19
31
 
@@ -26,7 +38,7 @@ class AuthorHeaders(BaseModel):
26
38
  class MarkdownConfig(BaseModel):
27
39
  """Configuration for markdown output."""
28
40
 
29
- latex_delimiters: Literal["default", "dollars"] = "default"
41
+ latex_delimiters: Literal["default", "dollars"] = "dollars"
30
42
  flavor: Literal["standard", "obsidian"] = "standard"
31
43
 
32
44
 
@@ -39,10 +51,13 @@ class YAMLConfig(BaseModel):
39
51
  create_time: bool = True
40
52
  update_time: bool = True
41
53
  model: bool = True
42
- used_plugins: bool = True
54
+ used_plugins: bool = False
43
55
  message_count: bool = True
44
- content_types: bool = True
45
- custom_instructions: bool = True
56
+ content_types: bool = False
57
+ content_types: bool = False
58
+ custom_instructions: bool = False
59
+ is_starred: bool = False
60
+ voice: bool = False
46
61
 
47
62
 
48
63
  class ConversationConfig(BaseModel):
@@ -70,6 +85,7 @@ class WordCloudConfig(BaseModel):
70
85
  include_numbers: bool = False
71
86
  width: int = 600
72
87
  height: int = 600
88
+ max_workers: int | None = None # None = use half CPU count
73
89
 
74
90
 
75
91
  class GraphConfig(BaseModel):
@@ -92,6 +108,7 @@ class ConvovizConfig(BaseModel):
92
108
  input_path: Path | None = None
93
109
  output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT-Data")
94
110
  folder_organization: FolderOrganization = FolderOrganization.DATE
111
+ outputs: set[OutputKind] = Field(default_factory=lambda: set(ALL_OUTPUTS))
95
112
  message: MessageConfig = Field(default_factory=MessageConfig)
96
113
  conversation: ConversationConfig = Field(default_factory=ConversationConfig)
97
114
  wordcloud: WordCloudConfig = Field(default_factory=WordCloudConfig)