convoviz 0.2.12__py3-none-any.whl → 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- convoviz/__init__.py +10 -1
- convoviz/analysis/__init__.py +16 -3
- convoviz/analysis/graphs.py +30 -6
- convoviz/analysis/wordcloud.py +68 -29
- convoviz/cli.py +51 -3
- convoviz/config.py +21 -4
- convoviz/interactive.py +123 -119
- convoviz/io/assets.py +12 -1
- convoviz/io/loaders.py +5 -0
- convoviz/io/writers.py +7 -3
- convoviz/logging_config.py +69 -0
- convoviz/models/conversation.py +18 -0
- convoviz/models/message.py +81 -5
- convoviz/pipeline.py +90 -73
- convoviz/renderers/markdown.py +96 -3
- convoviz/renderers/yaml.py +4 -0
- convoviz-0.4.7.dist-info/METADATA +233 -0
- {convoviz-0.2.12.dist-info → convoviz-0.4.7.dist-info}/RECORD +20 -19
- {convoviz-0.2.12.dist-info → convoviz-0.4.7.dist-info}/WHEEL +2 -2
- convoviz-0.2.12.dist-info/METADATA +0 -148
- {convoviz-0.2.12.dist-info → convoviz-0.4.7.dist-info}/entry_points.txt +0 -0
convoviz/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Convoviz - ChatGPT data visualization and export tool."""
|
|
2
2
|
|
|
3
|
-
from convoviz import
|
|
3
|
+
from convoviz import config, io, models, renderers, utils
|
|
4
4
|
from convoviz.config import ConvovizConfig, get_default_config
|
|
5
5
|
from convoviz.models import Conversation, ConversationCollection, Message, Node
|
|
6
6
|
from convoviz.pipeline import run_pipeline
|
|
@@ -23,3 +23,12 @@ __all__ = [
|
|
|
23
23
|
"get_default_config",
|
|
24
24
|
"run_pipeline",
|
|
25
25
|
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def __getattr__(name: str):
|
|
29
|
+
"""Lazy import for optional submodules like analysis."""
|
|
30
|
+
if name == "analysis":
|
|
31
|
+
from convoviz import analysis
|
|
32
|
+
|
|
33
|
+
return analysis
|
|
34
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
convoviz/analysis/__init__.py
CHANGED
|
@@ -1,9 +1,22 @@
|
|
|
1
|
-
"""Data analysis and visualization for convoviz.
|
|
1
|
+
"""Data analysis and visualization for convoviz.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
Requires the [viz] extra: uv tool install "convoviz[viz]"
|
|
4
|
+
"""
|
|
5
5
|
|
|
6
6
|
__all__ = [
|
|
7
7
|
"generate_week_barplot",
|
|
8
8
|
"generate_wordcloud",
|
|
9
9
|
]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def __getattr__(name: str):
|
|
13
|
+
"""Lazy import for visualization functions requiring optional dependencies."""
|
|
14
|
+
if name == "generate_week_barplot":
|
|
15
|
+
from convoviz.analysis.graphs import generate_week_barplot
|
|
16
|
+
|
|
17
|
+
return generate_week_barplot
|
|
18
|
+
if name == "generate_wordcloud":
|
|
19
|
+
from convoviz.analysis.wordcloud import generate_wordcloud
|
|
20
|
+
|
|
21
|
+
return generate_wordcloud
|
|
22
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
convoviz/analysis/graphs.py
CHANGED
|
@@ -8,6 +8,7 @@ Goals:
|
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
|
+
import logging
|
|
11
12
|
from collections import defaultdict
|
|
12
13
|
from collections.abc import Callable, Iterable
|
|
13
14
|
from datetime import UTC, datetime
|
|
@@ -25,6 +26,8 @@ from convoviz.config import GraphConfig, get_default_config
|
|
|
25
26
|
from convoviz.models import ConversationCollection
|
|
26
27
|
from convoviz.utils import get_asset_path
|
|
27
28
|
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
28
31
|
WEEKDAYS = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
|
29
32
|
|
|
30
33
|
|
|
@@ -347,7 +350,9 @@ def generate_length_histogram(
|
|
|
347
350
|
color="#cf222e",
|
|
348
351
|
)
|
|
349
352
|
|
|
350
|
-
ax.set_title(
|
|
353
|
+
ax.set_title(
|
|
354
|
+
"Conversation length (user prompts)", fontproperties=font_prop, fontsize=14, pad=14
|
|
355
|
+
)
|
|
351
356
|
ax.set_xlabel("User prompts per conversation", fontproperties=font_prop)
|
|
352
357
|
ax.set_ylabel("Conversations", fontproperties=font_prop)
|
|
353
358
|
ax.set_xlim(left=0, right=cap)
|
|
@@ -619,7 +624,9 @@ def generate_summary_dashboard(
|
|
|
619
624
|
locator = mdates.AutoDateLocator(minticks=4, maxticks=10)
|
|
620
625
|
ax_ts.xaxis.set_major_locator(locator)
|
|
621
626
|
ax_ts.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
|
|
622
|
-
ax_ts.set_title(
|
|
627
|
+
ax_ts.set_title(
|
|
628
|
+
"Monthly activity (user prompts)", fontproperties=font_prop, fontsize=13, pad=10
|
|
629
|
+
)
|
|
623
630
|
ax_ts.set_ylabel("User prompts", fontproperties=font_prop)
|
|
624
631
|
ax_ts.set_xlabel(f"Month ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
625
632
|
_apply_tick_font(ax_ts, font_prop)
|
|
@@ -738,14 +745,31 @@ def generate_summary_graphs(
|
|
|
738
745
|
cfg = config or get_default_config().graph
|
|
739
746
|
|
|
740
747
|
user_ts = collection.timestamps("user")
|
|
748
|
+
logger.info(f"Generating summary graphs to {output_dir}")
|
|
741
749
|
|
|
742
750
|
tasks: list[tuple[str, str, Callable[[], Figure]]] = [
|
|
743
751
|
("Overview", "overview.png", lambda: generate_summary_dashboard(collection, cfg)),
|
|
744
|
-
(
|
|
745
|
-
|
|
746
|
-
|
|
752
|
+
(
|
|
753
|
+
"Activity heatmap",
|
|
754
|
+
"activity_heatmap.png",
|
|
755
|
+
lambda: generate_activity_heatmap(collection, cfg),
|
|
756
|
+
),
|
|
757
|
+
(
|
|
758
|
+
"Daily activity",
|
|
759
|
+
"daily_activity.png",
|
|
760
|
+
lambda: generate_daily_activity_lineplot(collection, cfg),
|
|
761
|
+
),
|
|
762
|
+
(
|
|
763
|
+
"Monthly activity",
|
|
764
|
+
"monthly_activity.png",
|
|
765
|
+
lambda: generate_monthly_activity_barplot(collection, cfg),
|
|
766
|
+
),
|
|
747
767
|
("Model usage", "model_usage.png", lambda: generate_model_piechart(collection, cfg)),
|
|
748
|
-
(
|
|
768
|
+
(
|
|
769
|
+
"Conversation lengths",
|
|
770
|
+
"conversation_lengths.png",
|
|
771
|
+
lambda: generate_length_histogram(collection, cfg),
|
|
772
|
+
),
|
|
749
773
|
(
|
|
750
774
|
"Conversation lifetimes",
|
|
751
775
|
"conversation_lifetimes.png",
|
convoviz/analysis/wordcloud.py
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
"""Word cloud generation for conversation text."""
|
|
2
2
|
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
3
6
|
from functools import lru_cache
|
|
4
7
|
from pathlib import Path
|
|
5
8
|
|
|
@@ -13,6 +16,8 @@ from wordcloud import WordCloud
|
|
|
13
16
|
from convoviz.config import WordCloudConfig
|
|
14
17
|
from convoviz.models import ConversationCollection
|
|
15
18
|
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
16
21
|
# Languages for stopwords
|
|
17
22
|
STOPWORD_LANGUAGES = [
|
|
18
23
|
"arabic",
|
|
@@ -110,6 +115,25 @@ def generate_wordcloud(text: str, config: WordCloudConfig) -> Image:
|
|
|
110
115
|
return result
|
|
111
116
|
|
|
112
117
|
|
|
118
|
+
def _generate_and_save_wordcloud(args: tuple[str, str, Path, WordCloudConfig]) -> bool:
|
|
119
|
+
"""Worker function for parallel wordcloud generation.
|
|
120
|
+
|
|
121
|
+
Must be at module level for pickling by ProcessPoolExecutor.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
args: Tuple of (text, filename, output_dir, config)
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
True if wordcloud was generated, False if skipped (empty text)
|
|
128
|
+
"""
|
|
129
|
+
text, filename, output_dir, config = args
|
|
130
|
+
if not text.strip():
|
|
131
|
+
return False
|
|
132
|
+
img = generate_wordcloud(text, config)
|
|
133
|
+
img.save(output_dir / filename, optimize=True)
|
|
134
|
+
return True
|
|
135
|
+
|
|
136
|
+
|
|
113
137
|
def generate_wordclouds(
|
|
114
138
|
collection: ConversationCollection,
|
|
115
139
|
output_dir: Path,
|
|
@@ -119,6 +143,8 @@ def generate_wordclouds(
|
|
|
119
143
|
) -> None:
|
|
120
144
|
"""Generate word clouds for weekly, monthly, and yearly groupings.
|
|
121
145
|
|
|
146
|
+
Uses parallel processing to speed up generation on multi-core systems.
|
|
147
|
+
|
|
122
148
|
Args:
|
|
123
149
|
collection: Collection of conversations
|
|
124
150
|
output_dir: Directory to save the word clouds
|
|
@@ -126,40 +152,53 @@ def generate_wordclouds(
|
|
|
126
152
|
progress_bar: Whether to show progress bars
|
|
127
153
|
"""
|
|
128
154
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
155
|
+
logger.info(f"Generating wordclouds to {output_dir}")
|
|
129
156
|
|
|
130
157
|
week_groups = collection.group_by_week()
|
|
131
158
|
month_groups = collection.group_by_month()
|
|
132
159
|
year_groups = collection.group_by_year()
|
|
133
160
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
161
|
+
# Pre-load/download NLTK stopwords in the main process to avoid race conditions in workers
|
|
162
|
+
load_nltk_stopwords()
|
|
163
|
+
|
|
164
|
+
# Build list of all tasks: (text, filename, output_dir, config)
|
|
165
|
+
tasks: list[tuple[str, str, Path, WordCloudConfig]] = []
|
|
166
|
+
|
|
167
|
+
for week, group in week_groups.items():
|
|
139
168
|
text = group.plaintext("user", "assistant")
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
for month, group in tqdm(
|
|
146
|
-
month_groups.items(),
|
|
147
|
-
desc="Creating monthly wordclouds 🔡☁️",
|
|
148
|
-
disable=not progress_bar,
|
|
149
|
-
):
|
|
169
|
+
# Format: 2024-W15.png (ISO week format)
|
|
170
|
+
filename = f"{week.strftime('%Y-W%W')}.png"
|
|
171
|
+
tasks.append((text, filename, output_dir, config))
|
|
172
|
+
|
|
173
|
+
for month, group in month_groups.items():
|
|
150
174
|
text = group.plaintext("user", "assistant")
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
for year, group in tqdm(
|
|
157
|
-
year_groups.items(),
|
|
158
|
-
desc="Creating yearly wordclouds 🔡☁️",
|
|
159
|
-
disable=not progress_bar,
|
|
160
|
-
):
|
|
175
|
+
# Format: 2024-03-March.png (consistent with folder naming)
|
|
176
|
+
filename = f"{month.strftime('%Y-%m-%B')}.png"
|
|
177
|
+
tasks.append((text, filename, output_dir, config))
|
|
178
|
+
|
|
179
|
+
for year, group in year_groups.items():
|
|
161
180
|
text = group.plaintext("user", "assistant")
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
181
|
+
# Format: 2024.png
|
|
182
|
+
filename = f"{year.strftime('%Y')}.png"
|
|
183
|
+
tasks.append((text, filename, output_dir, config))
|
|
184
|
+
|
|
185
|
+
if not tasks:
|
|
186
|
+
return
|
|
187
|
+
|
|
188
|
+
# Determine worker count: use config if set, otherwise half CPU count (min 1)
|
|
189
|
+
max_workers = config.max_workers
|
|
190
|
+
if max_workers is None:
|
|
191
|
+
cpu_count = os.cpu_count() or 2
|
|
192
|
+
max_workers = max(1, cpu_count // 2)
|
|
193
|
+
|
|
194
|
+
# Use parallel processing for speedup on multi-core systems
|
|
195
|
+
logger.debug(f"Starting wordcloud generation with {max_workers} workers for {len(tasks)} tasks")
|
|
196
|
+
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
|
197
|
+
list(
|
|
198
|
+
tqdm(
|
|
199
|
+
executor.map(_generate_and_save_wordcloud, tasks),
|
|
200
|
+
total=len(tasks),
|
|
201
|
+
desc="Creating wordclouds 🔡☁️",
|
|
202
|
+
disable=not progress_bar,
|
|
203
|
+
)
|
|
204
|
+
)
|
convoviz/cli.py
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
"""Command-line interface for convoviz."""
|
|
2
2
|
|
|
3
|
+
import logging
|
|
4
|
+
from importlib.metadata import version as get_version
|
|
3
5
|
from pathlib import Path
|
|
4
6
|
|
|
5
7
|
import typer
|
|
6
8
|
from rich.console import Console
|
|
9
|
+
from rich.markup import escape
|
|
7
10
|
|
|
8
|
-
from convoviz.config import FolderOrganization, get_default_config
|
|
11
|
+
from convoviz.config import FolderOrganization, OutputKind, get_default_config
|
|
9
12
|
from convoviz.exceptions import ConfigurationError, InvalidZipError
|
|
10
13
|
from convoviz.interactive import run_interactive_config
|
|
11
14
|
from convoviz.io.loaders import find_latest_zip
|
|
15
|
+
from convoviz.logging_config import setup_logging
|
|
12
16
|
from convoviz.pipeline import run_pipeline
|
|
13
17
|
from convoviz.utils import default_font_path
|
|
14
18
|
|
|
@@ -19,6 +23,13 @@ app = typer.Typer(
|
|
|
19
23
|
console = Console()
|
|
20
24
|
|
|
21
25
|
|
|
26
|
+
def _version_callback(value: bool) -> None:
|
|
27
|
+
"""Print version and exit."""
|
|
28
|
+
if value:
|
|
29
|
+
console.print(f"convoviz {get_version('convoviz')}")
|
|
30
|
+
raise typer.Exit()
|
|
31
|
+
|
|
32
|
+
|
|
22
33
|
@app.callback(invoke_without_command=True)
|
|
23
34
|
def run(
|
|
24
35
|
ctx: typer.Context,
|
|
@@ -38,6 +49,12 @@ def run(
|
|
|
38
49
|
"-o",
|
|
39
50
|
help="Path to the output directory.",
|
|
40
51
|
),
|
|
52
|
+
outputs: list[OutputKind] | None = typer.Option(
|
|
53
|
+
None,
|
|
54
|
+
"--outputs",
|
|
55
|
+
help="Output types to generate (repeatable). Options: markdown, graphs, wordclouds. "
|
|
56
|
+
"If not specified, all outputs are generated.",
|
|
57
|
+
),
|
|
41
58
|
flat: bool = typer.Option(
|
|
42
59
|
False,
|
|
43
60
|
"--flat",
|
|
@@ -50,8 +67,34 @@ def run(
|
|
|
50
67
|
"-i/-I",
|
|
51
68
|
help="Force interactive mode on or off.",
|
|
52
69
|
),
|
|
70
|
+
verbose: int = typer.Option(
|
|
71
|
+
0,
|
|
72
|
+
"--verbose",
|
|
73
|
+
"-v",
|
|
74
|
+
help="Increase verbosity. Use -vv for debug.",
|
|
75
|
+
count=True,
|
|
76
|
+
),
|
|
77
|
+
log_file: Path | None = typer.Option(
|
|
78
|
+
None,
|
|
79
|
+
"--log-file",
|
|
80
|
+
help="Path to log file. Defaults to a temporary file.",
|
|
81
|
+
),
|
|
82
|
+
_version: bool = typer.Option(
|
|
83
|
+
False,
|
|
84
|
+
"--version",
|
|
85
|
+
"-V",
|
|
86
|
+
help="Show version and exit.",
|
|
87
|
+
callback=_version_callback,
|
|
88
|
+
is_eager=True,
|
|
89
|
+
),
|
|
53
90
|
) -> None:
|
|
54
91
|
"""Convert ChatGPT export data to markdown and generate visualizations."""
|
|
92
|
+
# Setup logging immediately
|
|
93
|
+
log_path = setup_logging(verbose, log_file)
|
|
94
|
+
logger = logging.getLogger("convoviz.cli")
|
|
95
|
+
console.print(f"[dim]Logging to: {log_path}[/dim]")
|
|
96
|
+
logger.debug(f"Logging initialized. Output: {log_path}")
|
|
97
|
+
|
|
55
98
|
if ctx.invoked_subcommand is not None:
|
|
56
99
|
return
|
|
57
100
|
|
|
@@ -63,6 +106,8 @@ def run(
|
|
|
63
106
|
config.input_path = input_path
|
|
64
107
|
if output_dir:
|
|
65
108
|
config.output_folder = output_dir
|
|
109
|
+
if outputs:
|
|
110
|
+
config.outputs = set(outputs)
|
|
66
111
|
if flat:
|
|
67
112
|
config.folder_organization = FolderOrganization.FLAT
|
|
68
113
|
|
|
@@ -105,10 +150,13 @@ def run(
|
|
|
105
150
|
try:
|
|
106
151
|
run_pipeline(config)
|
|
107
152
|
except (InvalidZipError, ConfigurationError) as e:
|
|
108
|
-
|
|
153
|
+
logger.error(f"Known error: {e}")
|
|
154
|
+
console.print(f"[bold red]Error:[/bold red] {escape(str(e))}")
|
|
109
155
|
raise typer.Exit(code=1) from None
|
|
110
156
|
except Exception as e:
|
|
111
|
-
|
|
157
|
+
logger.exception("Unexpected error occurred")
|
|
158
|
+
console.print(f"[bold red]Unexpected error:[/bold red] {escape(str(e))}")
|
|
159
|
+
console.print(f"[dim]See log file for details: {log_path}[/dim]")
|
|
112
160
|
raise typer.Exit(code=1) from None
|
|
113
161
|
|
|
114
162
|
|
convoviz/config.py
CHANGED
|
@@ -14,6 +14,18 @@ class FolderOrganization(str, Enum):
|
|
|
14
14
|
DATE = "date" # Nested by year/month (default)
|
|
15
15
|
|
|
16
16
|
|
|
17
|
+
class OutputKind(str, Enum):
|
|
18
|
+
"""Types of outputs that can be generated."""
|
|
19
|
+
|
|
20
|
+
MARKDOWN = "markdown" # Conversation markdown files
|
|
21
|
+
GRAPHS = "graphs" # Usage analytics graphs
|
|
22
|
+
WORDCLOUDS = "wordclouds" # Word cloud visualizations
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# Default: generate all outputs
|
|
26
|
+
ALL_OUTPUTS: frozenset[OutputKind] = frozenset(OutputKind)
|
|
27
|
+
|
|
28
|
+
|
|
17
29
|
class AuthorHeaders(BaseModel):
|
|
18
30
|
"""Headers for different message authors in markdown output."""
|
|
19
31
|
|
|
@@ -26,7 +38,7 @@ class AuthorHeaders(BaseModel):
|
|
|
26
38
|
class MarkdownConfig(BaseModel):
|
|
27
39
|
"""Configuration for markdown output."""
|
|
28
40
|
|
|
29
|
-
latex_delimiters: Literal["default", "dollars"] = "
|
|
41
|
+
latex_delimiters: Literal["default", "dollars"] = "dollars"
|
|
30
42
|
flavor: Literal["standard", "obsidian"] = "standard"
|
|
31
43
|
|
|
32
44
|
|
|
@@ -39,10 +51,13 @@ class YAMLConfig(BaseModel):
|
|
|
39
51
|
create_time: bool = True
|
|
40
52
|
update_time: bool = True
|
|
41
53
|
model: bool = True
|
|
42
|
-
used_plugins: bool =
|
|
54
|
+
used_plugins: bool = False
|
|
43
55
|
message_count: bool = True
|
|
44
|
-
content_types: bool =
|
|
45
|
-
|
|
56
|
+
content_types: bool = False
|
|
57
|
+
content_types: bool = False
|
|
58
|
+
custom_instructions: bool = False
|
|
59
|
+
is_starred: bool = False
|
|
60
|
+
voice: bool = False
|
|
46
61
|
|
|
47
62
|
|
|
48
63
|
class ConversationConfig(BaseModel):
|
|
@@ -70,6 +85,7 @@ class WordCloudConfig(BaseModel):
|
|
|
70
85
|
include_numbers: bool = False
|
|
71
86
|
width: int = 600
|
|
72
87
|
height: int = 600
|
|
88
|
+
max_workers: int | None = None # None = use half CPU count
|
|
73
89
|
|
|
74
90
|
|
|
75
91
|
class GraphConfig(BaseModel):
|
|
@@ -92,6 +108,7 @@ class ConvovizConfig(BaseModel):
|
|
|
92
108
|
input_path: Path | None = None
|
|
93
109
|
output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT-Data")
|
|
94
110
|
folder_organization: FolderOrganization = FolderOrganization.DATE
|
|
111
|
+
outputs: set[OutputKind] = Field(default_factory=lambda: set(ALL_OUTPUTS))
|
|
95
112
|
message: MessageConfig = Field(default_factory=MessageConfig)
|
|
96
113
|
conversation: ConversationConfig = Field(default_factory=ConversationConfig)
|
|
97
114
|
wordcloud: WordCloudConfig = Field(default_factory=WordCloudConfig)
|