convoviz 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {convoviz-0.2.1 → convoviz-0.2.3}/PKG-INFO +4 -24
- {convoviz-0.2.1 → convoviz-0.2.3}/README.md +1 -1
- convoviz-0.2.3/convoviz/analysis/graphs.py +429 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/analysis/wordcloud.py +20 -0
- convoviz-0.2.3/convoviz/assets/stopwords.txt +75 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/cli.py +18 -15
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/config.py +12 -7
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/interactive.py +22 -12
- convoviz-0.2.3/convoviz/io/assets.py +82 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/io/loaders.py +30 -2
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/io/writers.py +17 -2
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/models/__init__.py +0 -4
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/models/collection.py +2 -0
- convoviz-0.2.3/convoviz/models/message.py +119 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/pipeline.py +42 -19
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/renderers/markdown.py +46 -15
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/utils.py +54 -4
- {convoviz-0.2.1 → convoviz-0.2.3}/pyproject.toml +2 -2
- convoviz-0.2.1/LICENSE +0 -21
- convoviz-0.2.1/convoviz/analysis/graphs.py +0 -98
- convoviz-0.2.1/convoviz/models/message.py +0 -77
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/__init__.py +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/__main__.py +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/analysis/__init__.py +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/colormaps.txt +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/Borel-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/exceptions.py +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/io/__init__.py +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/models/conversation.py +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/models/node.py +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/py.typed +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/renderers/__init__.py +0 -0
- {convoviz-0.2.1 → convoviz-0.2.3}/convoviz/renderers/yaml.py +0 -0
|
@@ -1,31 +1,11 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: convoviz
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Get analytics and visualizations on your ChatGPT data!
|
|
5
5
|
Keywords: markdown,chatgpt,openai,visualization,analytics,json,export,data-analysis,obsidian
|
|
6
6
|
Author: Mohamed Cheikh Sidiya
|
|
7
7
|
Author-email: Mohamed Cheikh Sidiya <mohamedcheikhsidiya77@gmail.com>
|
|
8
|
-
License: MIT
|
|
9
|
-
|
|
10
|
-
Copyright (c) 2023 Mohamed Cheikh Sidiya
|
|
11
|
-
|
|
12
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
13
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
14
|
-
in the Software without restriction, including without limitation the rights
|
|
15
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
16
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
17
|
-
furnished to do so, subject to the following conditions:
|
|
18
|
-
|
|
19
|
-
The above copyright notice and this permission notice shall be included in all
|
|
20
|
-
copies or substantial portions of the Software.
|
|
21
|
-
|
|
22
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
23
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
24
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
25
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
26
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
27
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
28
|
-
SOFTWARE.
|
|
8
|
+
License-Expression: MIT
|
|
29
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
30
10
|
Classifier: Programming Language :: Python :: 3
|
|
31
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -136,6 +116,6 @@ I wasn't a fan of the clunky, and sometimes paid, browser extensions.
|
|
|
136
116
|
|
|
137
117
|
It was also a great opportunity to learn more about Python and type annotations. I had mypy, pyright, and ruff all on strict mode, 'twas fun.
|
|
138
118
|
|
|
139
|
-
It also
|
|
119
|
+
It should(?) also work as library, so you can import and use the models and functions. I need to add more documentation for that tho. Feel free to reach out if you need help.
|
|
140
120
|
|
|
141
121
|
I'm working on automating it to add new conversations and updating old ones. Had some luck with a JavaScript bookmarklet, still ironing it out tho.
|
|
@@ -90,6 +90,6 @@ I wasn't a fan of the clunky, and sometimes paid, browser extensions.
|
|
|
90
90
|
|
|
91
91
|
It was also a great opportunity to learn more about Python and type annotations. I had mypy, pyright, and ruff all on strict mode, 'twas fun.
|
|
92
92
|
|
|
93
|
-
It also
|
|
93
|
+
It should(?) also work as library, so you can import and use the models and functions. I need to add more documentation for that tho. Feel free to reach out if you need help.
|
|
94
94
|
|
|
95
95
|
I'm working on automating it to add new conversations and updating old ones. Had some luck with a JavaScript bookmarklet, still ironing it out tho.
|
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
"""Graph generation for conversation analytics."""
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import matplotlib.font_manager as fm
|
|
8
|
+
from matplotlib.figure import Figure
|
|
9
|
+
from tqdm import tqdm
|
|
10
|
+
|
|
11
|
+
from convoviz.config import GraphConfig, get_default_config
|
|
12
|
+
from convoviz.models import ConversationCollection
|
|
13
|
+
from convoviz.utils import get_asset_path
|
|
14
|
+
|
|
15
|
+
WEEKDAYS = [
|
|
16
|
+
"Monday",
|
|
17
|
+
"Tuesday",
|
|
18
|
+
"Wednesday",
|
|
19
|
+
"Thursday",
|
|
20
|
+
"Friday",
|
|
21
|
+
"Saturday",
|
|
22
|
+
"Sunday",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _setup_figure(config: GraphConfig) -> tuple[Figure, fm.FontProperties]:
|
|
27
|
+
"""Internal helper to setup a figure with common styling."""
|
|
28
|
+
fig = Figure(figsize=config.figsize, dpi=300)
|
|
29
|
+
ax = fig.add_subplot()
|
|
30
|
+
|
|
31
|
+
# Load custom font if possible
|
|
32
|
+
font_path = get_asset_path(f"fonts/{config.font_name}")
|
|
33
|
+
font_prop = (
|
|
34
|
+
fm.FontProperties(fname=str(font_path)) if font_path.exists() else fm.FontProperties()
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Styling
|
|
38
|
+
ax.spines["top"].set_visible(False)
|
|
39
|
+
ax.spines["right"].set_visible(False)
|
|
40
|
+
if config.grid:
|
|
41
|
+
ax.grid(axis="y", linestyle="--", alpha=0.7)
|
|
42
|
+
|
|
43
|
+
return fig, font_prop
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def generate_week_barplot(
|
|
47
|
+
timestamps: list[float],
|
|
48
|
+
title: str,
|
|
49
|
+
config: GraphConfig | None = None,
|
|
50
|
+
) -> Figure:
|
|
51
|
+
"""Create a bar graph showing message distribution across weekdays.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
timestamps: List of Unix timestamps
|
|
55
|
+
title: Title for the graph
|
|
56
|
+
config: Optional graph configuration
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Matplotlib Figure object
|
|
60
|
+
"""
|
|
61
|
+
cfg = config or get_default_config().graph
|
|
62
|
+
dates = [datetime.fromtimestamp(ts, UTC) for ts in timestamps]
|
|
63
|
+
|
|
64
|
+
weekday_counts: defaultdict[str, int] = defaultdict(int)
|
|
65
|
+
for date in dates:
|
|
66
|
+
weekday_counts[WEEKDAYS[date.weekday()]] += 1
|
|
67
|
+
|
|
68
|
+
x = WEEKDAYS
|
|
69
|
+
y = [weekday_counts[day] for day in WEEKDAYS]
|
|
70
|
+
|
|
71
|
+
fig, font_prop = _setup_figure(cfg)
|
|
72
|
+
ax = fig.gca()
|
|
73
|
+
|
|
74
|
+
bars = ax.bar(x, y, color=cfg.color, alpha=0.8)
|
|
75
|
+
|
|
76
|
+
if cfg.show_counts:
|
|
77
|
+
for bar in bars:
|
|
78
|
+
height = bar.get_height()
|
|
79
|
+
ax.text(
|
|
80
|
+
bar.get_x() + bar.get_width() / 2.0,
|
|
81
|
+
height,
|
|
82
|
+
f"{int(height)}",
|
|
83
|
+
ha="center",
|
|
84
|
+
va="bottom",
|
|
85
|
+
fontproperties=font_prop,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
ax.set_xlabel("Weekday", fontproperties=font_prop)
|
|
89
|
+
ax.set_ylabel("Prompt Count", fontproperties=font_prop)
|
|
90
|
+
ax.set_title(title, fontproperties=font_prop, fontsize=16, pad=20)
|
|
91
|
+
ax.set_xticks(range(len(x)))
|
|
92
|
+
ax.set_xticklabels(x, rotation=45, fontproperties=font_prop)
|
|
93
|
+
|
|
94
|
+
for label in ax.get_yticklabels():
|
|
95
|
+
label.set_fontproperties(font_prop)
|
|
96
|
+
|
|
97
|
+
fig.tight_layout()
|
|
98
|
+
return fig
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def generate_hour_barplot(
|
|
102
|
+
timestamps: list[float],
|
|
103
|
+
title: str,
|
|
104
|
+
config: GraphConfig | None = None,
|
|
105
|
+
) -> Figure:
|
|
106
|
+
"""Create a bar graph showing message distribution across hours of the day (0-23).
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
timestamps: List of Unix timestamps
|
|
110
|
+
title: Title for the graph
|
|
111
|
+
config: Optional graph configuration
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
Matplotlib Figure object
|
|
115
|
+
"""
|
|
116
|
+
cfg = config or get_default_config().graph
|
|
117
|
+
dates = [datetime.fromtimestamp(ts, UTC) for ts in timestamps]
|
|
118
|
+
|
|
119
|
+
hour_counts: dict[int, int] = dict.fromkeys(range(24), 0)
|
|
120
|
+
for date in dates:
|
|
121
|
+
hour_counts[date.hour] += 1
|
|
122
|
+
|
|
123
|
+
x = [f"{i:02d}:00" for i in range(24)]
|
|
124
|
+
y = [hour_counts[i] for i in range(24)]
|
|
125
|
+
|
|
126
|
+
fig, font_prop = _setup_figure(cfg)
|
|
127
|
+
ax = fig.gca()
|
|
128
|
+
|
|
129
|
+
bars = ax.bar(range(24), y, color=cfg.color, alpha=0.8)
|
|
130
|
+
|
|
131
|
+
if cfg.show_counts:
|
|
132
|
+
for bar in bars:
|
|
133
|
+
height = bar.get_height()
|
|
134
|
+
if height > 0:
|
|
135
|
+
ax.text(
|
|
136
|
+
bar.get_x() + bar.get_width() / 2.0,
|
|
137
|
+
height,
|
|
138
|
+
f"{int(height)}",
|
|
139
|
+
ha="center",
|
|
140
|
+
va="bottom",
|
|
141
|
+
fontproperties=font_prop,
|
|
142
|
+
fontsize=8,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
ax.set_xlabel("Hour of Day (UTC)", fontproperties=font_prop)
|
|
146
|
+
ax.set_ylabel("Prompt Count", fontproperties=font_prop)
|
|
147
|
+
ax.set_title(f"{title} - Hourly Distribution", fontproperties=font_prop, fontsize=16, pad=20)
|
|
148
|
+
ax.set_xticks(range(24))
|
|
149
|
+
ax.set_xticklabels(x, rotation=90, fontproperties=font_prop)
|
|
150
|
+
|
|
151
|
+
for label in ax.get_yticklabels():
|
|
152
|
+
label.set_fontproperties(font_prop)
|
|
153
|
+
|
|
154
|
+
fig.tight_layout()
|
|
155
|
+
return fig
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def generate_model_piechart(
|
|
159
|
+
collection: ConversationCollection,
|
|
160
|
+
config: GraphConfig | None = None,
|
|
161
|
+
) -> Figure:
|
|
162
|
+
"""Create a pie chart showing distribution of models used.
|
|
163
|
+
|
|
164
|
+
Groups models with < 5% usage into "Other".
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
collection: Collection of conversations
|
|
168
|
+
config: Optional graph configuration
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Matplotlib Figure object
|
|
172
|
+
"""
|
|
173
|
+
cfg = config or get_default_config().graph
|
|
174
|
+
model_counts: defaultdict[str, int] = defaultdict(int)
|
|
175
|
+
|
|
176
|
+
for conv in collection.conversations:
|
|
177
|
+
model = conv.model or "Unknown"
|
|
178
|
+
model_counts[model] += 1
|
|
179
|
+
|
|
180
|
+
total = sum(model_counts.values())
|
|
181
|
+
if total == 0:
|
|
182
|
+
# Return empty figure or figure with "No Data"
|
|
183
|
+
fig, font_prop = _setup_figure(cfg)
|
|
184
|
+
ax = fig.gca()
|
|
185
|
+
ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
|
|
186
|
+
return fig
|
|
187
|
+
|
|
188
|
+
# Group minor models
|
|
189
|
+
threshold = 0.05
|
|
190
|
+
refined_counts: dict[str, int] = {}
|
|
191
|
+
other_count = 0
|
|
192
|
+
|
|
193
|
+
for model, count in model_counts.items():
|
|
194
|
+
if count / total < threshold:
|
|
195
|
+
other_count += count
|
|
196
|
+
else:
|
|
197
|
+
refined_counts[model] = count
|
|
198
|
+
|
|
199
|
+
if other_count > 0:
|
|
200
|
+
refined_counts["Other"] = other_count
|
|
201
|
+
|
|
202
|
+
# Sort for consistent display
|
|
203
|
+
sorted_items = sorted(refined_counts.items(), key=lambda x: x[1], reverse=True)
|
|
204
|
+
labels = [item[0] for item in sorted_items]
|
|
205
|
+
sizes = [item[1] for item in sorted_items]
|
|
206
|
+
|
|
207
|
+
fig, font_prop = _setup_figure(cfg)
|
|
208
|
+
ax = fig.gca()
|
|
209
|
+
|
|
210
|
+
colors = [
|
|
211
|
+
"#4A90E2",
|
|
212
|
+
"#50E3C2",
|
|
213
|
+
"#F5A623",
|
|
214
|
+
"#D0021B",
|
|
215
|
+
"#8B572A",
|
|
216
|
+
"#417505",
|
|
217
|
+
"#9013FE",
|
|
218
|
+
"#BD10E0",
|
|
219
|
+
"#7F7F7F",
|
|
220
|
+
]
|
|
221
|
+
ax.pie(
|
|
222
|
+
sizes,
|
|
223
|
+
labels=labels,
|
|
224
|
+
autopct="%1.1f%%",
|
|
225
|
+
startangle=140,
|
|
226
|
+
colors=colors[: len(labels)],
|
|
227
|
+
textprops={"fontproperties": font_prop},
|
|
228
|
+
)
|
|
229
|
+
ax.set_title("Model Usage Distribution", fontproperties=font_prop, fontsize=16, pad=20)
|
|
230
|
+
|
|
231
|
+
fig.tight_layout()
|
|
232
|
+
return fig
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def generate_length_histogram(
|
|
236
|
+
collection: ConversationCollection,
|
|
237
|
+
config: GraphConfig | None = None,
|
|
238
|
+
) -> Figure:
|
|
239
|
+
"""Create a histogram showing distribution of conversation lengths.
|
|
240
|
+
|
|
241
|
+
Caps the X-axis at the 95th percentile to focus on typical lengths.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
collection: Collection of conversations
|
|
245
|
+
config: Optional graph configuration
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Matplotlib Figure object
|
|
249
|
+
"""
|
|
250
|
+
cfg = config or get_default_config().graph
|
|
251
|
+
lengths = [conv.message_count("user") for conv in collection.conversations]
|
|
252
|
+
|
|
253
|
+
fig, font_prop = _setup_figure(cfg)
|
|
254
|
+
ax = fig.gca()
|
|
255
|
+
|
|
256
|
+
if not lengths:
|
|
257
|
+
ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
|
|
258
|
+
return fig
|
|
259
|
+
|
|
260
|
+
import numpy as np
|
|
261
|
+
|
|
262
|
+
# Cap at 95th percentile to focus on most conversations
|
|
263
|
+
cap = int(np.percentile(lengths, 95))
|
|
264
|
+
cap = max(cap, 5) # Ensure at least some range
|
|
265
|
+
|
|
266
|
+
# Filter lengths for the histogram plot, but keep the data correct
|
|
267
|
+
plot_lengths = [min(L, cap) for L in lengths]
|
|
268
|
+
|
|
269
|
+
bins = range(0, cap + 2, max(1, cap // 10))
|
|
270
|
+
ax.hist(plot_lengths, bins=bins, color=cfg.color, alpha=0.8, rwidth=0.8)
|
|
271
|
+
|
|
272
|
+
ax.set_xlabel("Number of User Prompts", fontproperties=font_prop)
|
|
273
|
+
ax.set_ylabel("Number of Conversations", fontproperties=font_prop)
|
|
274
|
+
ax.set_title(
|
|
275
|
+
f"Conversation Length Distribution (Capped at {cap})",
|
|
276
|
+
fontproperties=font_prop,
|
|
277
|
+
fontsize=16,
|
|
278
|
+
pad=20,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
for label in ax.get_xticklabels() + ax.get_yticklabels():
|
|
282
|
+
label.set_fontproperties(font_prop)
|
|
283
|
+
|
|
284
|
+
fig.tight_layout()
|
|
285
|
+
return fig
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def generate_monthly_activity_barplot(
|
|
289
|
+
collection: ConversationCollection,
|
|
290
|
+
config: GraphConfig | None = None,
|
|
291
|
+
) -> Figure:
|
|
292
|
+
"""Create a bar chart showing total prompt count per month with readable labels.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
collection: Collection of conversations
|
|
296
|
+
config: Optional graph configuration
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Matplotlib Figure object
|
|
300
|
+
"""
|
|
301
|
+
cfg = config or get_default_config().graph
|
|
302
|
+
month_groups = collection.group_by_month()
|
|
303
|
+
sorted_months = sorted(month_groups.keys())
|
|
304
|
+
|
|
305
|
+
# Format labels as "Feb '23"
|
|
306
|
+
x = [m.strftime("%b '%y") for m in sorted_months]
|
|
307
|
+
y = [len(month_groups[m].timestamps("user")) for m in sorted_months]
|
|
308
|
+
|
|
309
|
+
fig, font_prop = _setup_figure(cfg)
|
|
310
|
+
ax = fig.gca()
|
|
311
|
+
|
|
312
|
+
bars = ax.bar(x, y, color=cfg.color, alpha=0.8)
|
|
313
|
+
|
|
314
|
+
if cfg.show_counts:
|
|
315
|
+
for bar in bars:
|
|
316
|
+
height = bar.get_height()
|
|
317
|
+
if height > 0:
|
|
318
|
+
ax.text(
|
|
319
|
+
bar.get_x() + bar.get_width() / 2.0,
|
|
320
|
+
height,
|
|
321
|
+
f"{int(height)}",
|
|
322
|
+
ha="center",
|
|
323
|
+
va="bottom",
|
|
324
|
+
fontproperties=font_prop,
|
|
325
|
+
fontsize=8,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
ax.set_xlabel("Month", fontproperties=font_prop)
|
|
329
|
+
ax.set_ylabel("Total Prompt Count", fontproperties=font_prop)
|
|
330
|
+
ax.set_title("Monthly Activity History", fontproperties=font_prop, fontsize=16, pad=20)
|
|
331
|
+
ax.set_xticks(range(len(x)))
|
|
332
|
+
ax.set_xticklabels(x, rotation=45, fontproperties=font_prop)
|
|
333
|
+
|
|
334
|
+
for label in ax.get_yticklabels():
|
|
335
|
+
label.set_fontproperties(font_prop)
|
|
336
|
+
|
|
337
|
+
fig.tight_layout()
|
|
338
|
+
return fig
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def generate_summary_graphs(
|
|
342
|
+
collection: ConversationCollection,
|
|
343
|
+
output_dir: Path,
|
|
344
|
+
config: GraphConfig | None = None,
|
|
345
|
+
) -> None:
|
|
346
|
+
"""Generate all summary-level graphs.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
collection: Collection of conversations
|
|
350
|
+
output_dir: Directory to save the graphs
|
|
351
|
+
config: Optional graph configuration
|
|
352
|
+
"""
|
|
353
|
+
summary_dir = output_dir / "Summary"
|
|
354
|
+
summary_dir.mkdir(parents=True, exist_ok=True)
|
|
355
|
+
|
|
356
|
+
if not collection.conversations:
|
|
357
|
+
return
|
|
358
|
+
|
|
359
|
+
# Model usage
|
|
360
|
+
fig_models = generate_model_piechart(collection, config)
|
|
361
|
+
fig_models.savefig(summary_dir / "model_usage.png")
|
|
362
|
+
|
|
363
|
+
# Length distribution
|
|
364
|
+
fig_length = generate_length_histogram(collection, config)
|
|
365
|
+
fig_length.savefig(summary_dir / "conversation_lengths.png")
|
|
366
|
+
|
|
367
|
+
# Monthly activity
|
|
368
|
+
fig_activity = generate_monthly_activity_barplot(collection, config)
|
|
369
|
+
fig_activity.savefig(summary_dir / "monthly_activity.png")
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def generate_graphs(
|
|
373
|
+
collection: ConversationCollection,
|
|
374
|
+
output_dir: Path,
|
|
375
|
+
config: GraphConfig | None = None,
|
|
376
|
+
*,
|
|
377
|
+
progress_bar: bool = False,
|
|
378
|
+
) -> None:
|
|
379
|
+
"""Generate weekly, hourly, and summary graphs.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
collection: Collection of conversations
|
|
383
|
+
output_dir: Directory to save the graphs
|
|
384
|
+
config: Optional graph configuration
|
|
385
|
+
progress_bar: Whether to show progress bars
|
|
386
|
+
"""
|
|
387
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
388
|
+
|
|
389
|
+
# Summary graphs
|
|
390
|
+
generate_summary_graphs(collection, output_dir, config)
|
|
391
|
+
|
|
392
|
+
month_groups = collection.group_by_month()
|
|
393
|
+
year_groups = collection.group_by_year()
|
|
394
|
+
|
|
395
|
+
# Month-wise graphs
|
|
396
|
+
for month, group in tqdm(
|
|
397
|
+
month_groups.items(),
|
|
398
|
+
desc="Creating monthly graphs 📈",
|
|
399
|
+
disable=not progress_bar,
|
|
400
|
+
):
|
|
401
|
+
base_name = month.strftime("%Y %B")
|
|
402
|
+
title = month.strftime("%B '%y")
|
|
403
|
+
timestamps = group.timestamps("user")
|
|
404
|
+
|
|
405
|
+
# Weekday distribution
|
|
406
|
+
fig_week = generate_week_barplot(timestamps, title, config)
|
|
407
|
+
fig_week.savefig(output_dir / f"{base_name}_weekly.png")
|
|
408
|
+
|
|
409
|
+
# Hourly distribution
|
|
410
|
+
fig_hour = generate_hour_barplot(timestamps, title, config)
|
|
411
|
+
fig_hour.savefig(output_dir / f"{base_name}_hourly.png")
|
|
412
|
+
|
|
413
|
+
# Year-wise graphs
|
|
414
|
+
for year, group in tqdm(
|
|
415
|
+
year_groups.items(),
|
|
416
|
+
desc="Creating yearly graphs 📈",
|
|
417
|
+
disable=not progress_bar,
|
|
418
|
+
):
|
|
419
|
+
base_name = year.strftime("%Y")
|
|
420
|
+
title = year.strftime("%Y")
|
|
421
|
+
timestamps = group.timestamps("user")
|
|
422
|
+
|
|
423
|
+
# Weekday distribution
|
|
424
|
+
fig_week = generate_week_barplot(timestamps, title, config)
|
|
425
|
+
fig_week.savefig(output_dir / f"{base_name}_weekly.png")
|
|
426
|
+
|
|
427
|
+
# Hourly distribution
|
|
428
|
+
fig_hour = generate_hour_barplot(timestamps, title, config)
|
|
429
|
+
fig_hour.savefig(output_dir / f"{base_name}_hourly.png")
|
|
@@ -24,6 +24,23 @@ STOPWORD_LANGUAGES = [
|
|
|
24
24
|
]
|
|
25
25
|
|
|
26
26
|
|
|
27
|
+
@lru_cache(maxsize=1)
|
|
28
|
+
def load_programming_stopwords() -> frozenset[str]:
|
|
29
|
+
"""Load programming keywords and types from assets.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Frozen set of programming stop words
|
|
33
|
+
"""
|
|
34
|
+
stopwords_path = Path(__file__).parent.parent / "assets" / "stopwords.txt"
|
|
35
|
+
if not stopwords_path.exists():
|
|
36
|
+
return frozenset()
|
|
37
|
+
|
|
38
|
+
with open(stopwords_path, encoding="utf-8") as f:
|
|
39
|
+
return frozenset(
|
|
40
|
+
line.strip().lower() for line in f if line.strip() and not line.strip().startswith("#")
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
27
44
|
@lru_cache(maxsize=1)
|
|
28
45
|
def load_nltk_stopwords() -> frozenset[str]:
|
|
29
46
|
"""Load and cache NLTK stopwords.
|
|
@@ -74,6 +91,9 @@ def generate_wordcloud(text: str, config: WordCloudConfig) -> Image:
|
|
|
74
91
|
stopwords = set(load_nltk_stopwords())
|
|
75
92
|
stopwords.update(parse_custom_stopwords(config.custom_stopwords))
|
|
76
93
|
|
|
94
|
+
if config.exclude_programming_keywords:
|
|
95
|
+
stopwords.update(load_programming_stopwords())
|
|
96
|
+
|
|
77
97
|
wc = WordCloud(
|
|
78
98
|
font_path=str(config.font_path) if config.font_path else None,
|
|
79
99
|
width=config.width,
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
def
|
|
3
|
+
class
|
|
4
|
+
import
|
|
5
|
+
from
|
|
6
|
+
as
|
|
7
|
+
elif
|
|
8
|
+
finally
|
|
9
|
+
yield
|
|
10
|
+
pass
|
|
11
|
+
lambda
|
|
12
|
+
async
|
|
13
|
+
await
|
|
14
|
+
nonlocal
|
|
15
|
+
assert
|
|
16
|
+
self
|
|
17
|
+
cls
|
|
18
|
+
# JavaScript / TypeScript
|
|
19
|
+
const
|
|
20
|
+
let
|
|
21
|
+
var
|
|
22
|
+
function
|
|
23
|
+
export
|
|
24
|
+
default
|
|
25
|
+
extends
|
|
26
|
+
implements
|
|
27
|
+
static
|
|
28
|
+
# Java / C#
|
|
29
|
+
final
|
|
30
|
+
abstract
|
|
31
|
+
new
|
|
32
|
+
super
|
|
33
|
+
package
|
|
34
|
+
throws
|
|
35
|
+
synchronized
|
|
36
|
+
volatile
|
|
37
|
+
transient
|
|
38
|
+
native
|
|
39
|
+
strictfp
|
|
40
|
+
override
|
|
41
|
+
# C / C++
|
|
42
|
+
unsigned
|
|
43
|
+
signed
|
|
44
|
+
typedef
|
|
45
|
+
sizeof
|
|
46
|
+
extern
|
|
47
|
+
register
|
|
48
|
+
restrict
|
|
49
|
+
inline
|
|
50
|
+
template
|
|
51
|
+
typename
|
|
52
|
+
virtual
|
|
53
|
+
friend
|
|
54
|
+
mutable
|
|
55
|
+
explicit
|
|
56
|
+
operator
|
|
57
|
+
typeid
|
|
58
|
+
# Rust
|
|
59
|
+
mut
|
|
60
|
+
fn
|
|
61
|
+
pub
|
|
62
|
+
mod
|
|
63
|
+
trait
|
|
64
|
+
impl
|
|
65
|
+
where
|
|
66
|
+
loop
|
|
67
|
+
unsafe
|
|
68
|
+
crate
|
|
69
|
+
dyn
|
|
70
|
+
# Go
|
|
71
|
+
func
|
|
72
|
+
chan
|
|
73
|
+
defer
|
|
74
|
+
fallthrough
|
|
75
|
+
goto
|
|
@@ -8,7 +8,7 @@ from rich.console import Console
|
|
|
8
8
|
from convoviz.config import get_default_config
|
|
9
9
|
from convoviz.exceptions import ConfigurationError, InvalidZipError
|
|
10
10
|
from convoviz.interactive import run_interactive_config
|
|
11
|
-
from convoviz.io.loaders import find_latest_zip
|
|
11
|
+
from convoviz.io.loaders import find_latest_zip
|
|
12
12
|
from convoviz.pipeline import run_pipeline
|
|
13
13
|
from convoviz.utils import default_font_path
|
|
14
14
|
|
|
@@ -22,14 +22,15 @@ console = Console()
|
|
|
22
22
|
@app.callback(invoke_without_command=True)
|
|
23
23
|
def run(
|
|
24
24
|
ctx: typer.Context,
|
|
25
|
-
|
|
25
|
+
input_path: Path | None = typer.Option(
|
|
26
26
|
None,
|
|
27
|
+
"--input",
|
|
27
28
|
"--zip",
|
|
28
29
|
"-z",
|
|
29
|
-
help="Path to the ChatGPT export zip file.",
|
|
30
|
+
help="Path to the ChatGPT export zip file, JSON file, or extracted directory.",
|
|
30
31
|
exists=True,
|
|
31
32
|
file_okay=True,
|
|
32
|
-
dir_okay=
|
|
33
|
+
dir_okay=True,
|
|
33
34
|
),
|
|
34
35
|
output_dir: Path | None = typer.Option(
|
|
35
36
|
None,
|
|
@@ -52,13 +53,13 @@ def run(
|
|
|
52
53
|
config = get_default_config()
|
|
53
54
|
|
|
54
55
|
# Override with CLI args
|
|
55
|
-
if
|
|
56
|
-
config.
|
|
56
|
+
if input_path:
|
|
57
|
+
config.input_path = input_path
|
|
57
58
|
if output_dir:
|
|
58
59
|
config.output_folder = output_dir
|
|
59
60
|
|
|
60
|
-
# Determine mode: interactive if explicitly requested or no
|
|
61
|
-
use_interactive = interactive if interactive is not None else (
|
|
61
|
+
# Determine mode: interactive if explicitly requested or no input provided
|
|
62
|
+
use_interactive = interactive if interactive is not None else (input_path is None)
|
|
62
63
|
|
|
63
64
|
if use_interactive:
|
|
64
65
|
console.print("Welcome to ChatGPT Data Visualizer ✨📊!\n")
|
|
@@ -69,21 +70,23 @@ def run(
|
|
|
69
70
|
raise typer.Exit(code=0) from None
|
|
70
71
|
else:
|
|
71
72
|
# Non-interactive mode: validate we have what we need
|
|
72
|
-
if not config.
|
|
73
|
+
if not config.input_path:
|
|
73
74
|
# Try to find a default
|
|
74
75
|
latest = find_latest_zip()
|
|
75
76
|
if latest:
|
|
76
|
-
console.print(f"No
|
|
77
|
-
config.
|
|
77
|
+
console.print(f"No input specified, using latest zip found: {latest}")
|
|
78
|
+
config.input_path = latest
|
|
78
79
|
else:
|
|
79
80
|
console.print(
|
|
80
|
-
"[bold red]Error:[/bold red] No
|
|
81
|
+
"[bold red]Error:[/bold red] No input file provided and none found in Downloads."
|
|
81
82
|
)
|
|
82
83
|
raise typer.Exit(code=1)
|
|
83
84
|
|
|
84
|
-
# Validate the
|
|
85
|
-
if not
|
|
86
|
-
console.print(
|
|
85
|
+
# Validate the input (basic check)
|
|
86
|
+
if not config.input_path.exists():
|
|
87
|
+
console.print(
|
|
88
|
+
f"[bold red]Error:[/bold red] Input path does not exist: {config.input_path}"
|
|
89
|
+
)
|
|
87
90
|
raise typer.Exit(code=1)
|
|
88
91
|
|
|
89
92
|
# Set default font if not set
|