convoviz 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {convoviz-0.2.3 → convoviz-0.2.4}/PKG-INFO +30 -5
- {convoviz-0.2.3 → convoviz-0.2.4}/README.md +29 -4
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/analysis/graphs.py +98 -40
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/analysis/wordcloud.py +1 -1
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/config.py +2 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/interactive.py +22 -3
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/io/loaders.py +28 -5
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/models/collection.py +12 -6
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/models/conversation.py +4 -6
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/models/message.py +42 -4
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/pipeline.py +31 -8
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/renderers/markdown.py +54 -18
- convoviz-0.2.4/convoviz/renderers/yaml.py +119 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/pyproject.toml +2 -3
- convoviz-0.2.3/convoviz/renderers/yaml.py +0 -42
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/__init__.py +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/__main__.py +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/analysis/__init__.py +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/colormaps.txt +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/Borel-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/assets/stopwords.txt +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/cli.py +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/exceptions.py +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/io/__init__.py +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/io/assets.py +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/io/writers.py +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/models/__init__.py +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/models/node.py +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/py.typed +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/renderers/__init__.py +0 -0
- {convoviz-0.2.3 → convoviz-0.2.4}/convoviz/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: convoviz
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Get analytics and visualizations on your ChatGPT data!
|
|
5
5
|
Keywords: markdown,chatgpt,openai,visualization,analytics,json,export,data-analysis,obsidian
|
|
6
6
|
Author: Mohamed Cheikh Sidiya
|
|
@@ -24,7 +24,7 @@ Requires-Python: >=3.12
|
|
|
24
24
|
Project-URL: Repository, https://github.com/mohamed-chs/chatgpt-history-export-to-md
|
|
25
25
|
Description-Content-Type: text/markdown
|
|
26
26
|
|
|
27
|
-
# Convoviz 📊: Visualize your entire ChatGPT data
|
|
27
|
+
# Convoviz 📊: Visualize your entire ChatGPT data
|
|
28
28
|
|
|
29
29
|
Convert your ChatGPT history into well-formatted Markdown files. Additionally, visualize your data with word clouds 🔡☁️, view your prompt history graphs 📈, and access all your custom instructions 🤖 in a single location.
|
|
30
30
|
|
|
@@ -68,7 +68,7 @@ or pipx:
|
|
|
68
68
|
pipx install convoviz
|
|
69
69
|
```
|
|
70
70
|
|
|
71
|
-
### 3. Run the
|
|
71
|
+
### 3. Run the tool 🏃♂️
|
|
72
72
|
|
|
73
73
|
Simply run the command and follow the prompts:
|
|
74
74
|
|
|
@@ -81,9 +81,18 @@ convoviz
|
|
|
81
81
|
You can provide arguments directly to skip the prompts:
|
|
82
82
|
|
|
83
83
|
```bash
|
|
84
|
-
convoviz --
|
|
84
|
+
convoviz --input path/to/your/export.zip --output path/to/output/folder
|
|
85
85
|
```
|
|
86
86
|
|
|
87
|
+
Inputs can be any of:
|
|
88
|
+
- A ChatGPT export ZIP (downloaded from OpenAI)
|
|
89
|
+
- An extracted export directory containing `conversations.json`
|
|
90
|
+
- A `conversations.json` file directly
|
|
91
|
+
|
|
92
|
+
Notes:
|
|
93
|
+
- `--zip` / `-z` is kept as an alias for `--input` for convenience.
|
|
94
|
+
- You can force non-interactive mode with `--no-interactive`.
|
|
95
|
+
|
|
87
96
|
For more options, run:
|
|
88
97
|
|
|
89
98
|
```bash
|
|
@@ -118,4 +127,20 @@ It was also a great opportunity to learn more about Python and type annotations.
|
|
|
118
127
|
|
|
119
128
|
It should(?) also work as library, so you can import and use the models and functions. I need to add more documentation for that tho. Feel free to reach out if you need help.
|
|
120
129
|
|
|
121
|
-
|
|
130
|
+
### Offline / reproducible runs
|
|
131
|
+
|
|
132
|
+
Convoviz uses NLTK stopwords for word clouds. If you’re offline and NLTK data isn’t already installed, pre-download it once:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
python -c "import nltk; nltk.download('stopwords')"
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
If you’re using `uv` without a global install, you can run:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
uv run python -c "import nltk; nltk.download('stopwords')"
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Bookmarklet
|
|
145
|
+
|
|
146
|
+
There’s also a JavaScript bookmarklet flow under `js/` (experimental) for exporting additional conversation data outside the official ZIP export.
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Convoviz 📊: Visualize your entire ChatGPT data
|
|
1
|
+
# Convoviz 📊: Visualize your entire ChatGPT data
|
|
2
2
|
|
|
3
3
|
Convert your ChatGPT history into well-formatted Markdown files. Additionally, visualize your data with word clouds 🔡☁️, view your prompt history graphs 📈, and access all your custom instructions 🤖 in a single location.
|
|
4
4
|
|
|
@@ -42,7 +42,7 @@ or pipx:
|
|
|
42
42
|
pipx install convoviz
|
|
43
43
|
```
|
|
44
44
|
|
|
45
|
-
### 3. Run the
|
|
45
|
+
### 3. Run the tool 🏃♂️
|
|
46
46
|
|
|
47
47
|
Simply run the command and follow the prompts:
|
|
48
48
|
|
|
@@ -55,9 +55,18 @@ convoviz
|
|
|
55
55
|
You can provide arguments directly to skip the prompts:
|
|
56
56
|
|
|
57
57
|
```bash
|
|
58
|
-
convoviz --
|
|
58
|
+
convoviz --input path/to/your/export.zip --output path/to/output/folder
|
|
59
59
|
```
|
|
60
60
|
|
|
61
|
+
Inputs can be any of:
|
|
62
|
+
- A ChatGPT export ZIP (downloaded from OpenAI)
|
|
63
|
+
- An extracted export directory containing `conversations.json`
|
|
64
|
+
- A `conversations.json` file directly
|
|
65
|
+
|
|
66
|
+
Notes:
|
|
67
|
+
- `--zip` / `-z` is kept as an alias for `--input` for convenience.
|
|
68
|
+
- You can force non-interactive mode with `--no-interactive`.
|
|
69
|
+
|
|
61
70
|
For more options, run:
|
|
62
71
|
|
|
63
72
|
```bash
|
|
@@ -92,4 +101,20 @@ It was also a great opportunity to learn more about Python and type annotations.
|
|
|
92
101
|
|
|
93
102
|
It should(?) also work as library, so you can import and use the models and functions. I need to add more documentation for that tho. Feel free to reach out if you need help.
|
|
94
103
|
|
|
95
|
-
|
|
104
|
+
### Offline / reproducible runs
|
|
105
|
+
|
|
106
|
+
Convoviz uses NLTK stopwords for word clouds. If you’re offline and NLTK data isn’t already installed, pre-download it once:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
python -c "import nltk; nltk.download('stopwords')"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
If you’re using `uv` without a global install, you can run:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
uv run python -c "import nltk; nltk.download('stopwords')"
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Bookmarklet
|
|
119
|
+
|
|
120
|
+
There’s also a JavaScript bookmarklet flow under `js/` (experimental) for exporting additional conversation data outside the official ZIP export.
|
|
@@ -4,7 +4,9 @@ from collections import defaultdict
|
|
|
4
4
|
from datetime import UTC, datetime
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
|
|
7
|
+
import matplotlib.dates as mdates
|
|
7
8
|
import matplotlib.font_manager as fm
|
|
9
|
+
from matplotlib.axes import Axes
|
|
8
10
|
from matplotlib.figure import Figure
|
|
9
11
|
from tqdm import tqdm
|
|
10
12
|
|
|
@@ -23,10 +25,10 @@ WEEKDAYS = [
|
|
|
23
25
|
]
|
|
24
26
|
|
|
25
27
|
|
|
26
|
-
def _setup_figure(config: GraphConfig) -> tuple[Figure, fm.FontProperties]:
|
|
28
|
+
def _setup_figure(config: GraphConfig) -> tuple[Figure, Axes, fm.FontProperties]:
|
|
27
29
|
"""Internal helper to setup a figure with common styling."""
|
|
28
|
-
fig = Figure(figsize=config.figsize, dpi=
|
|
29
|
-
ax = fig.add_subplot()
|
|
30
|
+
fig = Figure(figsize=config.figsize, dpi=config.dpi)
|
|
31
|
+
ax: Axes = fig.add_subplot()
|
|
30
32
|
|
|
31
33
|
# Load custom font if possible
|
|
32
34
|
font_path = get_asset_path(f"fonts/{config.font_name}")
|
|
@@ -35,12 +37,27 @@ def _setup_figure(config: GraphConfig) -> tuple[Figure, fm.FontProperties]:
|
|
|
35
37
|
)
|
|
36
38
|
|
|
37
39
|
# Styling
|
|
40
|
+
fig.set_facecolor("white")
|
|
41
|
+
ax.set_facecolor("white")
|
|
38
42
|
ax.spines["top"].set_visible(False)
|
|
39
43
|
ax.spines["right"].set_visible(False)
|
|
40
44
|
if config.grid:
|
|
41
45
|
ax.grid(axis="y", linestyle="--", alpha=0.7)
|
|
46
|
+
ax.set_axisbelow(True)
|
|
42
47
|
|
|
43
|
-
return fig, font_prop
|
|
48
|
+
return fig, ax, font_prop
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _ts_to_dt(ts: float, config: GraphConfig) -> datetime:
|
|
52
|
+
"""Convert epoch timestamps into aware datetimes based on config."""
|
|
53
|
+
dt_utc = datetime.fromtimestamp(ts, UTC)
|
|
54
|
+
if config.timezone == "utc":
|
|
55
|
+
return dt_utc
|
|
56
|
+
return dt_utc.astimezone()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _tz_label(config: GraphConfig) -> str:
|
|
60
|
+
return "UTC" if config.timezone == "utc" else "Local"
|
|
44
61
|
|
|
45
62
|
|
|
46
63
|
def generate_week_barplot(
|
|
@@ -59,37 +76,37 @@ def generate_week_barplot(
|
|
|
59
76
|
Matplotlib Figure object
|
|
60
77
|
"""
|
|
61
78
|
cfg = config or get_default_config().graph
|
|
62
|
-
dates = [
|
|
79
|
+
dates = [_ts_to_dt(ts, cfg) for ts in timestamps]
|
|
63
80
|
|
|
64
81
|
weekday_counts: defaultdict[str, int] = defaultdict(int)
|
|
65
82
|
for date in dates:
|
|
66
83
|
weekday_counts[WEEKDAYS[date.weekday()]] += 1
|
|
67
84
|
|
|
68
|
-
x = WEEKDAYS
|
|
85
|
+
x = list(range(len(WEEKDAYS)))
|
|
69
86
|
y = [weekday_counts[day] for day in WEEKDAYS]
|
|
70
87
|
|
|
71
|
-
fig, font_prop = _setup_figure(cfg)
|
|
72
|
-
ax = fig.gca()
|
|
88
|
+
fig, ax, font_prop = _setup_figure(cfg)
|
|
73
89
|
|
|
74
|
-
bars = ax.bar(x, y, color=cfg.color, alpha=0.
|
|
90
|
+
bars = ax.bar(x, y, color=cfg.color, alpha=0.85)
|
|
75
91
|
|
|
76
92
|
if cfg.show_counts:
|
|
77
93
|
for bar in bars:
|
|
78
94
|
height = bar.get_height()
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
95
|
+
if height > 0:
|
|
96
|
+
ax.text(
|
|
97
|
+
bar.get_x() + bar.get_width() / 2.0,
|
|
98
|
+
height,
|
|
99
|
+
f"{int(height)}",
|
|
100
|
+
ha="center",
|
|
101
|
+
va="bottom",
|
|
102
|
+
fontproperties=font_prop,
|
|
103
|
+
)
|
|
87
104
|
|
|
88
105
|
ax.set_xlabel("Weekday", fontproperties=font_prop)
|
|
89
|
-
ax.set_ylabel("Prompt Count", fontproperties=font_prop)
|
|
106
|
+
ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
|
|
90
107
|
ax.set_title(title, fontproperties=font_prop, fontsize=16, pad=20)
|
|
91
|
-
ax.set_xticks(
|
|
92
|
-
ax.set_xticklabels(
|
|
108
|
+
ax.set_xticks(x)
|
|
109
|
+
ax.set_xticklabels(WEEKDAYS, rotation=45, fontproperties=font_prop)
|
|
93
110
|
|
|
94
111
|
for label in ax.get_yticklabels():
|
|
95
112
|
label.set_fontproperties(font_prop)
|
|
@@ -114,7 +131,7 @@ def generate_hour_barplot(
|
|
|
114
131
|
Matplotlib Figure object
|
|
115
132
|
"""
|
|
116
133
|
cfg = config or get_default_config().graph
|
|
117
|
-
dates = [
|
|
134
|
+
dates = [_ts_to_dt(ts, cfg) for ts in timestamps]
|
|
118
135
|
|
|
119
136
|
hour_counts: dict[int, int] = dict.fromkeys(range(24), 0)
|
|
120
137
|
for date in dates:
|
|
@@ -123,8 +140,7 @@ def generate_hour_barplot(
|
|
|
123
140
|
x = [f"{i:02d}:00" for i in range(24)]
|
|
124
141
|
y = [hour_counts[i] for i in range(24)]
|
|
125
142
|
|
|
126
|
-
fig, font_prop = _setup_figure(cfg)
|
|
127
|
-
ax = fig.gca()
|
|
143
|
+
fig, ax, font_prop = _setup_figure(cfg)
|
|
128
144
|
|
|
129
145
|
bars = ax.bar(range(24), y, color=cfg.color, alpha=0.8)
|
|
130
146
|
|
|
@@ -142,8 +158,8 @@ def generate_hour_barplot(
|
|
|
142
158
|
fontsize=8,
|
|
143
159
|
)
|
|
144
160
|
|
|
145
|
-
ax.set_xlabel("Hour of Day (
|
|
146
|
-
ax.set_ylabel("Prompt Count", fontproperties=font_prop)
|
|
161
|
+
ax.set_xlabel(f"Hour of Day ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
162
|
+
ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
|
|
147
163
|
ax.set_title(f"{title} - Hourly Distribution", fontproperties=font_prop, fontsize=16, pad=20)
|
|
148
164
|
ax.set_xticks(range(24))
|
|
149
165
|
ax.set_xticklabels(x, rotation=90, fontproperties=font_prop)
|
|
@@ -180,8 +196,7 @@ def generate_model_piechart(
|
|
|
180
196
|
total = sum(model_counts.values())
|
|
181
197
|
if total == 0:
|
|
182
198
|
# Return empty figure or figure with "No Data"
|
|
183
|
-
fig, font_prop = _setup_figure(cfg)
|
|
184
|
-
ax = fig.gca()
|
|
199
|
+
fig, ax, font_prop = _setup_figure(cfg)
|
|
185
200
|
ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
|
|
186
201
|
return fig
|
|
187
202
|
|
|
@@ -204,8 +219,7 @@ def generate_model_piechart(
|
|
|
204
219
|
labels = [item[0] for item in sorted_items]
|
|
205
220
|
sizes = [item[1] for item in sorted_items]
|
|
206
221
|
|
|
207
|
-
fig, font_prop = _setup_figure(cfg)
|
|
208
|
-
ax = fig.gca()
|
|
222
|
+
fig, ax, font_prop = _setup_figure(cfg)
|
|
209
223
|
|
|
210
224
|
colors = [
|
|
211
225
|
"#4A90E2",
|
|
@@ -250,17 +264,16 @@ def generate_length_histogram(
|
|
|
250
264
|
cfg = config or get_default_config().graph
|
|
251
265
|
lengths = [conv.message_count("user") for conv in collection.conversations]
|
|
252
266
|
|
|
253
|
-
fig, font_prop = _setup_figure(cfg)
|
|
254
|
-
ax = fig.gca()
|
|
267
|
+
fig, ax, font_prop = _setup_figure(cfg)
|
|
255
268
|
|
|
256
269
|
if not lengths:
|
|
257
270
|
ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
|
|
258
271
|
return fig
|
|
259
272
|
|
|
260
|
-
import numpy as np
|
|
261
|
-
|
|
262
273
|
# Cap at 95th percentile to focus on most conversations
|
|
263
|
-
|
|
274
|
+
sorted_lengths = sorted(lengths)
|
|
275
|
+
idx = int(0.95 * (len(sorted_lengths) - 1))
|
|
276
|
+
cap = int(sorted_lengths[idx])
|
|
264
277
|
cap = max(cap, 5) # Ensure at least some range
|
|
265
278
|
|
|
266
279
|
# Filter lengths for the histogram plot, but keep the data correct
|
|
@@ -306,10 +319,10 @@ def generate_monthly_activity_barplot(
|
|
|
306
319
|
x = [m.strftime("%b '%y") for m in sorted_months]
|
|
307
320
|
y = [len(month_groups[m].timestamps("user")) for m in sorted_months]
|
|
308
321
|
|
|
309
|
-
fig, font_prop = _setup_figure(cfg)
|
|
310
|
-
ax = fig.gca()
|
|
322
|
+
fig, ax, font_prop = _setup_figure(cfg)
|
|
311
323
|
|
|
312
|
-
|
|
324
|
+
positions = list(range(len(x)))
|
|
325
|
+
bars = ax.bar(positions, y, color=cfg.color, alpha=0.85)
|
|
313
326
|
|
|
314
327
|
if cfg.show_counts:
|
|
315
328
|
for bar in bars:
|
|
@@ -326,10 +339,12 @@ def generate_monthly_activity_barplot(
|
|
|
326
339
|
)
|
|
327
340
|
|
|
328
341
|
ax.set_xlabel("Month", fontproperties=font_prop)
|
|
329
|
-
ax.set_ylabel("
|
|
342
|
+
ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
|
|
330
343
|
ax.set_title("Monthly Activity History", fontproperties=font_prop, fontsize=16, pad=20)
|
|
331
|
-
|
|
332
|
-
|
|
344
|
+
tick_step = max(1, len(positions) // 12) # show ~12 labels max
|
|
345
|
+
shown = positions[::tick_step] if positions else []
|
|
346
|
+
ax.set_xticks(shown)
|
|
347
|
+
ax.set_xticklabels([x[i] for i in shown], rotation=45, fontproperties=font_prop)
|
|
333
348
|
|
|
334
349
|
for label in ax.get_yticklabels():
|
|
335
350
|
label.set_fontproperties(font_prop)
|
|
@@ -338,6 +353,45 @@ def generate_monthly_activity_barplot(
|
|
|
338
353
|
return fig
|
|
339
354
|
|
|
340
355
|
|
|
356
|
+
def generate_daily_activity_lineplot(
|
|
357
|
+
collection: ConversationCollection,
|
|
358
|
+
config: GraphConfig | None = None,
|
|
359
|
+
) -> Figure:
|
|
360
|
+
"""Create a line chart showing user prompt count per day."""
|
|
361
|
+
cfg = config or get_default_config().graph
|
|
362
|
+
timestamps = collection.timestamps("user")
|
|
363
|
+
|
|
364
|
+
fig, ax, font_prop = _setup_figure(cfg)
|
|
365
|
+
if not timestamps:
|
|
366
|
+
ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
|
|
367
|
+
return fig
|
|
368
|
+
|
|
369
|
+
counts: defaultdict[datetime, int] = defaultdict(int)
|
|
370
|
+
for ts in timestamps:
|
|
371
|
+
dt = _ts_to_dt(ts, cfg)
|
|
372
|
+
day = dt.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
373
|
+
counts[day] += 1
|
|
374
|
+
|
|
375
|
+
days = sorted(counts.keys())
|
|
376
|
+
values = [counts[d] for d in days]
|
|
377
|
+
|
|
378
|
+
x = mdates.date2num(days)
|
|
379
|
+
ax.plot(x, values, color=cfg.color, linewidth=2.0)
|
|
380
|
+
ax.fill_between(x, values, color=cfg.color, alpha=0.15)
|
|
381
|
+
locator = mdates.AutoDateLocator()
|
|
382
|
+
ax.xaxis.set_major_locator(locator)
|
|
383
|
+
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
|
|
384
|
+
ax.set_title("Daily Activity History", fontproperties=font_prop, fontsize=16, pad=20)
|
|
385
|
+
ax.set_xlabel(f"Day ({_tz_label(cfg)})", fontproperties=font_prop)
|
|
386
|
+
ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
|
|
387
|
+
|
|
388
|
+
for label in ax.get_xticklabels() + ax.get_yticklabels():
|
|
389
|
+
label.set_fontproperties(font_prop)
|
|
390
|
+
|
|
391
|
+
fig.tight_layout()
|
|
392
|
+
return fig
|
|
393
|
+
|
|
394
|
+
|
|
341
395
|
def generate_summary_graphs(
|
|
342
396
|
collection: ConversationCollection,
|
|
343
397
|
output_dir: Path,
|
|
@@ -368,6 +422,10 @@ def generate_summary_graphs(
|
|
|
368
422
|
fig_activity = generate_monthly_activity_barplot(collection, config)
|
|
369
423
|
fig_activity.savefig(summary_dir / "monthly_activity.png")
|
|
370
424
|
|
|
425
|
+
# Daily activity
|
|
426
|
+
fig_daily = generate_daily_activity_lineplot(collection, config)
|
|
427
|
+
fig_daily.savefig(summary_dir / "daily_activity.png")
|
|
428
|
+
|
|
371
429
|
|
|
372
430
|
def generate_graphs(
|
|
373
431
|
collection: ConversationCollection,
|
|
@@ -62,7 +62,7 @@ def load_nltk_stopwords() -> frozenset[str]:
|
|
|
62
62
|
return frozenset(words)
|
|
63
63
|
|
|
64
64
|
|
|
65
|
-
def parse_custom_stopwords(stopwords_str: str) -> set[str]:
|
|
65
|
+
def parse_custom_stopwords(stopwords_str: str | None) -> set[str]:
|
|
66
66
|
"""Parse a comma-separated string of custom stopwords.
|
|
67
67
|
|
|
68
68
|
Args:
|
|
@@ -7,7 +7,7 @@ from questionary import path as qst_path
|
|
|
7
7
|
from questionary import text as qst_text
|
|
8
8
|
|
|
9
9
|
from convoviz.config import ConvovizConfig, get_default_config
|
|
10
|
-
from convoviz.io.loaders import find_latest_zip
|
|
10
|
+
from convoviz.io.loaders import find_latest_zip, validate_zip
|
|
11
11
|
from convoviz.utils import colormaps, default_font_path, font_names, font_path, validate_header
|
|
12
12
|
|
|
13
13
|
CUSTOM_STYLE = Style(
|
|
@@ -26,6 +26,25 @@ CUSTOM_STYLE = Style(
|
|
|
26
26
|
)
|
|
27
27
|
|
|
28
28
|
|
|
29
|
+
def _validate_input_path(raw: str) -> bool | str:
|
|
30
|
+
path = Path(raw)
|
|
31
|
+
if not path.exists():
|
|
32
|
+
return "Path must exist"
|
|
33
|
+
|
|
34
|
+
if path.is_dir():
|
|
35
|
+
if (path / "conversations.json").exists():
|
|
36
|
+
return True
|
|
37
|
+
return "Directory must contain conversations.json"
|
|
38
|
+
|
|
39
|
+
if path.suffix.lower() == ".json":
|
|
40
|
+
return True
|
|
41
|
+
|
|
42
|
+
if path.suffix.lower() == ".zip":
|
|
43
|
+
return True if validate_zip(path) else "ZIP must contain conversations.json"
|
|
44
|
+
|
|
45
|
+
return "Input must be a .zip, a .json, or a directory containing conversations.json"
|
|
46
|
+
|
|
47
|
+
|
|
29
48
|
def run_interactive_config(initial_config: ConvovizConfig | None = None) -> ConvovizConfig:
|
|
30
49
|
"""Run interactive prompts to configure convoviz.
|
|
31
50
|
|
|
@@ -49,9 +68,9 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
|
|
|
49
68
|
# Prompt for input path
|
|
50
69
|
input_default = str(config.input_path) if config.input_path else ""
|
|
51
70
|
input_result = qst_path(
|
|
52
|
-
"Enter the path to the
|
|
71
|
+
"Enter the path to the export ZIP, conversations JSON, or extracted directory:",
|
|
53
72
|
default=input_default,
|
|
54
|
-
validate=
|
|
73
|
+
validate=_validate_input_path,
|
|
55
74
|
style=CUSTOM_STYLE,
|
|
56
75
|
).ask()
|
|
57
76
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Loading functions for conversations and collections."""
|
|
2
2
|
|
|
3
|
-
from pathlib import Path
|
|
3
|
+
from pathlib import Path, PurePosixPath
|
|
4
4
|
from zipfile import ZipFile
|
|
5
5
|
|
|
6
6
|
from orjson import loads
|
|
@@ -9,6 +9,27 @@ from convoviz.exceptions import InvalidZipError
|
|
|
9
9
|
from convoviz.models import Conversation, ConversationCollection
|
|
10
10
|
|
|
11
11
|
|
|
12
|
+
def _is_safe_zip_member_name(name: str) -> bool:
|
|
13
|
+
"""Return True if a ZIP entry name is safe to extract.
|
|
14
|
+
|
|
15
|
+
This is intentionally OS-agnostic: it treats both ``/`` and ``\\`` as path
|
|
16
|
+
separators and rejects absolute paths, drive-letter paths, and ``..`` parts.
|
|
17
|
+
"""
|
|
18
|
+
normalized = name.replace("\\", "/")
|
|
19
|
+
member_path = PurePosixPath(normalized)
|
|
20
|
+
|
|
21
|
+
# Absolute paths (e.g. "/etc/passwd") or empty names
|
|
22
|
+
if not normalized or member_path.is_absolute():
|
|
23
|
+
return False
|
|
24
|
+
|
|
25
|
+
# Windows drive letters / UNC-style prefixes stored in the archive
|
|
26
|
+
first = member_path.parts[0] if member_path.parts else ""
|
|
27
|
+
if first.endswith(":") or first.startswith("//") or first.startswith("\\\\"):
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
return ".." not in member_path.parts
|
|
31
|
+
|
|
32
|
+
|
|
12
33
|
def extract_archive(filepath: Path) -> Path:
|
|
13
34
|
"""Extract a ZIP file and return the extraction folder path.
|
|
14
35
|
|
|
@@ -28,15 +49,17 @@ def extract_archive(filepath: Path) -> Path:
|
|
|
28
49
|
|
|
29
50
|
with ZipFile(filepath) as zf:
|
|
30
51
|
for member in zf.infolist():
|
|
31
|
-
# Check for path traversal (Zip-Slip)
|
|
32
|
-
|
|
33
|
-
|
|
52
|
+
# Check for path traversal (Zip-Slip) in an OS-agnostic way.
|
|
53
|
+
# ZIP files are typically POSIX-path-like, but malicious archives can
|
|
54
|
+
# embed backslashes or drive-letter tricks.
|
|
55
|
+
if not _is_safe_zip_member_name(member.filename):
|
|
34
56
|
raise InvalidZipError(
|
|
35
57
|
str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
|
|
36
58
|
)
|
|
37
59
|
|
|
38
60
|
# Additional check using resolved paths
|
|
39
|
-
|
|
61
|
+
normalized = member.filename.replace("\\", "/")
|
|
62
|
+
target_path = (folder / normalized).resolve()
|
|
40
63
|
if not target_path.is_relative_to(folder.resolve()):
|
|
41
64
|
raise InvalidZipError(
|
|
42
65
|
str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
|
|
@@ -37,14 +37,20 @@ class ConversationCollection(BaseModel):
|
|
|
37
37
|
def update(self, other: "ConversationCollection") -> None:
|
|
38
38
|
"""Merge another collection into this one.
|
|
39
39
|
|
|
40
|
-
|
|
40
|
+
Merges per-conversation, keeping the newest version when IDs collide.
|
|
41
|
+
|
|
42
|
+
Note: We intentionally do *not* gate on ``other.last_updated`` because
|
|
43
|
+
"new" conversations can still have older timestamps than the most recent
|
|
44
|
+
conversation in this collection (e.g. bookmarklet downloads).
|
|
41
45
|
"""
|
|
42
|
-
|
|
43
|
-
|
|
46
|
+
merged: dict[str, Conversation] = dict(self.index)
|
|
47
|
+
|
|
48
|
+
for conv_id, incoming in other.index.items():
|
|
49
|
+
existing = merged.get(conv_id)
|
|
50
|
+
if existing is None or incoming.update_time > existing.update_time:
|
|
51
|
+
merged[conv_id] = incoming
|
|
44
52
|
|
|
45
|
-
|
|
46
|
-
merged_index.update(other.index)
|
|
47
|
-
self.conversations = list(merged_index.values())
|
|
53
|
+
self.conversations = list(merged.values())
|
|
48
54
|
|
|
49
55
|
def add(self, conversation: Conversation) -> None:
|
|
50
56
|
"""Add a conversation to the collection."""
|
|
@@ -98,12 +98,10 @@ class Conversation(BaseModel):
|
|
|
98
98
|
def custom_instructions(self) -> dict[str, str]:
|
|
99
99
|
"""Get custom instructions used for this conversation."""
|
|
100
100
|
system_nodes = self.nodes_by_author("system")
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
if context_message and context_message.metadata.is_user_system_message:
|
|
106
|
-
return context_message.metadata.user_context_message_data or {}
|
|
101
|
+
for node in system_nodes:
|
|
102
|
+
context_message = node.message
|
|
103
|
+
if context_message and context_message.metadata.is_user_system_message:
|
|
104
|
+
return context_message.metadata.user_context_message_data or {}
|
|
107
105
|
return {}
|
|
108
106
|
|
|
109
107
|
def timestamps(self, *authors: AuthorRole) -> list[float]:
|
|
@@ -6,7 +6,7 @@ Object path: conversations.json -> conversation -> mapping -> mapping node -> me
|
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from typing import Any, Literal
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel, ConfigDict
|
|
9
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
10
10
|
|
|
11
11
|
from convoviz.exceptions import MessageContentError
|
|
12
12
|
|
|
@@ -18,7 +18,7 @@ class MessageAuthor(BaseModel):
|
|
|
18
18
|
|
|
19
19
|
role: AuthorRole
|
|
20
20
|
name: str | None = None
|
|
21
|
-
metadata: dict[str, Any] =
|
|
21
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class MessageContent(BaseModel):
|
|
@@ -55,8 +55,8 @@ class Message(BaseModel):
|
|
|
55
55
|
status: str
|
|
56
56
|
end_turn: bool | None = None
|
|
57
57
|
weight: float
|
|
58
|
-
metadata: MessageMetadata
|
|
59
|
-
recipient: str
|
|
58
|
+
metadata: MessageMetadata = Field(default_factory=MessageMetadata)
|
|
59
|
+
recipient: str | None = None
|
|
60
60
|
|
|
61
61
|
@property
|
|
62
62
|
def images(self) -> list[str]:
|
|
@@ -117,3 +117,41 @@ class Message(BaseModel):
|
|
|
117
117
|
return bool(
|
|
118
118
|
self.content.parts or self.content.text is not None or self.content.result is not None
|
|
119
119
|
)
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def is_empty(self) -> bool:
|
|
123
|
+
"""Check if the message is effectively empty (no text, no images)."""
|
|
124
|
+
try:
|
|
125
|
+
return not self.text.strip() and not self.images
|
|
126
|
+
except MessageContentError:
|
|
127
|
+
return True
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def is_hidden(self) -> bool:
|
|
131
|
+
"""Check if message should be hidden in export.
|
|
132
|
+
|
|
133
|
+
Hidden if:
|
|
134
|
+
1. It is empty (no text, no images).
|
|
135
|
+
2. It is an internal system message (not custom instructions).
|
|
136
|
+
3. It is a browser tool output (intermediate search steps).
|
|
137
|
+
"""
|
|
138
|
+
if self.is_empty:
|
|
139
|
+
return True
|
|
140
|
+
|
|
141
|
+
# Hide internal system messages
|
|
142
|
+
if self.author.role == "system":
|
|
143
|
+
# Only show if explicitly marked as user system message (Custom Instructions)
|
|
144
|
+
return not self.metadata.is_user_system_message
|
|
145
|
+
|
|
146
|
+
# Hide browser tool outputs (usually intermediate search steps)
|
|
147
|
+
if self.author.role == "tool" and self.author.name == "browser":
|
|
148
|
+
return True
|
|
149
|
+
|
|
150
|
+
# Hide assistant calls to browser tool (e.g. "search(...)") or code interpreter
|
|
151
|
+
if self.author.role == "assistant" and (
|
|
152
|
+
self.recipient == "browser" or self.content.content_type == "code"
|
|
153
|
+
):
|
|
154
|
+
return True
|
|
155
|
+
|
|
156
|
+
# Hide browsing status messages
|
|
157
|
+
return self.content.content_type == "tether_browsing_display"
|
|
@@ -19,6 +19,18 @@ from convoviz.io.writers import save_collection, save_custom_instructions
|
|
|
19
19
|
console = Console()
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
def _safe_uri(path: Path) -> str:
|
|
23
|
+
"""Best-effort URI for printing.
|
|
24
|
+
|
|
25
|
+
``Path.as_uri()`` requires an absolute path; users often provide relative
|
|
26
|
+
output paths, so we resolve first and fall back to string form.
|
|
27
|
+
"""
|
|
28
|
+
try:
|
|
29
|
+
return path.resolve().as_uri()
|
|
30
|
+
except Exception:
|
|
31
|
+
return str(path)
|
|
32
|
+
|
|
33
|
+
|
|
22
34
|
def run_pipeline(config: ConvovizConfig) -> None:
|
|
23
35
|
"""Run the main processing pipeline.
|
|
24
36
|
|
|
@@ -72,8 +84,14 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
72
84
|
managed_dirs = ["Markdown", "Graphs", "Word-Clouds"]
|
|
73
85
|
for d in managed_dirs:
|
|
74
86
|
sub_dir = output_folder / d
|
|
75
|
-
if sub_dir.exists()
|
|
76
|
-
|
|
87
|
+
if sub_dir.exists():
|
|
88
|
+
# Never follow symlinks; just unlink them.
|
|
89
|
+
if sub_dir.is_symlink():
|
|
90
|
+
sub_dir.unlink()
|
|
91
|
+
elif sub_dir.is_dir():
|
|
92
|
+
rmtree(sub_dir)
|
|
93
|
+
else:
|
|
94
|
+
sub_dir.unlink()
|
|
77
95
|
sub_dir.mkdir(exist_ok=True)
|
|
78
96
|
|
|
79
97
|
# Clean specific files we manage
|
|
@@ -81,7 +99,12 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
81
99
|
for f in managed_files:
|
|
82
100
|
managed_file = output_folder / f
|
|
83
101
|
if managed_file.exists():
|
|
84
|
-
managed_file.
|
|
102
|
+
if managed_file.is_symlink() or managed_file.is_file():
|
|
103
|
+
managed_file.unlink()
|
|
104
|
+
elif managed_file.is_dir():
|
|
105
|
+
rmtree(managed_file)
|
|
106
|
+
else:
|
|
107
|
+
managed_file.unlink()
|
|
85
108
|
|
|
86
109
|
# Save markdown files
|
|
87
110
|
markdown_folder = output_folder / "Markdown"
|
|
@@ -94,7 +117,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
94
117
|
)
|
|
95
118
|
console.print(
|
|
96
119
|
f"\nDone [bold green]✅[/bold green] ! "
|
|
97
|
-
f"Check the output [bold blue]📄[/bold blue] here: {markdown_folder
|
|
120
|
+
f"Check the output [bold blue]📄[/bold blue] here: {_safe_uri(markdown_folder)} 🔗\n"
|
|
98
121
|
)
|
|
99
122
|
|
|
100
123
|
# Generate graphs
|
|
@@ -108,7 +131,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
108
131
|
)
|
|
109
132
|
console.print(
|
|
110
133
|
f"\nDone [bold green]✅[/bold green] ! "
|
|
111
|
-
f"Check the output [bold blue]📈[/bold blue] here: {graph_folder
|
|
134
|
+
f"Check the output [bold blue]📈[/bold blue] here: {_safe_uri(graph_folder)} 🔗\n"
|
|
112
135
|
)
|
|
113
136
|
|
|
114
137
|
# Generate word clouds
|
|
@@ -122,7 +145,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
122
145
|
)
|
|
123
146
|
console.print(
|
|
124
147
|
f"\nDone [bold green]✅[/bold green] ! "
|
|
125
|
-
f"Check the output [bold blue]🔡☁️[/bold blue] here: {wordcloud_folder
|
|
148
|
+
f"Check the output [bold blue]🔡☁️[/bold blue] here: {_safe_uri(wordcloud_folder)} 🔗\n"
|
|
126
149
|
)
|
|
127
150
|
|
|
128
151
|
# Save custom instructions
|
|
@@ -131,12 +154,12 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
131
154
|
save_custom_instructions(collection, instructions_path)
|
|
132
155
|
console.print(
|
|
133
156
|
f"\nDone [bold green]✅[/bold green] ! "
|
|
134
|
-
f"Check the output [bold blue]📝[/bold blue] here: {instructions_path
|
|
157
|
+
f"Check the output [bold blue]📝[/bold blue] here: {_safe_uri(instructions_path)} 🔗\n"
|
|
135
158
|
)
|
|
136
159
|
|
|
137
160
|
console.print(
|
|
138
161
|
"ALL DONE [bold green]🎉🎉🎉[/bold green] !\n\n"
|
|
139
|
-
f"Explore the full gallery [bold yellow]🖼️[/bold yellow] at: {output_folder
|
|
162
|
+
f"Explore the full gallery [bold yellow]🖼️[/bold yellow] at: {_safe_uri(output_folder)} 🔗\n\n"
|
|
140
163
|
"I hope you enjoy the outcome 🤞.\n\n"
|
|
141
164
|
"If you appreciate it, kindly give the project a star 🌟 on GitHub:\n\n"
|
|
142
165
|
"➡️ https://github.com/mohamed-chs/chatgpt-history-export-to-md 🔗\n\n"
|
|
@@ -4,6 +4,7 @@ import re
|
|
|
4
4
|
from collections.abc import Callable
|
|
5
5
|
|
|
6
6
|
from convoviz.config import AuthorHeaders, ConversationConfig
|
|
7
|
+
from convoviz.exceptions import MessageContentError
|
|
7
8
|
from convoviz.models import Conversation, Node
|
|
8
9
|
from convoviz.renderers.yaml import render_yaml_header
|
|
9
10
|
|
|
@@ -154,32 +155,67 @@ def render_node(
|
|
|
154
155
|
if node.message is None:
|
|
155
156
|
return ""
|
|
156
157
|
|
|
158
|
+
if node.message.is_hidden:
|
|
159
|
+
return ""
|
|
160
|
+
|
|
157
161
|
header = render_node_header(node, headers, flavor=flavor)
|
|
158
162
|
|
|
159
163
|
# Get and process content
|
|
160
164
|
try:
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
165
|
+
text = node.message.text
|
|
166
|
+
except MessageContentError:
|
|
167
|
+
# Some message types only contain non-text parts; those still may have images.
|
|
168
|
+
text = ""
|
|
169
|
+
|
|
170
|
+
content = close_code_blocks(text)
|
|
171
|
+
content = f"\n{content}\n" if content else ""
|
|
172
|
+
if use_dollar_latex:
|
|
173
|
+
content = replace_latex_delimiters(content)
|
|
174
|
+
|
|
175
|
+
# Append images if resolver is provided and images exist
|
|
176
|
+
if asset_resolver and node.message.images:
|
|
177
|
+
for image_id in node.message.images:
|
|
178
|
+
rel_path = asset_resolver(image_id)
|
|
179
|
+
if rel_path:
|
|
180
|
+
# Using standard markdown image syntax.
|
|
181
|
+
# Obsidian handles this well.
|
|
182
|
+
content += f"\n\n"
|
|
177
183
|
|
|
178
184
|
footer = render_node_footer(node, flavor=flavor)
|
|
179
185
|
|
|
180
186
|
return f"\n{header}{content}{footer}\n---\n"
|
|
181
187
|
|
|
182
188
|
|
|
189
|
+
def _ordered_nodes(conversation: Conversation) -> list[Node]:
|
|
190
|
+
"""Return nodes in a deterministic depth-first traversal order.
|
|
191
|
+
|
|
192
|
+
ChatGPT exports store nodes in a mapping; dict iteration order is not a
|
|
193
|
+
reliable semantic ordering. For markdown output, we traverse from roots.
|
|
194
|
+
"""
|
|
195
|
+
mapping = conversation.node_mapping
|
|
196
|
+
roots = sorted((n for n in mapping.values() if n.parent is None), key=lambda n: n.id)
|
|
197
|
+
|
|
198
|
+
visited: set[str] = set()
|
|
199
|
+
ordered: list[Node] = []
|
|
200
|
+
|
|
201
|
+
def dfs(node: Node) -> None:
|
|
202
|
+
if node.id in visited:
|
|
203
|
+
return
|
|
204
|
+
visited.add(node.id)
|
|
205
|
+
ordered.append(node)
|
|
206
|
+
for child in node.children_nodes:
|
|
207
|
+
dfs(child)
|
|
208
|
+
|
|
209
|
+
for root in roots:
|
|
210
|
+
dfs(root)
|
|
211
|
+
|
|
212
|
+
# Include any disconnected/orphan nodes deterministically at the end.
|
|
213
|
+
for node in sorted(mapping.values(), key=lambda n: n.id):
|
|
214
|
+
dfs(node)
|
|
215
|
+
|
|
216
|
+
return ordered
|
|
217
|
+
|
|
218
|
+
|
|
183
219
|
def render_conversation(
|
|
184
220
|
conversation: Conversation,
|
|
185
221
|
config: ConversationConfig,
|
|
@@ -203,8 +239,8 @@ def render_conversation(
|
|
|
203
239
|
# Start with YAML header
|
|
204
240
|
markdown = render_yaml_header(conversation, config.yaml)
|
|
205
241
|
|
|
206
|
-
# Render
|
|
207
|
-
for node in conversation
|
|
242
|
+
# Render message nodes in a deterministic traversal order.
|
|
243
|
+
for node in _ordered_nodes(conversation):
|
|
208
244
|
if node.message:
|
|
209
245
|
markdown += render_node(
|
|
210
246
|
node, headers, use_dollar_latex, asset_resolver=asset_resolver, flavor=flavor
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""YAML frontmatter rendering for conversations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
from convoviz.config import YAMLConfig
|
|
9
|
+
from convoviz.models import Conversation
|
|
10
|
+
|
|
11
|
+
_TAG_SAFE_RE = re.compile(r"[^a-z0-9/_\-]+")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _to_yaml_scalar(value: object) -> str:
|
|
15
|
+
if value is None:
|
|
16
|
+
return "null"
|
|
17
|
+
if isinstance(value, bool):
|
|
18
|
+
return "true" if value else "false"
|
|
19
|
+
if isinstance(value, (int, float)):
|
|
20
|
+
return str(value)
|
|
21
|
+
if isinstance(value, datetime):
|
|
22
|
+
# Frontmatter consumers generally expect ISO 8601 strings
|
|
23
|
+
return f'"{value.isoformat()}"'
|
|
24
|
+
if isinstance(value, str):
|
|
25
|
+
if "\n" in value:
|
|
26
|
+
# Multiline: use a block scalar
|
|
27
|
+
indented = "\n".join(f" {line}" for line in value.splitlines())
|
|
28
|
+
return f"|-\n{indented}"
|
|
29
|
+
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
|
|
30
|
+
return f'"{escaped}"'
|
|
31
|
+
|
|
32
|
+
# Fallback: stringify and quote
|
|
33
|
+
escaped = str(value).replace("\\", "\\\\").replace('"', '\\"')
|
|
34
|
+
return f'"{escaped}"'
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _to_yaml(value: object, indent: int = 0) -> str:
|
|
38
|
+
pad = " " * indent
|
|
39
|
+
|
|
40
|
+
if isinstance(value, dict):
|
|
41
|
+
lines: list[str] = []
|
|
42
|
+
for k, v in value.items():
|
|
43
|
+
key = str(k)
|
|
44
|
+
if isinstance(v, (dict, list)):
|
|
45
|
+
lines.append(f"{pad}{key}:")
|
|
46
|
+
lines.append(_to_yaml(v, indent=indent + 2))
|
|
47
|
+
else:
|
|
48
|
+
scalar = _to_yaml_scalar(v)
|
|
49
|
+
# Block scalars already include newline + indentation
|
|
50
|
+
if scalar.startswith("|-"):
|
|
51
|
+
lines.append(f"{pad}{key}: {scalar.splitlines()[0]}")
|
|
52
|
+
lines.extend(f"{pad}{line}" for line in scalar.splitlines()[1:])
|
|
53
|
+
else:
|
|
54
|
+
lines.append(f"{pad}{key}: {scalar}")
|
|
55
|
+
return "\n".join(lines)
|
|
56
|
+
|
|
57
|
+
if isinstance(value, list):
|
|
58
|
+
lines = []
|
|
59
|
+
for item in value:
|
|
60
|
+
if isinstance(item, (dict, list)):
|
|
61
|
+
lines.append(f"{pad}-")
|
|
62
|
+
lines.append(_to_yaml(item, indent=indent + 2))
|
|
63
|
+
else:
|
|
64
|
+
lines.append(f"{pad}- {_to_yaml_scalar(item)}")
|
|
65
|
+
return "\n".join(lines)
|
|
66
|
+
|
|
67
|
+
return f"{pad}{_to_yaml_scalar(value)}"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _default_tags(conversation: Conversation) -> list[str]:
|
|
71
|
+
tags: list[str] = ["chatgpt"]
|
|
72
|
+
tags.extend(conversation.plugins)
|
|
73
|
+
# Normalize to a tag-friendly form
|
|
74
|
+
normalized: list[str] = []
|
|
75
|
+
for t in tags:
|
|
76
|
+
t2 = _TAG_SAFE_RE.sub("-", t.strip().lower()).strip("-")
|
|
77
|
+
if t2 and t2 not in normalized:
|
|
78
|
+
normalized.append(t2)
|
|
79
|
+
return normalized
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
|
|
83
|
+
"""Render the YAML frontmatter for a conversation.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
conversation: The conversation to render
|
|
87
|
+
config: YAML configuration specifying which fields to include
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
YAML frontmatter string with --- delimiters, or empty string if no fields enabled
|
|
91
|
+
"""
|
|
92
|
+
yaml_fields: dict[str, object] = {}
|
|
93
|
+
|
|
94
|
+
if config.title:
|
|
95
|
+
yaml_fields["title"] = conversation.title
|
|
96
|
+
if config.tags:
|
|
97
|
+
yaml_fields["tags"] = _default_tags(conversation)
|
|
98
|
+
if config.chat_link:
|
|
99
|
+
yaml_fields["chat_link"] = conversation.url
|
|
100
|
+
if config.create_time:
|
|
101
|
+
yaml_fields["create_time"] = conversation.create_time
|
|
102
|
+
if config.update_time:
|
|
103
|
+
yaml_fields["update_time"] = conversation.update_time
|
|
104
|
+
if config.model:
|
|
105
|
+
yaml_fields["model"] = conversation.model
|
|
106
|
+
if config.used_plugins:
|
|
107
|
+
yaml_fields["used_plugins"] = conversation.plugins
|
|
108
|
+
if config.message_count:
|
|
109
|
+
yaml_fields["message_count"] = conversation.message_count("user", "assistant")
|
|
110
|
+
if config.content_types:
|
|
111
|
+
yaml_fields["content_types"] = conversation.content_types
|
|
112
|
+
if config.custom_instructions:
|
|
113
|
+
yaml_fields["custom_instructions"] = conversation.custom_instructions
|
|
114
|
+
|
|
115
|
+
if not yaml_fields:
|
|
116
|
+
return ""
|
|
117
|
+
|
|
118
|
+
body = _to_yaml(yaml_fields)
|
|
119
|
+
return f"---\n{body}\n---\n"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "convoviz"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.4"
|
|
4
4
|
description = "Get analytics and visualizations on your ChatGPT data!"
|
|
5
5
|
license = "MIT"
|
|
6
6
|
keywords = [
|
|
@@ -48,8 +48,7 @@ source-exclude = [
|
|
|
48
48
|
".vscode",
|
|
49
49
|
".gitattributes",
|
|
50
50
|
".gitignore",
|
|
51
|
-
"
|
|
52
|
-
"NEXT_STEPS.md",
|
|
51
|
+
"dev",
|
|
53
52
|
"playground.ipynb",
|
|
54
53
|
"pyproject.toml.bak",
|
|
55
54
|
"uv.lock",
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
"""YAML frontmatter rendering for conversations."""
|
|
2
|
-
|
|
3
|
-
from convoviz.config import YAMLConfig
|
|
4
|
-
from convoviz.models import Conversation
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
|
|
8
|
-
"""Render the YAML frontmatter for a conversation.
|
|
9
|
-
|
|
10
|
-
Args:
|
|
11
|
-
conversation: The conversation to render
|
|
12
|
-
config: YAML configuration specifying which fields to include
|
|
13
|
-
|
|
14
|
-
Returns:
|
|
15
|
-
YAML frontmatter string with --- delimiters, or empty string if no fields enabled
|
|
16
|
-
"""
|
|
17
|
-
yaml_fields: dict[str, object] = {}
|
|
18
|
-
|
|
19
|
-
if config.title:
|
|
20
|
-
yaml_fields["title"] = conversation.title
|
|
21
|
-
if config.chat_link:
|
|
22
|
-
yaml_fields["chat_link"] = conversation.url
|
|
23
|
-
if config.create_time:
|
|
24
|
-
yaml_fields["create_time"] = conversation.create_time
|
|
25
|
-
if config.update_time:
|
|
26
|
-
yaml_fields["update_time"] = conversation.update_time
|
|
27
|
-
if config.model:
|
|
28
|
-
yaml_fields["model"] = conversation.model
|
|
29
|
-
if config.used_plugins:
|
|
30
|
-
yaml_fields["used_plugins"] = conversation.plugins
|
|
31
|
-
if config.message_count:
|
|
32
|
-
yaml_fields["message_count"] = conversation.message_count("user", "assistant")
|
|
33
|
-
if config.content_types:
|
|
34
|
-
yaml_fields["content_types"] = conversation.content_types
|
|
35
|
-
if config.custom_instructions:
|
|
36
|
-
yaml_fields["custom_instructions"] = conversation.custom_instructions
|
|
37
|
-
|
|
38
|
-
if not yaml_fields:
|
|
39
|
-
return ""
|
|
40
|
-
|
|
41
|
-
lines = [f"{key}: {value}" for key, value in yaml_fields.items()]
|
|
42
|
-
return f"---\n{chr(10).join(lines)}\n---\n"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|