convoviz 0.2.12__py3-none-any.whl → 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- convoviz/__init__.py +10 -1
- convoviz/analysis/__init__.py +16 -3
- convoviz/analysis/graphs.py +30 -6
- convoviz/analysis/wordcloud.py +68 -29
- convoviz/cli.py +51 -3
- convoviz/config.py +21 -4
- convoviz/interactive.py +123 -119
- convoviz/io/assets.py +12 -1
- convoviz/io/loaders.py +5 -0
- convoviz/io/writers.py +7 -3
- convoviz/logging_config.py +69 -0
- convoviz/models/conversation.py +18 -0
- convoviz/models/message.py +81 -5
- convoviz/pipeline.py +90 -73
- convoviz/renderers/markdown.py +96 -3
- convoviz/renderers/yaml.py +4 -0
- convoviz-0.4.7.dist-info/METADATA +233 -0
- {convoviz-0.2.12.dist-info → convoviz-0.4.7.dist-info}/RECORD +20 -19
- {convoviz-0.2.12.dist-info → convoviz-0.4.7.dist-info}/WHEEL +2 -2
- convoviz-0.2.12.dist-info/METADATA +0 -148
- {convoviz-0.2.12.dist-info → convoviz-0.4.7.dist-info}/entry_points.txt +0 -0
convoviz/interactive.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Interactive configuration prompts using questionary."""
|
|
2
2
|
|
|
3
|
+
import logging
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import Literal, Protocol, cast
|
|
5
6
|
|
|
@@ -7,10 +8,16 @@ from questionary import Choice, Style, checkbox, select
|
|
|
7
8
|
from questionary import path as qst_path
|
|
8
9
|
from questionary import text as qst_text
|
|
9
10
|
|
|
10
|
-
from convoviz.config import ConvovizConfig, get_default_config
|
|
11
|
+
from convoviz.config import ConvovizConfig, OutputKind, YAMLConfig, get_default_config
|
|
11
12
|
from convoviz.io.loaders import find_latest_zip, validate_zip
|
|
12
13
|
from convoviz.utils import colormaps, default_font_path, font_names, font_path, validate_header
|
|
13
14
|
|
|
15
|
+
OUTPUT_TITLES = {
|
|
16
|
+
OutputKind.MARKDOWN: "Markdown conversations",
|
|
17
|
+
OutputKind.GRAPHS: "Graphs (usage analytics)",
|
|
18
|
+
OutputKind.WORDCLOUDS: "Word clouds",
|
|
19
|
+
}
|
|
20
|
+
|
|
14
21
|
CUSTOM_STYLE = Style(
|
|
15
22
|
[
|
|
16
23
|
("qmark", "fg:#34eb9b bold"),
|
|
@@ -26,6 +33,9 @@ CUSTOM_STYLE = Style(
|
|
|
26
33
|
]
|
|
27
34
|
)
|
|
28
35
|
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
29
39
|
class _QuestionaryPrompt[T](Protocol):
|
|
30
40
|
def ask(self) -> T | None: ...
|
|
31
41
|
|
|
@@ -73,6 +83,7 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
|
|
|
73
83
|
Updated configuration based on user input
|
|
74
84
|
"""
|
|
75
85
|
config = initial_config or get_default_config()
|
|
86
|
+
logger.info("Starting interactive configuration")
|
|
76
87
|
|
|
77
88
|
# Set sensible defaults if not already set
|
|
78
89
|
if not config.input_path:
|
|
@@ -87,7 +98,7 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
|
|
|
87
98
|
input_default = str(config.input_path) if config.input_path else ""
|
|
88
99
|
input_result: str = _ask_or_cancel(
|
|
89
100
|
qst_path(
|
|
90
|
-
"Enter the path to the export ZIP, conversations JSON, or extracted directory:",
|
|
101
|
+
"Enter the path to the export ZIP:", # , conversations JSON, or extracted directory:",
|
|
91
102
|
default=input_default,
|
|
92
103
|
validate=_validate_input_path,
|
|
93
104
|
style=CUSTOM_STYLE,
|
|
@@ -96,6 +107,7 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
|
|
|
96
107
|
|
|
97
108
|
if input_result:
|
|
98
109
|
config.input_path = Path(input_result)
|
|
110
|
+
logger.debug(f"User selected input: {config.input_path}")
|
|
99
111
|
|
|
100
112
|
# Prompt for output folder
|
|
101
113
|
output_result: str = _ask_or_cancel(
|
|
@@ -108,140 +120,132 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
|
|
|
108
120
|
|
|
109
121
|
if output_result:
|
|
110
122
|
config.output_folder = Path(output_result)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
default=current,
|
|
120
|
-
validate=lambda t: validate_header(t)
|
|
121
|
-
or "Must be a valid markdown header (e.g., # Title)",
|
|
122
|
-
style=CUSTOM_STYLE,
|
|
123
|
-
)
|
|
123
|
+
logger.debug(f"User selected output: {config.output_folder}")
|
|
124
|
+
|
|
125
|
+
# Prompt for outputs to generate
|
|
126
|
+
output_choices = [
|
|
127
|
+
Choice(
|
|
128
|
+
title=OUTPUT_TITLES.get(kind, kind.value.title()),
|
|
129
|
+
value=kind,
|
|
130
|
+
checked=kind in config.outputs,
|
|
124
131
|
)
|
|
125
|
-
|
|
126
|
-
setattr(headers, role, result)
|
|
127
|
-
|
|
128
|
-
# Prompt for LaTeX delimiters
|
|
129
|
-
latex_result = cast(
|
|
130
|
-
Literal["default", "dollars"],
|
|
131
|
-
_ask_or_cancel(
|
|
132
|
-
select(
|
|
133
|
-
"Select the LaTeX math delimiters:",
|
|
134
|
-
choices=["default", "dollars"],
|
|
135
|
-
default=config.conversation.markdown.latex_delimiters,
|
|
136
|
-
style=CUSTOM_STYLE,
|
|
137
|
-
)
|
|
138
|
-
),
|
|
139
|
-
)
|
|
140
|
-
|
|
141
|
-
if latex_result:
|
|
142
|
-
config.conversation.markdown.latex_delimiters = latex_result
|
|
143
|
-
|
|
144
|
-
# Prompt for markdown flavor
|
|
145
|
-
flavor_result = cast(
|
|
146
|
-
Literal["standard", "obsidian"],
|
|
147
|
-
_ask_or_cancel(
|
|
148
|
-
select(
|
|
149
|
-
"Select the markdown flavor:",
|
|
150
|
-
choices=["standard", "obsidian"],
|
|
151
|
-
default=config.conversation.markdown.flavor,
|
|
152
|
-
style=CUSTOM_STYLE,
|
|
153
|
-
)
|
|
154
|
-
),
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
if flavor_result:
|
|
158
|
-
config.conversation.markdown.flavor = flavor_result
|
|
159
|
-
|
|
160
|
-
# Prompt for YAML headers
|
|
161
|
-
yaml_config = config.conversation.yaml
|
|
162
|
-
yaml_choices = [
|
|
163
|
-
Choice(title=field, checked=getattr(yaml_config, field))
|
|
164
|
-
for field in [
|
|
165
|
-
"title",
|
|
166
|
-
"tags",
|
|
167
|
-
"chat_link",
|
|
168
|
-
"create_time",
|
|
169
|
-
"update_time",
|
|
170
|
-
"model",
|
|
171
|
-
"used_plugins",
|
|
172
|
-
"message_count",
|
|
173
|
-
"content_types",
|
|
174
|
-
"custom_instructions",
|
|
175
|
-
]
|
|
132
|
+
for kind in OutputKind
|
|
176
133
|
]
|
|
177
134
|
|
|
178
|
-
|
|
135
|
+
selected_outputs: list[OutputKind] = _ask_or_cancel(
|
|
179
136
|
checkbox(
|
|
180
|
-
"Select
|
|
181
|
-
choices=
|
|
137
|
+
"Select outputs to generate:",
|
|
138
|
+
choices=output_choices,
|
|
182
139
|
style=CUSTOM_STYLE,
|
|
183
140
|
)
|
|
184
141
|
)
|
|
185
142
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
"
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
if available_fonts:
|
|
204
|
-
current_font = (
|
|
205
|
-
config.wordcloud.font_path.stem if config.wordcloud.font_path else available_fonts[0]
|
|
206
|
-
)
|
|
207
|
-
font_result: str = _ask_or_cancel(
|
|
208
|
-
select(
|
|
209
|
-
"Select the font for word clouds:",
|
|
210
|
-
choices=available_fonts,
|
|
211
|
-
default=current_font if current_font in available_fonts else available_fonts[0],
|
|
212
|
-
style=CUSTOM_STYLE,
|
|
143
|
+
config.outputs = set(selected_outputs) if selected_outputs else set()
|
|
144
|
+
logger.debug(f"User selected outputs: {config.outputs}")
|
|
145
|
+
|
|
146
|
+
# Prompt for markdown settings (only if markdown output is selected)
|
|
147
|
+
if OutputKind.MARKDOWN in config.outputs:
|
|
148
|
+
# Prompt for author headers
|
|
149
|
+
headers = config.message.author_headers
|
|
150
|
+
for role in ["user", "assistant"]:
|
|
151
|
+
current = getattr(headers, role)
|
|
152
|
+
result: str = _ask_or_cancel(
|
|
153
|
+
qst_text(
|
|
154
|
+
f"Enter the message header for '{role}':",
|
|
155
|
+
default=current,
|
|
156
|
+
validate=lambda t: validate_header(t)
|
|
157
|
+
or "Must be a valid markdown header (e.g., # Title)",
|
|
158
|
+
style=CUSTOM_STYLE,
|
|
159
|
+
)
|
|
213
160
|
)
|
|
161
|
+
if result:
|
|
162
|
+
setattr(headers, role, result)
|
|
163
|
+
logger.debug(f"User selected headers: {headers}")
|
|
164
|
+
|
|
165
|
+
# Prompt for markdown flavor
|
|
166
|
+
flavor_result = cast(
|
|
167
|
+
Literal["standard", "obsidian"],
|
|
168
|
+
_ask_or_cancel(
|
|
169
|
+
select(
|
|
170
|
+
"Select the markdown flavor:",
|
|
171
|
+
choices=["standard", "obsidian"],
|
|
172
|
+
default=config.conversation.markdown.flavor,
|
|
173
|
+
style=CUSTOM_STYLE,
|
|
174
|
+
)
|
|
175
|
+
),
|
|
214
176
|
)
|
|
215
177
|
|
|
216
|
-
if
|
|
217
|
-
config.
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
178
|
+
if flavor_result:
|
|
179
|
+
config.conversation.markdown.flavor = flavor_result
|
|
180
|
+
logger.debug(f"User selected flavor: {config.conversation.markdown.flavor}")
|
|
181
|
+
|
|
182
|
+
# Prompt for YAML headers
|
|
183
|
+
yaml_config = config.conversation.yaml
|
|
184
|
+
yaml_fields = list(YAMLConfig.model_fields.keys())
|
|
185
|
+
yaml_choices = [
|
|
186
|
+
Choice(title=field, checked=getattr(yaml_config, field)) for field in yaml_fields
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
selected: list[str] = _ask_or_cancel(
|
|
190
|
+
checkbox(
|
|
191
|
+
"Select YAML metadata headers to include:",
|
|
192
|
+
choices=yaml_choices,
|
|
229
193
|
style=CUSTOM_STYLE,
|
|
230
194
|
)
|
|
231
195
|
)
|
|
232
196
|
|
|
233
|
-
|
|
234
|
-
|
|
197
|
+
selected_set = set(selected)
|
|
198
|
+
for field_name in yaml_fields:
|
|
199
|
+
setattr(yaml_config, field_name, field_name in selected_set)
|
|
200
|
+
|
|
201
|
+
# Prompt for wordcloud settings (only if wordclouds output is selected)
|
|
202
|
+
if OutputKind.WORDCLOUDS in config.outputs:
|
|
203
|
+
# Prompt for font
|
|
204
|
+
available_fonts = font_names()
|
|
205
|
+
if available_fonts:
|
|
206
|
+
current_font = (
|
|
207
|
+
config.wordcloud.font_path.stem
|
|
208
|
+
if config.wordcloud.font_path
|
|
209
|
+
else available_fonts[0]
|
|
210
|
+
)
|
|
211
|
+
font_result: str = _ask_or_cancel(
|
|
212
|
+
select(
|
|
213
|
+
"Select the font for word clouds:",
|
|
214
|
+
choices=available_fonts,
|
|
215
|
+
default=current_font if current_font in available_fonts else available_fonts[0],
|
|
216
|
+
style=CUSTOM_STYLE,
|
|
217
|
+
)
|
|
218
|
+
)
|
|
235
219
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
220
|
+
if font_result:
|
|
221
|
+
config.wordcloud.font_path = font_path(font_result)
|
|
222
|
+
|
|
223
|
+
# Prompt for colormap
|
|
224
|
+
available_colormaps = colormaps()
|
|
225
|
+
if available_colormaps:
|
|
226
|
+
colormap_result: str = _ask_or_cancel(
|
|
227
|
+
select(
|
|
228
|
+
"Select the color theme for word clouds:",
|
|
229
|
+
choices=available_colormaps,
|
|
230
|
+
default=config.wordcloud.colormap
|
|
231
|
+
if config.wordcloud.colormap in available_colormaps
|
|
232
|
+
else available_colormaps[0],
|
|
233
|
+
style=CUSTOM_STYLE,
|
|
234
|
+
)
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
if colormap_result:
|
|
238
|
+
config.wordcloud.colormap = colormap_result
|
|
239
|
+
|
|
240
|
+
# Prompt for custom stopwords
|
|
241
|
+
stopwords_result: str = _ask_or_cancel(
|
|
242
|
+
qst_text(
|
|
243
|
+
"Enter custom stopwords (comma-separated):",
|
|
244
|
+
default=config.wordcloud.custom_stopwords,
|
|
245
|
+
style=CUSTOM_STYLE,
|
|
246
|
+
)
|
|
242
247
|
)
|
|
243
|
-
)
|
|
244
248
|
|
|
245
|
-
|
|
249
|
+
config.wordcloud.custom_stopwords = stopwords_result
|
|
246
250
|
|
|
247
251
|
return config
|
convoviz/io/assets.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
"Asset management functions."
|
|
2
2
|
|
|
3
|
+
import logging
|
|
3
4
|
import shutil
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
6
9
|
|
|
7
10
|
def resolve_asset_path(source_dir: Path, asset_id: str) -> Path | None:
|
|
8
11
|
"""Find the actual file for a given asset ID in the source directory.
|
|
@@ -26,6 +29,7 @@ def resolve_asset_path(source_dir: Path, asset_id: str) -> Path | None:
|
|
|
26
29
|
# 1. Try exact match
|
|
27
30
|
exact_path = (source_dir / asset_id).resolve()
|
|
28
31
|
if exact_path.exists() and exact_path.is_file() and exact_path.is_relative_to(source_dir):
|
|
32
|
+
logger.debug(f"Resolved asset (exact): {asset_id} -> {exact_path}")
|
|
29
33
|
return exact_path
|
|
30
34
|
|
|
31
35
|
# 2. Try prefix match in root
|
|
@@ -37,6 +41,7 @@ def resolve_asset_path(source_dir: Path, asset_id: str) -> Path | None:
|
|
|
37
41
|
if p.is_file() and p.resolve().is_relative_to(source_dir)
|
|
38
42
|
]
|
|
39
43
|
if files:
|
|
44
|
+
logger.debug(f"Resolved asset (prefix root): {asset_id} -> {files[0]}")
|
|
40
45
|
return files[0]
|
|
41
46
|
except Exception:
|
|
42
47
|
pass
|
|
@@ -53,6 +58,7 @@ def resolve_asset_path(source_dir: Path, asset_id: str) -> Path | None:
|
|
|
53
58
|
if p.is_file() and p.resolve().is_relative_to(dalle_dir)
|
|
54
59
|
]
|
|
55
60
|
if files:
|
|
61
|
+
logger.debug(f"Resolved asset (dalle): {asset_id} -> {files[0]}")
|
|
56
62
|
return files[0]
|
|
57
63
|
except Exception:
|
|
58
64
|
pass
|
|
@@ -69,6 +75,7 @@ def resolve_asset_path(source_dir: Path, asset_id: str) -> Path | None:
|
|
|
69
75
|
if p.is_file() and p.resolve().is_relative_to(user_dir)
|
|
70
76
|
]
|
|
71
77
|
if files:
|
|
78
|
+
logger.debug(f"Resolved asset (user dir): {asset_id} -> {files[0]}")
|
|
72
79
|
return files[0]
|
|
73
80
|
except Exception:
|
|
74
81
|
pass
|
|
@@ -92,7 +99,11 @@ def copy_asset(source_path: Path, dest_dir: Path) -> str:
|
|
|
92
99
|
dest_path = assets_dir / source_path.name
|
|
93
100
|
|
|
94
101
|
if not dest_path.exists():
|
|
95
|
-
|
|
102
|
+
try:
|
|
103
|
+
shutil.copy2(source_path, dest_path)
|
|
104
|
+
logger.debug(f"Copied asset: {source_path.name}")
|
|
105
|
+
except Exception as e:
|
|
106
|
+
logger.warning(f"Failed to copy asset {source_path}: {e}")
|
|
96
107
|
|
|
97
108
|
# Return forward-slash path for Markdown compatibility even on Windows
|
|
98
109
|
return f"assets/{source_path.name}"
|
convoviz/io/loaders.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Loading functions for conversations and collections."""
|
|
2
2
|
|
|
3
|
+
import logging
|
|
3
4
|
from pathlib import Path, PurePosixPath
|
|
4
5
|
from zipfile import ZipFile
|
|
5
6
|
|
|
@@ -8,6 +9,8 @@ from orjson import loads
|
|
|
8
9
|
from convoviz.exceptions import InvalidZipError
|
|
9
10
|
from convoviz.models import Conversation, ConversationCollection
|
|
10
11
|
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
11
14
|
|
|
12
15
|
def _is_safe_zip_member_name(name: str) -> bool:
|
|
13
16
|
"""Return True if a ZIP entry name is safe to extract.
|
|
@@ -46,6 +49,7 @@ def extract_archive(filepath: Path) -> Path:
|
|
|
46
49
|
"""
|
|
47
50
|
folder = filepath.with_suffix("")
|
|
48
51
|
folder.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
logger.info(f"Extracting archive: {filepath} to {folder}")
|
|
49
53
|
|
|
50
54
|
with ZipFile(filepath) as zf:
|
|
51
55
|
for member in zf.infolist():
|
|
@@ -115,6 +119,7 @@ def load_collection_from_json(filepath: Path | str) -> ConversationCollection:
|
|
|
115
119
|
Loaded ConversationCollection object
|
|
116
120
|
"""
|
|
117
121
|
filepath = Path(filepath)
|
|
122
|
+
logger.debug(f"Loading collection from JSON: {filepath}")
|
|
118
123
|
with filepath.open(encoding="utf-8") as f:
|
|
119
124
|
data = loads(f.read())
|
|
120
125
|
|
convoviz/io/writers.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Writing functions for conversations and collections."""
|
|
2
2
|
|
|
3
|
+
import logging
|
|
3
4
|
from os import utime as os_utime
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from urllib.parse import quote
|
|
@@ -13,6 +14,8 @@ from convoviz.models import Conversation, ConversationCollection
|
|
|
13
14
|
from convoviz.renderers import render_conversation
|
|
14
15
|
from convoviz.utils import sanitize
|
|
15
16
|
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
16
19
|
# Month names for folder naming
|
|
17
20
|
_MONTH_NAMES = [
|
|
18
21
|
"January",
|
|
@@ -102,6 +105,7 @@ def save_conversation(
|
|
|
102
105
|
markdown = render_conversation(conversation, config, headers, asset_resolver=asset_resolver)
|
|
103
106
|
with final_path.open("w", encoding="utf-8") as f:
|
|
104
107
|
f.write(markdown)
|
|
108
|
+
logger.debug(f"Saved conversation: {final_path}")
|
|
105
109
|
|
|
106
110
|
# Set modification time
|
|
107
111
|
timestamp = conversation.update_time.timestamp()
|
|
@@ -135,6 +139,7 @@ def _generate_year_index(year_dir: Path, year: str) -> None:
|
|
|
135
139
|
|
|
136
140
|
index_path = year_dir / "_index.md"
|
|
137
141
|
index_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
142
|
+
logger.debug(f"Generated year index: {index_path}")
|
|
138
143
|
|
|
139
144
|
|
|
140
145
|
def _generate_month_index(month_dir: Path, year: str, month: str) -> None:
|
|
@@ -146,9 +151,7 @@ def _generate_month_index(month_dir: Path, year: str, month: str) -> None:
|
|
|
146
151
|
month: The month folder name (e.g., "03-March")
|
|
147
152
|
"""
|
|
148
153
|
month_name = month.split("-", 1)[1] if "-" in month else month
|
|
149
|
-
files = sorted(
|
|
150
|
-
[f.name for f in month_dir.glob("*.md") if f.name != "_index.md"]
|
|
151
|
-
)
|
|
154
|
+
files = sorted([f.name for f in month_dir.glob("*.md") if f.name != "_index.md"])
|
|
152
155
|
|
|
153
156
|
lines = [
|
|
154
157
|
f"# {month_name} {year}",
|
|
@@ -164,6 +167,7 @@ def _generate_month_index(month_dir: Path, year: str, month: str) -> None:
|
|
|
164
167
|
|
|
165
168
|
index_path = month_dir / "_index.md"
|
|
166
169
|
index_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
170
|
+
logger.debug(f"Generated month index: {index_path}")
|
|
167
171
|
|
|
168
172
|
|
|
169
173
|
def save_collection(
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Logging configuration for convoviz."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import tempfile
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from rich.logging import RichHandler
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def setup_logging(
|
|
11
|
+
verbosity: int = 0,
|
|
12
|
+
log_file: Path | None = None,
|
|
13
|
+
) -> Path:
|
|
14
|
+
"""Set up logging configuration.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
verbosity: Level of verbosity (0=WARNING, 1=INFO, 2=DEBUG)
|
|
18
|
+
log_file: Path to log file. If None, a temporary file is created.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Path to the log file used.
|
|
22
|
+
"""
|
|
23
|
+
# clear existing handlers
|
|
24
|
+
root_logger = logging.getLogger()
|
|
25
|
+
root_logger.handlers.clear()
|
|
26
|
+
|
|
27
|
+
# Determine log level for console
|
|
28
|
+
if verbosity >= 2:
|
|
29
|
+
console_level = logging.DEBUG
|
|
30
|
+
elif verbosity >= 1:
|
|
31
|
+
console_level = logging.INFO
|
|
32
|
+
else:
|
|
33
|
+
console_level = logging.WARNING
|
|
34
|
+
|
|
35
|
+
# Console handler (Rich)
|
|
36
|
+
console_handler = RichHandler(
|
|
37
|
+
rich_tracebacks=True,
|
|
38
|
+
markup=True,
|
|
39
|
+
show_time=False,
|
|
40
|
+
show_path=False,
|
|
41
|
+
)
|
|
42
|
+
console_handler.setLevel(console_level)
|
|
43
|
+
|
|
44
|
+
# File handler
|
|
45
|
+
if log_file is None:
|
|
46
|
+
# Create temp file if not specified
|
|
47
|
+
with tempfile.NamedTemporaryFile(prefix="convoviz_", suffix=".log", delete=False) as tf:
|
|
48
|
+
log_file = Path(tf.name)
|
|
49
|
+
|
|
50
|
+
# Ensure parent dir exists
|
|
51
|
+
if not log_file.parent.exists():
|
|
52
|
+
log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
53
|
+
|
|
54
|
+
file_handler = logging.FileHandler(log_file, encoding="utf-8")
|
|
55
|
+
file_handler.setLevel(logging.DEBUG) # Always log DEBUG to file
|
|
56
|
+
file_formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
57
|
+
file_handler.setFormatter(file_formatter)
|
|
58
|
+
|
|
59
|
+
# Configure root logger
|
|
60
|
+
# We set root level to DEBUG so that the handlers can filter as they please
|
|
61
|
+
root_logger.setLevel(logging.DEBUG)
|
|
62
|
+
root_logger.addHandler(console_handler)
|
|
63
|
+
root_logger.addHandler(file_handler)
|
|
64
|
+
|
|
65
|
+
# Reduce noise from explicit libraries if necessary
|
|
66
|
+
logging.getLogger("matplotlib").setLevel(logging.WARNING)
|
|
67
|
+
logging.getLogger("PIL").setLevel(logging.WARNING)
|
|
68
|
+
|
|
69
|
+
return log_file
|
convoviz/models/conversation.py
CHANGED
|
@@ -24,6 +24,8 @@ class Conversation(BaseModel):
|
|
|
24
24
|
mapping: dict[str, Node]
|
|
25
25
|
moderation_results: list[Any] = Field(default_factory=list)
|
|
26
26
|
current_node: str
|
|
27
|
+
is_starred: bool | None = None
|
|
28
|
+
voice: str | dict[str, Any] | None = None
|
|
27
29
|
plugin_ids: list[str] | None = None
|
|
28
30
|
conversation_id: str
|
|
29
31
|
conversation_template_id: str | None = None
|
|
@@ -156,3 +158,19 @@ class Conversation(BaseModel):
|
|
|
156
158
|
def year_start(self) -> datetime:
|
|
157
159
|
"""Get January 1st of the year this conversation was created."""
|
|
158
160
|
return self.create_time.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def citation_map(self) -> dict[str, dict[str, str | None]]:
|
|
164
|
+
"""Aggregate citation metadata from all messages in the conversation.
|
|
165
|
+
|
|
166
|
+
Traverses all nodes (including hidden ones) to collect embedded citation definitions
|
|
167
|
+
from tool outputs (e.g. search results).
|
|
168
|
+
"""
|
|
169
|
+
aggregated_map = {}
|
|
170
|
+
for node in self.all_message_nodes:
|
|
171
|
+
if not node.message:
|
|
172
|
+
continue
|
|
173
|
+
# Extract citations from message parts
|
|
174
|
+
if hasattr(node.message, "internal_citation_map"):
|
|
175
|
+
aggregated_map.update(node.message.internal_citation_map)
|
|
176
|
+
return aggregated_map
|
convoviz/models/message.py
CHANGED
|
@@ -46,6 +46,9 @@ class MessageMetadata(BaseModel):
|
|
|
46
46
|
is_user_system_message: bool | None = None
|
|
47
47
|
is_visually_hidden_from_conversation: bool | None = None
|
|
48
48
|
user_context_message_data: dict[str, Any] | None = None
|
|
49
|
+
citations: list[dict[str, Any]] | None = None
|
|
50
|
+
search_result_groups: list[dict[str, Any]] | None = None
|
|
51
|
+
content_references: list[dict[str, Any]] | None = None
|
|
49
52
|
|
|
50
53
|
model_config = ConfigDict(protected_namespaces=())
|
|
51
54
|
|
|
@@ -179,11 +182,12 @@ class Message(BaseModel):
|
|
|
179
182
|
1. It is empty (no text, no images).
|
|
180
183
|
2. Explicitly marked as visually hidden.
|
|
181
184
|
3. It is an internal system message (not custom instructions).
|
|
182
|
-
4. It is a browser tool output (intermediate search steps).
|
|
185
|
+
4. It is a browser tool output (intermediate search steps) UNLESS it is a tether_quote.
|
|
183
186
|
5. It is an assistant message targeting a tool (internal call).
|
|
184
187
|
6. It is code interpreter input (content_type="code").
|
|
185
|
-
7. It is browsing status (
|
|
186
|
-
8. It is
|
|
188
|
+
7. It is browsing status, internal reasoning (o1/o3), or massive web scraps (sonic_webpage).
|
|
189
|
+
8. It is a redundant DALL-E textual status update.
|
|
190
|
+
9. It is from internal bio (memory) or web.run orchestration tools.
|
|
187
191
|
"""
|
|
188
192
|
if self.is_empty:
|
|
189
193
|
return True
|
|
@@ -197,10 +201,29 @@ class Message(BaseModel):
|
|
|
197
201
|
# Only show if explicitly marked as user system message (Custom Instructions)
|
|
198
202
|
return not self.metadata.is_user_system_message
|
|
199
203
|
|
|
200
|
-
# Hide
|
|
201
|
-
if self.
|
|
204
|
+
# Hide sonic_webpage (massive scraped text) and system_error
|
|
205
|
+
if self.content.content_type in ("sonic_webpage", "system_error"):
|
|
202
206
|
return True
|
|
203
207
|
|
|
208
|
+
if self.author.role == "tool":
|
|
209
|
+
# Hide memory updates (bio) and internal search orchestration (web.run)
|
|
210
|
+
if self.author.name in ("bio", "web.run"):
|
|
211
|
+
return True
|
|
212
|
+
|
|
213
|
+
# Hide browser tool outputs (intermediate search steps)
|
|
214
|
+
# EXCEPTION: tether_quote (citations) should remain visible
|
|
215
|
+
if self.author.name == "browser":
|
|
216
|
+
return self.content.content_type != "tether_quote"
|
|
217
|
+
|
|
218
|
+
# Hide DALL-E textual status ("DALL·E displayed 1 images...")
|
|
219
|
+
if (
|
|
220
|
+
self.author.name == "dalle.text2im"
|
|
221
|
+
and self.content.content_type == "text"
|
|
222
|
+
# Check if it doesn't have images (just in case they attach images to text logic)
|
|
223
|
+
and not self.images
|
|
224
|
+
):
|
|
225
|
+
return True
|
|
226
|
+
|
|
204
227
|
# Hide assistant messages targeting tools (e.g., search(...), code input)
|
|
205
228
|
# recipient="all" or None means it's for the user; anything else is internal
|
|
206
229
|
if self.author.role == "assistant" and self.recipient not in ("all", None):
|
|
@@ -216,3 +239,56 @@ class Message(BaseModel):
|
|
|
216
239
|
"thoughts",
|
|
217
240
|
"reasoning_recap",
|
|
218
241
|
)
|
|
242
|
+
|
|
243
|
+
@property
|
|
244
|
+
def internal_citation_map(self) -> dict[str, dict[str, str | None]]:
|
|
245
|
+
"""Extract a map of citation IDs to metadata from content parts.
|
|
246
|
+
|
|
247
|
+
Used for resolving embedded citations (e.g. citeturn0search18).
|
|
248
|
+
Key format: "turn{turn_index}search{ref_index}"
|
|
249
|
+
"""
|
|
250
|
+
if not self.content.parts:
|
|
251
|
+
return {}
|
|
252
|
+
|
|
253
|
+
citation_mapping = {}
|
|
254
|
+
|
|
255
|
+
# Helper to process a single search result entry
|
|
256
|
+
def process_entry(entry: dict[str, Any]) -> None:
|
|
257
|
+
ref_id = entry.get("ref_id")
|
|
258
|
+
if not ref_id:
|
|
259
|
+
return
|
|
260
|
+
# Only care about search results for now
|
|
261
|
+
if ref_id.get("ref_type") != "search":
|
|
262
|
+
return
|
|
263
|
+
|
|
264
|
+
turn_idx = ref_id.get("turn_index")
|
|
265
|
+
ref_idx = ref_id.get("ref_index")
|
|
266
|
+
|
|
267
|
+
if turn_idx is not None and ref_idx is not None:
|
|
268
|
+
# turn_idx is int, ref_idx is int
|
|
269
|
+
key = f"turn{turn_idx}search{ref_idx}"
|
|
270
|
+
citation_mapping[key] = {
|
|
271
|
+
"title": entry.get("title"),
|
|
272
|
+
"url": entry.get("url"),
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
# 1. Extract from self.content.parts
|
|
276
|
+
if self.content and self.content.parts:
|
|
277
|
+
for part in self.content.parts:
|
|
278
|
+
if isinstance(part, dict):
|
|
279
|
+
if part.get("type") == "search_result":
|
|
280
|
+
process_entry(part)
|
|
281
|
+
elif part.get("type") == "search_result_group":
|
|
282
|
+
for entry in part.get("entries", []):
|
|
283
|
+
process_entry(entry)
|
|
284
|
+
|
|
285
|
+
# 2. Extract from metadata.search_result_groups (if present)
|
|
286
|
+
if self.metadata and self.metadata.search_result_groups:
|
|
287
|
+
for group in self.metadata.search_result_groups:
|
|
288
|
+
if isinstance(group, dict):
|
|
289
|
+
# Groups might have 'entries' or be flat?
|
|
290
|
+
# Based on name 'groups', likely similar to part structure
|
|
291
|
+
for entry in group.get("entries", []):
|
|
292
|
+
process_entry(entry)
|
|
293
|
+
|
|
294
|
+
return citation_mapping
|