convoviz 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- convoviz/analysis/graphs.py +410 -21
- convoviz/analysis/wordcloud.py +21 -1
- convoviz/assets/stopwords.txt +75 -0
- convoviz/cli.py +18 -15
- convoviz/config.py +14 -7
- convoviz/interactive.py +40 -11
- convoviz/io/assets.py +82 -0
- convoviz/io/loaders.py +54 -3
- convoviz/io/writers.py +17 -2
- convoviz/models/__init__.py +0 -4
- convoviz/models/collection.py +14 -6
- convoviz/models/conversation.py +4 -6
- convoviz/models/message.py +87 -7
- convoviz/pipeline.py +70 -24
- convoviz/renderers/markdown.py +91 -24
- convoviz/renderers/yaml.py +79 -2
- convoviz/utils.py +54 -4
- {convoviz-0.2.2.dist-info → convoviz-0.2.4.dist-info}/METADATA +30 -5
- {convoviz-0.2.2.dist-info → convoviz-0.2.4.dist-info}/RECORD +21 -19
- {convoviz-0.2.2.dist-info → convoviz-0.2.4.dist-info}/WHEEL +1 -1
- {convoviz-0.2.2.dist-info → convoviz-0.2.4.dist-info}/entry_points.txt +0 -0
convoviz/config.py
CHANGED
|
@@ -19,6 +19,7 @@ class MarkdownConfig(BaseModel):
|
|
|
19
19
|
"""Configuration for markdown output."""
|
|
20
20
|
|
|
21
21
|
latex_delimiters: Literal["default", "dollars"] = "default"
|
|
22
|
+
flavor: Literal["obsidian", "standard"] = "obsidian"
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
class YAMLConfig(BaseModel):
|
|
@@ -53,27 +54,33 @@ class WordCloudConfig(BaseModel):
|
|
|
53
54
|
"""Configuration for word cloud generation."""
|
|
54
55
|
|
|
55
56
|
font_path: Path | None = None
|
|
56
|
-
colormap: str = "
|
|
57
|
+
colormap: str = "RdYlBu"
|
|
57
58
|
custom_stopwords: str = "use, file, "
|
|
59
|
+
exclude_programming_keywords: bool = True
|
|
58
60
|
background_color: str | None = None
|
|
59
61
|
mode: Literal["RGB", "RGBA"] = "RGBA"
|
|
60
62
|
include_numbers: bool = False
|
|
61
|
-
width: int =
|
|
62
|
-
height: int =
|
|
63
|
+
width: int = 600
|
|
64
|
+
height: int = 600
|
|
63
65
|
|
|
64
66
|
|
|
65
67
|
class GraphConfig(BaseModel):
|
|
66
68
|
"""Configuration for graph generation."""
|
|
67
69
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
+
color: str = "#4A90E2"
|
|
71
|
+
grid: bool = True
|
|
72
|
+
show_counts: bool = True
|
|
73
|
+
font_name: str = "Montserrat-Regular.ttf"
|
|
74
|
+
figsize: tuple[int, int] = (10, 6)
|
|
75
|
+
dpi: int = 300
|
|
76
|
+
timezone: Literal["utc", "local"] = "local"
|
|
70
77
|
|
|
71
78
|
|
|
72
79
|
class ConvovizConfig(BaseModel):
|
|
73
80
|
"""Main configuration for convoviz."""
|
|
74
81
|
|
|
75
|
-
|
|
76
|
-
output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT
|
|
82
|
+
input_path: Path | None = None
|
|
83
|
+
output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT-Data")
|
|
77
84
|
message: MessageConfig = Field(default_factory=MessageConfig)
|
|
78
85
|
conversation: ConversationConfig = Field(default_factory=ConversationConfig)
|
|
79
86
|
wordcloud: WordCloudConfig = Field(default_factory=WordCloudConfig)
|
convoviz/interactive.py
CHANGED
|
@@ -26,6 +26,25 @@ CUSTOM_STYLE = Style(
|
|
|
26
26
|
)
|
|
27
27
|
|
|
28
28
|
|
|
29
|
+
def _validate_input_path(raw: str) -> bool | str:
|
|
30
|
+
path = Path(raw)
|
|
31
|
+
if not path.exists():
|
|
32
|
+
return "Path must exist"
|
|
33
|
+
|
|
34
|
+
if path.is_dir():
|
|
35
|
+
if (path / "conversations.json").exists():
|
|
36
|
+
return True
|
|
37
|
+
return "Directory must contain conversations.json"
|
|
38
|
+
|
|
39
|
+
if path.suffix.lower() == ".json":
|
|
40
|
+
return True
|
|
41
|
+
|
|
42
|
+
if path.suffix.lower() == ".zip":
|
|
43
|
+
return True if validate_zip(path) else "ZIP must contain conversations.json"
|
|
44
|
+
|
|
45
|
+
return "Input must be a .zip, a .json, or a directory containing conversations.json"
|
|
46
|
+
|
|
47
|
+
|
|
29
48
|
def run_interactive_config(initial_config: ConvovizConfig | None = None) -> ConvovizConfig:
|
|
30
49
|
"""Run interactive prompts to configure convoviz.
|
|
31
50
|
|
|
@@ -38,26 +57,25 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
|
|
|
38
57
|
config = initial_config or get_default_config()
|
|
39
58
|
|
|
40
59
|
# Set sensible defaults if not already set
|
|
41
|
-
if not config.
|
|
60
|
+
if not config.input_path:
|
|
42
61
|
latest = find_latest_zip()
|
|
43
62
|
if latest:
|
|
44
|
-
config.
|
|
63
|
+
config.input_path = latest
|
|
45
64
|
|
|
46
65
|
if not config.wordcloud.font_path:
|
|
47
66
|
config.wordcloud.font_path = default_font_path()
|
|
48
67
|
|
|
49
|
-
# Prompt for
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
"Enter the path to the
|
|
53
|
-
default=
|
|
54
|
-
validate=
|
|
55
|
-
or "Invalid zip file (must contain conversations.json)",
|
|
68
|
+
# Prompt for input path
|
|
69
|
+
input_default = str(config.input_path) if config.input_path else ""
|
|
70
|
+
input_result = qst_path(
|
|
71
|
+
"Enter the path to the export ZIP, conversations JSON, or extracted directory:",
|
|
72
|
+
default=input_default,
|
|
73
|
+
validate=_validate_input_path,
|
|
56
74
|
style=CUSTOM_STYLE,
|
|
57
75
|
).ask()
|
|
58
76
|
|
|
59
|
-
if
|
|
60
|
-
config.
|
|
77
|
+
if input_result:
|
|
78
|
+
config.input_path = Path(input_result)
|
|
61
79
|
|
|
62
80
|
# Prompt for output folder
|
|
63
81
|
output_result = qst_path(
|
|
@@ -94,6 +112,17 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
|
|
|
94
112
|
if latex_result:
|
|
95
113
|
config.conversation.markdown.latex_delimiters = latex_result
|
|
96
114
|
|
|
115
|
+
# Prompt for markdown flavor
|
|
116
|
+
flavor_result = select(
|
|
117
|
+
"Select the markdown flavor:",
|
|
118
|
+
choices=["obsidian", "standard"],
|
|
119
|
+
default=config.conversation.markdown.flavor,
|
|
120
|
+
style=CUSTOM_STYLE,
|
|
121
|
+
).ask()
|
|
122
|
+
|
|
123
|
+
if flavor_result:
|
|
124
|
+
config.conversation.markdown.flavor = flavor_result
|
|
125
|
+
|
|
97
126
|
# Prompt for YAML headers
|
|
98
127
|
yaml_config = config.conversation.yaml
|
|
99
128
|
yaml_choices = [
|
convoviz/io/assets.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"Asset management functions."
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def resolve_asset_path(source_dir: Path, asset_id: str) -> Path | None:
|
|
8
|
+
"""Find the actual file for a given asset ID in the source directory.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
source_dir: Directory to search in
|
|
12
|
+
asset_id: The asset ID (e.g., "file-uuid")
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Path to the found file, or None
|
|
16
|
+
"""
|
|
17
|
+
if not source_dir.exists():
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
source_dir = source_dir.resolve()
|
|
21
|
+
|
|
22
|
+
# Safety check for asset_id
|
|
23
|
+
if ".." in asset_id or "/" in asset_id or "\\" in asset_id:
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
# 1. Try exact match
|
|
27
|
+
exact_path = (source_dir / asset_id).resolve()
|
|
28
|
+
if exact_path.exists() and exact_path.is_file() and exact_path.is_relative_to(source_dir):
|
|
29
|
+
return exact_path
|
|
30
|
+
|
|
31
|
+
# 2. Try prefix match in root
|
|
32
|
+
try:
|
|
33
|
+
candidates = list(source_dir.glob(f"{asset_id}*"))
|
|
34
|
+
files = [
|
|
35
|
+
p.resolve()
|
|
36
|
+
for p in candidates
|
|
37
|
+
if p.is_file() and p.resolve().is_relative_to(source_dir)
|
|
38
|
+
]
|
|
39
|
+
if files:
|
|
40
|
+
return files[0]
|
|
41
|
+
except Exception:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
# 3. Try prefix match in dalle-generations
|
|
45
|
+
dalle_dir = source_dir / "dalle-generations"
|
|
46
|
+
if dalle_dir.exists() and dalle_dir.is_dir():
|
|
47
|
+
dalle_dir = dalle_dir.resolve()
|
|
48
|
+
try:
|
|
49
|
+
candidates = list(dalle_dir.glob(f"{asset_id}*"))
|
|
50
|
+
files = [
|
|
51
|
+
p.resolve()
|
|
52
|
+
for p in candidates
|
|
53
|
+
if p.is_file() and p.resolve().is_relative_to(dalle_dir)
|
|
54
|
+
]
|
|
55
|
+
if files:
|
|
56
|
+
return files[0]
|
|
57
|
+
except Exception:
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def copy_asset(source_path: Path, dest_dir: Path) -> str:
|
|
64
|
+
"""Copy an asset to the destination directory.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
source_path: The source file path
|
|
68
|
+
dest_dir: The root output directory (assets will be in dest_dir/assets)
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Relative path to the asset (e.g., "assets/image.png")
|
|
72
|
+
"""
|
|
73
|
+
assets_dir = dest_dir / "assets"
|
|
74
|
+
assets_dir.mkdir(parents=True, exist_ok=True)
|
|
75
|
+
|
|
76
|
+
dest_path = assets_dir / source_path.name
|
|
77
|
+
|
|
78
|
+
if not dest_path.exists():
|
|
79
|
+
shutil.copy2(source_path, dest_path)
|
|
80
|
+
|
|
81
|
+
# Return forward-slash path for Markdown compatibility even on Windows
|
|
82
|
+
return f"assets/{source_path.name}"
|
convoviz/io/loaders.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Loading functions for conversations and collections."""
|
|
2
2
|
|
|
3
|
-
from pathlib import Path
|
|
3
|
+
from pathlib import Path, PurePosixPath
|
|
4
4
|
from zipfile import ZipFile
|
|
5
5
|
|
|
6
6
|
from orjson import loads
|
|
@@ -9,17 +9,62 @@ from convoviz.exceptions import InvalidZipError
|
|
|
9
9
|
from convoviz.models import Conversation, ConversationCollection
|
|
10
10
|
|
|
11
11
|
|
|
12
|
+
def _is_safe_zip_member_name(name: str) -> bool:
|
|
13
|
+
"""Return True if a ZIP entry name is safe to extract.
|
|
14
|
+
|
|
15
|
+
This is intentionally OS-agnostic: it treats both ``/`` and ``\\`` as path
|
|
16
|
+
separators and rejects absolute paths, drive-letter paths, and ``..`` parts.
|
|
17
|
+
"""
|
|
18
|
+
normalized = name.replace("\\", "/")
|
|
19
|
+
member_path = PurePosixPath(normalized)
|
|
20
|
+
|
|
21
|
+
# Absolute paths (e.g. "/etc/passwd") or empty names
|
|
22
|
+
if not normalized or member_path.is_absolute():
|
|
23
|
+
return False
|
|
24
|
+
|
|
25
|
+
# Windows drive letters / UNC-style prefixes stored in the archive
|
|
26
|
+
first = member_path.parts[0] if member_path.parts else ""
|
|
27
|
+
if first.endswith(":") or first.startswith("//") or first.startswith("\\\\"):
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
return ".." not in member_path.parts
|
|
31
|
+
|
|
32
|
+
|
|
12
33
|
def extract_archive(filepath: Path) -> Path:
|
|
13
34
|
"""Extract a ZIP file and return the extraction folder path.
|
|
14
35
|
|
|
36
|
+
Includes safety checks to prevent Path Traversal (Zip-Slip).
|
|
37
|
+
|
|
15
38
|
Args:
|
|
16
39
|
filepath: Path to the ZIP file
|
|
17
40
|
|
|
18
41
|
Returns:
|
|
19
42
|
Path to the extracted folder
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
InvalidZipError: If extraction fails or a security risk is detected
|
|
20
46
|
"""
|
|
21
47
|
folder = filepath.with_suffix("")
|
|
48
|
+
folder.mkdir(parents=True, exist_ok=True)
|
|
49
|
+
|
|
22
50
|
with ZipFile(filepath) as zf:
|
|
51
|
+
for member in zf.infolist():
|
|
52
|
+
# Check for path traversal (Zip-Slip) in an OS-agnostic way.
|
|
53
|
+
# ZIP files are typically POSIX-path-like, but malicious archives can
|
|
54
|
+
# embed backslashes or drive-letter tricks.
|
|
55
|
+
if not _is_safe_zip_member_name(member.filename):
|
|
56
|
+
raise InvalidZipError(
|
|
57
|
+
str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Additional check using resolved paths
|
|
61
|
+
normalized = member.filename.replace("\\", "/")
|
|
62
|
+
target_path = (folder / normalized).resolve()
|
|
63
|
+
if not target_path.is_relative_to(folder.resolve()):
|
|
64
|
+
raise InvalidZipError(
|
|
65
|
+
str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
|
|
66
|
+
)
|
|
67
|
+
|
|
23
68
|
zf.extractall(folder)
|
|
24
69
|
return folder
|
|
25
70
|
|
|
@@ -60,7 +105,8 @@ def load_conversation_from_json(filepath: Path | str) -> Conversation:
|
|
|
60
105
|
def load_collection_from_json(filepath: Path | str) -> ConversationCollection:
|
|
61
106
|
"""Load a conversation collection from a JSON file.
|
|
62
107
|
|
|
63
|
-
The JSON file should contain an array of conversation objects
|
|
108
|
+
The JSON file should contain an array of conversation objects,
|
|
109
|
+
or an object with a "conversations" key.
|
|
64
110
|
|
|
65
111
|
Args:
|
|
66
112
|
filepath: Path to the JSON file
|
|
@@ -71,7 +117,12 @@ def load_collection_from_json(filepath: Path | str) -> ConversationCollection:
|
|
|
71
117
|
filepath = Path(filepath)
|
|
72
118
|
with filepath.open(encoding="utf-8") as f:
|
|
73
119
|
data = loads(f.read())
|
|
74
|
-
|
|
120
|
+
|
|
121
|
+
# Handle case where export is wrapped in a top-level object
|
|
122
|
+
if isinstance(data, dict) and "conversations" in data:
|
|
123
|
+
data = data["conversations"]
|
|
124
|
+
|
|
125
|
+
return ConversationCollection(conversations=data, source_path=filepath.parent)
|
|
75
126
|
|
|
76
127
|
|
|
77
128
|
def load_collection_from_zip(filepath: Path | str) -> ConversationCollection:
|
convoviz/io/writers.py
CHANGED
|
@@ -7,6 +7,7 @@ from orjson import OPT_INDENT_2, dumps
|
|
|
7
7
|
from tqdm import tqdm
|
|
8
8
|
|
|
9
9
|
from convoviz.config import AuthorHeaders, ConversationConfig
|
|
10
|
+
from convoviz.io.assets import copy_asset, resolve_asset_path
|
|
10
11
|
from convoviz.models import Conversation, ConversationCollection
|
|
11
12
|
from convoviz.renderers import render_conversation
|
|
12
13
|
from convoviz.utils import sanitize
|
|
@@ -17,6 +18,7 @@ def save_conversation(
|
|
|
17
18
|
filepath: Path,
|
|
18
19
|
config: ConversationConfig,
|
|
19
20
|
headers: AuthorHeaders,
|
|
21
|
+
source_path: Path | None = None,
|
|
20
22
|
) -> Path:
|
|
21
23
|
"""Save a conversation to a markdown file.
|
|
22
24
|
|
|
@@ -28,6 +30,7 @@ def save_conversation(
|
|
|
28
30
|
filepath: Target file path
|
|
29
31
|
config: Conversation rendering configuration
|
|
30
32
|
headers: Author header configuration
|
|
33
|
+
source_path: Path to the source directory containing assets
|
|
31
34
|
|
|
32
35
|
Returns:
|
|
33
36
|
The actual path the file was saved to (may differ if there was a conflict)
|
|
@@ -41,8 +44,20 @@ def save_conversation(
|
|
|
41
44
|
counter += 1
|
|
42
45
|
final_path = filepath.with_name(f"{base_name} ({counter}){filepath.suffix}")
|
|
43
46
|
|
|
47
|
+
# Define asset resolver
|
|
48
|
+
def asset_resolver(asset_id: str) -> str | None:
|
|
49
|
+
if not source_path:
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
src_file = resolve_asset_path(source_path, asset_id)
|
|
53
|
+
if not src_file:
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
# Copy to output directory (relative to the markdown file's directory)
|
|
57
|
+
return copy_asset(src_file, final_path.parent)
|
|
58
|
+
|
|
44
59
|
# Render and write
|
|
45
|
-
markdown = render_conversation(conversation, config, headers)
|
|
60
|
+
markdown = render_conversation(conversation, config, headers, asset_resolver=asset_resolver)
|
|
46
61
|
with final_path.open("w", encoding="utf-8") as f:
|
|
47
62
|
f.write(markdown)
|
|
48
63
|
|
|
@@ -78,7 +93,7 @@ def save_collection(
|
|
|
78
93
|
disable=not progress_bar,
|
|
79
94
|
):
|
|
80
95
|
filepath = directory / f"{sanitize(conv.title)}.md"
|
|
81
|
-
save_conversation(conv, filepath, config, headers)
|
|
96
|
+
save_conversation(conv, filepath, config, headers, source_path=collection.source_path)
|
|
82
97
|
|
|
83
98
|
|
|
84
99
|
def save_custom_instructions(
|
convoviz/models/__init__.py
CHANGED
|
@@ -11,14 +11,10 @@ from convoviz.models.message import (
|
|
|
11
11
|
)
|
|
12
12
|
from convoviz.models.node import Node, build_node_tree
|
|
13
13
|
|
|
14
|
-
# Backward compatibility alias
|
|
15
|
-
ConversationSet = ConversationCollection
|
|
16
|
-
|
|
17
14
|
__all__ = [
|
|
18
15
|
"AuthorRole",
|
|
19
16
|
"Conversation",
|
|
20
17
|
"ConversationCollection",
|
|
21
|
-
"ConversationSet",
|
|
22
18
|
"Message",
|
|
23
19
|
"MessageAuthor",
|
|
24
20
|
"MessageContent",
|
convoviz/models/collection.py
CHANGED
|
@@ -4,6 +4,7 @@ This is a pure data model - I/O and visualization logic are in separate modules.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from datetime import datetime
|
|
7
|
+
from pathlib import Path
|
|
7
8
|
from typing import Any
|
|
8
9
|
|
|
9
10
|
from pydantic import BaseModel, Field
|
|
@@ -19,6 +20,7 @@ class ConversationCollection(BaseModel):
|
|
|
19
20
|
"""
|
|
20
21
|
|
|
21
22
|
conversations: list[Conversation] = Field(default_factory=list)
|
|
23
|
+
source_path: Path | None = None
|
|
22
24
|
|
|
23
25
|
@property
|
|
24
26
|
def index(self) -> dict[str, Conversation]:
|
|
@@ -35,14 +37,20 @@ class ConversationCollection(BaseModel):
|
|
|
35
37
|
def update(self, other: "ConversationCollection") -> None:
|
|
36
38
|
"""Merge another collection into this one.
|
|
37
39
|
|
|
38
|
-
|
|
40
|
+
Merges per-conversation, keeping the newest version when IDs collide.
|
|
41
|
+
|
|
42
|
+
Note: We intentionally do *not* gate on ``other.last_updated`` because
|
|
43
|
+
"new" conversations can still have older timestamps than the most recent
|
|
44
|
+
conversation in this collection (e.g. bookmarklet downloads).
|
|
39
45
|
"""
|
|
40
|
-
|
|
41
|
-
|
|
46
|
+
merged: dict[str, Conversation] = dict(self.index)
|
|
47
|
+
|
|
48
|
+
for conv_id, incoming in other.index.items():
|
|
49
|
+
existing = merged.get(conv_id)
|
|
50
|
+
if existing is None or incoming.update_time > existing.update_time:
|
|
51
|
+
merged[conv_id] = incoming
|
|
42
52
|
|
|
43
|
-
|
|
44
|
-
merged_index.update(other.index)
|
|
45
|
-
self.conversations = list(merged_index.values())
|
|
53
|
+
self.conversations = list(merged.values())
|
|
46
54
|
|
|
47
55
|
def add(self, conversation: Conversation) -> None:
|
|
48
56
|
"""Add a conversation to the collection."""
|
convoviz/models/conversation.py
CHANGED
|
@@ -98,12 +98,10 @@ class Conversation(BaseModel):
|
|
|
98
98
|
def custom_instructions(self) -> dict[str, str]:
|
|
99
99
|
"""Get custom instructions used for this conversation."""
|
|
100
100
|
system_nodes = self.nodes_by_author("system")
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
if context_message and context_message.metadata.is_user_system_message:
|
|
106
|
-
return context_message.metadata.user_context_message_data or {}
|
|
101
|
+
for node in system_nodes:
|
|
102
|
+
context_message = node.message
|
|
103
|
+
if context_message and context_message.metadata.is_user_system_message:
|
|
104
|
+
return context_message.metadata.user_context_message_data or {}
|
|
107
105
|
return {}
|
|
108
106
|
|
|
109
107
|
def timestamps(self, *authors: AuthorRole) -> list[float]:
|
convoviz/models/message.py
CHANGED
|
@@ -6,11 +6,11 @@ Object path: conversations.json -> conversation -> mapping -> mapping node -> me
|
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from typing import Any, Literal
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel, ConfigDict
|
|
9
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
10
10
|
|
|
11
11
|
from convoviz.exceptions import MessageContentError
|
|
12
12
|
|
|
13
|
-
AuthorRole = Literal["user", "assistant", "system", "tool"]
|
|
13
|
+
AuthorRole = Literal["user", "assistant", "system", "tool", "function"]
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class MessageAuthor(BaseModel):
|
|
@@ -18,14 +18,14 @@ class MessageAuthor(BaseModel):
|
|
|
18
18
|
|
|
19
19
|
role: AuthorRole
|
|
20
20
|
name: str | None = None
|
|
21
|
-
metadata: dict[str, Any] =
|
|
21
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class MessageContent(BaseModel):
|
|
25
25
|
"""Content of a message."""
|
|
26
26
|
|
|
27
27
|
content_type: str
|
|
28
|
-
parts: list[
|
|
28
|
+
parts: list[Any] | None = None
|
|
29
29
|
text: str | None = None
|
|
30
30
|
result: str | None = None
|
|
31
31
|
|
|
@@ -55,14 +55,56 @@ class Message(BaseModel):
|
|
|
55
55
|
status: str
|
|
56
56
|
end_turn: bool | None = None
|
|
57
57
|
weight: float
|
|
58
|
-
metadata: MessageMetadata
|
|
59
|
-
recipient: str
|
|
58
|
+
metadata: MessageMetadata = Field(default_factory=MessageMetadata)
|
|
59
|
+
recipient: str | None = None
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def images(self) -> list[str]:
|
|
63
|
+
"""Extract image asset pointers from the message content."""
|
|
64
|
+
if not self.content.parts:
|
|
65
|
+
return []
|
|
66
|
+
|
|
67
|
+
image_ids = []
|
|
68
|
+
for part in self.content.parts:
|
|
69
|
+
if isinstance(part, dict) and part.get("content_type") == "image_asset_pointer":
|
|
70
|
+
pointer = part.get("asset_pointer", "")
|
|
71
|
+
# Strip prefixes like "file-service://" or "sediment://"
|
|
72
|
+
if pointer.startswith("file-service://"):
|
|
73
|
+
pointer = pointer[len("file-service://") :]
|
|
74
|
+
elif pointer.startswith("sediment://"):
|
|
75
|
+
pointer = pointer[len("sediment://") :]
|
|
76
|
+
|
|
77
|
+
if pointer:
|
|
78
|
+
image_ids.append(pointer)
|
|
79
|
+
return image_ids
|
|
60
80
|
|
|
61
81
|
@property
|
|
62
82
|
def text(self) -> str:
|
|
63
83
|
"""Extract the text content of the message."""
|
|
64
84
|
if self.content.parts is not None:
|
|
65
|
-
|
|
85
|
+
# Handle multimodal content where parts can be mixed strings and dicts
|
|
86
|
+
text_parts = []
|
|
87
|
+
for part in self.content.parts:
|
|
88
|
+
if isinstance(part, str):
|
|
89
|
+
text_parts.append(part)
|
|
90
|
+
elif isinstance(part, dict) and "text" in part:
|
|
91
|
+
# Some parts might be dicts wrapping text (e.g. code interpreter?)
|
|
92
|
+
# But based on spec, usually text is just a string in the list.
|
|
93
|
+
# We'll stick to string extraction for now.
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
# If we found string parts, join them.
|
|
97
|
+
# If parts existed but no strings (e.g. only images), return empty string?
|
|
98
|
+
# Or should we return a placeholder? For now, let's return joined text.
|
|
99
|
+
if text_parts:
|
|
100
|
+
return "".join(text_parts)
|
|
101
|
+
|
|
102
|
+
# If parts list is not empty but contains no strings, we might want to fall through
|
|
103
|
+
# or return empty string if we consider it "handled".
|
|
104
|
+
# The original code returned "" if parts was empty list.
|
|
105
|
+
if self.content.parts:
|
|
106
|
+
return ""
|
|
107
|
+
|
|
66
108
|
if self.content.text is not None:
|
|
67
109
|
return self.content.text
|
|
68
110
|
if self.content.result is not None:
|
|
@@ -75,3 +117,41 @@ class Message(BaseModel):
|
|
|
75
117
|
return bool(
|
|
76
118
|
self.content.parts or self.content.text is not None or self.content.result is not None
|
|
77
119
|
)
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def is_empty(self) -> bool:
|
|
123
|
+
"""Check if the message is effectively empty (no text, no images)."""
|
|
124
|
+
try:
|
|
125
|
+
return not self.text.strip() and not self.images
|
|
126
|
+
except MessageContentError:
|
|
127
|
+
return True
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def is_hidden(self) -> bool:
|
|
131
|
+
"""Check if message should be hidden in export.
|
|
132
|
+
|
|
133
|
+
Hidden if:
|
|
134
|
+
1. It is empty (no text, no images).
|
|
135
|
+
2. It is an internal system message (not custom instructions).
|
|
136
|
+
3. It is a browser tool output (intermediate search steps).
|
|
137
|
+
"""
|
|
138
|
+
if self.is_empty:
|
|
139
|
+
return True
|
|
140
|
+
|
|
141
|
+
# Hide internal system messages
|
|
142
|
+
if self.author.role == "system":
|
|
143
|
+
# Only show if explicitly marked as user system message (Custom Instructions)
|
|
144
|
+
return not self.metadata.is_user_system_message
|
|
145
|
+
|
|
146
|
+
# Hide browser tool outputs (usually intermediate search steps)
|
|
147
|
+
if self.author.role == "tool" and self.author.name == "browser":
|
|
148
|
+
return True
|
|
149
|
+
|
|
150
|
+
# Hide assistant calls to browser tool (e.g. "search(...)") or code interpreter
|
|
151
|
+
if self.author.role == "assistant" and (
|
|
152
|
+
self.recipient == "browser" or self.content.content_type == "code"
|
|
153
|
+
):
|
|
154
|
+
return True
|
|
155
|
+
|
|
156
|
+
# Hide browsing status messages
|
|
157
|
+
return self.content.content_type == "tether_browsing_display"
|