convoviz 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- convoviz/__init__.py +25 -5
- convoviz/__main__.py +6 -5
- convoviz/analysis/__init__.py +9 -0
- convoviz/analysis/graphs.py +98 -0
- convoviz/analysis/wordcloud.py +142 -0
- convoviz/assets/colormaps.txt +15 -16
- convoviz/cli.py +101 -94
- convoviz/config.py +88 -0
- convoviz/exceptions.py +47 -0
- convoviz/interactive.py +178 -0
- convoviz/io/__init__.py +21 -0
- convoviz/io/loaders.py +135 -0
- convoviz/io/writers.py +96 -0
- convoviz/models/__init__.py +26 -6
- convoviz/models/collection.py +107 -0
- convoviz/models/conversation.py +149 -0
- convoviz/models/message.py +77 -0
- convoviz/models/node.py +66 -0
- convoviz/pipeline.py +120 -0
- convoviz/renderers/__init__.py +10 -0
- convoviz/renderers/markdown.py +182 -0
- convoviz/renderers/yaml.py +42 -0
- convoviz/utils.py +68 -237
- {convoviz-0.1.7.dist-info → convoviz-0.2.0.dist-info}/METADATA +61 -42
- {convoviz-0.1.7.dist-info → convoviz-0.2.0.dist-info}/RECORD +27 -17
- convoviz-0.2.0.dist-info/WHEEL +4 -0
- convoviz-0.2.0.dist-info/entry_points.txt +3 -0
- convoviz/configuration.py +0 -125
- convoviz/data_analysis.py +0 -119
- convoviz/long_runs.py +0 -93
- convoviz/models/_conversation.py +0 -289
- convoviz/models/_conversation_set.py +0 -191
- convoviz/models/_message.py +0 -89
- convoviz/models/_node.py +0 -74
- convoviz-0.1.7.dist-info/LICENSE +0 -21
- convoviz-0.1.7.dist-info/WHEEL +0 -4
convoviz/interactive.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Interactive configuration prompts using questionary."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from questionary import Choice, Style, checkbox, select
|
|
6
|
+
from questionary import path as qst_path
|
|
7
|
+
from questionary import text as qst_text
|
|
8
|
+
|
|
9
|
+
from convoviz.config import ConvovizConfig, get_default_config
|
|
10
|
+
from convoviz.io.loaders import find_latest_zip, validate_zip
|
|
11
|
+
from convoviz.utils import colormaps, default_font_path, font_names, font_path, validate_header
|
|
12
|
+
|
|
13
|
+
CUSTOM_STYLE = Style(
|
|
14
|
+
[
|
|
15
|
+
("qmark", "fg:#34eb9b bold"),
|
|
16
|
+
("question", "bold fg:#e0e0e0"),
|
|
17
|
+
("answer", "fg:#34ebeb bold"),
|
|
18
|
+
("pointer", "fg:#e834eb bold"),
|
|
19
|
+
("highlighted", "fg:#349ceb bold"),
|
|
20
|
+
("selected", "fg:#34ebeb"),
|
|
21
|
+
("separator", "fg:#eb3434"),
|
|
22
|
+
("instruction", "fg:#eb9434"),
|
|
23
|
+
("text", "fg:#b2eb34"),
|
|
24
|
+
("disabled", "fg:#858585 italic"),
|
|
25
|
+
]
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def run_interactive_config(initial_config: ConvovizConfig | None = None) -> ConvovizConfig:
|
|
30
|
+
"""Run interactive prompts to configure convoviz.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
initial_config: Optional starting configuration (uses defaults if None)
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Updated configuration based on user input
|
|
37
|
+
"""
|
|
38
|
+
config = initial_config or get_default_config()
|
|
39
|
+
|
|
40
|
+
# Set sensible defaults if not already set
|
|
41
|
+
if not config.zip_filepath:
|
|
42
|
+
latest = find_latest_zip()
|
|
43
|
+
if latest:
|
|
44
|
+
config.zip_filepath = latest
|
|
45
|
+
|
|
46
|
+
if not config.wordcloud.font_path:
|
|
47
|
+
config.wordcloud.font_path = default_font_path()
|
|
48
|
+
|
|
49
|
+
# Prompt for zip file path
|
|
50
|
+
zip_default = str(config.zip_filepath) if config.zip_filepath else ""
|
|
51
|
+
zip_result = qst_path(
|
|
52
|
+
"Enter the path to the zip file:",
|
|
53
|
+
default=zip_default,
|
|
54
|
+
validate=lambda p: validate_zip(Path(p))
|
|
55
|
+
or "Invalid zip file (must contain conversations.json)",
|
|
56
|
+
style=CUSTOM_STYLE,
|
|
57
|
+
).ask()
|
|
58
|
+
|
|
59
|
+
if zip_result:
|
|
60
|
+
config.zip_filepath = Path(zip_result)
|
|
61
|
+
|
|
62
|
+
# Prompt for output folder
|
|
63
|
+
output_result = qst_path(
|
|
64
|
+
"Enter the path to the output folder:",
|
|
65
|
+
default=str(config.output_folder),
|
|
66
|
+
style=CUSTOM_STYLE,
|
|
67
|
+
).ask()
|
|
68
|
+
|
|
69
|
+
if output_result:
|
|
70
|
+
config.output_folder = Path(output_result)
|
|
71
|
+
|
|
72
|
+
# Prompt for author headers
|
|
73
|
+
headers = config.message.author_headers
|
|
74
|
+
for role in ["system", "user", "assistant", "tool"]:
|
|
75
|
+
current = getattr(headers, role)
|
|
76
|
+
result = qst_text(
|
|
77
|
+
f"Enter the message header for '{role}':",
|
|
78
|
+
default=current,
|
|
79
|
+
validate=lambda t: validate_header(t)
|
|
80
|
+
or "Must be a valid markdown header (e.g., # Title)",
|
|
81
|
+
style=CUSTOM_STYLE,
|
|
82
|
+
).ask()
|
|
83
|
+
if result:
|
|
84
|
+
setattr(headers, role, result)
|
|
85
|
+
|
|
86
|
+
# Prompt for LaTeX delimiters
|
|
87
|
+
latex_result = select(
|
|
88
|
+
"Select the LaTeX math delimiters:",
|
|
89
|
+
choices=["default", "dollars"],
|
|
90
|
+
default=config.conversation.markdown.latex_delimiters,
|
|
91
|
+
style=CUSTOM_STYLE,
|
|
92
|
+
).ask()
|
|
93
|
+
|
|
94
|
+
if latex_result:
|
|
95
|
+
config.conversation.markdown.latex_delimiters = latex_result
|
|
96
|
+
|
|
97
|
+
# Prompt for YAML headers
|
|
98
|
+
yaml_config = config.conversation.yaml
|
|
99
|
+
yaml_choices = [
|
|
100
|
+
Choice(title=field, checked=getattr(yaml_config, field))
|
|
101
|
+
for field in [
|
|
102
|
+
"title",
|
|
103
|
+
"tags",
|
|
104
|
+
"chat_link",
|
|
105
|
+
"create_time",
|
|
106
|
+
"update_time",
|
|
107
|
+
"model",
|
|
108
|
+
"used_plugins",
|
|
109
|
+
"message_count",
|
|
110
|
+
"content_types",
|
|
111
|
+
"custom_instructions",
|
|
112
|
+
]
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
selected = checkbox(
|
|
116
|
+
"Select YAML metadata headers to include:",
|
|
117
|
+
choices=yaml_choices,
|
|
118
|
+
style=CUSTOM_STYLE,
|
|
119
|
+
).ask()
|
|
120
|
+
|
|
121
|
+
if selected is not None:
|
|
122
|
+
selected_set = set(selected)
|
|
123
|
+
for field_name in [
|
|
124
|
+
"title",
|
|
125
|
+
"tags",
|
|
126
|
+
"chat_link",
|
|
127
|
+
"create_time",
|
|
128
|
+
"update_time",
|
|
129
|
+
"model",
|
|
130
|
+
"used_plugins",
|
|
131
|
+
"message_count",
|
|
132
|
+
"content_types",
|
|
133
|
+
"custom_instructions",
|
|
134
|
+
]:
|
|
135
|
+
setattr(yaml_config, field_name, field_name in selected_set)
|
|
136
|
+
|
|
137
|
+
# Prompt for font
|
|
138
|
+
available_fonts = font_names()
|
|
139
|
+
if available_fonts:
|
|
140
|
+
current_font = (
|
|
141
|
+
config.wordcloud.font_path.stem if config.wordcloud.font_path else available_fonts[0]
|
|
142
|
+
)
|
|
143
|
+
font_result = select(
|
|
144
|
+
"Select the font for word clouds:",
|
|
145
|
+
choices=available_fonts,
|
|
146
|
+
default=current_font if current_font in available_fonts else available_fonts[0],
|
|
147
|
+
style=CUSTOM_STYLE,
|
|
148
|
+
).ask()
|
|
149
|
+
|
|
150
|
+
if font_result:
|
|
151
|
+
config.wordcloud.font_path = font_path(font_result)
|
|
152
|
+
|
|
153
|
+
# Prompt for colormap
|
|
154
|
+
available_colormaps = colormaps()
|
|
155
|
+
if available_colormaps:
|
|
156
|
+
colormap_result = select(
|
|
157
|
+
"Select the color theme for word clouds:",
|
|
158
|
+
choices=available_colormaps,
|
|
159
|
+
default=config.wordcloud.colormap
|
|
160
|
+
if config.wordcloud.colormap in available_colormaps
|
|
161
|
+
else available_colormaps[0],
|
|
162
|
+
style=CUSTOM_STYLE,
|
|
163
|
+
).ask()
|
|
164
|
+
|
|
165
|
+
if colormap_result:
|
|
166
|
+
config.wordcloud.colormap = colormap_result
|
|
167
|
+
|
|
168
|
+
# Prompt for custom stopwords
|
|
169
|
+
stopwords_result = qst_text(
|
|
170
|
+
"Enter custom stopwords (comma-separated):",
|
|
171
|
+
default=config.wordcloud.custom_stopwords,
|
|
172
|
+
style=CUSTOM_STYLE,
|
|
173
|
+
).ask()
|
|
174
|
+
|
|
175
|
+
if stopwords_result is not None:
|
|
176
|
+
config.wordcloud.custom_stopwords = stopwords_result
|
|
177
|
+
|
|
178
|
+
return config
|
convoviz/io/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""I/O operations for convoviz."""
|
|
2
|
+
|
|
3
|
+
from convoviz.io.loaders import (
|
|
4
|
+
load_collection_from_json,
|
|
5
|
+
load_collection_from_zip,
|
|
6
|
+
load_conversation_from_json,
|
|
7
|
+
)
|
|
8
|
+
from convoviz.io.writers import (
|
|
9
|
+
save_collection,
|
|
10
|
+
save_conversation,
|
|
11
|
+
save_custom_instructions,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"load_collection_from_json",
|
|
16
|
+
"load_collection_from_zip",
|
|
17
|
+
"load_conversation_from_json",
|
|
18
|
+
"save_collection",
|
|
19
|
+
"save_conversation",
|
|
20
|
+
"save_custom_instructions",
|
|
21
|
+
]
|
convoviz/io/loaders.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Loading functions for conversations and collections."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from zipfile import ZipFile
|
|
5
|
+
|
|
6
|
+
from orjson import loads
|
|
7
|
+
|
|
8
|
+
from convoviz.exceptions import InvalidZipError
|
|
9
|
+
from convoviz.models import Conversation, ConversationCollection
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def extract_archive(filepath: Path) -> Path:
|
|
13
|
+
"""Extract a ZIP file and return the extraction folder path.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
filepath: Path to the ZIP file
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Path to the extracted folder
|
|
20
|
+
"""
|
|
21
|
+
folder = filepath.with_suffix("")
|
|
22
|
+
with ZipFile(filepath) as zf:
|
|
23
|
+
zf.extractall(folder)
|
|
24
|
+
return folder
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def validate_zip(filepath: Path) -> bool:
|
|
28
|
+
"""Check if a ZIP file contains conversations.json.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
filepath: Path to the ZIP file
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
True if valid, False otherwise
|
|
35
|
+
"""
|
|
36
|
+
if not filepath.is_file() or filepath.suffix != ".zip":
|
|
37
|
+
return False
|
|
38
|
+
try:
|
|
39
|
+
with ZipFile(filepath) as zf:
|
|
40
|
+
return "conversations.json" in zf.namelist()
|
|
41
|
+
except Exception:
|
|
42
|
+
return False
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def load_conversation_from_json(filepath: Path | str) -> Conversation:
|
|
46
|
+
"""Load a single conversation from a JSON file.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
filepath: Path to the JSON file
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Loaded Conversation object
|
|
53
|
+
"""
|
|
54
|
+
filepath = Path(filepath)
|
|
55
|
+
with filepath.open(encoding="utf-8") as f:
|
|
56
|
+
data = loads(f.read())
|
|
57
|
+
return Conversation(**data)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def load_collection_from_json(filepath: Path | str) -> ConversationCollection:
|
|
61
|
+
"""Load a conversation collection from a JSON file.
|
|
62
|
+
|
|
63
|
+
The JSON file should contain an array of conversation objects.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
filepath: Path to the JSON file
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Loaded ConversationCollection object
|
|
70
|
+
"""
|
|
71
|
+
filepath = Path(filepath)
|
|
72
|
+
with filepath.open(encoding="utf-8") as f:
|
|
73
|
+
data = loads(f.read())
|
|
74
|
+
return ConversationCollection(conversations=data)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def load_collection_from_zip(filepath: Path | str) -> ConversationCollection:
|
|
78
|
+
"""Load a conversation collection from a ChatGPT export ZIP file.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
filepath: Path to the ZIP file
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Loaded ConversationCollection object
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
InvalidZipError: If the ZIP file is invalid or missing conversations.json
|
|
88
|
+
"""
|
|
89
|
+
filepath = Path(filepath)
|
|
90
|
+
|
|
91
|
+
if not validate_zip(filepath):
|
|
92
|
+
raise InvalidZipError(str(filepath))
|
|
93
|
+
|
|
94
|
+
extracted_folder = extract_archive(filepath)
|
|
95
|
+
conversations_path = extracted_folder / "conversations.json"
|
|
96
|
+
|
|
97
|
+
return load_collection_from_json(conversations_path)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def find_latest_zip(directory: Path | None = None) -> Path | None:
|
|
101
|
+
"""Find the most recently created ZIP file in a directory.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
directory: Directory to search (defaults to ~/Downloads)
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Path to the most recent ZIP, or None if none found
|
|
108
|
+
"""
|
|
109
|
+
if directory is None:
|
|
110
|
+
directory = Path.home() / "Downloads"
|
|
111
|
+
|
|
112
|
+
zip_files = list(directory.glob("*.zip"))
|
|
113
|
+
if not zip_files:
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
return max(zip_files, key=lambda p: p.stat().st_ctime)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def find_latest_bookmarklet_json(directory: Path | None = None) -> Path | None:
|
|
120
|
+
"""Find the most recent bookmarklet JSON file in a directory.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
directory: Directory to search (defaults to ~/Downloads)
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Path to the most recent bookmarklet JSON, or None if none found
|
|
127
|
+
"""
|
|
128
|
+
if directory is None:
|
|
129
|
+
directory = Path.home() / "Downloads"
|
|
130
|
+
|
|
131
|
+
bookmarklet_files = [f for f in directory.glob("*.json") if "bookmarklet" in f.name.lower()]
|
|
132
|
+
if not bookmarklet_files:
|
|
133
|
+
return None
|
|
134
|
+
|
|
135
|
+
return max(bookmarklet_files, key=lambda p: p.stat().st_ctime)
|
convoviz/io/writers.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Writing functions for conversations and collections."""
|
|
2
|
+
|
|
3
|
+
from os import utime as os_utime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from orjson import OPT_INDENT_2, dumps
|
|
7
|
+
from tqdm import tqdm
|
|
8
|
+
|
|
9
|
+
from convoviz.config import AuthorHeaders, ConversationConfig
|
|
10
|
+
from convoviz.models import Conversation, ConversationCollection
|
|
11
|
+
from convoviz.renderers import render_conversation
|
|
12
|
+
from convoviz.utils import sanitize
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def save_conversation(
|
|
16
|
+
conversation: Conversation,
|
|
17
|
+
filepath: Path,
|
|
18
|
+
config: ConversationConfig,
|
|
19
|
+
headers: AuthorHeaders,
|
|
20
|
+
) -> Path:
|
|
21
|
+
"""Save a conversation to a markdown file.
|
|
22
|
+
|
|
23
|
+
Handles filename conflicts by appending a counter. Sets the file's
|
|
24
|
+
modification time to match the conversation's update time.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
conversation: The conversation to save
|
|
28
|
+
filepath: Target file path
|
|
29
|
+
config: Conversation rendering configuration
|
|
30
|
+
headers: Author header configuration
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
The actual path the file was saved to (may differ if there was a conflict)
|
|
34
|
+
"""
|
|
35
|
+
# Handle filename conflicts
|
|
36
|
+
base_name = sanitize(filepath.stem)
|
|
37
|
+
final_path = filepath
|
|
38
|
+
counter = 0
|
|
39
|
+
|
|
40
|
+
while final_path.exists():
|
|
41
|
+
counter += 1
|
|
42
|
+
final_path = filepath.with_name(f"{base_name} ({counter}){filepath.suffix}")
|
|
43
|
+
|
|
44
|
+
# Render and write
|
|
45
|
+
markdown = render_conversation(conversation, config, headers)
|
|
46
|
+
with final_path.open("w", encoding="utf-8") as f:
|
|
47
|
+
f.write(markdown)
|
|
48
|
+
|
|
49
|
+
# Set modification time
|
|
50
|
+
timestamp = conversation.update_time.timestamp()
|
|
51
|
+
os_utime(final_path, (timestamp, timestamp))
|
|
52
|
+
|
|
53
|
+
return final_path
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def save_collection(
|
|
57
|
+
collection: ConversationCollection,
|
|
58
|
+
directory: Path,
|
|
59
|
+
config: ConversationConfig,
|
|
60
|
+
headers: AuthorHeaders,
|
|
61
|
+
*,
|
|
62
|
+
progress_bar: bool = False,
|
|
63
|
+
) -> None:
|
|
64
|
+
"""Save all conversations in a collection to markdown files.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
collection: The collection to save
|
|
68
|
+
directory: Target directory
|
|
69
|
+
config: Conversation rendering configuration
|
|
70
|
+
headers: Author header configuration
|
|
71
|
+
progress_bar: Whether to show a progress bar
|
|
72
|
+
"""
|
|
73
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
74
|
+
|
|
75
|
+
for conv in tqdm(
|
|
76
|
+
collection.conversations,
|
|
77
|
+
desc="Writing Markdown 📄 files",
|
|
78
|
+
disable=not progress_bar,
|
|
79
|
+
):
|
|
80
|
+
filepath = directory / f"{sanitize(conv.title)}.md"
|
|
81
|
+
save_conversation(conv, filepath, config, headers)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def save_custom_instructions(
|
|
85
|
+
collection: ConversationCollection,
|
|
86
|
+
filepath: Path,
|
|
87
|
+
) -> None:
|
|
88
|
+
"""Save all custom instructions from a collection to a JSON file.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
collection: The collection to extract instructions from
|
|
92
|
+
filepath: Target JSON file path
|
|
93
|
+
"""
|
|
94
|
+
instructions = collection.custom_instructions
|
|
95
|
+
with filepath.open("w", encoding="utf-8") as f:
|
|
96
|
+
f.write(dumps(instructions, option=OPT_INDENT_2).decode())
|
convoviz/models/__init__.py
CHANGED
|
@@ -1,8 +1,28 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Data models for convoviz."""
|
|
2
2
|
|
|
3
|
-
from .
|
|
4
|
-
from .
|
|
5
|
-
from .
|
|
6
|
-
|
|
3
|
+
from convoviz.models.collection import ConversationCollection
|
|
4
|
+
from convoviz.models.conversation import Conversation
|
|
5
|
+
from convoviz.models.message import (
|
|
6
|
+
AuthorRole,
|
|
7
|
+
Message,
|
|
8
|
+
MessageAuthor,
|
|
9
|
+
MessageContent,
|
|
10
|
+
MessageMetadata,
|
|
11
|
+
)
|
|
12
|
+
from convoviz.models.node import Node, build_node_tree
|
|
7
13
|
|
|
8
|
-
|
|
14
|
+
# Backward compatibility alias
|
|
15
|
+
ConversationSet = ConversationCollection
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"AuthorRole",
|
|
19
|
+
"Conversation",
|
|
20
|
+
"ConversationCollection",
|
|
21
|
+
"ConversationSet",
|
|
22
|
+
"Message",
|
|
23
|
+
"MessageAuthor",
|
|
24
|
+
"MessageContent",
|
|
25
|
+
"MessageMetadata",
|
|
26
|
+
"Node",
|
|
27
|
+
"build_node_tree",
|
|
28
|
+
]
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""ConversationCollection model - manages a set of conversations.
|
|
2
|
+
|
|
3
|
+
This is a pure data model - I/O and visualization logic are in separate modules.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
from convoviz.models.conversation import Conversation
|
|
12
|
+
from convoviz.models.message import AuthorRole
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ConversationCollection(BaseModel):
|
|
16
|
+
"""A collection of ChatGPT conversations.
|
|
17
|
+
|
|
18
|
+
Provides grouping and aggregation operations over conversations.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
conversations: list[Conversation] = Field(default_factory=list)
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def index(self) -> dict[str, Conversation]:
|
|
25
|
+
"""Get conversations indexed by conversation_id."""
|
|
26
|
+
return {conv.conversation_id: conv for conv in self.conversations}
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def last_updated(self) -> datetime:
|
|
30
|
+
"""Get the most recent update time across all conversations."""
|
|
31
|
+
if not self.conversations:
|
|
32
|
+
return datetime.min
|
|
33
|
+
return max(conv.update_time for conv in self.conversations)
|
|
34
|
+
|
|
35
|
+
def update(self, other: "ConversationCollection") -> None:
|
|
36
|
+
"""Merge another collection into this one.
|
|
37
|
+
|
|
38
|
+
Only updates if the other collection has newer content.
|
|
39
|
+
"""
|
|
40
|
+
if other.last_updated <= self.last_updated:
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
merged_index = self.index
|
|
44
|
+
merged_index.update(other.index)
|
|
45
|
+
self.conversations = list(merged_index.values())
|
|
46
|
+
|
|
47
|
+
def add(self, conversation: Conversation) -> None:
|
|
48
|
+
"""Add a conversation to the collection."""
|
|
49
|
+
self.conversations.append(conversation)
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def custom_instructions(self) -> list[dict[str, Any]]:
|
|
53
|
+
"""Get all custom instructions from all conversations."""
|
|
54
|
+
instructions: list[dict[str, Any]] = []
|
|
55
|
+
for conv in self.conversations:
|
|
56
|
+
if not conv.custom_instructions:
|
|
57
|
+
continue
|
|
58
|
+
instructions.append(
|
|
59
|
+
{
|
|
60
|
+
"chat_title": conv.title,
|
|
61
|
+
"chat_link": conv.url,
|
|
62
|
+
"time": conv.create_time,
|
|
63
|
+
"custom_instructions": conv.custom_instructions,
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
return instructions
|
|
67
|
+
|
|
68
|
+
def timestamps(self, *authors: AuthorRole) -> list[float]:
|
|
69
|
+
"""Get all message timestamps from specified authors."""
|
|
70
|
+
result: list[float] = []
|
|
71
|
+
for conv in self.conversations:
|
|
72
|
+
result.extend(conv.timestamps(*authors))
|
|
73
|
+
return result
|
|
74
|
+
|
|
75
|
+
def plaintext(self, *authors: AuthorRole) -> str:
|
|
76
|
+
"""Get concatenated plain text from all conversations."""
|
|
77
|
+
return "\n".join(conv.plaintext(*authors) for conv in self.conversations)
|
|
78
|
+
|
|
79
|
+
def group_by_week(self) -> dict[datetime, "ConversationCollection"]:
|
|
80
|
+
"""Group conversations by the week they were created."""
|
|
81
|
+
groups: dict[datetime, ConversationCollection] = {}
|
|
82
|
+
for conv in self.conversations:
|
|
83
|
+
week_start = conv.week_start
|
|
84
|
+
if week_start not in groups:
|
|
85
|
+
groups[week_start] = ConversationCollection()
|
|
86
|
+
groups[week_start].add(conv)
|
|
87
|
+
return groups
|
|
88
|
+
|
|
89
|
+
def group_by_month(self) -> dict[datetime, "ConversationCollection"]:
|
|
90
|
+
"""Group conversations by the month they were created."""
|
|
91
|
+
groups: dict[datetime, ConversationCollection] = {}
|
|
92
|
+
for conv in self.conversations:
|
|
93
|
+
month_start = conv.month_start
|
|
94
|
+
if month_start not in groups:
|
|
95
|
+
groups[month_start] = ConversationCollection()
|
|
96
|
+
groups[month_start].add(conv)
|
|
97
|
+
return groups
|
|
98
|
+
|
|
99
|
+
def group_by_year(self) -> dict[datetime, "ConversationCollection"]:
|
|
100
|
+
"""Group conversations by the year they were created."""
|
|
101
|
+
groups: dict[datetime, ConversationCollection] = {}
|
|
102
|
+
for conv in self.conversations:
|
|
103
|
+
year_start = conv.year_start
|
|
104
|
+
if year_start not in groups:
|
|
105
|
+
groups[year_start] = ConversationCollection()
|
|
106
|
+
groups[year_start].add(conv)
|
|
107
|
+
return groups
|