convoviz 0.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. convoviz/__init__.py +25 -0
  2. convoviz/__main__.py +6 -0
  3. convoviz/analysis/__init__.py +9 -0
  4. convoviz/analysis/graphs.py +855 -0
  5. convoviz/analysis/wordcloud.py +165 -0
  6. convoviz/assets/colormaps.txt +15 -0
  7. convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
  8. convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
  9. convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
  10. convoviz/assets/fonts/Borel-Regular.ttf +0 -0
  11. convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
  12. convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
  13. convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
  14. convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
  15. convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
  16. convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
  17. convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
  18. convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
  19. convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
  20. convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
  21. convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
  22. convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
  23. convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
  24. convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
  25. convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
  26. convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
  27. convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
  28. convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
  29. convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
  30. convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
  31. convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
  32. convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
  33. convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
  34. convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
  35. convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
  36. convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
  37. convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
  38. convoviz/assets/stopwords.txt +1 -0
  39. convoviz/cli.py +117 -0
  40. convoviz/config.py +106 -0
  41. convoviz/exceptions.py +47 -0
  42. convoviz/interactive.py +247 -0
  43. convoviz/io/__init__.py +21 -0
  44. convoviz/io/assets.py +98 -0
  45. convoviz/io/loaders.py +186 -0
  46. convoviz/io/writers.py +227 -0
  47. convoviz/models/__init__.py +24 -0
  48. convoviz/models/collection.py +115 -0
  49. convoviz/models/conversation.py +158 -0
  50. convoviz/models/message.py +218 -0
  51. convoviz/models/node.py +66 -0
  52. convoviz/pipeline.py +167 -0
  53. convoviz/py.typed +0 -0
  54. convoviz/renderers/__init__.py +10 -0
  55. convoviz/renderers/markdown.py +269 -0
  56. convoviz/renderers/yaml.py +119 -0
  57. convoviz/utils.py +155 -0
  58. convoviz-0.2.12.dist-info/METADATA +148 -0
  59. convoviz-0.2.12.dist-info/RECORD +61 -0
  60. convoviz-0.2.12.dist-info/WHEEL +4 -0
  61. convoviz-0.2.12.dist-info/entry_points.txt +3 -0
convoviz/io/writers.py ADDED
@@ -0,0 +1,227 @@
1
+ """Writing functions for conversations and collections."""
2
+
3
+ from os import utime as os_utime
4
+ from pathlib import Path
5
+ from urllib.parse import quote
6
+
7
+ from orjson import OPT_INDENT_2, dumps
8
+ from tqdm import tqdm
9
+
10
+ from convoviz.config import AuthorHeaders, ConversationConfig, FolderOrganization
11
+ from convoviz.io.assets import copy_asset, resolve_asset_path
12
+ from convoviz.models import Conversation, ConversationCollection
13
+ from convoviz.renderers import render_conversation
14
+ from convoviz.utils import sanitize
15
+
16
+ # Month names for folder naming
17
+ _MONTH_NAMES = [
18
+ "January",
19
+ "February",
20
+ "March",
21
+ "April",
22
+ "May",
23
+ "June",
24
+ "July",
25
+ "August",
26
+ "September",
27
+ "October",
28
+ "November",
29
+ "December",
30
+ ]
31
+
32
+
33
+ def get_date_folder_path(conversation: Conversation) -> Path:
34
+ """Get the date-based folder path for a conversation.
35
+
36
+ Creates a nested structure: year/month
37
+ Example: 2024/03-March/
38
+
39
+ Args:
40
+ conversation: The conversation to get the path for
41
+
42
+ Returns:
43
+ Relative path for the date-based folder structure
44
+ """
45
+ create_time = conversation.create_time
46
+
47
+ # Year folder: "2024"
48
+ year = str(create_time.year)
49
+
50
+ # Month folder: "03-March"
51
+ month_num = create_time.month
52
+ month_name = _MONTH_NAMES[month_num - 1]
53
+ month = f"{month_num:02d}-{month_name}"
54
+
55
+ return Path(year) / month
56
+
57
+
58
+ def save_conversation(
59
+ conversation: Conversation,
60
+ filepath: Path,
61
+ config: ConversationConfig,
62
+ headers: AuthorHeaders,
63
+ source_path: Path | None = None,
64
+ ) -> Path:
65
+ """Save a conversation to a markdown file.
66
+
67
+ Handles filename conflicts by appending a counter. Sets the file's
68
+ modification time to match the conversation's update time.
69
+
70
+ Args:
71
+ conversation: The conversation to save
72
+ filepath: Target file path
73
+ config: Conversation rendering configuration
74
+ headers: Author header configuration
75
+ source_path: Path to the source directory containing assets
76
+
77
+ Returns:
78
+ The actual path the file was saved to (may differ if there was a conflict)
79
+ """
80
+ # Handle filename conflicts
81
+ base_name = sanitize(filepath.stem)
82
+ final_path = filepath
83
+ counter = 0
84
+
85
+ while final_path.exists():
86
+ counter += 1
87
+ final_path = filepath.with_name(f"{base_name} ({counter}){filepath.suffix}")
88
+
89
+ # Define asset resolver
90
+ def asset_resolver(asset_id: str) -> str | None:
91
+ if not source_path:
92
+ return None
93
+
94
+ src_file = resolve_asset_path(source_path, asset_id)
95
+ if not src_file:
96
+ return None
97
+
98
+ # Copy to output directory (relative to the markdown file's directory)
99
+ return copy_asset(src_file, final_path.parent)
100
+
101
+ # Render and write
102
+ markdown = render_conversation(conversation, config, headers, asset_resolver=asset_resolver)
103
+ with final_path.open("w", encoding="utf-8") as f:
104
+ f.write(markdown)
105
+
106
+ # Set modification time
107
+ timestamp = conversation.update_time.timestamp()
108
+ os_utime(final_path, (timestamp, timestamp))
109
+
110
+ return final_path
111
+
112
+
113
+ def _generate_year_index(year_dir: Path, year: str) -> None:
114
+ """Generate a _index.md file for a year folder.
115
+
116
+ Args:
117
+ year_dir: Path to the year directory
118
+ year: The year string (e.g., "2024")
119
+ """
120
+ months = sorted(
121
+ [d.name for d in year_dir.iterdir() if d.is_dir()],
122
+ key=lambda m: int(m.split("-")[0]),
123
+ )
124
+
125
+ lines = [
126
+ f"# {year}",
127
+ "",
128
+ "## Months",
129
+ "",
130
+ ]
131
+
132
+ for month in months:
133
+ month_name = month.split("-", 1)[1] if "-" in month else month
134
+ lines.append(f"- [{month_name}]({month}/_index.md)")
135
+
136
+ index_path = year_dir / "_index.md"
137
+ index_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
138
+
139
+
140
+ def _generate_month_index(month_dir: Path, year: str, month: str) -> None:
141
+ """Generate a _index.md file for a month folder.
142
+
143
+ Args:
144
+ month_dir: Path to the month directory
145
+ year: The year string (e.g., "2024")
146
+ month: The month folder name (e.g., "03-March")
147
+ """
148
+ month_name = month.split("-", 1)[1] if "-" in month else month
149
+ files = sorted(
150
+ [f.name for f in month_dir.glob("*.md") if f.name != "_index.md"]
151
+ )
152
+
153
+ lines = [
154
+ f"# {month_name} {year}",
155
+ "",
156
+ "## Conversations",
157
+ "",
158
+ ]
159
+
160
+ for file in files:
161
+ title = file[:-3] # Remove .md extension
162
+ encoded_file = quote(file)
163
+ lines.append(f"- [{title}]({encoded_file})")
164
+
165
+ index_path = month_dir / "_index.md"
166
+ index_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
167
+
168
+
169
+ def save_collection(
170
+ collection: ConversationCollection,
171
+ directory: Path,
172
+ config: ConversationConfig,
173
+ headers: AuthorHeaders,
174
+ *,
175
+ folder_organization: FolderOrganization = FolderOrganization.FLAT,
176
+ progress_bar: bool = False,
177
+ ) -> None:
178
+ """Save all conversations in a collection to markdown files.
179
+
180
+ Args:
181
+ collection: The collection to save
182
+ directory: Target directory
183
+ config: Conversation rendering configuration
184
+ headers: Author header configuration
185
+ folder_organization: How to organize files in folders (flat or by date)
186
+ progress_bar: Whether to show a progress bar
187
+ """
188
+ directory.mkdir(parents=True, exist_ok=True)
189
+
190
+ for conv in tqdm(
191
+ collection.conversations,
192
+ desc="Writing Markdown 📄 files",
193
+ disable=not progress_bar,
194
+ ):
195
+ # Determine target directory based on organization setting
196
+ if folder_organization == FolderOrganization.DATE:
197
+ target_dir = directory / get_date_folder_path(conv)
198
+ target_dir.mkdir(parents=True, exist_ok=True)
199
+ else:
200
+ target_dir = directory
201
+
202
+ filepath = target_dir / f"{sanitize(conv.title)}.md"
203
+ save_conversation(conv, filepath, config, headers, source_path=collection.source_path)
204
+
205
+ # Generate index files for date organization
206
+ if folder_organization == FolderOrganization.DATE:
207
+ for year_dir in directory.iterdir():
208
+ if year_dir.is_dir() and year_dir.name.isdigit():
209
+ for month_dir in year_dir.iterdir():
210
+ if month_dir.is_dir():
211
+ _generate_month_index(month_dir, year_dir.name, month_dir.name)
212
+ _generate_year_index(year_dir, year_dir.name)
213
+
214
+
215
+ def save_custom_instructions(
216
+ collection: ConversationCollection,
217
+ filepath: Path,
218
+ ) -> None:
219
+ """Save all custom instructions from a collection to a JSON file.
220
+
221
+ Args:
222
+ collection: The collection to extract instructions from
223
+ filepath: Target JSON file path
224
+ """
225
+ instructions = collection.custom_instructions
226
+ with filepath.open("w", encoding="utf-8") as f:
227
+ f.write(dumps(instructions, option=OPT_INDENT_2).decode())
@@ -0,0 +1,24 @@
1
+ """Data models for convoviz."""
2
+
3
+ from convoviz.models.collection import ConversationCollection
4
+ from convoviz.models.conversation import Conversation
5
+ from convoviz.models.message import (
6
+ AuthorRole,
7
+ Message,
8
+ MessageAuthor,
9
+ MessageContent,
10
+ MessageMetadata,
11
+ )
12
+ from convoviz.models.node import Node, build_node_tree
13
+
14
+ __all__ = [
15
+ "AuthorRole",
16
+ "Conversation",
17
+ "ConversationCollection",
18
+ "Message",
19
+ "MessageAuthor",
20
+ "MessageContent",
21
+ "MessageMetadata",
22
+ "Node",
23
+ "build_node_tree",
24
+ ]
@@ -0,0 +1,115 @@
1
+ """ConversationCollection model - manages a set of conversations.
2
+
3
+ This is a pure data model - I/O and visualization logic are in separate modules.
4
+ """
5
+
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from pydantic import BaseModel, Field
11
+
12
+ from convoviz.models.conversation import Conversation
13
+ from convoviz.models.message import AuthorRole
14
+
15
+
16
+ class ConversationCollection(BaseModel):
17
+ """A collection of ChatGPT conversations.
18
+
19
+ Provides grouping and aggregation operations over conversations.
20
+ """
21
+
22
+ conversations: list[Conversation] = Field(default_factory=list)
23
+ source_path: Path | None = None
24
+
25
+ @property
26
+ def index(self) -> dict[str, Conversation]:
27
+ """Get conversations indexed by conversation_id."""
28
+ return {conv.conversation_id: conv for conv in self.conversations}
29
+
30
+ @property
31
+ def last_updated(self) -> datetime:
32
+ """Get the most recent update time across all conversations."""
33
+ if not self.conversations:
34
+ return datetime.min
35
+ return max(conv.update_time for conv in self.conversations)
36
+
37
+ def update(self, other: "ConversationCollection") -> None:
38
+ """Merge another collection into this one.
39
+
40
+ Merges per-conversation, keeping the newest version when IDs collide.
41
+
42
+ Note: We intentionally do *not* gate on ``other.last_updated`` because
43
+ "new" conversations can still have older timestamps than the most recent
44
+ conversation in this collection (e.g. bookmarklet downloads).
45
+ """
46
+ merged: dict[str, Conversation] = dict(self.index)
47
+
48
+ for conv_id, incoming in other.index.items():
49
+ existing = merged.get(conv_id)
50
+ if existing is None or incoming.update_time > existing.update_time:
51
+ merged[conv_id] = incoming
52
+
53
+ self.conversations = list(merged.values())
54
+
55
+ def add(self, conversation: Conversation) -> None:
56
+ """Add a conversation to the collection."""
57
+ self.conversations.append(conversation)
58
+
59
+ @property
60
+ def custom_instructions(self) -> list[dict[str, Any]]:
61
+ """Get all custom instructions from all conversations."""
62
+ instructions: list[dict[str, Any]] = []
63
+ for conv in self.conversations:
64
+ if not conv.custom_instructions:
65
+ continue
66
+ instructions.append(
67
+ {
68
+ "chat_title": conv.title,
69
+ "chat_link": conv.url,
70
+ "time": conv.create_time,
71
+ "custom_instructions": conv.custom_instructions,
72
+ }
73
+ )
74
+ return instructions
75
+
76
+ def timestamps(self, *authors: AuthorRole) -> list[float]:
77
+ """Get all message timestamps from specified authors."""
78
+ result: list[float] = []
79
+ for conv in self.conversations:
80
+ result.extend(conv.timestamps(*authors))
81
+ return result
82
+
83
+ def plaintext(self, *authors: AuthorRole) -> str:
84
+ """Get concatenated plain text from all conversations."""
85
+ return "\n".join(conv.plaintext(*authors) for conv in self.conversations)
86
+
87
+ def group_by_week(self) -> dict[datetime, "ConversationCollection"]:
88
+ """Group conversations by the week they were created."""
89
+ groups: dict[datetime, ConversationCollection] = {}
90
+ for conv in self.conversations:
91
+ week_start = conv.week_start
92
+ if week_start not in groups:
93
+ groups[week_start] = ConversationCollection()
94
+ groups[week_start].add(conv)
95
+ return groups
96
+
97
+ def group_by_month(self) -> dict[datetime, "ConversationCollection"]:
98
+ """Group conversations by the month they were created."""
99
+ groups: dict[datetime, ConversationCollection] = {}
100
+ for conv in self.conversations:
101
+ month_start = conv.month_start
102
+ if month_start not in groups:
103
+ groups[month_start] = ConversationCollection()
104
+ groups[month_start].add(conv)
105
+ return groups
106
+
107
+ def group_by_year(self) -> dict[datetime, "ConversationCollection"]:
108
+ """Group conversations by the year they were created."""
109
+ groups: dict[datetime, ConversationCollection] = {}
110
+ for conv in self.conversations:
111
+ year_start = conv.year_start
112
+ if year_start not in groups:
113
+ groups[year_start] = ConversationCollection()
114
+ groups[year_start].add(conv)
115
+ return groups
@@ -0,0 +1,158 @@
1
+ """Conversation model - pure data class.
2
+
3
+ Object path: conversations.json -> conversation (one of the list items)
4
+ """
5
+
6
+ from datetime import datetime, timedelta
7
+ from typing import Any
8
+
9
+ from pydantic import BaseModel, Field
10
+
11
+ from convoviz.models.message import AuthorRole
12
+ from convoviz.models.node import Node, build_node_tree
13
+
14
+
15
+ class Conversation(BaseModel):
16
+ """A single ChatGPT conversation.
17
+
18
+ This is a pure data model - rendering and I/O logic are in separate modules.
19
+ """
20
+
21
+ title: str
22
+ create_time: datetime
23
+ update_time: datetime
24
+ mapping: dict[str, Node]
25
+ moderation_results: list[Any] = Field(default_factory=list)
26
+ current_node: str
27
+ plugin_ids: list[str] | None = None
28
+ conversation_id: str
29
+ conversation_template_id: str | None = None
30
+ id: str | None = None
31
+
32
+ @property
33
+ def node_mapping(self) -> dict[str, Node]:
34
+ """Get the connected node tree."""
35
+ return build_node_tree(self.mapping)
36
+
37
+ @property
38
+ def all_message_nodes(self) -> list[Node]:
39
+ """Get all nodes that have messages (including hidden/internal ones)."""
40
+ return [node for node in self.node_mapping.values() if node.has_message]
41
+
42
+ @property
43
+ def visible_message_nodes(self) -> list[Node]:
44
+ """Get all nodes that have *visible* (non-hidden) messages."""
45
+ return [
46
+ node
47
+ for node in self.node_mapping.values()
48
+ if node.has_message and node.message is not None and not node.message.is_hidden
49
+ ]
50
+
51
+ def nodes_by_author(self, *authors: AuthorRole, include_hidden: bool = False) -> list[Node]:
52
+ """Get nodes with messages from specified authors.
53
+
54
+ Args:
55
+ *authors: Author roles to filter by. Defaults to ("user",) if empty.
56
+ include_hidden: Whether to include hidden/internal messages.
57
+ """
58
+ if not authors:
59
+ authors = ("user",)
60
+ nodes = self.all_message_nodes if include_hidden else self.visible_message_nodes
61
+ return [node for node in nodes if node.message and node.message.author.role in authors]
62
+
63
+ @property
64
+ def leaf_count(self) -> int:
65
+ """Count the number of leaf nodes (conversation endpoints)."""
66
+ return sum(1 for node in self.all_message_nodes if node.is_leaf)
67
+
68
+ @property
69
+ def url(self) -> str:
70
+ """Get the ChatGPT URL for this conversation."""
71
+ return f"https://chat.openai.com/c/{self.conversation_id}"
72
+
73
+ @property
74
+ def content_types(self) -> list[str]:
75
+ """Get all unique content types in the conversation (excluding hidden messages)."""
76
+ return list(
77
+ {
78
+ node.message.content.content_type
79
+ for node in self.visible_message_nodes
80
+ if node.message
81
+ }
82
+ )
83
+
84
+ def message_count(self, *authors: AuthorRole) -> int:
85
+ """Count messages from specified authors."""
86
+ return len(self.nodes_by_author(*authors))
87
+
88
+ @property
89
+ def model(self) -> str | None:
90
+ """Get the ChatGPT model used for this conversation."""
91
+ assistant_nodes = self.nodes_by_author("assistant")
92
+ if not assistant_nodes:
93
+ return None
94
+ message = assistant_nodes[0].message
95
+ return message.metadata.model_slug if message else None
96
+
97
+ @property
98
+ def plugins(self) -> list[str]:
99
+ """Get all plugins used in this conversation."""
100
+ return list(
101
+ {
102
+ node.message.metadata.invoked_plugin["namespace"]
103
+ for node in self.nodes_by_author("tool")
104
+ if node.message and node.message.metadata.invoked_plugin
105
+ }
106
+ )
107
+
108
+ @property
109
+ def custom_instructions(self) -> dict[str, str]:
110
+ """Get custom instructions used for this conversation."""
111
+ system_nodes = self.nodes_by_author("system")
112
+ for node in system_nodes:
113
+ context_message = node.message
114
+ if context_message and context_message.metadata.is_user_system_message:
115
+ return context_message.metadata.user_context_message_data or {}
116
+ return {}
117
+
118
+ def timestamps(self, *authors: AuthorRole) -> list[float]:
119
+ """Get message timestamps from specified authors.
120
+
121
+ Useful for generating time-based visualizations.
122
+ """
123
+ if not authors:
124
+ authors = ("user",)
125
+ return [
126
+ node.message.create_time.timestamp()
127
+ for node in self.nodes_by_author(*authors)
128
+ if node.message and node.message.create_time
129
+ ]
130
+
131
+ def plaintext(self, *authors: AuthorRole) -> str:
132
+ """Get concatenated plain text from specified authors.
133
+
134
+ Useful for word cloud generation.
135
+ """
136
+ if not authors:
137
+ authors = ("user",)
138
+ return "\n".join(
139
+ node.message.text
140
+ for node in self.nodes_by_author(*authors)
141
+ if node.message and node.message.has_content
142
+ )
143
+
144
+ @property
145
+ def week_start(self) -> datetime:
146
+ """Get the Monday of the week this conversation was created."""
147
+ start_of_week = self.create_time - timedelta(days=self.create_time.weekday())
148
+ return start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
149
+
150
+ @property
151
+ def month_start(self) -> datetime:
152
+ """Get the first day of the month this conversation was created."""
153
+ return self.create_time.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
154
+
155
+ @property
156
+ def year_start(self) -> datetime:
157
+ """Get January 1st of the year this conversation was created."""
158
+ return self.create_time.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)