convoviz 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. convoviz/__init__.py +34 -0
  2. convoviz/__main__.py +6 -0
  3. convoviz/analysis/__init__.py +22 -0
  4. convoviz/analysis/graphs.py +879 -0
  5. convoviz/analysis/wordcloud.py +204 -0
  6. convoviz/assets/colormaps.txt +15 -0
  7. convoviz/assets/fonts/AmaticSC-Regular.ttf +0 -0
  8. convoviz/assets/fonts/ArchitectsDaughter-Regular.ttf +0 -0
  9. convoviz/assets/fonts/BebasNeue-Regular.ttf +0 -0
  10. convoviz/assets/fonts/Borel-Regular.ttf +0 -0
  11. convoviz/assets/fonts/Courgette-Regular.ttf +0 -0
  12. convoviz/assets/fonts/CroissantOne-Regular.ttf +0 -0
  13. convoviz/assets/fonts/Handjet-Regular.ttf +0 -0
  14. convoviz/assets/fonts/IndieFlower-Regular.ttf +0 -0
  15. convoviz/assets/fonts/Kalam-Regular.ttf +0 -0
  16. convoviz/assets/fonts/Lobster-Regular.ttf +0 -0
  17. convoviz/assets/fonts/MartianMono-Regular.ttf +0 -0
  18. convoviz/assets/fonts/MartianMono-Thin.ttf +0 -0
  19. convoviz/assets/fonts/Montserrat-Regular.ttf +0 -0
  20. convoviz/assets/fonts/Mooli-Regular.ttf +0 -0
  21. convoviz/assets/fonts/Pacifico-Regular.ttf +0 -0
  22. convoviz/assets/fonts/PlayfairDisplay-Regular.ttf +0 -0
  23. convoviz/assets/fonts/Raleway-Regular.ttf +0 -0
  24. convoviz/assets/fonts/RobotoMono-Regular.ttf +0 -0
  25. convoviz/assets/fonts/RobotoMono-Thin.ttf +0 -0
  26. convoviz/assets/fonts/RobotoSlab-Regular.ttf +0 -0
  27. convoviz/assets/fonts/RobotoSlab-Thin.ttf +0 -0
  28. convoviz/assets/fonts/Ruwudu-Regular.ttf +0 -0
  29. convoviz/assets/fonts/Sacramento-Regular.ttf +0 -0
  30. convoviz/assets/fonts/SedgwickAveDisplay-Regular.ttf +0 -0
  31. convoviz/assets/fonts/ShadowsIntoLight-Regular.ttf +0 -0
  32. convoviz/assets/fonts/TitilliumWeb-Regular.ttf +0 -0
  33. convoviz/assets/fonts/Yellowtail-Regular.ttf +0 -0
  34. convoviz/assets/fonts/YsabeauOffice-Regular.ttf +0 -0
  35. convoviz/assets/fonts/YsabeauSC-Regular.ttf +0 -0
  36. convoviz/assets/fonts/YsabeauSC-Thin.ttf +0 -0
  37. convoviz/assets/fonts/Zeyada-Regular.ttf +0 -0
  38. convoviz/assets/stopwords.txt +1 -0
  39. convoviz/cli.py +149 -0
  40. convoviz/config.py +120 -0
  41. convoviz/exceptions.py +47 -0
  42. convoviz/interactive.py +264 -0
  43. convoviz/io/__init__.py +21 -0
  44. convoviz/io/assets.py +109 -0
  45. convoviz/io/loaders.py +191 -0
  46. convoviz/io/writers.py +231 -0
  47. convoviz/logging_config.py +69 -0
  48. convoviz/models/__init__.py +24 -0
  49. convoviz/models/collection.py +115 -0
  50. convoviz/models/conversation.py +158 -0
  51. convoviz/models/message.py +218 -0
  52. convoviz/models/node.py +66 -0
  53. convoviz/pipeline.py +184 -0
  54. convoviz/py.typed +0 -0
  55. convoviz/renderers/__init__.py +10 -0
  56. convoviz/renderers/markdown.py +269 -0
  57. convoviz/renderers/yaml.py +119 -0
  58. convoviz/utils.py +155 -0
  59. convoviz-0.4.1.dist-info/METADATA +215 -0
  60. convoviz-0.4.1.dist-info/RECORD +62 -0
  61. convoviz-0.4.1.dist-info/WHEEL +4 -0
  62. convoviz-0.4.1.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,218 @@
1
+ """Message model - pure data class.
2
+
3
+ Object path: conversations.json -> conversation -> mapping -> mapping node -> message
4
+ """
5
+
6
+ from datetime import datetime
7
+ from typing import Any, Literal
8
+
9
+ from pydantic import BaseModel, ConfigDict, Field
10
+
11
+ from convoviz.exceptions import MessageContentError
12
+
13
+ AuthorRole = Literal["user", "assistant", "system", "tool", "function"]
14
+
15
+
16
+ class MessageAuthor(BaseModel):
17
+ """Author information for a message."""
18
+
19
+ role: AuthorRole
20
+ name: str | None = None
21
+ metadata: dict[str, Any] = Field(default_factory=dict)
22
+
23
+
24
+ class MessageContent(BaseModel):
25
+ """Content of a message."""
26
+
27
+ content_type: str
28
+ parts: list[Any] | None = None
29
+ text: str | None = None
30
+ result: str | None = None
31
+ # reasoning_recap content type
32
+ content: str | None = None
33
+ # thoughts content type (list of thought objects with summary/content/finished)
34
+ thoughts: list[Any] | None = None
35
+ # tether_quote content type
36
+ url: str | None = None
37
+ domain: str | None = None
38
+ title: str | None = None
39
+
40
+
41
+ class MessageMetadata(BaseModel):
42
+ """Metadata for a message."""
43
+
44
+ model_slug: str | None = None
45
+ invoked_plugin: dict[str, Any] | None = None
46
+ is_user_system_message: bool | None = None
47
+ is_visually_hidden_from_conversation: bool | None = None
48
+ user_context_message_data: dict[str, Any] | None = None
49
+
50
+ model_config = ConfigDict(protected_namespaces=())
51
+
52
+
53
+ class Message(BaseModel):
54
+ """A single message in a conversation.
55
+
56
+ This is a pure data model - rendering logic is in the renderers module.
57
+ """
58
+
59
+ id: str
60
+ author: MessageAuthor
61
+ create_time: datetime | None = None
62
+ update_time: datetime | None = None
63
+ content: MessageContent
64
+ status: str
65
+ end_turn: bool | None = None
66
+ weight: float
67
+ metadata: MessageMetadata = Field(default_factory=MessageMetadata)
68
+ recipient: str | None = None
69
+
70
+ @property
71
+ def images(self) -> list[str]:
72
+ """Extract image asset pointers from the message content."""
73
+ if not self.content.parts:
74
+ return []
75
+
76
+ image_ids = []
77
+ for part in self.content.parts:
78
+ if isinstance(part, dict) and part.get("content_type") == "image_asset_pointer":
79
+ pointer = part.get("asset_pointer", "")
80
+ # Strip prefixes like "file-service://" or "sediment://"
81
+ if pointer.startswith("file-service://"):
82
+ pointer = pointer[len("file-service://") :]
83
+ elif pointer.startswith("sediment://"):
84
+ pointer = pointer[len("sediment://") :]
85
+
86
+ if pointer:
87
+ image_ids.append(pointer)
88
+ return image_ids
89
+
90
+ @property
91
+ def text(self) -> str:
92
+ """Extract the text content of the message."""
93
+ if self.content.parts is not None:
94
+ # Handle multimodal content where parts can be mixed strings and dicts
95
+ text_parts = []
96
+ for part in self.content.parts:
97
+ if isinstance(part, str):
98
+ text_parts.append(part)
99
+ elif isinstance(part, dict) and "text" in part:
100
+ # Some parts might be dicts wrapping text (e.g. code interpreter?)
101
+ # But based on spec, usually text is just a string in the list.
102
+ # We'll stick to string extraction for now.
103
+ pass
104
+
105
+ # If we found string parts, join them.
106
+ # If parts existed but no strings (e.g. only images), return empty string?
107
+ # Or should we return a placeholder? For now, let's return joined text.
108
+ if text_parts:
109
+ return "".join(text_parts)
110
+
111
+ # If parts list is not empty but contains no strings, we might want to fall through
112
+ # or return empty string if we consider it "handled".
113
+ # The original code returned "" if parts was empty list.
114
+ if self.content.parts:
115
+ return ""
116
+
117
+ # tether_quote: render as a blockquote with attribution (check before .text)
118
+ if self.content.content_type == "tether_quote":
119
+ return self._render_tether_quote()
120
+ if self.content.text is not None:
121
+ return self.content.text
122
+ if self.content.result is not None:
123
+ return self.content.result
124
+ # reasoning_recap content type uses 'content' field
125
+ if self.content.content is not None:
126
+ return self.content.content
127
+ # thoughts content type uses 'thoughts' field (list of thought objects)
128
+ if self.content.thoughts is not None:
129
+ return self._render_thoughts()
130
+ raise MessageContentError(self.id)
131
+
132
+ def _render_thoughts(self) -> str:
133
+ """Render thoughts content (list of thought objects with summary/content)."""
134
+ if not self.content.thoughts:
135
+ return ""
136
+ summaries = []
137
+ for thought in self.content.thoughts:
138
+ if isinstance(thought, dict) and (summary := thought.get("summary")):
139
+ summaries.append(summary)
140
+ return "\n".join(summaries) if summaries else ""
141
+
142
+ def _render_tether_quote(self) -> str:
143
+ """Render tether_quote content as a blockquote."""
144
+ quote_text = self.content.text or ""
145
+ if not quote_text.strip():
146
+ return ""
147
+ # Format as blockquote with source
148
+ lines = [f"> {line}" for line in quote_text.strip().split("\n")]
149
+ blockquote = "\n".join(lines)
150
+ # Add attribution if we have title/domain/url
151
+ if self.content.title and self.content.url:
152
+ blockquote += f"\n> โ€” [{self.content.title}]({self.content.url})"
153
+ elif self.content.domain and self.content.url:
154
+ blockquote += f"\n> โ€” [{self.content.domain}]({self.content.url})"
155
+ elif self.content.url:
156
+ blockquote += f"\n> โ€” <{self.content.url}>"
157
+ return blockquote
158
+
159
+ @property
160
+ def has_content(self) -> bool:
161
+ """Check if the message has extractable content."""
162
+ return bool(
163
+ self.content.parts or self.content.text is not None or self.content.result is not None
164
+ )
165
+
166
+ @property
167
+ def is_empty(self) -> bool:
168
+ """Check if the message is effectively empty (no text, no images)."""
169
+ try:
170
+ return not self.text.strip() and not self.images
171
+ except MessageContentError:
172
+ return True
173
+
174
+ @property
175
+ def is_hidden(self) -> bool:
176
+ """Check if message should be hidden in export.
177
+
178
+ Hidden if:
179
+ 1. It is empty (no text, no images).
180
+ 2. Explicitly marked as visually hidden.
181
+ 3. It is an internal system message (not custom instructions).
182
+ 4. It is a browser tool output (intermediate search steps).
183
+ 5. It is an assistant message targeting a tool (internal call).
184
+ 6. It is code interpreter input (content_type="code").
185
+ 7. It is browsing status (tether_browsing_display).
186
+ 8. It is internal reasoning (thoughts, reasoning_recap from o1/o3).
187
+ """
188
+ if self.is_empty:
189
+ return True
190
+
191
+ # Explicitly marked as hidden by OpenAI
192
+ if self.metadata.is_visually_hidden_from_conversation:
193
+ return True
194
+
195
+ # Hide internal system messages
196
+ if self.author.role == "system":
197
+ # Only show if explicitly marked as user system message (Custom Instructions)
198
+ return not self.metadata.is_user_system_message
199
+
200
+ # Hide browser tool outputs (intermediate search steps)
201
+ if self.author.role == "tool" and self.author.name == "browser":
202
+ return True
203
+
204
+ # Hide assistant messages targeting tools (e.g., search(...), code input)
205
+ # recipient="all" or None means it's for the user; anything else is internal
206
+ if self.author.role == "assistant" and self.recipient not in ("all", None):
207
+ return True
208
+
209
+ # Hide code interpreter input (content_type="code")
210
+ if self.author.role == "assistant" and self.content.content_type == "code":
211
+ return True
212
+
213
+ # Hide browsing status and internal reasoning steps (o1/o3 models)
214
+ return self.content.content_type in (
215
+ "tether_browsing_display",
216
+ "thoughts",
217
+ "reasoning_recap",
218
+ )
@@ -0,0 +1,66 @@
1
+ """Node model - pure data class.
2
+
3
+ Object path: conversations.json -> conversation -> mapping -> mapping node
4
+
5
+ Nodes form a tree structure representing conversation branches.
6
+ """
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+ from convoviz.models.message import Message
11
+
12
+
13
+ class Node(BaseModel):
14
+ """A node in the conversation tree.
15
+
16
+ Each node can have a message and links to parent/children nodes.
17
+ This is a pure data model - rendering logic is in the renderers module.
18
+ """
19
+
20
+ id: str
21
+ message: Message | None = None
22
+ parent: str | None = None
23
+ children: list[str] = Field(default_factory=list)
24
+
25
+ # Runtime-populated references (not from JSON)
26
+ parent_node: "Node | None" = None
27
+ children_nodes: list["Node"] = Field(default_factory=list)
28
+
29
+ def add_child(self, node: "Node") -> None:
30
+ """Add a child node and set up bidirectional references."""
31
+ self.children_nodes.append(node)
32
+ node.parent_node = self
33
+
34
+ @property
35
+ def has_message(self) -> bool:
36
+ """Check if this node contains a message."""
37
+ return self.message is not None
38
+
39
+ @property
40
+ def is_leaf(self) -> bool:
41
+ """Check if this node is a leaf (no children)."""
42
+ return len(self.children_nodes) == 0
43
+
44
+
45
+ def build_node_tree(mapping: dict[str, Node]) -> dict[str, Node]:
46
+ """Build the node tree by connecting parent/child references.
47
+
48
+ Args:
49
+ mapping: Dictionary of node_id -> Node
50
+
51
+ Returns:
52
+ The same dictionary with nodes connected via parent_node/children_nodes
53
+ """
54
+ # Reset connections to avoid duplicates on repeated calls
55
+ for node in mapping.values():
56
+ node.children_nodes = []
57
+ node.parent_node = None
58
+
59
+ # Build connections
60
+ for node in mapping.values():
61
+ for child_id in node.children:
62
+ if child_id in mapping:
63
+ child_node = mapping[child_id]
64
+ node.add_child(child_node)
65
+
66
+ return mapping
convoviz/pipeline.py ADDED
@@ -0,0 +1,184 @@
1
+ """Main processing pipeline for convoviz."""
2
+
3
+ import logging
4
+ from pathlib import Path
5
+ from shutil import rmtree
6
+
7
+ from rich.console import Console
8
+
9
+ from convoviz.config import ConvovizConfig, OutputKind
10
+ from convoviz.exceptions import ConfigurationError, InvalidZipError
11
+ from convoviz.io.loaders import (
12
+ find_latest_bookmarklet_json,
13
+ load_collection_from_json,
14
+ load_collection_from_zip,
15
+ )
16
+ from convoviz.io.writers import save_collection
17
+
18
+ console = Console()
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def _safe_uri(path: Path) -> str:
23
+ """Best-effort URI for printing.
24
+
25
+ ``Path.as_uri()`` requires an absolute path; users often provide relative
26
+ output paths, so we resolve first and fall back to string form.
27
+ """
28
+ try:
29
+ return path.resolve().as_uri()
30
+ except Exception:
31
+ return str(path)
32
+
33
+
34
+ def run_pipeline(config: ConvovizConfig) -> None:
35
+ """Run the main processing pipeline.
36
+
37
+ Args:
38
+ config: Complete configuration for the pipeline
39
+
40
+ Raises:
41
+ InvalidZipError: If the input is invalid
42
+ ConfigurationError: If configuration is incomplete
43
+ """
44
+ if not config.input_path:
45
+ raise InvalidZipError("", reason="No input path specified")
46
+
47
+ input_path = Path(config.input_path)
48
+ if not input_path.exists():
49
+ raise InvalidZipError(str(input_path), reason="File does not exist")
50
+
51
+ logger.info(f"Starting pipeline with input: {input_path}")
52
+ console.print(f"Loading data from {input_path} [bold yellow]๐Ÿ“‚[/bold yellow] ...\n")
53
+
54
+ # Load collection based on input type
55
+ if input_path.is_dir():
56
+ # Check for conversations.json inside
57
+ json_path = input_path / "conversations.json"
58
+ if not json_path.exists():
59
+ raise InvalidZipError(
60
+ str(input_path), reason="Directory must contain conversations.json"
61
+ )
62
+ collection = load_collection_from_json(json_path)
63
+ elif input_path.suffix == ".json":
64
+ collection = load_collection_from_json(input_path)
65
+ else:
66
+ # Assume zip
67
+ collection = load_collection_from_zip(input_path)
68
+ logger.info(f"Loaded collection with {len(collection.conversations)} conversations")
69
+
70
+ # Try to merge bookmarklet data if available
71
+ bookmarklet_json = find_latest_bookmarklet_json()
72
+ if bookmarklet_json:
73
+ console.print("Found bookmarklet download, loading [bold yellow]๐Ÿ“‚[/bold yellow] ...\n")
74
+ try:
75
+ bookmarklet_collection = load_collection_from_json(bookmarklet_json)
76
+ collection.update(bookmarklet_collection)
77
+ logger.info("Merged bookmarklet data")
78
+ except Exception as e:
79
+ console.print(
80
+ f"[bold yellow]Warning:[/bold yellow] Failed to load bookmarklet data: {e}"
81
+ )
82
+
83
+ output_folder = config.output_folder
84
+ output_folder.mkdir(parents=True, exist_ok=True)
85
+
86
+ # Determine which outputs are selected
87
+ selected_outputs = config.outputs
88
+
89
+ # Build mapping of output kind -> directory name
90
+ output_dir_map: dict[OutputKind, str] = {
91
+ OutputKind.MARKDOWN: "Markdown",
92
+ OutputKind.GRAPHS: "Graphs",
93
+ OutputKind.WORDCLOUDS: "Word-Clouds",
94
+ }
95
+
96
+ # Clean only specific sub-directories we manage (only for selected outputs)
97
+ for output_kind, dir_name in output_dir_map.items():
98
+ if output_kind not in selected_outputs:
99
+ continue
100
+ sub_dir = output_folder / dir_name
101
+ if sub_dir.exists():
102
+ # Never follow symlinks; just unlink them.
103
+ if sub_dir.is_symlink():
104
+ sub_dir.unlink()
105
+ elif sub_dir.is_dir():
106
+ rmtree(sub_dir)
107
+ else:
108
+ sub_dir.unlink()
109
+ sub_dir.mkdir(exist_ok=True)
110
+
111
+ # Save markdown files (if selected)
112
+ if OutputKind.MARKDOWN in selected_outputs:
113
+ markdown_folder = output_folder / "Markdown"
114
+ save_collection(
115
+ collection,
116
+ markdown_folder,
117
+ config.conversation,
118
+ config.message.author_headers,
119
+ folder_organization=config.folder_organization,
120
+ progress_bar=True,
121
+ )
122
+ logger.info("Markdown generation complete")
123
+ console.print(
124
+ f"\nDone [bold green]โœ…[/bold green] ! "
125
+ f"Check the output [bold blue]๐Ÿ“„[/bold blue] here: {_safe_uri(markdown_folder)} ๐Ÿ”—\n"
126
+ )
127
+
128
+ # Generate graphs (if selected)
129
+ if OutputKind.GRAPHS in selected_outputs:
130
+ # Lazy import to allow markdown-only usage without matplotlib
131
+ try:
132
+ from convoviz.analysis.graphs import generate_graphs
133
+ except ModuleNotFoundError as e:
134
+ raise ConfigurationError(
135
+ "Graph generation requires matplotlib. "
136
+ 'Reinstall with the [viz] extra: uv tool install "convoviz[viz]"'
137
+ ) from e
138
+
139
+ graph_folder = output_folder / "Graphs"
140
+ graph_folder.mkdir(parents=True, exist_ok=True)
141
+ generate_graphs(
142
+ collection,
143
+ graph_folder,
144
+ config.graph,
145
+ progress_bar=True,
146
+ )
147
+ logger.info("Graph generation complete")
148
+ console.print(
149
+ f"\nDone [bold green]โœ…[/bold green] ! "
150
+ f"Check the output [bold blue]๐Ÿ“ˆ[/bold blue] here: {_safe_uri(graph_folder)} ๐Ÿ”—\n"
151
+ )
152
+
153
+ # Generate word clouds (if selected)
154
+ if OutputKind.WORDCLOUDS in selected_outputs:
155
+ # Lazy import to allow markdown-only usage without wordcloud/nltk
156
+ try:
157
+ from convoviz.analysis.wordcloud import generate_wordclouds
158
+ except ModuleNotFoundError as e:
159
+ raise ConfigurationError(
160
+ "Word cloud generation requires wordcloud and nltk. "
161
+ 'Reinstall with the [viz] extra: uv tool install "convoviz[viz]"'
162
+ ) from e
163
+
164
+ wordcloud_folder = output_folder / "Word-Clouds"
165
+ wordcloud_folder.mkdir(parents=True, exist_ok=True)
166
+ generate_wordclouds(
167
+ collection,
168
+ wordcloud_folder,
169
+ config.wordcloud,
170
+ progress_bar=True,
171
+ )
172
+ logger.info("Wordcloud generation complete")
173
+ console.print(
174
+ f"\nDone [bold green]โœ…[/bold green] ! "
175
+ f"Check the output [bold blue]๐Ÿ”กโ˜๏ธ[/bold blue] here: {_safe_uri(wordcloud_folder)} ๐Ÿ”—\n"
176
+ )
177
+
178
+ console.print(
179
+ "ALL DONE [bold green]๐ŸŽ‰๐ŸŽ‰๐ŸŽ‰[/bold green] !\n\n"
180
+ f"Explore the full gallery [bold yellow]๐Ÿ–ผ๏ธ[/bold yellow] at: {_safe_uri(output_folder)} ๐Ÿ”—\n\n"
181
+ "I hope you enjoy the outcome ๐Ÿคž.\n\n"
182
+ "If you appreciate it, kindly give the project a star ๐ŸŒŸ on GitHub:\n\n"
183
+ "โžก๏ธ https://github.com/mohamed-chs/convoviz ๐Ÿ”—\n\n"
184
+ )
convoviz/py.typed ADDED
File without changes
@@ -0,0 +1,10 @@
1
+ """Rendering utilities for conversations."""
2
+
3
+ from convoviz.renderers.markdown import render_conversation, render_node
4
+ from convoviz.renderers.yaml import render_yaml_header
5
+
6
+ __all__ = [
7
+ "render_conversation",
8
+ "render_node",
9
+ "render_yaml_header",
10
+ ]