convoviz 0.1.7__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,149 @@
1
+ """Conversation model - pure data class.
2
+
3
+ Object path: conversations.json -> conversation (one of the list items)
4
+ """
5
+
6
+ from datetime import datetime, timedelta
7
+ from typing import Any
8
+
9
+ from pydantic import BaseModel, Field
10
+
11
+ from convoviz.models.message import AuthorRole
12
+ from convoviz.models.node import Node, build_node_tree
13
+
14
+
15
+ class Conversation(BaseModel):
16
+ """A single ChatGPT conversation.
17
+
18
+ This is a pure data model - rendering and I/O logic are in separate modules.
19
+ """
20
+
21
+ title: str
22
+ create_time: datetime
23
+ update_time: datetime
24
+ mapping: dict[str, Node]
25
+ moderation_results: list[Any] = Field(default_factory=list)
26
+ current_node: str
27
+ plugin_ids: list[str] | None = None
28
+ conversation_id: str
29
+ conversation_template_id: str | None = None
30
+ id: str | None = None
31
+
32
+ @property
33
+ def node_mapping(self) -> dict[str, Node]:
34
+ """Get the connected node tree."""
35
+ return build_node_tree(self.mapping)
36
+
37
+ @property
38
+ def all_message_nodes(self) -> list[Node]:
39
+ """Get all nodes that have messages (including all branches)."""
40
+ return [node for node in self.node_mapping.values() if node.has_message]
41
+
42
+ def nodes_by_author(self, *authors: AuthorRole) -> list[Node]:
43
+ """Get nodes with messages from specified authors.
44
+
45
+ Args:
46
+ *authors: Author roles to filter by. Defaults to ("user",) if empty.
47
+ """
48
+ if not authors:
49
+ authors = ("user",)
50
+ return [
51
+ node
52
+ for node in self.all_message_nodes
53
+ if node.message and node.message.author.role in authors
54
+ ]
55
+
56
+ @property
57
+ def leaf_count(self) -> int:
58
+ """Count the number of leaf nodes (conversation endpoints)."""
59
+ return sum(1 for node in self.all_message_nodes if node.is_leaf)
60
+
61
+ @property
62
+ def url(self) -> str:
63
+ """Get the ChatGPT URL for this conversation."""
64
+ return f"https://chat.openai.com/c/{self.conversation_id}"
65
+
66
+ @property
67
+ def content_types(self) -> list[str]:
68
+ """Get all unique content types in the conversation."""
69
+ return list(
70
+ {node.message.content.content_type for node in self.all_message_nodes if node.message}
71
+ )
72
+
73
+ def message_count(self, *authors: AuthorRole) -> int:
74
+ """Count messages from specified authors."""
75
+ return len(self.nodes_by_author(*authors))
76
+
77
+ @property
78
+ def model(self) -> str | None:
79
+ """Get the ChatGPT model used for this conversation."""
80
+ assistant_nodes = self.nodes_by_author("assistant")
81
+ if not assistant_nodes:
82
+ return None
83
+ message = assistant_nodes[0].message
84
+ return message.metadata.model_slug if message else None
85
+
86
+ @property
87
+ def plugins(self) -> list[str]:
88
+ """Get all plugins used in this conversation."""
89
+ return list(
90
+ {
91
+ node.message.metadata.invoked_plugin["namespace"]
92
+ for node in self.nodes_by_author("tool")
93
+ if node.message and node.message.metadata.invoked_plugin
94
+ }
95
+ )
96
+
97
+ @property
98
+ def custom_instructions(self) -> dict[str, str]:
99
+ """Get custom instructions used for this conversation."""
100
+ system_nodes = self.nodes_by_author("system")
101
+ if len(system_nodes) < 2:
102
+ return {}
103
+
104
+ context_message = system_nodes[1].message
105
+ if context_message and context_message.metadata.is_user_system_message:
106
+ return context_message.metadata.user_context_message_data or {}
107
+ return {}
108
+
109
+ def timestamps(self, *authors: AuthorRole) -> list[float]:
110
+ """Get message timestamps from specified authors.
111
+
112
+ Useful for generating time-based visualizations.
113
+ """
114
+ if not authors:
115
+ authors = ("user",)
116
+ return [
117
+ node.message.create_time.timestamp()
118
+ for node in self.nodes_by_author(*authors)
119
+ if node.message and node.message.create_time
120
+ ]
121
+
122
+ def plaintext(self, *authors: AuthorRole) -> str:
123
+ """Get concatenated plain text from specified authors.
124
+
125
+ Useful for word cloud generation.
126
+ """
127
+ if not authors:
128
+ authors = ("user",)
129
+ return "\n".join(
130
+ node.message.text
131
+ for node in self.nodes_by_author(*authors)
132
+ if node.message and node.message.has_content
133
+ )
134
+
135
+ @property
136
+ def week_start(self) -> datetime:
137
+ """Get the Monday of the week this conversation was created."""
138
+ start_of_week = self.create_time - timedelta(days=self.create_time.weekday())
139
+ return start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
140
+
141
+ @property
142
+ def month_start(self) -> datetime:
143
+ """Get the first day of the month this conversation was created."""
144
+ return self.create_time.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
145
+
146
+ @property
147
+ def year_start(self) -> datetime:
148
+ """Get January 1st of the year this conversation was created."""
149
+ return self.create_time.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)
@@ -0,0 +1,77 @@
1
+ """Message model - pure data class.
2
+
3
+ Object path: conversations.json -> conversation -> mapping -> mapping node -> message
4
+ """
5
+
6
+ from datetime import datetime
7
+ from typing import Any, Literal
8
+
9
+ from pydantic import BaseModel, ConfigDict
10
+
11
+ from convoviz.exceptions import MessageContentError
12
+
13
+ AuthorRole = Literal["user", "assistant", "system", "tool"]
14
+
15
+
16
+ class MessageAuthor(BaseModel):
17
+ """Author information for a message."""
18
+
19
+ role: AuthorRole
20
+ name: str | None = None
21
+ metadata: dict[str, Any] = {}
22
+
23
+
24
+ class MessageContent(BaseModel):
25
+ """Content of a message."""
26
+
27
+ content_type: str
28
+ parts: list[str] | None = None
29
+ text: str | None = None
30
+ result: str | None = None
31
+
32
+
33
+ class MessageMetadata(BaseModel):
34
+ """Metadata for a message."""
35
+
36
+ model_slug: str | None = None
37
+ invoked_plugin: dict[str, Any] | None = None
38
+ is_user_system_message: bool | None = None
39
+ user_context_message_data: dict[str, Any] | None = None
40
+
41
+ model_config = ConfigDict(protected_namespaces=())
42
+
43
+
44
+ class Message(BaseModel):
45
+ """A single message in a conversation.
46
+
47
+ This is a pure data model - rendering logic is in the renderers module.
48
+ """
49
+
50
+ id: str
51
+ author: MessageAuthor
52
+ create_time: datetime | None = None
53
+ update_time: datetime | None = None
54
+ content: MessageContent
55
+ status: str
56
+ end_turn: bool | None = None
57
+ weight: float
58
+ metadata: MessageMetadata
59
+ recipient: str
60
+
61
+ @property
62
+ def text(self) -> str:
63
+ """Extract the text content of the message."""
64
+ if self.content.parts is not None:
65
+ return str(self.content.parts[0]) if self.content.parts else ""
66
+ if self.content.text is not None:
67
+ return self.content.text
68
+ if self.content.result is not None:
69
+ return self.content.result
70
+ raise MessageContentError(self.id)
71
+
72
+ @property
73
+ def has_content(self) -> bool:
74
+ """Check if the message has extractable content."""
75
+ return bool(
76
+ self.content.parts or self.content.text is not None or self.content.result is not None
77
+ )
@@ -0,0 +1,66 @@
1
+ """Node model - pure data class.
2
+
3
+ Object path: conversations.json -> conversation -> mapping -> mapping node
4
+
5
+ Nodes form a tree structure representing conversation branches.
6
+ """
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+ from convoviz.models.message import Message
11
+
12
+
13
+ class Node(BaseModel):
14
+ """A node in the conversation tree.
15
+
16
+ Each node can have a message and links to parent/children nodes.
17
+ This is a pure data model - rendering logic is in the renderers module.
18
+ """
19
+
20
+ id: str
21
+ message: Message | None = None
22
+ parent: str | None = None
23
+ children: list[str] = Field(default_factory=list)
24
+
25
+ # Runtime-populated references (not from JSON)
26
+ parent_node: "Node | None" = None
27
+ children_nodes: list["Node"] = Field(default_factory=list)
28
+
29
+ def add_child(self, node: "Node") -> None:
30
+ """Add a child node and set up bidirectional references."""
31
+ self.children_nodes.append(node)
32
+ node.parent_node = self
33
+
34
+ @property
35
+ def has_message(self) -> bool:
36
+ """Check if this node contains a message."""
37
+ return self.message is not None
38
+
39
+ @property
40
+ def is_leaf(self) -> bool:
41
+ """Check if this node is a leaf (no children)."""
42
+ return len(self.children_nodes) == 0
43
+
44
+
45
+ def build_node_tree(mapping: dict[str, Node]) -> dict[str, Node]:
46
+ """Build the node tree by connecting parent/child references.
47
+
48
+ Args:
49
+ mapping: Dictionary of node_id -> Node
50
+
51
+ Returns:
52
+ The same dictionary with nodes connected via parent_node/children_nodes
53
+ """
54
+ # Reset connections to avoid duplicates on repeated calls
55
+ for node in mapping.values():
56
+ node.children_nodes = []
57
+ node.parent_node = None
58
+
59
+ # Build connections
60
+ for node in mapping.values():
61
+ for child_id in node.children:
62
+ if child_id in mapping:
63
+ child_node = mapping[child_id]
64
+ node.add_child(child_node)
65
+
66
+ return mapping
convoviz/pipeline.py ADDED
@@ -0,0 +1,120 @@
1
+ """Main processing pipeline for convoviz."""
2
+
3
+ from pathlib import Path
4
+ from shutil import rmtree
5
+
6
+ from rich.console import Console
7
+
8
+ from convoviz.analysis.graphs import generate_week_barplots
9
+ from convoviz.analysis.wordcloud import generate_wordclouds
10
+ from convoviz.config import ConvovizConfig
11
+ from convoviz.exceptions import InvalidZipError
12
+ from convoviz.io.loaders import (
13
+ find_latest_bookmarklet_json,
14
+ load_collection_from_json,
15
+ load_collection_from_zip,
16
+ )
17
+ from convoviz.io.writers import save_collection, save_custom_instructions
18
+
19
+ console = Console()
20
+
21
+
22
+ def run_pipeline(config: ConvovizConfig) -> None:
23
+ """Run the main processing pipeline.
24
+
25
+ Args:
26
+ config: Complete configuration for the pipeline
27
+
28
+ Raises:
29
+ InvalidZipError: If the zip file is invalid
30
+ ConfigurationError: If configuration is incomplete
31
+ """
32
+ if not config.zip_filepath:
33
+ raise InvalidZipError("", reason="No zip file specified")
34
+
35
+ zip_path = Path(config.zip_filepath)
36
+ if not zip_path.exists():
37
+ raise InvalidZipError(str(zip_path), reason="File does not exist")
38
+
39
+ console.print("Loading data [bold yellow]📂[/bold yellow] ...\n")
40
+
41
+ # Load main collection from zip
42
+ collection = load_collection_from_zip(zip_path)
43
+
44
+ # Try to merge bookmarklet data if available
45
+ bookmarklet_json = find_latest_bookmarklet_json()
46
+ if bookmarklet_json:
47
+ console.print("Found bookmarklet download, loading [bold yellow]📂[/bold yellow] ...\n")
48
+ try:
49
+ bookmarklet_collection = load_collection_from_json(bookmarklet_json)
50
+ collection.update(bookmarklet_collection)
51
+ except Exception as e:
52
+ console.print(
53
+ f"[bold yellow]Warning:[/bold yellow] Failed to load bookmarklet data: {e}"
54
+ )
55
+
56
+ output_folder = config.output_folder
57
+
58
+ # Clean and recreate output folder
59
+ if output_folder.exists() and output_folder.is_dir():
60
+ rmtree(output_folder)
61
+ output_folder.mkdir(parents=True, exist_ok=True)
62
+
63
+ # Save markdown files
64
+ markdown_folder = output_folder / "Markdown"
65
+ save_collection(
66
+ collection,
67
+ markdown_folder,
68
+ config.conversation,
69
+ config.message.author_headers,
70
+ progress_bar=True,
71
+ )
72
+ console.print(
73
+ f"\nDone [bold green]✅[/bold green] ! "
74
+ f"Check the output [bold blue]📄[/bold blue] here: {markdown_folder.as_uri()} 🔗\n"
75
+ )
76
+
77
+ # Generate graphs
78
+ graph_folder = output_folder / "Graphs"
79
+ graph_folder.mkdir(parents=True, exist_ok=True)
80
+ generate_week_barplots(
81
+ collection,
82
+ graph_folder,
83
+ config.graph,
84
+ progress_bar=True,
85
+ )
86
+ console.print(
87
+ f"\nDone [bold green]✅[/bold green] ! "
88
+ f"Check the output [bold blue]📈[/bold blue] here: {graph_folder.as_uri()} 🔗\n"
89
+ )
90
+
91
+ # Generate word clouds
92
+ wordcloud_folder = output_folder / "Word Clouds"
93
+ wordcloud_folder.mkdir(parents=True, exist_ok=True)
94
+ generate_wordclouds(
95
+ collection,
96
+ wordcloud_folder,
97
+ config.wordcloud,
98
+ progress_bar=True,
99
+ )
100
+ console.print(
101
+ f"\nDone [bold green]✅[/bold green] ! "
102
+ f"Check the output [bold blue]🔡☁️[/bold blue] here: {wordcloud_folder.as_uri()} 🔗\n"
103
+ )
104
+
105
+ # Save custom instructions
106
+ console.print("Writing custom instructions [bold blue]📝[/bold blue] ...\n")
107
+ instructions_path = output_folder / "custom_instructions.json"
108
+ save_custom_instructions(collection, instructions_path)
109
+ console.print(
110
+ f"\nDone [bold green]✅[/bold green] ! "
111
+ f"Check the output [bold blue]📝[/bold blue] here: {instructions_path.as_uri()} 🔗\n"
112
+ )
113
+
114
+ console.print(
115
+ "ALL DONE [bold green]🎉🎉🎉[/bold green] !\n\n"
116
+ f"Explore the full gallery [bold yellow]🖼️[/bold yellow] at: {output_folder.as_uri()} 🔗\n\n"
117
+ "I hope you enjoy the outcome 🤞.\n\n"
118
+ "If you appreciate it, kindly give the project a star 🌟 on GitHub:\n\n"
119
+ "➡️ https://github.com/mohamed-chs/chatgpt-history-export-to-md 🔗\n\n"
120
+ )
@@ -0,0 +1,10 @@
1
+ """Rendering utilities for conversations."""
2
+
3
+ from convoviz.renderers.markdown import render_conversation, render_node
4
+ from convoviz.renderers.yaml import render_yaml_header
5
+
6
+ __all__ = [
7
+ "render_conversation",
8
+ "render_node",
9
+ "render_yaml_header",
10
+ ]
@@ -0,0 +1,182 @@
1
+ """Markdown rendering for conversations."""
2
+
3
+ import re
4
+
5
+ from convoviz.config import AuthorHeaders, ConversationConfig
6
+ from convoviz.models import Conversation, Node
7
+ from convoviz.renderers.yaml import render_yaml_header
8
+
9
+
10
+ def close_code_blocks(text: str) -> str:
11
+ """Ensure all code blocks in the text are properly closed.
12
+
13
+ Args:
14
+ text: Markdown text that may have unclosed code blocks
15
+
16
+ Returns:
17
+ Text with all code blocks properly closed
18
+ """
19
+ open_code_block = False
20
+ lines = text.split("\n")
21
+
22
+ for line in lines:
23
+ if line.startswith("```") and not open_code_block:
24
+ open_code_block = True
25
+ continue
26
+ if line == "```" and open_code_block:
27
+ open_code_block = False
28
+
29
+ if open_code_block:
30
+ text += "\n```"
31
+
32
+ return text
33
+
34
+
35
+ def replace_latex_delimiters(text: str) -> str:
36
+ """Replace LaTeX bracket delimiters with dollar sign delimiters.
37
+
38
+ Args:
39
+ text: Text with \\[ \\] \\( \\) delimiters
40
+
41
+ Returns:
42
+ Text with $$ and $ delimiters
43
+ """
44
+ text = re.sub(r"\\\[", "$$", text)
45
+ text = re.sub(r"\\\]", "$$", text)
46
+ text = re.sub(r"\\\(", "$", text)
47
+ return re.sub(r"\\\)", "$", text)
48
+
49
+
50
+ def code_block(text: str, lang: str = "python") -> str:
51
+ """Wrap text in a markdown code block.
52
+
53
+ Args:
54
+ text: The code to wrap
55
+ lang: The language for syntax highlighting
56
+
57
+ Returns:
58
+ Markdown code block string
59
+ """
60
+ return f"```{lang}\n{text}\n```"
61
+
62
+
63
+ def render_message_header(role: str, headers: AuthorHeaders) -> str:
64
+ """Get the markdown header for a message author.
65
+
66
+ Args:
67
+ role: The author role (user, assistant, system, tool)
68
+ headers: Configuration for author headers
69
+
70
+ Returns:
71
+ The markdown header string
72
+ """
73
+ header_map = {
74
+ "system": headers.system,
75
+ "user": headers.user,
76
+ "assistant": headers.assistant,
77
+ "tool": headers.tool,
78
+ }
79
+ return header_map.get(role, f"### {role.title()}")
80
+
81
+
82
+ def render_node_header(node: Node, headers: AuthorHeaders) -> str:
83
+ """Render the header section of a node.
84
+
85
+ Includes the node ID, parent link, and message author header.
86
+
87
+ Args:
88
+ node: The node to render
89
+ headers: Configuration for author headers
90
+
91
+ Returns:
92
+ The header markdown string
93
+ """
94
+ if node.message is None:
95
+ return ""
96
+
97
+ parts = [f"###### {node.id}"]
98
+
99
+ # Add parent link if parent has a message
100
+ if node.parent_node and node.parent_node.message:
101
+ parts.append(f"[parent ⬆️](#{node.parent_node.id})")
102
+
103
+ parts.append(render_message_header(node.message.author.role, headers))
104
+
105
+ return "\n".join(parts) + "\n"
106
+
107
+
108
+ def render_node_footer(node: Node) -> str:
109
+ """Render the footer section of a node with child links.
110
+
111
+ Args:
112
+ node: The node to render
113
+
114
+ Returns:
115
+ The footer markdown string with child navigation links
116
+ """
117
+ if not node.children_nodes:
118
+ return ""
119
+
120
+ if len(node.children_nodes) == 1:
121
+ return f"\n[child ⬇️](#{node.children_nodes[0].id})\n"
122
+
123
+ links = " | ".join(
124
+ f"[child {i + 1} ⬇️](#{child.id})" for i, child in enumerate(node.children_nodes)
125
+ )
126
+ return f"\n{links}\n"
127
+
128
+
129
+ def render_node(node: Node, headers: AuthorHeaders, use_dollar_latex: bool = False) -> str:
130
+ """Render a complete node as markdown.
131
+
132
+ Args:
133
+ node: The node to render
134
+ headers: Configuration for author headers
135
+ use_dollar_latex: Whether to convert LaTeX delimiters to dollars
136
+
137
+ Returns:
138
+ Complete markdown string for the node
139
+ """
140
+ if node.message is None:
141
+ return ""
142
+
143
+ header = render_node_header(node, headers)
144
+
145
+ # Get and process content
146
+ try:
147
+ content = close_code_blocks(node.message.text)
148
+ content = f"\n{content}\n" if content else ""
149
+ if use_dollar_latex:
150
+ content = replace_latex_delimiters(content)
151
+ except Exception:
152
+ content = ""
153
+
154
+ footer = render_node_footer(node)
155
+
156
+ return f"\n{header}{content}{footer}\n---\n"
157
+
158
+
159
+ def render_conversation(
160
+ conversation: Conversation, config: ConversationConfig, headers: AuthorHeaders
161
+ ) -> str:
162
+ """Render a complete conversation as markdown.
163
+
164
+ Args:
165
+ conversation: The conversation to render
166
+ config: Conversation rendering configuration
167
+ headers: Configuration for author headers
168
+
169
+ Returns:
170
+ Complete markdown document string
171
+ """
172
+ use_dollar_latex = config.markdown.latex_delimiters == "dollars"
173
+
174
+ # Start with YAML header
175
+ markdown = render_yaml_header(conversation, config.yaml)
176
+
177
+ # Render all message nodes
178
+ for node in conversation.all_message_nodes:
179
+ if node.message:
180
+ markdown += render_node(node, headers, use_dollar_latex)
181
+
182
+ return markdown
@@ -0,0 +1,42 @@
1
+ """YAML frontmatter rendering for conversations."""
2
+
3
+ from convoviz.config import YAMLConfig
4
+ from convoviz.models import Conversation
5
+
6
+
7
+ def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
8
+ """Render the YAML frontmatter for a conversation.
9
+
10
+ Args:
11
+ conversation: The conversation to render
12
+ config: YAML configuration specifying which fields to include
13
+
14
+ Returns:
15
+ YAML frontmatter string with --- delimiters, or empty string if no fields enabled
16
+ """
17
+ yaml_fields: dict[str, object] = {}
18
+
19
+ if config.title:
20
+ yaml_fields["title"] = conversation.title
21
+ if config.chat_link:
22
+ yaml_fields["chat_link"] = conversation.url
23
+ if config.create_time:
24
+ yaml_fields["create_time"] = conversation.create_time
25
+ if config.update_time:
26
+ yaml_fields["update_time"] = conversation.update_time
27
+ if config.model:
28
+ yaml_fields["model"] = conversation.model
29
+ if config.used_plugins:
30
+ yaml_fields["used_plugins"] = conversation.plugins
31
+ if config.message_count:
32
+ yaml_fields["message_count"] = conversation.message_count("user", "assistant")
33
+ if config.content_types:
34
+ yaml_fields["content_types"] = conversation.content_types
35
+ if config.custom_instructions:
36
+ yaml_fields["custom_instructions"] = conversation.custom_instructions
37
+
38
+ if not yaml_fields:
39
+ return ""
40
+
41
+ lines = [f"{key}: {value}" for key, value in yaml_fields.items()]
42
+ return f"---\n{chr(10).join(lines)}\n---\n"