convoviz 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
convoviz/config.py CHANGED
@@ -19,6 +19,7 @@ class MarkdownConfig(BaseModel):
19
19
  """Configuration for markdown output."""
20
20
 
21
21
  latex_delimiters: Literal["default", "dollars"] = "default"
22
+ flavor: Literal["obsidian", "standard"] = "obsidian"
22
23
 
23
24
 
24
25
  class YAMLConfig(BaseModel):
@@ -53,27 +54,33 @@ class WordCloudConfig(BaseModel):
53
54
  """Configuration for word cloud generation."""
54
55
 
55
56
  font_path: Path | None = None
56
- colormap: str = "magma"
57
+ colormap: str = "RdYlBu"
57
58
  custom_stopwords: str = "use, file, "
59
+ exclude_programming_keywords: bool = True
58
60
  background_color: str | None = None
59
61
  mode: Literal["RGB", "RGBA"] = "RGBA"
60
62
  include_numbers: bool = False
61
- width: int = 1000
62
- height: int = 1000
63
+ width: int = 600
64
+ height: int = 600
63
65
 
64
66
 
65
67
  class GraphConfig(BaseModel):
66
68
  """Configuration for graph generation."""
67
69
 
68
- # Extensible for future graph options
69
- pass
70
+ color: str = "#4A90E2"
71
+ grid: bool = True
72
+ show_counts: bool = True
73
+ font_name: str = "Montserrat-Regular.ttf"
74
+ figsize: tuple[int, int] = (10, 6)
75
+ dpi: int = 300
76
+ timezone: Literal["utc", "local"] = "local"
70
77
 
71
78
 
72
79
  class ConvovizConfig(BaseModel):
73
80
  """Main configuration for convoviz."""
74
81
 
75
- zip_filepath: Path | None = None
76
- output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT Data")
82
+ input_path: Path | None = None
83
+ output_folder: Path = Field(default_factory=lambda: Path.home() / "Documents" / "ChatGPT-Data")
77
84
  message: MessageConfig = Field(default_factory=MessageConfig)
78
85
  conversation: ConversationConfig = Field(default_factory=ConversationConfig)
79
86
  wordcloud: WordCloudConfig = Field(default_factory=WordCloudConfig)
convoviz/interactive.py CHANGED
@@ -26,6 +26,25 @@ CUSTOM_STYLE = Style(
26
26
  )
27
27
 
28
28
 
29
+ def _validate_input_path(raw: str) -> bool | str:
30
+ path = Path(raw)
31
+ if not path.exists():
32
+ return "Path must exist"
33
+
34
+ if path.is_dir():
35
+ if (path / "conversations.json").exists():
36
+ return True
37
+ return "Directory must contain conversations.json"
38
+
39
+ if path.suffix.lower() == ".json":
40
+ return True
41
+
42
+ if path.suffix.lower() == ".zip":
43
+ return True if validate_zip(path) else "ZIP must contain conversations.json"
44
+
45
+ return "Input must be a .zip, a .json, or a directory containing conversations.json"
46
+
47
+
29
48
  def run_interactive_config(initial_config: ConvovizConfig | None = None) -> ConvovizConfig:
30
49
  """Run interactive prompts to configure convoviz.
31
50
 
@@ -38,26 +57,25 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
38
57
  config = initial_config or get_default_config()
39
58
 
40
59
  # Set sensible defaults if not already set
41
- if not config.zip_filepath:
60
+ if not config.input_path:
42
61
  latest = find_latest_zip()
43
62
  if latest:
44
- config.zip_filepath = latest
63
+ config.input_path = latest
45
64
 
46
65
  if not config.wordcloud.font_path:
47
66
  config.wordcloud.font_path = default_font_path()
48
67
 
49
- # Prompt for zip file path
50
- zip_default = str(config.zip_filepath) if config.zip_filepath else ""
51
- zip_result = qst_path(
52
- "Enter the path to the zip file:",
53
- default=zip_default,
54
- validate=lambda p: validate_zip(Path(p))
55
- or "Invalid zip file (must contain conversations.json)",
68
+ # Prompt for input path
69
+ input_default = str(config.input_path) if config.input_path else ""
70
+ input_result = qst_path(
71
+ "Enter the path to the export ZIP, conversations JSON, or extracted directory:",
72
+ default=input_default,
73
+ validate=_validate_input_path,
56
74
  style=CUSTOM_STYLE,
57
75
  ).ask()
58
76
 
59
- if zip_result:
60
- config.zip_filepath = Path(zip_result)
77
+ if input_result:
78
+ config.input_path = Path(input_result)
61
79
 
62
80
  # Prompt for output folder
63
81
  output_result = qst_path(
@@ -94,6 +112,17 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
94
112
  if latex_result:
95
113
  config.conversation.markdown.latex_delimiters = latex_result
96
114
 
115
+ # Prompt for markdown flavor
116
+ flavor_result = select(
117
+ "Select the markdown flavor:",
118
+ choices=["obsidian", "standard"],
119
+ default=config.conversation.markdown.flavor,
120
+ style=CUSTOM_STYLE,
121
+ ).ask()
122
+
123
+ if flavor_result:
124
+ config.conversation.markdown.flavor = flavor_result
125
+
97
126
  # Prompt for YAML headers
98
127
  yaml_config = config.conversation.yaml
99
128
  yaml_choices = [
convoviz/io/assets.py ADDED
@@ -0,0 +1,82 @@
1
+ "Asset management functions."
2
+
3
+ import shutil
4
+ from pathlib import Path
5
+
6
+
7
+ def resolve_asset_path(source_dir: Path, asset_id: str) -> Path | None:
8
+ """Find the actual file for a given asset ID in the source directory.
9
+
10
+ Args:
11
+ source_dir: Directory to search in
12
+ asset_id: The asset ID (e.g., "file-uuid")
13
+
14
+ Returns:
15
+ Path to the found file, or None
16
+ """
17
+ if not source_dir.exists():
18
+ return None
19
+
20
+ source_dir = source_dir.resolve()
21
+
22
+ # Safety check for asset_id
23
+ if ".." in asset_id or "/" in asset_id or "\\" in asset_id:
24
+ return None
25
+
26
+ # 1. Try exact match
27
+ exact_path = (source_dir / asset_id).resolve()
28
+ if exact_path.exists() and exact_path.is_file() and exact_path.is_relative_to(source_dir):
29
+ return exact_path
30
+
31
+ # 2. Try prefix match in root
32
+ try:
33
+ candidates = list(source_dir.glob(f"{asset_id}*"))
34
+ files = [
35
+ p.resolve()
36
+ for p in candidates
37
+ if p.is_file() and p.resolve().is_relative_to(source_dir)
38
+ ]
39
+ if files:
40
+ return files[0]
41
+ except Exception:
42
+ pass
43
+
44
+ # 3. Try prefix match in dalle-generations
45
+ dalle_dir = source_dir / "dalle-generations"
46
+ if dalle_dir.exists() and dalle_dir.is_dir():
47
+ dalle_dir = dalle_dir.resolve()
48
+ try:
49
+ candidates = list(dalle_dir.glob(f"{asset_id}*"))
50
+ files = [
51
+ p.resolve()
52
+ for p in candidates
53
+ if p.is_file() and p.resolve().is_relative_to(dalle_dir)
54
+ ]
55
+ if files:
56
+ return files[0]
57
+ except Exception:
58
+ pass
59
+
60
+ return None
61
+
62
+
63
+ def copy_asset(source_path: Path, dest_dir: Path) -> str:
64
+ """Copy an asset to the destination directory.
65
+
66
+ Args:
67
+ source_path: The source file path
68
+ dest_dir: The root output directory (assets will be in dest_dir/assets)
69
+
70
+ Returns:
71
+ Relative path to the asset (e.g., "assets/image.png")
72
+ """
73
+ assets_dir = dest_dir / "assets"
74
+ assets_dir.mkdir(parents=True, exist_ok=True)
75
+
76
+ dest_path = assets_dir / source_path.name
77
+
78
+ if not dest_path.exists():
79
+ shutil.copy2(source_path, dest_path)
80
+
81
+ # Return forward-slash path for Markdown compatibility even on Windows
82
+ return f"assets/{source_path.name}"
convoviz/io/loaders.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """Loading functions for conversations and collections."""
2
2
 
3
- from pathlib import Path
3
+ from pathlib import Path, PurePosixPath
4
4
  from zipfile import ZipFile
5
5
 
6
6
  from orjson import loads
@@ -9,17 +9,62 @@ from convoviz.exceptions import InvalidZipError
9
9
  from convoviz.models import Conversation, ConversationCollection
10
10
 
11
11
 
12
+ def _is_safe_zip_member_name(name: str) -> bool:
13
+ """Return True if a ZIP entry name is safe to extract.
14
+
15
+ This is intentionally OS-agnostic: it treats both ``/`` and ``\\`` as path
16
+ separators and rejects absolute paths, drive-letter paths, and ``..`` parts.
17
+ """
18
+ normalized = name.replace("\\", "/")
19
+ member_path = PurePosixPath(normalized)
20
+
21
+ # Absolute paths (e.g. "/etc/passwd") or empty names
22
+ if not normalized or member_path.is_absolute():
23
+ return False
24
+
25
+ # Windows drive letters / UNC-style prefixes stored in the archive
26
+ first = member_path.parts[0] if member_path.parts else ""
27
+ if first.endswith(":") or first.startswith("//") or first.startswith("\\\\"):
28
+ return False
29
+
30
+ return ".." not in member_path.parts
31
+
32
+
12
33
  def extract_archive(filepath: Path) -> Path:
13
34
  """Extract a ZIP file and return the extraction folder path.
14
35
 
36
+ Includes safety checks to prevent Path Traversal (Zip-Slip).
37
+
15
38
  Args:
16
39
  filepath: Path to the ZIP file
17
40
 
18
41
  Returns:
19
42
  Path to the extracted folder
43
+
44
+ Raises:
45
+ InvalidZipError: If extraction fails or a security risk is detected
20
46
  """
21
47
  folder = filepath.with_suffix("")
48
+ folder.mkdir(parents=True, exist_ok=True)
49
+
22
50
  with ZipFile(filepath) as zf:
51
+ for member in zf.infolist():
52
+ # Check for path traversal (Zip-Slip) in an OS-agnostic way.
53
+ # ZIP files are typically POSIX-path-like, but malicious archives can
54
+ # embed backslashes or drive-letter tricks.
55
+ if not _is_safe_zip_member_name(member.filename):
56
+ raise InvalidZipError(
57
+ str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
58
+ )
59
+
60
+ # Additional check using resolved paths
61
+ normalized = member.filename.replace("\\", "/")
62
+ target_path = (folder / normalized).resolve()
63
+ if not target_path.is_relative_to(folder.resolve()):
64
+ raise InvalidZipError(
65
+ str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
66
+ )
67
+
23
68
  zf.extractall(folder)
24
69
  return folder
25
70
 
@@ -60,7 +105,8 @@ def load_conversation_from_json(filepath: Path | str) -> Conversation:
60
105
  def load_collection_from_json(filepath: Path | str) -> ConversationCollection:
61
106
  """Load a conversation collection from a JSON file.
62
107
 
63
- The JSON file should contain an array of conversation objects.
108
+ The JSON file should contain an array of conversation objects,
109
+ or an object with a "conversations" key.
64
110
 
65
111
  Args:
66
112
  filepath: Path to the JSON file
@@ -71,7 +117,12 @@ def load_collection_from_json(filepath: Path | str) -> ConversationCollection:
71
117
  filepath = Path(filepath)
72
118
  with filepath.open(encoding="utf-8") as f:
73
119
  data = loads(f.read())
74
- return ConversationCollection(conversations=data)
120
+
121
+ # Handle case where export is wrapped in a top-level object
122
+ if isinstance(data, dict) and "conversations" in data:
123
+ data = data["conversations"]
124
+
125
+ return ConversationCollection(conversations=data, source_path=filepath.parent)
75
126
 
76
127
 
77
128
  def load_collection_from_zip(filepath: Path | str) -> ConversationCollection:
convoviz/io/writers.py CHANGED
@@ -7,6 +7,7 @@ from orjson import OPT_INDENT_2, dumps
7
7
  from tqdm import tqdm
8
8
 
9
9
  from convoviz.config import AuthorHeaders, ConversationConfig
10
+ from convoviz.io.assets import copy_asset, resolve_asset_path
10
11
  from convoviz.models import Conversation, ConversationCollection
11
12
  from convoviz.renderers import render_conversation
12
13
  from convoviz.utils import sanitize
@@ -17,6 +18,7 @@ def save_conversation(
17
18
  filepath: Path,
18
19
  config: ConversationConfig,
19
20
  headers: AuthorHeaders,
21
+ source_path: Path | None = None,
20
22
  ) -> Path:
21
23
  """Save a conversation to a markdown file.
22
24
 
@@ -28,6 +30,7 @@ def save_conversation(
28
30
  filepath: Target file path
29
31
  config: Conversation rendering configuration
30
32
  headers: Author header configuration
33
+ source_path: Path to the source directory containing assets
31
34
 
32
35
  Returns:
33
36
  The actual path the file was saved to (may differ if there was a conflict)
@@ -41,8 +44,20 @@ def save_conversation(
41
44
  counter += 1
42
45
  final_path = filepath.with_name(f"{base_name} ({counter}){filepath.suffix}")
43
46
 
47
+ # Define asset resolver
48
+ def asset_resolver(asset_id: str) -> str | None:
49
+ if not source_path:
50
+ return None
51
+
52
+ src_file = resolve_asset_path(source_path, asset_id)
53
+ if not src_file:
54
+ return None
55
+
56
+ # Copy to output directory (relative to the markdown file's directory)
57
+ return copy_asset(src_file, final_path.parent)
58
+
44
59
  # Render and write
45
- markdown = render_conversation(conversation, config, headers)
60
+ markdown = render_conversation(conversation, config, headers, asset_resolver=asset_resolver)
46
61
  with final_path.open("w", encoding="utf-8") as f:
47
62
  f.write(markdown)
48
63
 
@@ -78,7 +93,7 @@ def save_collection(
78
93
  disable=not progress_bar,
79
94
  ):
80
95
  filepath = directory / f"{sanitize(conv.title)}.md"
81
- save_conversation(conv, filepath, config, headers)
96
+ save_conversation(conv, filepath, config, headers, source_path=collection.source_path)
82
97
 
83
98
 
84
99
  def save_custom_instructions(
@@ -11,14 +11,10 @@ from convoviz.models.message import (
11
11
  )
12
12
  from convoviz.models.node import Node, build_node_tree
13
13
 
14
- # Backward compatibility alias
15
- ConversationSet = ConversationCollection
16
-
17
14
  __all__ = [
18
15
  "AuthorRole",
19
16
  "Conversation",
20
17
  "ConversationCollection",
21
- "ConversationSet",
22
18
  "Message",
23
19
  "MessageAuthor",
24
20
  "MessageContent",
@@ -4,6 +4,7 @@ This is a pure data model - I/O and visualization logic are in separate modules.
4
4
  """
5
5
 
6
6
  from datetime import datetime
7
+ from pathlib import Path
7
8
  from typing import Any
8
9
 
9
10
  from pydantic import BaseModel, Field
@@ -19,6 +20,7 @@ class ConversationCollection(BaseModel):
19
20
  """
20
21
 
21
22
  conversations: list[Conversation] = Field(default_factory=list)
23
+ source_path: Path | None = None
22
24
 
23
25
  @property
24
26
  def index(self) -> dict[str, Conversation]:
@@ -35,14 +37,20 @@ class ConversationCollection(BaseModel):
35
37
  def update(self, other: "ConversationCollection") -> None:
36
38
  """Merge another collection into this one.
37
39
 
38
- Only updates if the other collection has newer content.
40
+ Merges per-conversation, keeping the newest version when IDs collide.
41
+
42
+ Note: We intentionally do *not* gate on ``other.last_updated`` because
43
+ "new" conversations can still have older timestamps than the most recent
44
+ conversation in this collection (e.g. bookmarklet downloads).
39
45
  """
40
- if other.last_updated <= self.last_updated:
41
- return
46
+ merged: dict[str, Conversation] = dict(self.index)
47
+
48
+ for conv_id, incoming in other.index.items():
49
+ existing = merged.get(conv_id)
50
+ if existing is None or incoming.update_time > existing.update_time:
51
+ merged[conv_id] = incoming
42
52
 
43
- merged_index = self.index
44
- merged_index.update(other.index)
45
- self.conversations = list(merged_index.values())
53
+ self.conversations = list(merged.values())
46
54
 
47
55
  def add(self, conversation: Conversation) -> None:
48
56
  """Add a conversation to the collection."""
@@ -98,12 +98,10 @@ class Conversation(BaseModel):
98
98
  def custom_instructions(self) -> dict[str, str]:
99
99
  """Get custom instructions used for this conversation."""
100
100
  system_nodes = self.nodes_by_author("system")
101
- if len(system_nodes) < 2:
102
- return {}
103
-
104
- context_message = system_nodes[1].message
105
- if context_message and context_message.metadata.is_user_system_message:
106
- return context_message.metadata.user_context_message_data or {}
101
+ for node in system_nodes:
102
+ context_message = node.message
103
+ if context_message and context_message.metadata.is_user_system_message:
104
+ return context_message.metadata.user_context_message_data or {}
107
105
  return {}
108
106
 
109
107
  def timestamps(self, *authors: AuthorRole) -> list[float]:
@@ -6,11 +6,11 @@ Object path: conversations.json -> conversation -> mapping -> mapping node -> me
6
6
  from datetime import datetime
7
7
  from typing import Any, Literal
8
8
 
9
- from pydantic import BaseModel, ConfigDict
9
+ from pydantic import BaseModel, ConfigDict, Field
10
10
 
11
11
  from convoviz.exceptions import MessageContentError
12
12
 
13
- AuthorRole = Literal["user", "assistant", "system", "tool"]
13
+ AuthorRole = Literal["user", "assistant", "system", "tool", "function"]
14
14
 
15
15
 
16
16
  class MessageAuthor(BaseModel):
@@ -18,14 +18,14 @@ class MessageAuthor(BaseModel):
18
18
 
19
19
  role: AuthorRole
20
20
  name: str | None = None
21
- metadata: dict[str, Any] = {}
21
+ metadata: dict[str, Any] = Field(default_factory=dict)
22
22
 
23
23
 
24
24
  class MessageContent(BaseModel):
25
25
  """Content of a message."""
26
26
 
27
27
  content_type: str
28
- parts: list[str] | None = None
28
+ parts: list[Any] | None = None
29
29
  text: str | None = None
30
30
  result: str | None = None
31
31
 
@@ -55,14 +55,56 @@ class Message(BaseModel):
55
55
  status: str
56
56
  end_turn: bool | None = None
57
57
  weight: float
58
- metadata: MessageMetadata
59
- recipient: str
58
+ metadata: MessageMetadata = Field(default_factory=MessageMetadata)
59
+ recipient: str | None = None
60
+
61
+ @property
62
+ def images(self) -> list[str]:
63
+ """Extract image asset pointers from the message content."""
64
+ if not self.content.parts:
65
+ return []
66
+
67
+ image_ids = []
68
+ for part in self.content.parts:
69
+ if isinstance(part, dict) and part.get("content_type") == "image_asset_pointer":
70
+ pointer = part.get("asset_pointer", "")
71
+ # Strip prefixes like "file-service://" or "sediment://"
72
+ if pointer.startswith("file-service://"):
73
+ pointer = pointer[len("file-service://") :]
74
+ elif pointer.startswith("sediment://"):
75
+ pointer = pointer[len("sediment://") :]
76
+
77
+ if pointer:
78
+ image_ids.append(pointer)
79
+ return image_ids
60
80
 
61
81
  @property
62
82
  def text(self) -> str:
63
83
  """Extract the text content of the message."""
64
84
  if self.content.parts is not None:
65
- return str(self.content.parts[0]) if self.content.parts else ""
85
+ # Handle multimodal content where parts can be mixed strings and dicts
86
+ text_parts = []
87
+ for part in self.content.parts:
88
+ if isinstance(part, str):
89
+ text_parts.append(part)
90
+ elif isinstance(part, dict) and "text" in part:
91
+ # Some parts might be dicts wrapping text (e.g. code interpreter?)
92
+ # But based on spec, usually text is just a string in the list.
93
+ # We'll stick to string extraction for now.
94
+ pass
95
+
96
+ # If we found string parts, join them.
97
+ # If parts existed but no strings (e.g. only images), return empty string?
98
+ # Or should we return a placeholder? For now, let's return joined text.
99
+ if text_parts:
100
+ return "".join(text_parts)
101
+
102
+ # If parts list is not empty but contains no strings, we might want to fall through
103
+ # or return empty string if we consider it "handled".
104
+ # The original code returned "" if parts was empty list.
105
+ if self.content.parts:
106
+ return ""
107
+
66
108
  if self.content.text is not None:
67
109
  return self.content.text
68
110
  if self.content.result is not None:
@@ -75,3 +117,41 @@ class Message(BaseModel):
75
117
  return bool(
76
118
  self.content.parts or self.content.text is not None or self.content.result is not None
77
119
  )
120
+
121
+ @property
122
+ def is_empty(self) -> bool:
123
+ """Check if the message is effectively empty (no text, no images)."""
124
+ try:
125
+ return not self.text.strip() and not self.images
126
+ except MessageContentError:
127
+ return True
128
+
129
+ @property
130
+ def is_hidden(self) -> bool:
131
+ """Check if message should be hidden in export.
132
+
133
+ Hidden if:
134
+ 1. It is empty (no text, no images).
135
+ 2. It is an internal system message (not custom instructions).
136
+ 3. It is a browser tool output (intermediate search steps).
137
+ """
138
+ if self.is_empty:
139
+ return True
140
+
141
+ # Hide internal system messages
142
+ if self.author.role == "system":
143
+ # Only show if explicitly marked as user system message (Custom Instructions)
144
+ return not self.metadata.is_user_system_message
145
+
146
+ # Hide browser tool outputs (usually intermediate search steps)
147
+ if self.author.role == "tool" and self.author.name == "browser":
148
+ return True
149
+
150
+ # Hide assistant calls to browser tool (e.g. "search(...)") or code interpreter
151
+ if self.author.role == "assistant" and (
152
+ self.recipient == "browser" or self.content.content_type == "code"
153
+ ):
154
+ return True
155
+
156
+ # Hide browsing status messages
157
+ return self.content.content_type == "tether_browsing_display"