convoviz 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- convoviz/analysis/graphs.py +349 -18
- convoviz/analysis/wordcloud.py +20 -0
- convoviz/assets/stopwords.txt +75 -0
- convoviz/cli.py +18 -15
- convoviz/config.py +12 -7
- convoviz/interactive.py +22 -12
- convoviz/io/assets.py +82 -0
- convoviz/io/loaders.py +30 -2
- convoviz/io/writers.py +17 -2
- convoviz/models/__init__.py +0 -4
- convoviz/models/collection.py +2 -0
- convoviz/models/message.py +45 -3
- convoviz/pipeline.py +42 -19
- convoviz/renderers/markdown.py +46 -15
- convoviz/utils.py +54 -4
- {convoviz-0.2.1.dist-info → convoviz-0.2.3.dist-info}/METADATA +4 -24
- {convoviz-0.2.1.dist-info → convoviz-0.2.3.dist-info}/RECORD +19 -17
- {convoviz-0.2.1.dist-info → convoviz-0.2.3.dist-info}/WHEEL +0 -0
- {convoviz-0.2.1.dist-info → convoviz-0.2.3.dist-info}/entry_points.txt +0 -0
convoviz/interactive.py
CHANGED
|
@@ -7,7 +7,7 @@ from questionary import path as qst_path
|
|
|
7
7
|
from questionary import text as qst_text
|
|
8
8
|
|
|
9
9
|
from convoviz.config import ConvovizConfig, get_default_config
|
|
10
|
-
from convoviz.io.loaders import find_latest_zip
|
|
10
|
+
from convoviz.io.loaders import find_latest_zip
|
|
11
11
|
from convoviz.utils import colormaps, default_font_path, font_names, font_path, validate_header
|
|
12
12
|
|
|
13
13
|
CUSTOM_STYLE = Style(
|
|
@@ -38,26 +38,25 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
|
|
|
38
38
|
config = initial_config or get_default_config()
|
|
39
39
|
|
|
40
40
|
# Set sensible defaults if not already set
|
|
41
|
-
if not config.
|
|
41
|
+
if not config.input_path:
|
|
42
42
|
latest = find_latest_zip()
|
|
43
43
|
if latest:
|
|
44
|
-
config.
|
|
44
|
+
config.input_path = latest
|
|
45
45
|
|
|
46
46
|
if not config.wordcloud.font_path:
|
|
47
47
|
config.wordcloud.font_path = default_font_path()
|
|
48
48
|
|
|
49
|
-
# Prompt for
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
"Enter the path to the zip file:",
|
|
53
|
-
default=
|
|
54
|
-
validate=lambda p:
|
|
55
|
-
or "Invalid zip file (must contain conversations.json)",
|
|
49
|
+
# Prompt for input path
|
|
50
|
+
input_default = str(config.input_path) if config.input_path else ""
|
|
51
|
+
input_result = qst_path(
|
|
52
|
+
"Enter the path to the zip file or extracted directory:",
|
|
53
|
+
default=input_default,
|
|
54
|
+
validate=lambda p: Path(p).exists() or "Path must exist",
|
|
56
55
|
style=CUSTOM_STYLE,
|
|
57
56
|
).ask()
|
|
58
57
|
|
|
59
|
-
if
|
|
60
|
-
config.
|
|
58
|
+
if input_result:
|
|
59
|
+
config.input_path = Path(input_result)
|
|
61
60
|
|
|
62
61
|
# Prompt for output folder
|
|
63
62
|
output_result = qst_path(
|
|
@@ -94,6 +93,17 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
|
|
|
94
93
|
if latex_result:
|
|
95
94
|
config.conversation.markdown.latex_delimiters = latex_result
|
|
96
95
|
|
|
96
|
+
# Prompt for markdown flavor
|
|
97
|
+
flavor_result = select(
|
|
98
|
+
"Select the markdown flavor:",
|
|
99
|
+
choices=["obsidian", "standard"],
|
|
100
|
+
default=config.conversation.markdown.flavor,
|
|
101
|
+
style=CUSTOM_STYLE,
|
|
102
|
+
).ask()
|
|
103
|
+
|
|
104
|
+
if flavor_result:
|
|
105
|
+
config.conversation.markdown.flavor = flavor_result
|
|
106
|
+
|
|
97
107
|
# Prompt for YAML headers
|
|
98
108
|
yaml_config = config.conversation.yaml
|
|
99
109
|
yaml_choices = [
|
convoviz/io/assets.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"Asset management functions."
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def resolve_asset_path(source_dir: Path, asset_id: str) -> Path | None:
|
|
8
|
+
"""Find the actual file for a given asset ID in the source directory.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
source_dir: Directory to search in
|
|
12
|
+
asset_id: The asset ID (e.g., "file-uuid")
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Path to the found file, or None
|
|
16
|
+
"""
|
|
17
|
+
if not source_dir.exists():
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
source_dir = source_dir.resolve()
|
|
21
|
+
|
|
22
|
+
# Safety check for asset_id
|
|
23
|
+
if ".." in asset_id or "/" in asset_id or "\\" in asset_id:
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
# 1. Try exact match
|
|
27
|
+
exact_path = (source_dir / asset_id).resolve()
|
|
28
|
+
if exact_path.exists() and exact_path.is_file() and exact_path.is_relative_to(source_dir):
|
|
29
|
+
return exact_path
|
|
30
|
+
|
|
31
|
+
# 2. Try prefix match in root
|
|
32
|
+
try:
|
|
33
|
+
candidates = list(source_dir.glob(f"{asset_id}*"))
|
|
34
|
+
files = [
|
|
35
|
+
p.resolve()
|
|
36
|
+
for p in candidates
|
|
37
|
+
if p.is_file() and p.resolve().is_relative_to(source_dir)
|
|
38
|
+
]
|
|
39
|
+
if files:
|
|
40
|
+
return files[0]
|
|
41
|
+
except Exception:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
# 3. Try prefix match in dalle-generations
|
|
45
|
+
dalle_dir = source_dir / "dalle-generations"
|
|
46
|
+
if dalle_dir.exists() and dalle_dir.is_dir():
|
|
47
|
+
dalle_dir = dalle_dir.resolve()
|
|
48
|
+
try:
|
|
49
|
+
candidates = list(dalle_dir.glob(f"{asset_id}*"))
|
|
50
|
+
files = [
|
|
51
|
+
p.resolve()
|
|
52
|
+
for p in candidates
|
|
53
|
+
if p.is_file() and p.resolve().is_relative_to(dalle_dir)
|
|
54
|
+
]
|
|
55
|
+
if files:
|
|
56
|
+
return files[0]
|
|
57
|
+
except Exception:
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def copy_asset(source_path: Path, dest_dir: Path) -> str:
|
|
64
|
+
"""Copy an asset to the destination directory.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
source_path: The source file path
|
|
68
|
+
dest_dir: The root output directory (assets will be in dest_dir/assets)
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Relative path to the asset (e.g., "assets/image.png")
|
|
72
|
+
"""
|
|
73
|
+
assets_dir = dest_dir / "assets"
|
|
74
|
+
assets_dir.mkdir(parents=True, exist_ok=True)
|
|
75
|
+
|
|
76
|
+
dest_path = assets_dir / source_path.name
|
|
77
|
+
|
|
78
|
+
if not dest_path.exists():
|
|
79
|
+
shutil.copy2(source_path, dest_path)
|
|
80
|
+
|
|
81
|
+
# Return forward-slash path for Markdown compatibility even on Windows
|
|
82
|
+
return f"assets/{source_path.name}"
|
convoviz/io/loaders.py
CHANGED
|
@@ -12,14 +12,36 @@ from convoviz.models import Conversation, ConversationCollection
|
|
|
12
12
|
def extract_archive(filepath: Path) -> Path:
|
|
13
13
|
"""Extract a ZIP file and return the extraction folder path.
|
|
14
14
|
|
|
15
|
+
Includes safety checks to prevent Path Traversal (Zip-Slip).
|
|
16
|
+
|
|
15
17
|
Args:
|
|
16
18
|
filepath: Path to the ZIP file
|
|
17
19
|
|
|
18
20
|
Returns:
|
|
19
21
|
Path to the extracted folder
|
|
22
|
+
|
|
23
|
+
Raises:
|
|
24
|
+
InvalidZipError: If extraction fails or a security risk is detected
|
|
20
25
|
"""
|
|
21
26
|
folder = filepath.with_suffix("")
|
|
27
|
+
folder.mkdir(parents=True, exist_ok=True)
|
|
28
|
+
|
|
22
29
|
with ZipFile(filepath) as zf:
|
|
30
|
+
for member in zf.infolist():
|
|
31
|
+
# Check for path traversal (Zip-Slip)
|
|
32
|
+
member_path = Path(member.filename)
|
|
33
|
+
if member_path.is_absolute() or ".." in member_path.parts:
|
|
34
|
+
raise InvalidZipError(
|
|
35
|
+
str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Additional check using resolved paths
|
|
39
|
+
target_path = (folder / member.filename).resolve()
|
|
40
|
+
if not target_path.is_relative_to(folder.resolve()):
|
|
41
|
+
raise InvalidZipError(
|
|
42
|
+
str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
|
|
43
|
+
)
|
|
44
|
+
|
|
23
45
|
zf.extractall(folder)
|
|
24
46
|
return folder
|
|
25
47
|
|
|
@@ -60,7 +82,8 @@ def load_conversation_from_json(filepath: Path | str) -> Conversation:
|
|
|
60
82
|
def load_collection_from_json(filepath: Path | str) -> ConversationCollection:
|
|
61
83
|
"""Load a conversation collection from a JSON file.
|
|
62
84
|
|
|
63
|
-
The JSON file should contain an array of conversation objects
|
|
85
|
+
The JSON file should contain an array of conversation objects,
|
|
86
|
+
or an object with a "conversations" key.
|
|
64
87
|
|
|
65
88
|
Args:
|
|
66
89
|
filepath: Path to the JSON file
|
|
@@ -71,7 +94,12 @@ def load_collection_from_json(filepath: Path | str) -> ConversationCollection:
|
|
|
71
94
|
filepath = Path(filepath)
|
|
72
95
|
with filepath.open(encoding="utf-8") as f:
|
|
73
96
|
data = loads(f.read())
|
|
74
|
-
|
|
97
|
+
|
|
98
|
+
# Handle case where export is wrapped in a top-level object
|
|
99
|
+
if isinstance(data, dict) and "conversations" in data:
|
|
100
|
+
data = data["conversations"]
|
|
101
|
+
|
|
102
|
+
return ConversationCollection(conversations=data, source_path=filepath.parent)
|
|
75
103
|
|
|
76
104
|
|
|
77
105
|
def load_collection_from_zip(filepath: Path | str) -> ConversationCollection:
|
convoviz/io/writers.py
CHANGED
|
@@ -7,6 +7,7 @@ from orjson import OPT_INDENT_2, dumps
|
|
|
7
7
|
from tqdm import tqdm
|
|
8
8
|
|
|
9
9
|
from convoviz.config import AuthorHeaders, ConversationConfig
|
|
10
|
+
from convoviz.io.assets import copy_asset, resolve_asset_path
|
|
10
11
|
from convoviz.models import Conversation, ConversationCollection
|
|
11
12
|
from convoviz.renderers import render_conversation
|
|
12
13
|
from convoviz.utils import sanitize
|
|
@@ -17,6 +18,7 @@ def save_conversation(
|
|
|
17
18
|
filepath: Path,
|
|
18
19
|
config: ConversationConfig,
|
|
19
20
|
headers: AuthorHeaders,
|
|
21
|
+
source_path: Path | None = None,
|
|
20
22
|
) -> Path:
|
|
21
23
|
"""Save a conversation to a markdown file.
|
|
22
24
|
|
|
@@ -28,6 +30,7 @@ def save_conversation(
|
|
|
28
30
|
filepath: Target file path
|
|
29
31
|
config: Conversation rendering configuration
|
|
30
32
|
headers: Author header configuration
|
|
33
|
+
source_path: Path to the source directory containing assets
|
|
31
34
|
|
|
32
35
|
Returns:
|
|
33
36
|
The actual path the file was saved to (may differ if there was a conflict)
|
|
@@ -41,8 +44,20 @@ def save_conversation(
|
|
|
41
44
|
counter += 1
|
|
42
45
|
final_path = filepath.with_name(f"{base_name} ({counter}){filepath.suffix}")
|
|
43
46
|
|
|
47
|
+
# Define asset resolver
|
|
48
|
+
def asset_resolver(asset_id: str) -> str | None:
|
|
49
|
+
if not source_path:
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
src_file = resolve_asset_path(source_path, asset_id)
|
|
53
|
+
if not src_file:
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
# Copy to output directory (relative to the markdown file's directory)
|
|
57
|
+
return copy_asset(src_file, final_path.parent)
|
|
58
|
+
|
|
44
59
|
# Render and write
|
|
45
|
-
markdown = render_conversation(conversation, config, headers)
|
|
60
|
+
markdown = render_conversation(conversation, config, headers, asset_resolver=asset_resolver)
|
|
46
61
|
with final_path.open("w", encoding="utf-8") as f:
|
|
47
62
|
f.write(markdown)
|
|
48
63
|
|
|
@@ -78,7 +93,7 @@ def save_collection(
|
|
|
78
93
|
disable=not progress_bar,
|
|
79
94
|
):
|
|
80
95
|
filepath = directory / f"{sanitize(conv.title)}.md"
|
|
81
|
-
save_conversation(conv, filepath, config, headers)
|
|
96
|
+
save_conversation(conv, filepath, config, headers, source_path=collection.source_path)
|
|
82
97
|
|
|
83
98
|
|
|
84
99
|
def save_custom_instructions(
|
convoviz/models/__init__.py
CHANGED
|
@@ -11,14 +11,10 @@ from convoviz.models.message import (
|
|
|
11
11
|
)
|
|
12
12
|
from convoviz.models.node import Node, build_node_tree
|
|
13
13
|
|
|
14
|
-
# Backward compatibility alias
|
|
15
|
-
ConversationSet = ConversationCollection
|
|
16
|
-
|
|
17
14
|
__all__ = [
|
|
18
15
|
"AuthorRole",
|
|
19
16
|
"Conversation",
|
|
20
17
|
"ConversationCollection",
|
|
21
|
-
"ConversationSet",
|
|
22
18
|
"Message",
|
|
23
19
|
"MessageAuthor",
|
|
24
20
|
"MessageContent",
|
convoviz/models/collection.py
CHANGED
|
@@ -4,6 +4,7 @@ This is a pure data model - I/O and visualization logic are in separate modules.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from datetime import datetime
|
|
7
|
+
from pathlib import Path
|
|
7
8
|
from typing import Any
|
|
8
9
|
|
|
9
10
|
from pydantic import BaseModel, Field
|
|
@@ -19,6 +20,7 @@ class ConversationCollection(BaseModel):
|
|
|
19
20
|
"""
|
|
20
21
|
|
|
21
22
|
conversations: list[Conversation] = Field(default_factory=list)
|
|
23
|
+
source_path: Path | None = None
|
|
22
24
|
|
|
23
25
|
@property
|
|
24
26
|
def index(self) -> dict[str, Conversation]:
|
convoviz/models/message.py
CHANGED
|
@@ -10,7 +10,7 @@ from pydantic import BaseModel, ConfigDict
|
|
|
10
10
|
|
|
11
11
|
from convoviz.exceptions import MessageContentError
|
|
12
12
|
|
|
13
|
-
AuthorRole = Literal["user", "assistant", "system", "tool"]
|
|
13
|
+
AuthorRole = Literal["user", "assistant", "system", "tool", "function"]
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class MessageAuthor(BaseModel):
|
|
@@ -25,7 +25,7 @@ class MessageContent(BaseModel):
|
|
|
25
25
|
"""Content of a message."""
|
|
26
26
|
|
|
27
27
|
content_type: str
|
|
28
|
-
parts: list[
|
|
28
|
+
parts: list[Any] | None = None
|
|
29
29
|
text: str | None = None
|
|
30
30
|
result: str | None = None
|
|
31
31
|
|
|
@@ -58,11 +58,53 @@ class Message(BaseModel):
|
|
|
58
58
|
metadata: MessageMetadata
|
|
59
59
|
recipient: str
|
|
60
60
|
|
|
61
|
+
@property
|
|
62
|
+
def images(self) -> list[str]:
|
|
63
|
+
"""Extract image asset pointers from the message content."""
|
|
64
|
+
if not self.content.parts:
|
|
65
|
+
return []
|
|
66
|
+
|
|
67
|
+
image_ids = []
|
|
68
|
+
for part in self.content.parts:
|
|
69
|
+
if isinstance(part, dict) and part.get("content_type") == "image_asset_pointer":
|
|
70
|
+
pointer = part.get("asset_pointer", "")
|
|
71
|
+
# Strip prefixes like "file-service://" or "sediment://"
|
|
72
|
+
if pointer.startswith("file-service://"):
|
|
73
|
+
pointer = pointer[len("file-service://") :]
|
|
74
|
+
elif pointer.startswith("sediment://"):
|
|
75
|
+
pointer = pointer[len("sediment://") :]
|
|
76
|
+
|
|
77
|
+
if pointer:
|
|
78
|
+
image_ids.append(pointer)
|
|
79
|
+
return image_ids
|
|
80
|
+
|
|
61
81
|
@property
|
|
62
82
|
def text(self) -> str:
|
|
63
83
|
"""Extract the text content of the message."""
|
|
64
84
|
if self.content.parts is not None:
|
|
65
|
-
|
|
85
|
+
# Handle multimodal content where parts can be mixed strings and dicts
|
|
86
|
+
text_parts = []
|
|
87
|
+
for part in self.content.parts:
|
|
88
|
+
if isinstance(part, str):
|
|
89
|
+
text_parts.append(part)
|
|
90
|
+
elif isinstance(part, dict) and "text" in part:
|
|
91
|
+
# Some parts might be dicts wrapping text (e.g. code interpreter?)
|
|
92
|
+
# But based on spec, usually text is just a string in the list.
|
|
93
|
+
# We'll stick to string extraction for now.
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
# If we found string parts, join them.
|
|
97
|
+
# If parts existed but no strings (e.g. only images), return empty string?
|
|
98
|
+
# Or should we return a placeholder? For now, let's return joined text.
|
|
99
|
+
if text_parts:
|
|
100
|
+
return "".join(text_parts)
|
|
101
|
+
|
|
102
|
+
# If parts list is not empty but contains no strings, we might want to fall through
|
|
103
|
+
# or return empty string if we consider it "handled".
|
|
104
|
+
# The original code returned "" if parts was empty list.
|
|
105
|
+
if self.content.parts:
|
|
106
|
+
return ""
|
|
107
|
+
|
|
66
108
|
if self.content.text is not None:
|
|
67
109
|
return self.content.text
|
|
68
110
|
if self.content.result is not None:
|
convoviz/pipeline.py
CHANGED
|
@@ -5,7 +5,7 @@ from shutil import rmtree
|
|
|
5
5
|
|
|
6
6
|
from rich.console import Console
|
|
7
7
|
|
|
8
|
-
from convoviz.analysis.graphs import
|
|
8
|
+
from convoviz.analysis.graphs import generate_graphs
|
|
9
9
|
from convoviz.analysis.wordcloud import generate_wordclouds
|
|
10
10
|
from convoviz.config import ConvovizConfig
|
|
11
11
|
from convoviz.exceptions import InvalidZipError
|
|
@@ -26,20 +26,32 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
26
26
|
config: Complete configuration for the pipeline
|
|
27
27
|
|
|
28
28
|
Raises:
|
|
29
|
-
InvalidZipError: If the
|
|
29
|
+
InvalidZipError: If the input is invalid
|
|
30
30
|
ConfigurationError: If configuration is incomplete
|
|
31
31
|
"""
|
|
32
|
-
if not config.
|
|
33
|
-
raise InvalidZipError("", reason="No
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
if not
|
|
37
|
-
raise InvalidZipError(str(
|
|
38
|
-
|
|
39
|
-
console.print("Loading data [bold yellow]📂[/bold yellow] ...\n")
|
|
40
|
-
|
|
41
|
-
# Load
|
|
42
|
-
|
|
32
|
+
if not config.input_path:
|
|
33
|
+
raise InvalidZipError("", reason="No input path specified")
|
|
34
|
+
|
|
35
|
+
input_path = Path(config.input_path)
|
|
36
|
+
if not input_path.exists():
|
|
37
|
+
raise InvalidZipError(str(input_path), reason="File does not exist")
|
|
38
|
+
|
|
39
|
+
console.print(f"Loading data from {input_path} [bold yellow]📂[/bold yellow] ...\n")
|
|
40
|
+
|
|
41
|
+
# Load collection based on input type
|
|
42
|
+
if input_path.is_dir():
|
|
43
|
+
# Check for conversations.json inside
|
|
44
|
+
json_path = input_path / "conversations.json"
|
|
45
|
+
if not json_path.exists():
|
|
46
|
+
raise InvalidZipError(
|
|
47
|
+
str(input_path), reason="Directory must contain conversations.json"
|
|
48
|
+
)
|
|
49
|
+
collection = load_collection_from_json(json_path)
|
|
50
|
+
elif input_path.suffix == ".json":
|
|
51
|
+
collection = load_collection_from_json(input_path)
|
|
52
|
+
else:
|
|
53
|
+
# Assume zip
|
|
54
|
+
collection = load_collection_from_zip(input_path)
|
|
43
55
|
|
|
44
56
|
# Try to merge bookmarklet data if available
|
|
45
57
|
bookmarklet_json = find_latest_bookmarklet_json()
|
|
@@ -54,12 +66,23 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
54
66
|
)
|
|
55
67
|
|
|
56
68
|
output_folder = config.output_folder
|
|
57
|
-
|
|
58
|
-
# Clean and recreate output folder
|
|
59
|
-
if output_folder.exists() and output_folder.is_dir():
|
|
60
|
-
rmtree(output_folder)
|
|
61
69
|
output_folder.mkdir(parents=True, exist_ok=True)
|
|
62
70
|
|
|
71
|
+
# Clean only specific sub-directories we manage
|
|
72
|
+
managed_dirs = ["Markdown", "Graphs", "Word-Clouds"]
|
|
73
|
+
for d in managed_dirs:
|
|
74
|
+
sub_dir = output_folder / d
|
|
75
|
+
if sub_dir.exists() and sub_dir.is_dir():
|
|
76
|
+
rmtree(sub_dir)
|
|
77
|
+
sub_dir.mkdir(exist_ok=True)
|
|
78
|
+
|
|
79
|
+
# Clean specific files we manage
|
|
80
|
+
managed_files = ["custom_instructions.json"]
|
|
81
|
+
for f in managed_files:
|
|
82
|
+
managed_file = output_folder / f
|
|
83
|
+
if managed_file.exists():
|
|
84
|
+
managed_file.unlink()
|
|
85
|
+
|
|
63
86
|
# Save markdown files
|
|
64
87
|
markdown_folder = output_folder / "Markdown"
|
|
65
88
|
save_collection(
|
|
@@ -77,7 +100,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
77
100
|
# Generate graphs
|
|
78
101
|
graph_folder = output_folder / "Graphs"
|
|
79
102
|
graph_folder.mkdir(parents=True, exist_ok=True)
|
|
80
|
-
|
|
103
|
+
generate_graphs(
|
|
81
104
|
collection,
|
|
82
105
|
graph_folder,
|
|
83
106
|
config.graph,
|
|
@@ -89,7 +112,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
|
|
|
89
112
|
)
|
|
90
113
|
|
|
91
114
|
# Generate word clouds
|
|
92
|
-
wordcloud_folder = output_folder / "Word
|
|
115
|
+
wordcloud_folder = output_folder / "Word-Clouds"
|
|
93
116
|
wordcloud_folder.mkdir(parents=True, exist_ok=True)
|
|
94
117
|
generate_wordclouds(
|
|
95
118
|
collection,
|
convoviz/renderers/markdown.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Markdown rendering for conversations."""
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
|
+
from collections.abc import Callable
|
|
4
5
|
|
|
5
6
|
from convoviz.config import AuthorHeaders, ConversationConfig
|
|
6
7
|
from convoviz.models import Conversation, Node
|
|
@@ -79,7 +80,7 @@ def render_message_header(role: str, headers: AuthorHeaders) -> str:
|
|
|
79
80
|
return header_map.get(role, f"### {role.title()}")
|
|
80
81
|
|
|
81
82
|
|
|
82
|
-
def render_node_header(node: Node, headers: AuthorHeaders) -> str:
|
|
83
|
+
def render_node_header(node: Node, headers: AuthorHeaders, flavor: str = "obsidian") -> str:
|
|
83
84
|
"""Render the header section of a node.
|
|
84
85
|
|
|
85
86
|
Includes the node ID, parent link, and message author header.
|
|
@@ -87,6 +88,7 @@ def render_node_header(node: Node, headers: AuthorHeaders) -> str:
|
|
|
87
88
|
Args:
|
|
88
89
|
node: The node to render
|
|
89
90
|
headers: Configuration for author headers
|
|
91
|
+
flavor: Markdown flavor (obsidian, standard)
|
|
90
92
|
|
|
91
93
|
Returns:
|
|
92
94
|
The header markdown string
|
|
@@ -94,45 +96,57 @@ def render_node_header(node: Node, headers: AuthorHeaders) -> str:
|
|
|
94
96
|
if node.message is None:
|
|
95
97
|
return ""
|
|
96
98
|
|
|
97
|
-
|
|
99
|
+
if flavor == "standard":
|
|
100
|
+
return render_message_header(node.message.author.role, headers) + "\n"
|
|
101
|
+
|
|
102
|
+
# Obsidian flavor
|
|
103
|
+
parts = []
|
|
98
104
|
|
|
99
105
|
# Add parent link if parent has a message
|
|
100
106
|
if node.parent_node and node.parent_node.message:
|
|
101
|
-
parts.append(f"[
|
|
107
|
+
parts.append(f"[⬆️](#^{node.parent_node.id})")
|
|
102
108
|
|
|
103
|
-
|
|
109
|
+
author_header = render_message_header(node.message.author.role, headers)
|
|
110
|
+
parts.append(f"{author_header} ^{node.id}")
|
|
104
111
|
|
|
105
112
|
return "\n".join(parts) + "\n"
|
|
106
113
|
|
|
107
114
|
|
|
108
|
-
def render_node_footer(node: Node) -> str:
|
|
115
|
+
def render_node_footer(node: Node, flavor: str = "obsidian") -> str:
|
|
109
116
|
"""Render the footer section of a node with child links.
|
|
110
117
|
|
|
111
118
|
Args:
|
|
112
119
|
node: The node to render
|
|
120
|
+
flavor: Markdown flavor (obsidian, standard)
|
|
113
121
|
|
|
114
122
|
Returns:
|
|
115
123
|
The footer markdown string with child navigation links
|
|
116
124
|
"""
|
|
117
|
-
if not node.children_nodes:
|
|
125
|
+
if flavor == "standard" or not node.children_nodes:
|
|
118
126
|
return ""
|
|
119
127
|
|
|
120
128
|
if len(node.children_nodes) == 1:
|
|
121
|
-
return f"\n[
|
|
129
|
+
return f"\n[⬇️](#^{node.children_nodes[0].id})\n"
|
|
122
130
|
|
|
123
|
-
links = " | ".join(
|
|
124
|
-
f"[child {i + 1} ⬇️](#{child.id})" for i, child in enumerate(node.children_nodes)
|
|
125
|
-
)
|
|
131
|
+
links = " | ".join(f"[{i + 1} ⬇️](#^{child.id})" for i, child in enumerate(node.children_nodes))
|
|
126
132
|
return f"\n{links}\n"
|
|
127
133
|
|
|
128
134
|
|
|
129
|
-
def render_node(
|
|
135
|
+
def render_node(
|
|
136
|
+
node: Node,
|
|
137
|
+
headers: AuthorHeaders,
|
|
138
|
+
use_dollar_latex: bool = False,
|
|
139
|
+
asset_resolver: Callable[[str], str | None] | None = None,
|
|
140
|
+
flavor: str = "obsidian",
|
|
141
|
+
) -> str:
|
|
130
142
|
"""Render a complete node as markdown.
|
|
131
143
|
|
|
132
144
|
Args:
|
|
133
145
|
node: The node to render
|
|
134
146
|
headers: Configuration for author headers
|
|
135
147
|
use_dollar_latex: Whether to convert LaTeX delimiters to dollars
|
|
148
|
+
asset_resolver: Function to resolve asset IDs to paths
|
|
149
|
+
flavor: Markdown flavor (obsidian, standard)
|
|
136
150
|
|
|
137
151
|
Returns:
|
|
138
152
|
Complete markdown string for the node
|
|
@@ -140,7 +154,7 @@ def render_node(node: Node, headers: AuthorHeaders, use_dollar_latex: bool = Fal
|
|
|
140
154
|
if node.message is None:
|
|
141
155
|
return ""
|
|
142
156
|
|
|
143
|
-
header = render_node_header(node, headers)
|
|
157
|
+
header = render_node_header(node, headers, flavor=flavor)
|
|
144
158
|
|
|
145
159
|
# Get and process content
|
|
146
160
|
try:
|
|
@@ -148,16 +162,29 @@ def render_node(node: Node, headers: AuthorHeaders, use_dollar_latex: bool = Fal
|
|
|
148
162
|
content = f"\n{content}\n" if content else ""
|
|
149
163
|
if use_dollar_latex:
|
|
150
164
|
content = replace_latex_delimiters(content)
|
|
165
|
+
|
|
166
|
+
# Append images if resolver is provided and images exist
|
|
167
|
+
if asset_resolver and node.message.images:
|
|
168
|
+
for image_id in node.message.images:
|
|
169
|
+
rel_path = asset_resolver(image_id)
|
|
170
|
+
if rel_path:
|
|
171
|
+
# Using standard markdown image syntax.
|
|
172
|
+
# Obsidian handles this well.
|
|
173
|
+
content += f"\n\n"
|
|
174
|
+
|
|
151
175
|
except Exception:
|
|
152
176
|
content = ""
|
|
153
177
|
|
|
154
|
-
footer = render_node_footer(node)
|
|
178
|
+
footer = render_node_footer(node, flavor=flavor)
|
|
155
179
|
|
|
156
180
|
return f"\n{header}{content}{footer}\n---\n"
|
|
157
181
|
|
|
158
182
|
|
|
159
183
|
def render_conversation(
|
|
160
|
-
conversation: Conversation,
|
|
184
|
+
conversation: Conversation,
|
|
185
|
+
config: ConversationConfig,
|
|
186
|
+
headers: AuthorHeaders,
|
|
187
|
+
asset_resolver: Callable[[str], str | None] | None = None,
|
|
161
188
|
) -> str:
|
|
162
189
|
"""Render a complete conversation as markdown.
|
|
163
190
|
|
|
@@ -165,11 +192,13 @@ def render_conversation(
|
|
|
165
192
|
conversation: The conversation to render
|
|
166
193
|
config: Conversation rendering configuration
|
|
167
194
|
headers: Configuration for author headers
|
|
195
|
+
asset_resolver: Function to resolve asset IDs to paths
|
|
168
196
|
|
|
169
197
|
Returns:
|
|
170
198
|
Complete markdown document string
|
|
171
199
|
"""
|
|
172
200
|
use_dollar_latex = config.markdown.latex_delimiters == "dollars"
|
|
201
|
+
flavor = config.markdown.flavor
|
|
173
202
|
|
|
174
203
|
# Start with YAML header
|
|
175
204
|
markdown = render_yaml_header(conversation, config.yaml)
|
|
@@ -177,6 +206,8 @@ def render_conversation(
|
|
|
177
206
|
# Render all message nodes
|
|
178
207
|
for node in conversation.all_message_nodes:
|
|
179
208
|
if node.message:
|
|
180
|
-
markdown += render_node(
|
|
209
|
+
markdown += render_node(
|
|
210
|
+
node, headers, use_dollar_latex, asset_resolver=asset_resolver, flavor=flavor
|
|
211
|
+
)
|
|
181
212
|
|
|
182
213
|
return markdown
|