PyPI - convoviz - Versions diffs - 0.2.3__tar.gz → 0.2.4__tar.gz - Mend

convoviz 0.2.3tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{convoviz-0.2.3 → convoviz-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: convoviz
-Version: 0.2.3
+Version: 0.2.4
 Summary: Get analytics and visualizations on your ChatGPT data!
 Keywords: markdown,chatgpt,openai,visualization,analytics,json,export,data-analysis,obsidian
 Author: Mohamed Cheikh Sidiya
@@ -24,7 +24,7 @@ Requires-Python: >=3.12
 Project-URL: Repository, https://github.com/mohamed-chs/chatgpt-history-export-to-md
 Description-Content-Type: text/markdown
-# Convoviz 📊: Visualize your entire ChatGPT data !
+# Convoviz 📊: Visualize your entire ChatGPT data
 Convert your ChatGPT history into well-formatted Markdown files. Additionally, visualize your data with word clouds 🔡☁️, view your prompt history graphs 📈, and access all your custom instructions 🤖 in a single location.
@@ -68,7 +68,7 @@ or pipx:
 pipx install convoviz
 ```
-### 3. Run the Script 🏃‍♂️
+### 3. Run the tool 🏃‍♂️
 Simply run the command and follow the prompts:
@@ -81,9 +81,18 @@ convoviz
 You can provide arguments directly to skip the prompts:
 ```bash
-convoviz --zip path/to/your/export.zip --output path/to/output/folder
+convoviz --input path/to/your/export.zip --output path/to/output/folder
 ```
+Inputs can be any of:
+- A ChatGPT export ZIP (downloaded from OpenAI)
+- An extracted export directory containing `conversations.json`
+- A `conversations.json` file directly
+Notes:
+- `--zip` / `-z` is kept as an alias for `--input` for convenience.
+- You can force non-interactive mode with `--no-interactive`.
 For more options, run:
 ```bash
@@ -118,4 +127,20 @@ It was also a great opportunity to learn more about Python and type annotations.
 It should(?) also work as library, so you can import and use the models and functions. I need to add more documentation for that tho. Feel free to reach out if you need help.
-I'm working on automating it to add new conversations and updating old ones. Had some luck with a JavaScript bookmarklet, still ironing it out tho.
+### Offline / reproducible runs
+Convoviz uses NLTK stopwords for word clouds. If you’re offline and NLTK data isn’t already installed, pre-download it once:
+```bash
+python -c "import nltk; nltk.download('stopwords')"
+```
+If you’re using `uv` without a global install, you can run:
+```bash
+uv run python -c "import nltk; nltk.download('stopwords')"
+```
+### Bookmarklet
+There’s also a JavaScript bookmarklet flow under `js/` (experimental) for exporting additional conversation data outside the official ZIP export.

{convoviz-0.2.3 → convoviz-0.2.4}/README.md RENAMED Viewed

@@ -1,4 +1,4 @@
-# Convoviz 📊: Visualize your entire ChatGPT data !
+# Convoviz 📊: Visualize your entire ChatGPT data
 Convert your ChatGPT history into well-formatted Markdown files. Additionally, visualize your data with word clouds 🔡☁️, view your prompt history graphs 📈, and access all your custom instructions 🤖 in a single location.
@@ -42,7 +42,7 @@ or pipx:
 pipx install convoviz
 ```
-### 3. Run the Script 🏃‍♂️
+### 3. Run the tool 🏃‍♂️
 Simply run the command and follow the prompts:
@@ -55,9 +55,18 @@ convoviz
 You can provide arguments directly to skip the prompts:
 ```bash
-convoviz --zip path/to/your/export.zip --output path/to/output/folder
+convoviz --input path/to/your/export.zip --output path/to/output/folder
 ```
+Inputs can be any of:
+- A ChatGPT export ZIP (downloaded from OpenAI)
+- An extracted export directory containing `conversations.json`
+- A `conversations.json` file directly
+Notes:
+- `--zip` / `-z` is kept as an alias for `--input` for convenience.
+- You can force non-interactive mode with `--no-interactive`.
 For more options, run:
 ```bash
@@ -92,4 +101,20 @@ It was also a great opportunity to learn more about Python and type annotations.
 It should(?) also work as library, so you can import and use the models and functions. I need to add more documentation for that tho. Feel free to reach out if you need help.
-I'm working on automating it to add new conversations and updating old ones. Had some luck with a JavaScript bookmarklet, still ironing it out tho.
+### Offline / reproducible runs
+Convoviz uses NLTK stopwords for word clouds. If you’re offline and NLTK data isn’t already installed, pre-download it once:
+```bash
+python -c "import nltk; nltk.download('stopwords')"
+```
+If you’re using `uv` without a global install, you can run:
+```bash
+uv run python -c "import nltk; nltk.download('stopwords')"
+```
+### Bookmarklet
+There’s also a JavaScript bookmarklet flow under `js/` (experimental) for exporting additional conversation data outside the official ZIP export.

{convoviz-0.2.3 → convoviz-0.2.4}/convoviz/analysis/graphs.py RENAMED Viewed

@@ -4,7 +4,9 @@ from collections import defaultdict
 from datetime import UTC, datetime
 from pathlib import Path
+import matplotlib.dates as mdates
 import matplotlib.font_manager as fm
+from matplotlib.axes import Axes
 from matplotlib.figure import Figure
 from tqdm import tqdm
@@ -23,10 +25,10 @@ WEEKDAYS = [
 ]
-def _setup_figure(config: GraphConfig) -> tuple[Figure, fm.FontProperties]:
+def _setup_figure(config: GraphConfig) -> tuple[Figure, Axes, fm.FontProperties]:
     """Internal helper to setup a figure with common styling."""
-    fig = Figure(figsize=config.figsize, dpi=300)
-    ax = fig.add_subplot()
+    fig = Figure(figsize=config.figsize, dpi=config.dpi)
+    ax: Axes = fig.add_subplot()
     # Load custom font if possible
     font_path = get_asset_path(f"fonts/{config.font_name}")
@@ -35,12 +37,27 @@ def _setup_figure(config: GraphConfig) -> tuple[Figure, fm.FontProperties]:
     )
     # Styling
+    fig.set_facecolor("white")
+    ax.set_facecolor("white")
     ax.spines["top"].set_visible(False)
     ax.spines["right"].set_visible(False)
     if config.grid:
         ax.grid(axis="y", linestyle="--", alpha=0.7)
+    ax.set_axisbelow(True)
-    return fig, font_prop
+    return fig, ax, font_prop
+def _ts_to_dt(ts: float, config: GraphConfig) -> datetime:
+    """Convert epoch timestamps into aware datetimes based on config."""
+    dt_utc = datetime.fromtimestamp(ts, UTC)
+    if config.timezone == "utc":
+        return dt_utc
+    return dt_utc.astimezone()
+def _tz_label(config: GraphConfig) -> str:
+    return "UTC" if config.timezone == "utc" else "Local"
 def generate_week_barplot(
@@ -59,37 +76,37 @@ def generate_week_barplot(
         Matplotlib Figure object
     """
     cfg = config or get_default_config().graph
-    dates = [datetime.fromtimestamp(ts, UTC) for ts in timestamps]
+    dates = [_ts_to_dt(ts, cfg) for ts in timestamps]
     weekday_counts: defaultdict[str, int] = defaultdict(int)
     for date in dates:
         weekday_counts[WEEKDAYS[date.weekday()]] += 1
-    x = WEEKDAYS
+    x = list(range(len(WEEKDAYS)))
     y = [weekday_counts[day] for day in WEEKDAYS]
-    fig, font_prop = _setup_figure(cfg)
-    ax = fig.gca()
+    fig, ax, font_prop = _setup_figure(cfg)
-    bars = ax.bar(x, y, color=cfg.color, alpha=0.8)
+    bars = ax.bar(x, y, color=cfg.color, alpha=0.85)
     if cfg.show_counts:
         for bar in bars:
             height = bar.get_height()
-            ax.text(
-                bar.get_x() + bar.get_width() / 2.0,
-                height,
-                f"{int(height)}",
-                ha="center",
-                va="bottom",
-                fontproperties=font_prop,
-            )
+            if height > 0:
+                ax.text(
+                    bar.get_x() + bar.get_width() / 2.0,
+                    height,
+                    f"{int(height)}",
+                    ha="center",
+                    va="bottom",
+                    fontproperties=font_prop,
+                )
     ax.set_xlabel("Weekday", fontproperties=font_prop)
-    ax.set_ylabel("Prompt Count", fontproperties=font_prop)
+    ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
     ax.set_title(title, fontproperties=font_prop, fontsize=16, pad=20)
-    ax.set_xticks(range(len(x)))
-    ax.set_xticklabels(x, rotation=45, fontproperties=font_prop)
+    ax.set_xticks(x)
+    ax.set_xticklabels(WEEKDAYS, rotation=45, fontproperties=font_prop)
     for label in ax.get_yticklabels():
         label.set_fontproperties(font_prop)
@@ -114,7 +131,7 @@ def generate_hour_barplot(
         Matplotlib Figure object
     """
     cfg = config or get_default_config().graph
-    dates = [datetime.fromtimestamp(ts, UTC) for ts in timestamps]
+    dates = [_ts_to_dt(ts, cfg) for ts in timestamps]
     hour_counts: dict[int, int] = dict.fromkeys(range(24), 0)
     for date in dates:
@@ -123,8 +140,7 @@ def generate_hour_barplot(
     x = [f"{i:02d}:00" for i in range(24)]
     y = [hour_counts[i] for i in range(24)]
-    fig, font_prop = _setup_figure(cfg)
-    ax = fig.gca()
+    fig, ax, font_prop = _setup_figure(cfg)
     bars = ax.bar(range(24), y, color=cfg.color, alpha=0.8)
@@ -142,8 +158,8 @@ def generate_hour_barplot(
                     fontsize=8,
                 )
-    ax.set_xlabel("Hour of Day (UTC)", fontproperties=font_prop)
-    ax.set_ylabel("Prompt Count", fontproperties=font_prop)
+    ax.set_xlabel(f"Hour of Day ({_tz_label(cfg)})", fontproperties=font_prop)
+    ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
     ax.set_title(f"{title} - Hourly Distribution", fontproperties=font_prop, fontsize=16, pad=20)
     ax.set_xticks(range(24))
     ax.set_xticklabels(x, rotation=90, fontproperties=font_prop)
@@ -180,8 +196,7 @@ def generate_model_piechart(
     total = sum(model_counts.values())
     if total == 0:
         # Return empty figure or figure with "No Data"
-        fig, font_prop = _setup_figure(cfg)
-        ax = fig.gca()
+        fig, ax, font_prop = _setup_figure(cfg)
         ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
         return fig
@@ -204,8 +219,7 @@ def generate_model_piechart(
     labels = [item[0] for item in sorted_items]
     sizes = [item[1] for item in sorted_items]
-    fig, font_prop = _setup_figure(cfg)
-    ax = fig.gca()
+    fig, ax, font_prop = _setup_figure(cfg)
     colors = [
         "#4A90E2",
@@ -250,17 +264,16 @@ def generate_length_histogram(
     cfg = config or get_default_config().graph
     lengths = [conv.message_count("user") for conv in collection.conversations]
-    fig, font_prop = _setup_figure(cfg)
-    ax = fig.gca()
+    fig, ax, font_prop = _setup_figure(cfg)
     if not lengths:
         ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
         return fig
-    import numpy as np
     # Cap at 95th percentile to focus on most conversations
-    cap = int(np.percentile(lengths, 95))
+    sorted_lengths = sorted(lengths)
+    idx = int(0.95 * (len(sorted_lengths) - 1))
+    cap = int(sorted_lengths[idx])
     cap = max(cap, 5)  # Ensure at least some range
     # Filter lengths for the histogram plot, but keep the data correct
@@ -306,10 +319,10 @@ def generate_monthly_activity_barplot(
     x = [m.strftime("%b '%y") for m in sorted_months]
     y = [len(month_groups[m].timestamps("user")) for m in sorted_months]
-    fig, font_prop = _setup_figure(cfg)
-    ax = fig.gca()
+    fig, ax, font_prop = _setup_figure(cfg)
-    bars = ax.bar(x, y, color=cfg.color, alpha=0.8)
+    positions = list(range(len(x)))
+    bars = ax.bar(positions, y, color=cfg.color, alpha=0.85)
     if cfg.show_counts:
         for bar in bars:
@@ -326,10 +339,12 @@ def generate_monthly_activity_barplot(
                 )
     ax.set_xlabel("Month", fontproperties=font_prop)
-    ax.set_ylabel("Total Prompt Count", fontproperties=font_prop)
+    ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
     ax.set_title("Monthly Activity History", fontproperties=font_prop, fontsize=16, pad=20)
-    ax.set_xticks(range(len(x)))
-    ax.set_xticklabels(x, rotation=45, fontproperties=font_prop)
+    tick_step = max(1, len(positions) // 12)  # show ~12 labels max
+    shown = positions[::tick_step] if positions else []
+    ax.set_xticks(shown)
+    ax.set_xticklabels([x[i] for i in shown], rotation=45, fontproperties=font_prop)
     for label in ax.get_yticklabels():
         label.set_fontproperties(font_prop)
@@ -338,6 +353,45 @@ def generate_monthly_activity_barplot(
     return fig
+def generate_daily_activity_lineplot(
+    collection: ConversationCollection,
+    config: GraphConfig | None = None,
+) -> Figure:
+    """Create a line chart showing user prompt count per day."""
+    cfg = config or get_default_config().graph
+    timestamps = collection.timestamps("user")
+    fig, ax, font_prop = _setup_figure(cfg)
+    if not timestamps:
+        ax.text(0.5, 0.5, "No Data", ha="center", va="center", fontproperties=font_prop)
+        return fig
+    counts: defaultdict[datetime, int] = defaultdict(int)
+    for ts in timestamps:
+        dt = _ts_to_dt(ts, cfg)
+        day = dt.replace(hour=0, minute=0, second=0, microsecond=0)
+        counts[day] += 1
+    days = sorted(counts.keys())
+    values = [counts[d] for d in days]
+    x = mdates.date2num(days)
+    ax.plot(x, values, color=cfg.color, linewidth=2.0)
+    ax.fill_between(x, values, color=cfg.color, alpha=0.15)
+    locator = mdates.AutoDateLocator()
+    ax.xaxis.set_major_locator(locator)
+    ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator))
+    ax.set_title("Daily Activity History", fontproperties=font_prop, fontsize=16, pad=20)
+    ax.set_xlabel(f"Day ({_tz_label(cfg)})", fontproperties=font_prop)
+    ax.set_ylabel("User Prompt Count", fontproperties=font_prop)
+    for label in ax.get_xticklabels() + ax.get_yticklabels():
+        label.set_fontproperties(font_prop)
+    fig.tight_layout()
+    return fig
 def generate_summary_graphs(
     collection: ConversationCollection,
     output_dir: Path,
@@ -368,6 +422,10 @@ def generate_summary_graphs(
     fig_activity = generate_monthly_activity_barplot(collection, config)
     fig_activity.savefig(summary_dir / "monthly_activity.png")
+    # Daily activity
+    fig_daily = generate_daily_activity_lineplot(collection, config)
+    fig_daily.savefig(summary_dir / "daily_activity.png")
 def generate_graphs(
     collection: ConversationCollection,

{convoviz-0.2.3 → convoviz-0.2.4}/convoviz/analysis/wordcloud.py RENAMED Viewed

@@ -62,7 +62,7 @@ def load_nltk_stopwords() -> frozenset[str]:
     return frozenset(words)
-def parse_custom_stopwords(stopwords_str: str) -> set[str]:
+def parse_custom_stopwords(stopwords_str: str | None) -> set[str]:
     """Parse a comma-separated string of custom stopwords.
     Args:

{convoviz-0.2.3 → convoviz-0.2.4}/convoviz/config.py RENAMED Viewed

@@ -72,6 +72,8 @@ class GraphConfig(BaseModel):
     show_counts: bool = True
     font_name: str = "Montserrat-Regular.ttf"
     figsize: tuple[int, int] = (10, 6)
+    dpi: int = 300
+    timezone: Literal["utc", "local"] = "local"
 class ConvovizConfig(BaseModel):

{convoviz-0.2.3 → convoviz-0.2.4}/convoviz/interactive.py RENAMED Viewed

@@ -7,7 +7,7 @@ from questionary import path as qst_path
 from questionary import text as qst_text
 from convoviz.config import ConvovizConfig, get_default_config
-from convoviz.io.loaders import find_latest_zip
+from convoviz.io.loaders import find_latest_zip, validate_zip
 from convoviz.utils import colormaps, default_font_path, font_names, font_path, validate_header
 CUSTOM_STYLE = Style(
@@ -26,6 +26,25 @@ CUSTOM_STYLE = Style(
 )
+def _validate_input_path(raw: str) -> bool | str:
+    path = Path(raw)
+    if not path.exists():
+        return "Path must exist"
+    if path.is_dir():
+        if (path / "conversations.json").exists():
+            return True
+        return "Directory must contain conversations.json"
+    if path.suffix.lower() == ".json":
+        return True
+    if path.suffix.lower() == ".zip":
+        return True if validate_zip(path) else "ZIP must contain conversations.json"
+    return "Input must be a .zip, a .json, or a directory containing conversations.json"
 def run_interactive_config(initial_config: ConvovizConfig | None = None) -> ConvovizConfig:
     """Run interactive prompts to configure convoviz.
@@ -49,9 +68,9 @@ def run_interactive_config(initial_config: ConvovizConfig | None = None) -> Conv
     # Prompt for input path
     input_default = str(config.input_path) if config.input_path else ""
     input_result = qst_path(
-        "Enter the path to the zip file or extracted directory:",
+        "Enter the path to the export ZIP, conversations JSON, or extracted directory:",
         default=input_default,
-        validate=lambda p: Path(p).exists() or "Path must exist",
+        validate=_validate_input_path,
         style=CUSTOM_STYLE,
     ).ask()

{convoviz-0.2.3 → convoviz-0.2.4}/convoviz/io/loaders.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """Loading functions for conversations and collections."""
-from pathlib import Path
+from pathlib import Path, PurePosixPath
 from zipfile import ZipFile
 from orjson import loads
@@ -9,6 +9,27 @@ from convoviz.exceptions import InvalidZipError
 from convoviz.models import Conversation, ConversationCollection
+def _is_safe_zip_member_name(name: str) -> bool:
+    """Return True if a ZIP entry name is safe to extract.
+    This is intentionally OS-agnostic: it treats both ``/`` and ``\\`` as path
+    separators and rejects absolute paths, drive-letter paths, and ``..`` parts.
+    """
+    normalized = name.replace("\\", "/")
+    member_path = PurePosixPath(normalized)
+    # Absolute paths (e.g. "/etc/passwd") or empty names
+    if not normalized or member_path.is_absolute():
+        return False
+    # Windows drive letters / UNC-style prefixes stored in the archive
+    first = member_path.parts[0] if member_path.parts else ""
+    if first.endswith(":") or first.startswith("//") or first.startswith("\\\\"):
+        return False
+    return ".." not in member_path.parts
 def extract_archive(filepath: Path) -> Path:
     """Extract a ZIP file and return the extraction folder path.
@@ -28,15 +49,17 @@ def extract_archive(filepath: Path) -> Path:
     with ZipFile(filepath) as zf:
         for member in zf.infolist():
-            # Check for path traversal (Zip-Slip)
-            member_path = Path(member.filename)
-            if member_path.is_absolute() or ".." in member_path.parts:
+            # Check for path traversal (Zip-Slip) in an OS-agnostic way.
+            # ZIP files are typically POSIX-path-like, but malicious archives can
+            # embed backslashes or drive-letter tricks.
+            if not _is_safe_zip_member_name(member.filename):
                 raise InvalidZipError(
                     str(filepath), reason=f"Malicious path in ZIP: {member.filename}"
                 )
             # Additional check using resolved paths
-            target_path = (folder / member.filename).resolve()
+            normalized = member.filename.replace("\\", "/")
+            target_path = (folder / normalized).resolve()
             if not target_path.is_relative_to(folder.resolve()):
                 raise InvalidZipError(
                     str(filepath), reason=f"Malicious path in ZIP: {member.filename}"

{convoviz-0.2.3 → convoviz-0.2.4}/convoviz/models/collection.py RENAMED Viewed

@@ -37,14 +37,20 @@ class ConversationCollection(BaseModel):
     def update(self, other: "ConversationCollection") -> None:
         """Merge another collection into this one.
-        Only updates if the other collection has newer content.
+        Merges per-conversation, keeping the newest version when IDs collide.
+        Note: We intentionally do *not* gate on ``other.last_updated`` because
+        "new" conversations can still have older timestamps than the most recent
+        conversation in this collection (e.g. bookmarklet downloads).
         """
-        if other.last_updated <= self.last_updated:
-            return
+        merged: dict[str, Conversation] = dict(self.index)
+        for conv_id, incoming in other.index.items():
+            existing = merged.get(conv_id)
+            if existing is None or incoming.update_time > existing.update_time:
+                merged[conv_id] = incoming
-        merged_index = self.index
-        merged_index.update(other.index)
-        self.conversations = list(merged_index.values())
+        self.conversations = list(merged.values())
     def add(self, conversation: Conversation) -> None:
         """Add a conversation to the collection."""

{convoviz-0.2.3 → convoviz-0.2.4}/convoviz/models/conversation.py RENAMED Viewed

@@ -98,12 +98,10 @@ class Conversation(BaseModel):
     def custom_instructions(self) -> dict[str, str]:
         """Get custom instructions used for this conversation."""
         system_nodes = self.nodes_by_author("system")
-        if len(system_nodes) < 2:
-            return {}
-        context_message = system_nodes[1].message
-        if context_message and context_message.metadata.is_user_system_message:
-            return context_message.metadata.user_context_message_data or {}
+        for node in system_nodes:
+            context_message = node.message
+            if context_message and context_message.metadata.is_user_system_message:
+                return context_message.metadata.user_context_message_data or {}
         return {}
     def timestamps(self, *authors: AuthorRole) -> list[float]:

{convoviz-0.2.3 → convoviz-0.2.4}/convoviz/models/message.py RENAMED Viewed

@@ -6,7 +6,7 @@ Object path: conversations.json -> conversation -> mapping -> mapping node -> me
 from datetime import datetime
 from typing import Any, Literal
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, Field
 from convoviz.exceptions import MessageContentError
@@ -18,7 +18,7 @@ class MessageAuthor(BaseModel):
     role: AuthorRole
     name: str | None = None
-    metadata: dict[str, Any] = {}
+    metadata: dict[str, Any] = Field(default_factory=dict)
 class MessageContent(BaseModel):
@@ -55,8 +55,8 @@ class Message(BaseModel):
     status: str
     end_turn: bool | None = None
     weight: float
-    metadata: MessageMetadata
-    recipient: str
+    metadata: MessageMetadata = Field(default_factory=MessageMetadata)
+    recipient: str | None = None
     @property
     def images(self) -> list[str]:
@@ -117,3 +117,41 @@ class Message(BaseModel):
         return bool(
             self.content.parts or self.content.text is not None or self.content.result is not None
         )
+    @property
+    def is_empty(self) -> bool:
+        """Check if the message is effectively empty (no text, no images)."""
+        try:
+            return not self.text.strip() and not self.images
+        except MessageContentError:
+            return True
+    @property
+    def is_hidden(self) -> bool:
+        """Check if message should be hidden in export.
+        Hidden if:
+        1. It is empty (no text, no images).
+        2. It is an internal system message (not custom instructions).
+        3. It is a browser tool output (intermediate search steps).
+        """
+        if self.is_empty:
+            return True
+        # Hide internal system messages
+        if self.author.role == "system":
+            # Only show if explicitly marked as user system message (Custom Instructions)
+            return not self.metadata.is_user_system_message
+        # Hide browser tool outputs (usually intermediate search steps)
+        if self.author.role == "tool" and self.author.name == "browser":
+            return True
+        # Hide assistant calls to browser tool (e.g. "search(...)") or code interpreter
+        if self.author.role == "assistant" and (
+            self.recipient == "browser" or self.content.content_type == "code"
+        ):
+            return True
+        # Hide browsing status messages
+        return self.content.content_type == "tether_browsing_display"

{convoviz-0.2.3 → convoviz-0.2.4}/convoviz/pipeline.py RENAMED Viewed

@@ -19,6 +19,18 @@ from convoviz.io.writers import save_collection, save_custom_instructions
 console = Console()
+def _safe_uri(path: Path) -> str:
+    """Best-effort URI for printing.
+    ``Path.as_uri()`` requires an absolute path; users often provide relative
+    output paths, so we resolve first and fall back to string form.
+    """
+    try:
+        return path.resolve().as_uri()
+    except Exception:
+        return str(path)
 def run_pipeline(config: ConvovizConfig) -> None:
     """Run the main processing pipeline.
@@ -72,8 +84,14 @@ def run_pipeline(config: ConvovizConfig) -> None:
     managed_dirs = ["Markdown", "Graphs", "Word-Clouds"]
     for d in managed_dirs:
         sub_dir = output_folder / d
-        if sub_dir.exists() and sub_dir.is_dir():
-            rmtree(sub_dir)
+        if sub_dir.exists():
+            # Never follow symlinks; just unlink them.
+            if sub_dir.is_symlink():
+                sub_dir.unlink()
+            elif sub_dir.is_dir():
+                rmtree(sub_dir)
+            else:
+                sub_dir.unlink()
         sub_dir.mkdir(exist_ok=True)
     # Clean specific files we manage
@@ -81,7 +99,12 @@ def run_pipeline(config: ConvovizConfig) -> None:
     for f in managed_files:
         managed_file = output_folder / f
         if managed_file.exists():
-            managed_file.unlink()
+            if managed_file.is_symlink() or managed_file.is_file():
+                managed_file.unlink()
+            elif managed_file.is_dir():
+                rmtree(managed_file)
+            else:
+                managed_file.unlink()
     # Save markdown files
     markdown_folder = output_folder / "Markdown"
@@ -94,7 +117,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
     )
     console.print(
         f"\nDone [bold green]✅[/bold green] ! "
-        f"Check the output [bold blue]📄[/bold blue] here: {markdown_folder.as_uri()} 🔗\n"
+        f"Check the output [bold blue]📄[/bold blue] here: {_safe_uri(markdown_folder)} 🔗\n"
     )
     # Generate graphs
@@ -108,7 +131,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
     )
     console.print(
         f"\nDone [bold green]✅[/bold green] ! "
-        f"Check the output [bold blue]📈[/bold blue] here: {graph_folder.as_uri()} 🔗\n"
+        f"Check the output [bold blue]📈[/bold blue] here: {_safe_uri(graph_folder)} 🔗\n"
     )
     # Generate word clouds
@@ -122,7 +145,7 @@ def run_pipeline(config: ConvovizConfig) -> None:
     )
     console.print(
         f"\nDone [bold green]✅[/bold green] ! "
-        f"Check the output [bold blue]🔡☁️[/bold blue] here: {wordcloud_folder.as_uri()} 🔗\n"
+        f"Check the output [bold blue]🔡☁️[/bold blue] here: {_safe_uri(wordcloud_folder)} 🔗\n"
     )
     # Save custom instructions
@@ -131,12 +154,12 @@ def run_pipeline(config: ConvovizConfig) -> None:
     save_custom_instructions(collection, instructions_path)
     console.print(
         f"\nDone [bold green]✅[/bold green] ! "
-        f"Check the output [bold blue]📝[/bold blue] here: {instructions_path.as_uri()} 🔗\n"
+        f"Check the output [bold blue]📝[/bold blue] here: {_safe_uri(instructions_path)} 🔗\n"
     )
     console.print(
         "ALL DONE [bold green]🎉🎉🎉[/bold green] !\n\n"
-        f"Explore the full gallery [bold yellow]🖼️[/bold yellow] at: {output_folder.as_uri()} 🔗\n\n"
+        f"Explore the full gallery [bold yellow]🖼️[/bold yellow] at: {_safe_uri(output_folder)} 🔗\n\n"
         "I hope you enjoy the outcome 🤞.\n\n"
         "If you appreciate it, kindly give the project a star 🌟 on GitHub:\n\n"
         "➡️ https://github.com/mohamed-chs/chatgpt-history-export-to-md 🔗\n\n"

{convoviz-0.2.3 → convoviz-0.2.4}/convoviz/renderers/markdown.py RENAMED Viewed

@@ -4,6 +4,7 @@ import re
 from collections.abc import Callable
 from convoviz.config import AuthorHeaders, ConversationConfig
+from convoviz.exceptions import MessageContentError
 from convoviz.models import Conversation, Node
 from convoviz.renderers.yaml import render_yaml_header
@@ -154,32 +155,67 @@ def render_node(
     if node.message is None:
         return ""
+    if node.message.is_hidden:
+        return ""
     header = render_node_header(node, headers, flavor=flavor)
     # Get and process content
     try:
-        content = close_code_blocks(node.message.text)
-        content = f"\n{content}\n" if content else ""
-        if use_dollar_latex:
-            content = replace_latex_delimiters(content)
-        # Append images if resolver is provided and images exist
-        if asset_resolver and node.message.images:
-            for image_id in node.message.images:
-                rel_path = asset_resolver(image_id)
-                if rel_path:
-                    # Using standard markdown image syntax.
-                    # Obsidian handles this well.
-                    content += f"\n![Image]({rel_path})\n"
-    except Exception:
-        content = ""
+        text = node.message.text
+    except MessageContentError:
+        # Some message types only contain non-text parts; those still may have images.
+        text = ""
+    content = close_code_blocks(text)
+    content = f"\n{content}\n" if content else ""
+    if use_dollar_latex:
+        content = replace_latex_delimiters(content)
+    # Append images if resolver is provided and images exist
+    if asset_resolver and node.message.images:
+        for image_id in node.message.images:
+            rel_path = asset_resolver(image_id)
+            if rel_path:
+                # Using standard markdown image syntax.
+                # Obsidian handles this well.
+                content += f"\n![Image]({rel_path})\n"
     footer = render_node_footer(node, flavor=flavor)
     return f"\n{header}{content}{footer}\n---\n"
+def _ordered_nodes(conversation: Conversation) -> list[Node]:
+    """Return nodes in a deterministic depth-first traversal order.
+    ChatGPT exports store nodes in a mapping; dict iteration order is not a
+    reliable semantic ordering. For markdown output, we traverse from roots.
+    """
+    mapping = conversation.node_mapping
+    roots = sorted((n for n in mapping.values() if n.parent is None), key=lambda n: n.id)
+    visited: set[str] = set()
+    ordered: list[Node] = []
+    def dfs(node: Node) -> None:
+        if node.id in visited:
+            return
+        visited.add(node.id)
+        ordered.append(node)
+        for child in node.children_nodes:
+            dfs(child)
+    for root in roots:
+        dfs(root)
+    # Include any disconnected/orphan nodes deterministically at the end.
+    for node in sorted(mapping.values(), key=lambda n: n.id):
+        dfs(node)
+    return ordered
 def render_conversation(
     conversation: Conversation,
     config: ConversationConfig,
@@ -203,8 +239,8 @@ def render_conversation(
     # Start with YAML header
     markdown = render_yaml_header(conversation, config.yaml)
-    # Render all message nodes
-    for node in conversation.all_message_nodes:
+    # Render message nodes in a deterministic traversal order.
+    for node in _ordered_nodes(conversation):
         if node.message:
             markdown += render_node(
                 node, headers, use_dollar_latex, asset_resolver=asset_resolver, flavor=flavor

convoviz-0.2.4/convoviz/renderers/yaml.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""YAML frontmatter rendering for conversations."""
+from __future__ import annotations
+import re
+from datetime import datetime
+from convoviz.config import YAMLConfig
+from convoviz.models import Conversation
+_TAG_SAFE_RE = re.compile(r"[^a-z0-9/_\-]+")
+def _to_yaml_scalar(value: object) -> str:
+    if value is None:
+        return "null"
+    if isinstance(value, bool):
+        return "true" if value else "false"
+    if isinstance(value, (int, float)):
+        return str(value)
+    if isinstance(value, datetime):
+        # Frontmatter consumers generally expect ISO 8601 strings
+        return f'"{value.isoformat()}"'
+    if isinstance(value, str):
+        if "\n" in value:
+            # Multiline: use a block scalar
+            indented = "\n".join(f"  {line}" for line in value.splitlines())
+            return f"|-\n{indented}"
+        escaped = value.replace("\\", "\\\\").replace('"', '\\"')
+        return f'"{escaped}"'
+    # Fallback: stringify and quote
+    escaped = str(value).replace("\\", "\\\\").replace('"', '\\"')
+    return f'"{escaped}"'
+def _to_yaml(value: object, indent: int = 0) -> str:
+    pad = " " * indent
+    if isinstance(value, dict):
+        lines: list[str] = []
+        for k, v in value.items():
+            key = str(k)
+            if isinstance(v, (dict, list)):
+                lines.append(f"{pad}{key}:")
+                lines.append(_to_yaml(v, indent=indent + 2))
+            else:
+                scalar = _to_yaml_scalar(v)
+                # Block scalars already include newline + indentation
+                if scalar.startswith("|-"):
+                    lines.append(f"{pad}{key}: {scalar.splitlines()[0]}")
+                    lines.extend(f"{pad}{line}" for line in scalar.splitlines()[1:])
+                else:
+                    lines.append(f"{pad}{key}: {scalar}")
+        return "\n".join(lines)
+    if isinstance(value, list):
+        lines = []
+        for item in value:
+            if isinstance(item, (dict, list)):
+                lines.append(f"{pad}-")
+                lines.append(_to_yaml(item, indent=indent + 2))
+            else:
+                lines.append(f"{pad}- {_to_yaml_scalar(item)}")
+        return "\n".join(lines)
+    return f"{pad}{_to_yaml_scalar(value)}"
+def _default_tags(conversation: Conversation) -> list[str]:
+    tags: list[str] = ["chatgpt"]
+    tags.extend(conversation.plugins)
+    # Normalize to a tag-friendly form
+    normalized: list[str] = []
+    for t in tags:
+        t2 = _TAG_SAFE_RE.sub("-", t.strip().lower()).strip("-")
+        if t2 and t2 not in normalized:
+            normalized.append(t2)
+    return normalized
+def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
+    """Render the YAML frontmatter for a conversation.
+    Args:
+        conversation: The conversation to render
+        config: YAML configuration specifying which fields to include
+    Returns:
+        YAML frontmatter string with --- delimiters, or empty string if no fields enabled
+    """
+    yaml_fields: dict[str, object] = {}
+    if config.title:
+        yaml_fields["title"] = conversation.title
+    if config.tags:
+        yaml_fields["tags"] = _default_tags(conversation)
+    if config.chat_link:
+        yaml_fields["chat_link"] = conversation.url
+    if config.create_time:
+        yaml_fields["create_time"] = conversation.create_time
+    if config.update_time:
+        yaml_fields["update_time"] = conversation.update_time
+    if config.model:
+        yaml_fields["model"] = conversation.model
+    if config.used_plugins:
+        yaml_fields["used_plugins"] = conversation.plugins
+    if config.message_count:
+        yaml_fields["message_count"] = conversation.message_count("user", "assistant")
+    if config.content_types:
+        yaml_fields["content_types"] = conversation.content_types
+    if config.custom_instructions:
+        yaml_fields["custom_instructions"] = conversation.custom_instructions
+    if not yaml_fields:
+        return ""
+    body = _to_yaml(yaml_fields)
+    return f"---\n{body}\n---\n"

{convoviz-0.2.3 → convoviz-0.2.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "convoviz"
-version = "0.2.3"
+version = "0.2.4"
 description = "Get analytics and visualizations on your ChatGPT data!"
 license = "MIT"
 keywords = [
@@ -48,8 +48,7 @@ source-exclude = [
     ".vscode",
     ".gitattributes",
     ".gitignore",
-    "HANDOFF.md",
-    "NEXT_STEPS.md",
+    "dev",
     "playground.ipynb",
     "pyproject.toml.bak",
     "uv.lock",

convoviz-0.2.3/convoviz/renderers/yaml.py DELETED Viewed

@@ -1,42 +0,0 @@
-"""YAML frontmatter rendering for conversations."""
-from convoviz.config import YAMLConfig
-from convoviz.models import Conversation
-def render_yaml_header(conversation: Conversation, config: YAMLConfig) -> str:
-    """Render the YAML frontmatter for a conversation.
-    Args:
-        conversation: The conversation to render
-        config: YAML configuration specifying which fields to include
-    Returns:
-        YAML frontmatter string with --- delimiters, or empty string if no fields enabled
-    """
-    yaml_fields: dict[str, object] = {}
-    if config.title:
-        yaml_fields["title"] = conversation.title
-    if config.chat_link:
-        yaml_fields["chat_link"] = conversation.url
-    if config.create_time:
-        yaml_fields["create_time"] = conversation.create_time
-    if config.update_time:
-        yaml_fields["update_time"] = conversation.update_time
-    if config.model:
-        yaml_fields["model"] = conversation.model
-    if config.used_plugins:
-        yaml_fields["used_plugins"] = conversation.plugins
-    if config.message_count:
-        yaml_fields["message_count"] = conversation.message_count("user", "assistant")
-    if config.content_types:
-        yaml_fields["content_types"] = conversation.content_types
-    if config.custom_instructions:
-        yaml_fields["custom_instructions"] = conversation.custom_instructions
-    if not yaml_fields:
-        return ""
-    lines = [f"{key}: {value}" for key, value in yaml_fields.items()]
-    return f"---\n{chr(10).join(lines)}\n---\n"