PyPI - methodproof - Versions diffs - 0.8.3__tar.gz → 0.8.4__tar.gz - Mend

methodproof 0.8.3tar.gz → 0.8.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

{methodproof-0.8.3 → methodproof-0.8.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: methodproof
-Version: 0.8.3
+Version: 0.8.4
 Summary: See how you code. Capture and visualize your engineering process.
 License-Expression: Apache-2.0
 License-File: LICENSE

{methodproof-0.8.3 → methodproof-0.8.4}/methodproof/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """MethodProof — see how you code."""
-__version__ = "0.8.1"
+__version__ = "0.8.2"

{methodproof-0.8.3 → methodproof-0.8.4}/methodproof/agents/watcher.py RENAMED Viewed

@@ -40,8 +40,15 @@ IGNORE_PATTERNS = re.compile(
     r"|\.build/|DerivedData/|Pods/"
     # Build output / artifacts
     r"|dist/|build/|\.output/"
-    # Logs and locks
-    r"|\.lock$|\.log$)"
+    # Logs and locks — runtime log output, never engineering source.
+    # ``/logs/`` excludes any file under a ``logs/`` directory regardless
+    # of extension. The prior ``\.log$`` check was too narrow — session
+    # 8c21 had 15,269 file_edit events captured on
+    # ``methodproof-platform/logs/methodproof-platform.jsonl`` (the
+    # platform's own runtime log, NOT ``.log`` extension) which polluted
+    # both the thread and step distributions. Any project with a
+    # ``logs/`` subdirectory for runtime output inherits the exclusion.
+    r"|/logs/|\.lock$|\.log$)"
 )

{methodproof-0.8.3 → methodproof-0.8.4}/methodproof/hooks/claude_code.py RENAMED Viewed

@@ -16,6 +16,22 @@ except ImportError:
     analyze_prompt = lambda _: {}
     compose_summary = lambda _: ""
+try:
+    from methodproof.hooks import model_cache
+except ImportError:
+    model_cache = None  # type: ignore[assignment]
+def _current_model(session_id: str) -> str | None:
+    """Return the most recently-seen model for this Claude session, from
+    the per-session cache. None if cache unavailable or no prior update."""
+    if model_cache is None or not session_id:
+        return None
+    try:
+        return model_cache.get_model(session_id)
+    except Exception:
+        return None
 def _extract_result_text(response) -> str:
     """Extract plain text from tool_response regardless of shape.
@@ -92,25 +108,34 @@ def _tool_input_preview(d: dict) -> str:
     return json.dumps(inp)[:300] if inp else ""
+def _with_model(meta: dict, d: dict) -> dict:
+    """Attach the session's currently-cached model to a metadata dict.
+    No-op when session_id is missing or cache has no entry."""
+    model = _current_model(d.get("session_id") or "")
+    if model:
+        meta["model"] = model
+    return meta
 _META_EXTRACTORS = {
-    "UserPromptSubmit": lambda d: {
+    "UserPromptSubmit": lambda d: _with_model({
         "tool": _TOOL,
         "prompt_text": d.get("prompt") or "",
         "prompt_preview": _build_prompt_meta(d.get("prompt") or "").get("prompt_summary", ""),
         "prompt_length": len(d.get("prompt") or ""),
-    },
-    "PreToolUse": lambda d: {
+    }, d),
+    "PreToolUse": lambda d: _with_model({
         "tool": _TOOL, "tool_name": d.get("tool_name", "unknown"),
         "tool_input": d.get("tool_input") or {},
         "tool_input_preview": _tool_input_preview(d),
-    },
-    "PostToolUse": lambda d: {
+    }, d),
+    "PostToolUse": lambda d: _with_model({
         "tool": _TOOL, "tool_name": d.get("tool_name", "unknown"), "success": True,
         "tool_input": d.get("tool_input") or {},
         "tool_response": d.get("tool_response") or {},
         "tool_input_preview": _tool_input_preview(d),
         "result_preview": _extract_result_text(d.get("tool_response")),
-    },
+    }, d),
     "PostToolUseFailure": lambda d: {
         "tool": _TOOL, "tool_name": d.get("tool_name", "unknown"),
         "success": False, "is_interrupt": d.get("is_interrupt", False),
@@ -124,9 +149,9 @@ _META_EXTRACTORS = {
     },
     "TaskCreated": lambda d: {"tool": _TOOL, "task_id": d.get("task_id", ""), "subject": d.get("task_subject", "")},
     "TaskCompleted": lambda d: {"tool": _TOOL, "task_id": d.get("task_id", "")},
-    "SessionStart": lambda d: {"tool": _TOOL, "session_id": d.get("session_id", ""), "cwd": d.get("cwd", "")},
+    "SessionStart": lambda d: _with_model({"tool": _TOOL, "session_id": d.get("session_id", ""), "cwd": d.get("cwd", "")}, d),
     "SessionEnd": lambda d: {"tool": _TOOL, "session_id": d.get("session_id", "")},
-    "Stop": lambda d: {"tool": _TOOL},
+    "Stop": lambda d: _with_model({"tool": _TOOL}, d),
     "StopFailure": lambda d: {"tool": _TOOL, "error": d.get("error", "")},
     "CwdChanged": lambda d: {
         "tool": _TOOL, "cwd": d.get("cwd", ""),
@@ -175,6 +200,24 @@ def main() -> None:
         return
     event = data.get("hook_event_name", "unknown")
+    session_id = data.get("session_id") or ""
+    transcript_path = data.get("transcript_path", "")
+    # Refresh the per-session model cache before running the metadata extractor
+    # so `_with_model` sees the freshest value. We include PreToolUse because
+    # the first turn's UserPromptSubmit fires before any assistant message is
+    # in the transcript — without a PreToolUse refresh, every tool event in
+    # that first turn lands with no `model` attribution. PostToolUse is left
+    # out: once PreToolUse has refreshed, the cache is warm for the rest of
+    # the turn, and PostToolUse would duplicate the transcript read.
+    if model_cache is not None and transcript_path and session_id and event in (
+        "UserPromptSubmit", "SessionStart", "Stop", "PreToolUse",
+    ):
+        try:
+            model_cache.update_from_transcript(session_id, transcript_path)
+        except Exception:
+            pass  # Cache is best-effort; hook must never raise.
     etype = _TYPE_MAP.get(event, "claude_code_event")
     extractor = _META_EXTRACTORS.get(event)
     meta = extractor(data) if extractor else {"tool": _TOOL, "event": event}
@@ -183,7 +226,6 @@ def main() -> None:
     events_out = [{"type": etype, "timestamp": ts, "metadata": meta}]
     # On Stop, grep transcript for recap (journal mode only)
-    transcript_path = data.get("transcript_path", "")
     if event == "Stop" and transcript_path:
         recap = _extract_recap(transcript_path)
         if recap:
@@ -193,6 +235,13 @@ def main() -> None:
                 "metadata": {"tool": _TOOL, "recap": recap[:2000]},
             })
+    # Clean up the cache entry on session end so the file stays bounded.
+    if model_cache is not None and event == "SessionEnd" and session_id:
+        try:
+            model_cache.clear_session(session_id)
+        except Exception:
+            pass
     payload = json.dumps({"events": events_out}).encode()
     req = urllib.request.Request(
         "http://localhost:9877/events", data=payload,

{methodproof-0.8.3 → methodproof-0.8.4}/methodproof/hooks/claude_code.sh RENAMED Viewed

@@ -8,6 +8,39 @@
 INPUT=$(cat)
+# Model cache: per-Claude-session model attribution.
+# The transcript JSONL is the only place Claude Code surfaces the active
+# model. Re-reading it on every PreToolUse is too expensive, so we refresh
+# a cache at the cheap once-per-turn waypoints (SessionStart / Stop — and
+# UserPromptSubmit which delegates to the Python hook that updates it too)
+# and read it via a cheap jq lookup on tool events.
+_MP_MODEL_CACHE="${HOME}/.methodproof/hook_state/models.json"
+# Read the current model for a session. Fast path — no Python subprocess.
+_mp_read_model() {
+  local sess="$1"
+  [ -z "$sess" ] || [ ! -f "$_MP_MODEL_CACHE" ] && return
+  command -v jq >/dev/null 2>&1 || return
+  jq -r --arg s "$sess" '.[$s].model // empty' "$_MP_MODEL_CACHE" 2>/dev/null
+}
+# Refresh the cache by shelling out to the Python module (handles JSON
+# safely + atomic write). Rare — called on SessionStart / Stop only.
+_mp_update_model() {
+  local sess="$1" transcript="$2"
+  [ -z "$sess" ] || [ -z "$transcript" ] && return
+  command -v python3 >/dev/null 2>&1 || return
+  python3 -m methodproof.hooks.model_cache update "$sess" "$transcript" \
+    >/dev/null 2>&1 || true
+}
+_mp_clear_model() {
+  local sess="$1"
+  [ -z "$sess" ] && return
+  command -v python3 >/dev/null 2>&1 || return
+  python3 -m methodproof.hooks.model_cache clear "$sess" >/dev/null 2>&1 || true
+}
 if command -v jq >/dev/null 2>&1; then
   EVENT=$(echo "$INPUT" | jq -r '.hook_event_name // "unknown"' 2>/dev/null || echo "unknown")
 else
@@ -25,6 +58,24 @@ fi
 # Build event JSON — use jq if available, else minimal Python
 if command -v jq >/dev/null 2>&1; then
+  # Pull session + transcript once — cache ops + model attribution use both.
+  SESSION_ID=$(echo "$INPUT" | jq -r '.session_id // ""' 2>/dev/null)
+  TRANSCRIPT=$(echo "$INPUT" | jq -r '.transcript_path // ""' 2>/dev/null)
+  # Refresh model cache at once-per-turn waypoints. Cheap tool events
+  # (PreToolUse / PostToolUse) read the cache without touching the
+  # transcript.
+  case "$EVENT" in
+    SessionStart|Stop)
+      _mp_update_model "$SESSION_ID" "$TRANSCRIPT"
+      ;;
+    SessionEnd)
+      _mp_clear_model "$SESSION_ID"
+      ;;
+  esac
+  MP_MODEL=$(_mp_read_model "$SESSION_ID")
   case "$EVENT" in
     UserPromptSubmit)
       # Delegate to Python for structural analysis (shell can't do regex classification)
@@ -37,9 +88,10 @@ if command -v jq >/dev/null 2>&1; then
       ;;
     PreToolUse)
       TYPE="tool_call"
-      META=$(echo "$INPUT" | jq -c '{
+      META=$(echo "$INPUT" | jq -c --arg model "$MP_MODEL" '{
         tool: (.tool_name // "unknown"),
         tool_use_id: (.tool_use_id // ""),
+        model: (if $model == "" then null else $model end),
         tool_input: (.tool_input // {}),
         tool_input_preview: (
           (.tool_input // {}) as $ti |
@@ -58,10 +110,11 @@ if command -v jq >/dev/null 2>&1; then
       ;;
     PostToolUse)
       TYPE="tool_result"
-      META=$(echo "$INPUT" | jq -c '{
+      META=$(echo "$INPUT" | jq -c --arg model "$MP_MODEL" '{
         tool: (.tool_name // "unknown"),
         tool_use_id: (.tool_use_id // ""),
         success: true,
+        model: (if $model == "" then null else $model end),
         tool_input: (.tool_input // {}),
         tool_response: (.tool_response // {}),
         tool_input_preview: (
@@ -112,7 +165,11 @@ if command -v jq >/dev/null 2>&1; then
       ;;
     SessionStart)
       TYPE="claude_session_start"
-      META=$(echo "$INPUT" | jq -c '{claude_session_id: (.session_id // ""), cwd: (.cwd // "")}' 2>/dev/null || echo '{}')
+      META=$(echo "$INPUT" | jq -c --arg model "$MP_MODEL" '{
+        claude_session_id: (.session_id // ""),
+        cwd: (.cwd // ""),
+        model: (if $model == "" then null else $model end)
+      }' 2>/dev/null || echo '{}')
       ;;
     PostToolUseFailure)
       TYPE="tool_failure"
@@ -124,7 +181,11 @@ if command -v jq >/dev/null 2>&1; then
       ;;
     Stop)
       TYPE="agent_turn_end"
-      META='{"tool":"claude_code"}'
+      if [ -n "$MP_MODEL" ]; then
+        META="{\"tool\":\"claude_code\",\"model\":\"$MP_MODEL\"}"
+      else
+        META='{"tool":"claude_code"}'
+      fi
       # Extract recap from transcript if available (journal mode)
       TRANSCRIPT=$(echo "$INPUT" | jq -r '.transcript_path // empty' 2>/dev/null)
       if [ -n "$TRANSCRIPT" ] && [ -f "$TRANSCRIPT" ]; then

methodproof-0.8.4/methodproof/hooks/model_cache.py ADDED Viewed

@@ -0,0 +1,164 @@
+"""Per-session model cache for Claude Code capture.
+Claude Code doesn't pass the active model in every hook payload — only the
+transcript JSONL carries it. Re-reading the transcript on every PreToolUse
+would add tens of ms to each hook invocation. This module keeps a tiny
+JSON cache at ``~/.methodproof/hook_state/models.json`` mapping Claude
+session_id → (model, updated_at), refreshed once per turn at the cheap
+waypoints (``SessionStart``, ``UserPromptSubmit``, ``Stop``) and read
+cheaply on every tool event.
+Atomic writes via ``tempfile.NamedTemporaryFile`` + ``os.replace`` so
+concurrent hook invocations never corrupt the file.
+"""
+from __future__ import annotations
+import json
+import os
+import pathlib
+import tempfile
+import time
+CACHE_PATH = pathlib.Path.home() / ".methodproof" / "hook_state" / "models.json"
+# How far back to scan a transcript for the last assistant message's model.
+# Transcripts are JSONL append-only, so tail is all we need. 200 lines covers
+# a typical turn plus headroom; we're not trying to reconstruct history.
+_TAIL_BYTES = 64 * 1024
+def _load() -> dict:
+    try:
+        with CACHE_PATH.open("r") as f:
+            data = json.load(f)
+        return data if isinstance(data, dict) else {}
+    except (FileNotFoundError, json.JSONDecodeError, OSError):
+        return {}
+def _save(data: dict) -> None:
+    CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
+    # Atomic write — write to a tmp file in the same dir, rename over.
+    with tempfile.NamedTemporaryFile(
+        mode="w", dir=str(CACHE_PATH.parent), delete=False, suffix=".json",
+    ) as tmp:
+        json.dump(data, tmp)
+        tmp_path = tmp.name
+    os.replace(tmp_path, CACHE_PATH)
+def _extract_last_model(transcript_path: str) -> str | None:
+    """Read the tail of a transcript JSONL and return the most recent
+    ``model`` field from an assistant message. ``None`` if the transcript
+    is missing, unreadable, or contains no model annotation.
+    """
+    path = pathlib.Path(transcript_path)
+    if not path.is_file():
+        return None
+    try:
+        size = path.stat().st_size
+        with path.open("rb") as f:
+            if size > _TAIL_BYTES:
+                f.seek(size - _TAIL_BYTES)
+                # Drop partial first line after a seek
+                f.readline()
+            blob = f.read().decode("utf-8", errors="replace")
+    except OSError:
+        return None
+    last_model: str | None = None
+    for line in blob.splitlines():
+        line = line.strip()
+        if not line or not line.startswith("{"):
+            continue
+        try:
+            rec = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if not isinstance(rec, dict):
+            continue
+        # Claude Code transcript shape: top-level "model" on assistant messages.
+        model = rec.get("model")
+        if isinstance(model, str) and model:
+            last_model = model
+    return last_model
+def update_from_transcript(session_id: str, transcript_path: str) -> str | None:
+    """Read the transcript tail, extract the most recent model, and persist
+    it in the cache keyed by ``session_id``. Returns the model string (or
+    ``None`` if extraction failed — cache untouched in that case).
+    """
+    if not session_id or not transcript_path:
+        return None
+    model = _extract_last_model(transcript_path)
+    if model is None:
+        return None
+    try:
+        data = _load()
+        data[session_id] = {"model": model, "updated_at": time.time()}
+        _save(data)
+    except OSError:
+        # Cache is best-effort. A write failure must not break the hook.
+        return model
+    return model
+def get_model(session_id: str) -> str | None:
+    """Return the cached model for ``session_id``, or ``None`` if no cache
+    entry exists. Never raises — the cache is best-effort."""
+    if not session_id:
+        return None
+    try:
+        data = _load()
+    except OSError:
+        return None
+    entry = data.get(session_id)
+    if not isinstance(entry, dict):
+        return None
+    model = entry.get("model")
+    return model if isinstance(model, str) and model else None
+def clear_session(session_id: str) -> None:
+    """Remove a session's cache entry. Called on ``SessionEnd`` so cache
+    size stays bounded over time."""
+    if not session_id:
+        return
+    try:
+        data = _load()
+        if session_id in data:
+            del data[session_id]
+            _save(data)
+    except OSError:
+        return
+# CLI entry so the shell hook can do `python3 -m methodproof.hooks.model_cache ...`
+# on rare events (SessionStart / Stop / SessionEnd). The hot read path in shell
+# uses jq directly on the cache file — no Python subprocess needed.
+def _main() -> int:
+    import sys
+    args = sys.argv[1:]
+    if len(args) < 1:
+        return 1
+    cmd = args[0]
+    if cmd == "update" and len(args) == 3:
+        model = update_from_transcript(args[1], args[2])
+        if model:
+            print(model)
+        return 0
+    if cmd == "get" and len(args) == 2:
+        model = get_model(args[1])
+        if model:
+            print(model)
+        return 0
+    if cmd == "clear" and len(args) == 2:
+        clear_session(args[1])
+        return 0
+    return 1
+if __name__ == "__main__":
+    raise SystemExit(_main())

{methodproof-0.8.3 → methodproof-0.8.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "methodproof"
-version = "0.8.3"
+version = "0.8.4"
 description = "See how you code. Capture and visualize your engineering process."
 requires-python = ">=3.11"
 dependencies = ["watchdog>=4.0", "websocket-client>=1.7", "cryptography>=43.0", "keyring>=25.0", "textual>=0.59", "rich>=13.7", "sqlcipher3>=0.6"]

methodproof-0.8.4/tests/test_model_cache.py ADDED Viewed

@@ -0,0 +1,278 @@
+"""Per-session model cache tests — drives the Claude Code hook's model
+attribution pipeline. See `methodproof/hooks/model_cache.py`.
+"""
+import json
+import pathlib
+import pytest
+from methodproof.hooks import model_cache
+@pytest.fixture(autouse=True)
+def isolated_cache(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
+    """Redirect cache to a tmpdir so tests don't touch a developer's real
+    `~/.methodproof/hook_state/models.json`. Uses a dedicated subdir so
+    the sibling-files assertion is tight."""
+    cache_dir = tmp_path / "hook_state"
+    cache_file = cache_dir / "models.json"
+    monkeypatch.setattr(model_cache, "CACHE_PATH", cache_file)
+    return cache_file
+def _write_transcript(path: pathlib.Path, records: list[dict]) -> None:
+    path.write_text("\n".join(json.dumps(r) for r in records) + "\n")
+# ── extract_last_model ─────────────────────────────────────────────────────
+def test_extract_returns_last_model_in_transcript(tmp_path: pathlib.Path) -> None:
+    """When the transcript carries multiple assistant messages with
+    different models, `_extract_last_model` returns the final one —
+    the model that was active most recently."""
+    transcript = tmp_path / "t.jsonl"
+    _write_transcript(transcript, [
+        {"type": "user", "message": {"content": "hi"}},
+        {"type": "assistant", "model": "claude-haiku-4-5", "message": {}},
+        {"type": "user", "message": {}},
+        {"type": "assistant", "model": "claude-sonnet-4-5", "message": {}},
+    ])
+    assert model_cache._extract_last_model(str(transcript)) == "claude-sonnet-4-5"
+def test_extract_returns_none_when_no_model_field(tmp_path: pathlib.Path) -> None:
+    """Transcripts without an assistant message (or without a model on
+    any message) yield None — the hook falls back to 'no model attribution'."""
+    transcript = tmp_path / "t.jsonl"
+    _write_transcript(transcript, [
+        {"type": "user", "message": {"content": "hi"}},
+    ])
+    assert model_cache._extract_last_model(str(transcript)) is None
+def test_extract_skips_malformed_lines(tmp_path: pathlib.Path) -> None:
+    """Corrupted / partial JSON lines must not crash extraction —
+    hooks run inline and cannot raise."""
+    transcript = tmp_path / "t.jsonl"
+    transcript.write_text(
+        'not-json\n'
+        + json.dumps({"type": "assistant", "model": "claude-opus-4-7"})
+        + "\n{incomplete\n"
+    )
+    assert model_cache._extract_last_model(str(transcript)) == "claude-opus-4-7"
+def test_extract_nonexistent_file_returns_none() -> None:
+    assert model_cache._extract_last_model("/nonexistent/transcript.jsonl") is None
+def test_extract_uses_tail_only_on_large_transcript(tmp_path: pathlib.Path) -> None:
+    """Long transcripts (>64KB) are tailed, not fully read. We seek past
+    the first 64KB from the end and drop the partial first line, so a
+    model set in the first KB will NOT appear. This is intentional —
+    we want the CURRENT model, not the original."""
+    transcript = tmp_path / "t.jsonl"
+    # Pad with 80 KB of text; final assistant record at the very end.
+    padding = json.dumps({"type": "user", "message": {"content": "x" * 200}}) + "\n"
+    early_model = json.dumps({"type": "assistant", "model": "claude-haiku-4-5"}) + "\n"
+    late_model = json.dumps({"type": "assistant", "model": "claude-opus-4-7"}) + "\n"
+    transcript.write_text(early_model + padding * 400 + late_model)
+    assert model_cache._extract_last_model(str(transcript)) == "claude-opus-4-7"
+# ── update_from_transcript ─────────────────────────────────────────────────
+def test_update_persists_and_returns_model(
+    tmp_path: pathlib.Path, isolated_cache: pathlib.Path,
+) -> None:
+    transcript = tmp_path / "t.jsonl"
+    _write_transcript(transcript, [
+        {"type": "assistant", "model": "claude-sonnet-4-5"},
+    ])
+    result = model_cache.update_from_transcript("sess-1", str(transcript))
+    assert result == "claude-sonnet-4-5"
+    data = json.loads(isolated_cache.read_text())
+    assert data["sess-1"]["model"] == "claude-sonnet-4-5"
+    assert isinstance(data["sess-1"]["updated_at"], (int, float))
+def test_update_preserves_other_sessions(
+    tmp_path: pathlib.Path, isolated_cache: pathlib.Path,
+) -> None:
+    """Multiple concurrent Claude Code sessions must coexist in the cache
+    without clobbering each other — e.g., two worktrees each running
+    `claude` simultaneously."""
+    t1 = tmp_path / "t1.jsonl"
+    t2 = tmp_path / "t2.jsonl"
+    _write_transcript(t1, [{"type": "assistant", "model": "claude-haiku-4-5"}])
+    _write_transcript(t2, [{"type": "assistant", "model": "claude-opus-4-7"}])
+    model_cache.update_from_transcript("sess-A", str(t1))
+    model_cache.update_from_transcript("sess-B", str(t2))
+    assert model_cache.get_model("sess-A") == "claude-haiku-4-5"
+    assert model_cache.get_model("sess-B") == "claude-opus-4-7"
+def test_update_with_no_model_in_transcript_leaves_cache_untouched(
+    tmp_path: pathlib.Path, isolated_cache: pathlib.Path,
+) -> None:
+    """Transcripts with no model yield None — the existing cache entry
+    (from a prior update) must not be wiped. Otherwise a mid-session
+    refresh against an incomplete transcript would erase attribution."""
+    t1 = tmp_path / "t1.jsonl"
+    _write_transcript(t1, [{"type": "assistant", "model": "claude-sonnet-4-5"}])
+    model_cache.update_from_transcript("sess-X", str(t1))
+    t2 = tmp_path / "t2.jsonl"
+    _write_transcript(t2, [{"type": "user", "message": {"content": "hi"}}])
+    result = model_cache.update_from_transcript("sess-X", str(t2))
+    assert result is None
+    # Cache preserved.
+    assert model_cache.get_model("sess-X") == "claude-sonnet-4-5"
+def test_update_with_missing_transcript_path_noops(isolated_cache: pathlib.Path) -> None:
+    assert model_cache.update_from_transcript("sess-1", "") is None
+    assert model_cache.update_from_transcript("", "/some/path.jsonl") is None
+    assert not isolated_cache.exists()
+# ── get_model / clear_session ──────────────────────────────────────────────
+def test_get_model_returns_none_when_no_cache() -> None:
+    assert model_cache.get_model("never-seen") is None
+def test_clear_session_removes_entry(
+    tmp_path: pathlib.Path, isolated_cache: pathlib.Path,
+) -> None:
+    t = tmp_path / "t.jsonl"
+    _write_transcript(t, [{"type": "assistant", "model": "claude-sonnet-4-5"}])
+    model_cache.update_from_transcript("sess-cleanup", str(t))
+    assert model_cache.get_model("sess-cleanup") == "claude-sonnet-4-5"
+    model_cache.clear_session("sess-cleanup")
+    assert model_cache.get_model("sess-cleanup") is None
+def test_corrupted_cache_file_does_not_raise(isolated_cache: pathlib.Path) -> None:
+    """A user who hand-edits (or a crash that truncates) the cache file
+    must not break the hook — we silently treat corrupt cache as empty."""
+    isolated_cache.parent.mkdir(parents=True, exist_ok=True)
+    isolated_cache.write_text("not valid JSON {{{")
+    assert model_cache.get_model("anything") is None
+def test_atomic_write_does_not_leave_stale_tmp_files(
+    tmp_path: pathlib.Path, isolated_cache: pathlib.Path,
+) -> None:
+    """The save path uses NamedTemporaryFile + os.replace. After a
+    successful update, the cache dir should contain only models.json —
+    no leftover .tmp* files."""
+    t = tmp_path / "t.jsonl"
+    _write_transcript(t, [{"type": "assistant", "model": "claude-sonnet-4-5"}])
+    model_cache.update_from_transcript("sess-1", str(t))
+    siblings = list(isolated_cache.parent.iterdir())
+    assert siblings == [isolated_cache], f"stale tmp files: {siblings}"
+# ── Hook integration: model flows into emitted event metadata ─────────────
+def test_claude_code_hook_pretooluse_attaches_cached_model(
+    tmp_path: pathlib.Path, isolated_cache: pathlib.Path,
+) -> None:
+    """End-to-end: a PreToolUse payload emitted by the Python hook
+    carries the session's currently-cached model. This is the whole
+    point of the cache — tool events need model attribution without
+    re-reading the transcript on every fire."""
+    from methodproof.hooks import claude_code as hook
+    # Prime the cache with this session's model.
+    transcript = tmp_path / "t.jsonl"
+    _write_transcript(transcript, [
+        {"type": "assistant", "model": "claude-sonnet-4-5"},
+    ])
+    model_cache.update_from_transcript("sess-test", str(transcript))
+    # Simulate the PreToolUse stdin payload Claude Code would send.
+    payload = {
+        "hook_event_name": "PreToolUse",
+        "session_id": "sess-test",
+        "tool_name": "Edit",
+        "tool_use_id": "toolu_test",
+        "tool_input": {"file_path": "/abs/path/app.py",
+                       "old_string": "x", "new_string": "y"},
+    }
+    meta = hook._META_EXTRACTORS["PreToolUse"](payload)
+    assert meta["model"] == "claude-sonnet-4-5"
+    assert meta["tool_name"] == "Edit"
+    assert meta["tool_input"]["file_path"] == "/abs/path/app.py"
+def test_claude_code_hook_omits_model_when_cache_empty(tmp_path: pathlib.Path) -> None:
+    """No cache entry → no ``model`` key in metadata (not a ``None``
+    placeholder). Downstream consumers use `metadata.get("model")` and
+    a missing key is the honest answer when we don't know."""
+    from methodproof.hooks import claude_code as hook
+    payload = {
+        "hook_event_name": "PreToolUse",
+        "session_id": "never-cached",
+        "tool_name": "Edit",
+        "tool_use_id": "toolu_2",
+        "tool_input": {"file_path": "/abs/foo.py"},
+    }
+    meta = hook._META_EXTRACTORS["PreToolUse"](payload)
+    assert "model" not in meta
+def test_main_refreshes_cache_on_pretooluse_first_turn(
+    tmp_path: pathlib.Path, isolated_cache: pathlib.Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """First-turn PreToolUse arrives with a cold cache (UserPromptSubmit
+    ran before any assistant message hit the transcript). `main()` must
+    refresh the cache on PreToolUse so the emitted event carries `model`.
+    Without this, every tool event in the session's first turn lands
+    with no model attribution and downstream `model_switch` moments and
+    SENT_TO/CONSUMED edges silently fail."""
+    import io
+    from methodproof.hooks import claude_code as hook
+    transcript = tmp_path / "t.jsonl"
+    _write_transcript(transcript, [
+        {"type": "assistant", "model": "claude-sonnet-4-5"},
+    ])
+    assert model_cache.get_model("sess-first-turn") is None  # cold
+    payload = {
+        "hook_event_name": "PreToolUse",
+        "session_id": "sess-first-turn",
+        "transcript_path": str(transcript),
+        "tool_name": "Edit",
+        "tool_input": {"file_path": "/abs/app.py"},
+    }
+    captured: dict = {}
+    def fake_urlopen(req, timeout):
+        captured["body"] = json.loads(req.data.decode())
+        class _R:
+            def __enter__(self): return self
+            def __exit__(self, *a): pass
+        return _R()
+    monkeypatch.setattr("sys.stdin", io.StringIO(json.dumps(payload)))
+    monkeypatch.setattr("urllib.request.urlopen", fake_urlopen)
+    hook.main()
+    assert model_cache.get_model("sess-first-turn") == "claude-sonnet-4-5"
+    events = captured["body"]["events"]
+    assert events[0]["type"] == "tool_call"
+    assert events[0]["metadata"]["model"] == "claude-sonnet-4-5"

methodproof-0.8.4/tests/test_watcher_ignore.py ADDED Viewed

@@ -0,0 +1,63 @@
+"""IGNORE_PATTERNS — watcher exclusions for runtime log output.
+Regression guards for the 8c21 prod symptom: the platform's own log
+file ``methodproof-platform/logs/methodproof-platform.jsonl`` captured
+as file_edit events, polluting both thread and step distributions with
+15,269 spurious events.
+"""
+from methodproof.agents.watcher import IGNORE_PATTERNS
+def _ignored(path: str) -> bool:
+    return bool(IGNORE_PATTERNS.search(path))
+# ── log-output directories excluded ──────────────────────────────────
+def test_jsonl_log_in_logs_dir_excluded() -> None:
+    """The 8c21 pathology: jsonl logs under ``logs/`` must not capture."""
+    assert _ignored("/repo/methodproof-platform/logs/methodproof-platform.jsonl")
+def test_log_file_in_logs_dir_excluded() -> None:
+    assert _ignored("/repo/project/logs/app.log")
+def test_arbitrary_extension_in_logs_dir_excluded() -> None:
+    """Any file under ``logs/`` — txt, out, ndjson, etc. — is runtime output."""
+    assert _ignored("/repo/project/logs/events.ndjson")
+    assert _ignored("/repo/project/logs/stdout.txt")
+def test_nested_logs_dir_excluded() -> None:
+    """Deeper logs dirs still match (watchdog sees absolute paths)."""
+    assert _ignored("/repo/pkg/sub/logs/trace.jsonl")
+# ── legit source files with similar names NOT excluded ─────────────
+def test_source_file_named_logs_not_excluded() -> None:
+    """A source file named ``logs.py`` is not under ``/logs/`` and stays captured."""
+    assert not _ignored("/repo/project/app/logs.py")
+def test_logger_module_file_not_excluded() -> None:
+    """``app/logging/formatter.py`` has ``logging`` in path but no ``/logs/``."""
+    assert not _ignored("/repo/project/app/logging/formatter.py")
+# ── existing exclusions still work ──────────────────────────────────
+def test_log_extension_still_excluded() -> None:
+    """The original ``\\.log$`` check still fires on top-level .log files."""
+    assert _ignored("/repo/project/app.log")
+def test_lock_extension_still_excluded() -> None:
+    assert _ignored("/repo/project/package-lock.lock")
+def test_node_modules_still_excluded() -> None:
+    """Sanity — don't break the other exclusions."""
+    assert _ignored("/repo/project/node_modules/react/index.js")

{methodproof-0.8.3 → methodproof-0.8.4}/uv.lock RENAMED Viewed

@@ -1016,7 +1016,7 @@ wheels = [
 [[package]]
 name = "methodproof"
-version = "0.8.0"
+version = "0.8.4"
 source = { editable = "." }
 dependencies = [
     { name = "cryptography", version = "44.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" },