PyPI - cctx-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

cctx-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

cctx/__init__.py +3 -0
cctx/cli.py +375 -0
cctx/diagnostician/__init__.py +81 -0
cctx/diagnostician/aggregate.py +40 -0
cctx/diagnostician/inflection.py +19 -0
cctx/diagnostician/patterns/__init__.py +1 -0
cctx/diagnostician/patterns/retry_loop.py +145 -0
cctx/diagnostician/patterns/scope_creep.py +87 -0
cctx/diagnostician/patterns/stale_context.py +147 -0
cctx/discovery.py +185 -0
cctx/exporters/__init__.py +0 -0
cctx/exporters/csv.py +64 -0
cctx/exporters/jsonl.py +64 -0
cctx/harvest.py +173 -0
cctx/models.py +269 -0
cctx/parsers/__init__.py +1 -0
cctx/parsers/claude_code.py +690 -0
cctx/pricing.py +18 -0
cctx/recommender/__init__.py +0 -0
cctx/recommender/claude_md.py +131 -0
cctx/recommender/evidence.py +46 -0
cctx/renderers/__init__.py +0 -0
cctx/renderers/report.py +58 -0
cctx/renderers/templates/autopsy.html.j2 +249 -0
cctx/renderers/terminal.py +251 -0
cctx/renderers/trace_tui.py +291 -0
cctx/tokenizer.py +77 -0
cctx_cli-0.1.0.dist-info/METADATA +159 -0
cctx_cli-0.1.0.dist-info/RECORD +31 -0
cctx_cli-0.1.0.dist-info/WHEEL +4 -0
cctx_cli-0.1.0.dist-info/entry_points.txt +2 -0

cctx/diagnostician/patterns/scope_creep.py ADDED Viewed

@@ -0,0 +1,87 @@
+"""Scope-creep classifier.
+Fires only on explicit re-scoping phrases in assistant turn text (conservative
+v0). No structural heuristics. One Finding per session; all phrase matches
+bundled into evidence.
+"""
+from __future__ import annotations
+import re
+from typing import TYPE_CHECKING
+from cctx.models import Confidence, Finding, FindingKind, Severity
+if TYPE_CHECKING:
+    from cctx.models import SessionTrace
+# Case-insensitive phrase list. "i noticed that" requires a following action verb.
+_PLAIN_PHRASES = [
+    "i'll also fix",
+    "while i'm here",
+    "let me also",
+    "i also noticed",
+    "while we're at it",
+    "i should also",
+    "additionally, i'll",
+]
+_ACTION_VERBS = r"(?:fix|add|update|change|remove|clean|refactor|improve|address)"
+_NOTICED_THAT = re.compile(
+    r"i noticed that.{0,20}" + _ACTION_VERBS,
+    re.IGNORECASE,
+)
+def _matches(text: str) -> list[str]:
+    """Return all matched phrases found in text."""
+    low = text.lower()
+    found = [p for p in _PLAIN_PHRASES if p in low]
+    if _NOTICED_THAT.search(text):
+        found.append("i noticed that")
+    return found
+def classify(trace: SessionTrace) -> list[Finding]:
+    try:
+        return _classify_impl(trace)
+    except Exception:
+        return []
+def _classify_impl(trace: SessionTrace) -> list[Finding]:
+    phrases_found: list[dict] = []
+    for turn in trace.turns:
+        if turn.role != "assistant" or not turn.text:
+            continue
+        matched = _matches(turn.text)
+        for phrase in matched:
+            low = turn.text.lower()
+            idx = low.find(phrase)
+            start = max(0, idx - 20)
+            snippet = turn.text[start : start + 80]
+            phrases_found.append({
+                "turn": turn.turn_number,
+                "phrase": phrase,
+                "snippet": snippet,
+            })
+    if not phrases_found:
+        return []
+    first_turn = min(p["turn"] for p in phrases_found)
+    count = len(phrases_found)
+    first_phrase = phrases_found[0]["phrase"]
+    plural = "s" if count > 1 else ""
+    summary = f"'{first_phrase}' at turn {first_turn} ({count} scope expansion{plural} total)"
+    return [Finding(
+        kind=FindingKind.SCOPE_CREEP,
+        severity=Severity.MEDIUM,
+        confidence=Confidence.MEDIUM,
+        first_turn=first_turn,
+        last_turn=phrases_found[-1]["turn"] if len(phrases_found) > 1 else None,
+        evidence={"phrases": phrases_found},
+        cost_usd=None,
+        summary=summary,
+    )]

cctx/diagnostician/patterns/stale_context.py ADDED Viewed

@@ -0,0 +1,147 @@
+"""Stale-context classifier.
+Detects large tool results that remained in context well past their last
+reference. Uses 3-gram overlap to detect references. Compaction-aware:
+staleness resets to zero at compaction events.
+Thresholds (per spec):
+  T_size  = 2_000 tokens (minimum size to be a candidate)
+  N_stale = 5 turns after last reference before "stale"
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from cctx.models import Confidence, Finding, FindingKind, Severity
+if TYPE_CHECKING:
+    from cctx.models import SessionTrace, Turn
+T_SIZE = 2_000   # token threshold
+N_STALE = 5      # turns before officially stale
+STALE_HIGH_THRESHOLD = 500_000  # token-turns above which → HIGH
+def _estimate_tokens(text: str) -> int:
+    return int(len(text.split()) * 1.3)
+def _make_3grams(text: str) -> set[tuple[str, ...]]:
+    words = text.lower().split()
+    if len(words) < 3:
+        return set()
+    return {tuple(words[i : i + 3]) for i in range(len(words) - 2)}
+def _is_compaction(turn: Turn) -> bool:
+    return turn.role == "system" and "compact" in turn.text.lower()
+def _classify_impl(trace: SessionTrace) -> list[Finding]:
+    # Identify large tool results and their first_seen_turn
+    candidates: list[dict] = []  # {uid, tool_name, content, tokens, first_seen_turn}
+    for turn in trace.turns:
+        for tr in turn.tool_results:
+            tokens = tr.token_count if tr.token_count > 0 else _estimate_tokens(tr.content)
+            if tokens < T_SIZE:
+                continue
+            candidates.append({
+                "uid": tr.tool_use_id,
+                "tool_name": tr.tool_name,
+                "content": tr.content,
+                "tokens": tokens,
+                "first_seen_turn": turn.turn_number,
+                "content_3grams": _make_3grams(tr.content),
+            })
+    if not candidates:
+        return []
+    # Find the turn number of any compaction events
+    compaction_turns: set[int] = {
+        t.turn_number for t in trace.turns if _is_compaction(t)
+    }
+    last_turn_number = max((t.turn_number for t in trace.turns), default=0)
+    stale_items: list[dict] = []
+    for cand in candidates:
+        first_seen = cand["first_seen_turn"]
+        content_3grams = cand["content_3grams"]
+        # Find last assistant turn with a 3-gram reference to this content
+        last_ref = first_seen  # at minimum, the turn it appeared in counts as a reference
+        for turn in trace.turns:
+            if turn.turn_number <= first_seen:
+                continue
+            if turn.role != "assistant":
+                continue
+            turn_3grams = _make_3grams(turn.text)
+            if content_3grams & turn_3grams:
+                last_ref = turn.turn_number
+        # Check for compaction between first_seen and end: if any, skip this item
+        if any(ct > first_seen for ct in compaction_turns):
+            continue
+        turns_stale = last_turn_number - last_ref
+        if turns_stale <= N_STALE:
+            continue
+        # Cost is attributed only to API calls (assistant turns), not to
+        # user/tool_result turns. Using raw turn-number delta inflates waste
+        # by ~2× in typical alternating-turn sessions.
+        billed_stale = sum(
+            1 for t in trace.turns
+            if t.turn_number > last_ref and t.role == "assistant"
+        )
+        token_turns = cand["tokens"] * billed_stale
+        stale_items.append({
+            "tool_name": cand["tool_name"],
+            "content_tokens": cand["tokens"],
+            "first_seen_turn": first_seen,
+            "last_referenced_turn": last_ref,
+            "turns_stale": turns_stale,
+            "token_turns": token_turns,
+        })
+    if not stale_items:
+        return []
+    total_token_turns = sum(item["token_turns"] for item in stale_items)
+    level = Confidence.HIGH if total_token_turns > STALE_HIGH_THRESHOLD else Confidence.MEDIUM
+    severity = Severity.HIGH if total_token_turns > STALE_HIGH_THRESHOLD else Severity.MEDIUM
+    # first_turn = when the first item became officially stale
+    first_stale = min(
+        item["last_referenced_turn"] + N_STALE for item in stale_items
+    )
+    # Summary describes the worst offender
+    worst = max(stale_items, key=lambda i: i["token_turns"])
+    tokens_k = worst["content_tokens"] // 1000
+    summary = (
+        f"{tokens_k}K-token {worst['tool_name']} result stale "
+        f"{worst['turns_stale']} turns "
+        f"(~{total_token_turns:,} token-turns)"
+    )
+    return [Finding(
+        kind=FindingKind.STALE_CONTEXT,
+        severity=severity,
+        confidence=level,
+        first_turn=first_stale,
+        last_turn=last_turn_number,
+        evidence={"stale_items": stale_items, "total_token_turns": total_token_turns},
+        cost_usd=None,
+        summary=summary,
+    )]
+def classify(trace: SessionTrace) -> list[Finding]:
+    try:
+        return _classify_impl(trace)
+    except Exception:
+        return []

cctx/discovery.py ADDED Viewed

@@ -0,0 +1,185 @@
+"""Session and project discovery for ~/.claude/projects/.
+Public API:
+    claude_projects_dir() -> Path
+    find_project_dir(cwd) -> Path | None
+    list_projects(base) -> list[ProjectInfo]
+    list_sessions(project_dir) -> list[SessionMeta]
+    latest_session(project_dir) -> Path | None
+    complete_project(ctx, param, incomplete) -> list[CompletionItem]
+"""
+from __future__ import annotations
+import json
+import os
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+@dataclass
+class SessionMeta:
+    path: Path
+    session_id: str
+    start_time: datetime | None
+    cwd: str | None
+    git_branch: str | None
+@dataclass
+class ProjectInfo:
+    project_dir: Path          # ~/.claude/projects/-Users-...
+    display_name: str          # ~/Projects/cctx  (from cwd in first session)
+    sessions: list[SessionMeta] = field(default_factory=list)
+    @property
+    def session_count(self) -> int:
+        return len(self.sessions)
+    @property
+    def latest_time(self) -> datetime | None:
+        times = [s.start_time for s in self.sessions if s.start_time]
+        return max(times) if times else None
+def claude_projects_dir() -> Path:
+    if override := os.environ.get("CCTX_PROJECTS_DIR"):
+        return Path(override)
+    return Path.home() / ".claude" / "projects"
+def _encode_path(path: Path) -> str:
+    return path.resolve().as_posix().replace("/", "-")
+def find_project_dir(cwd: Path, *, base: Path | None = None) -> Path | None:
+    """Return the ~/.claude/projects/<encoded> dir that corresponds to cwd."""
+    base = base or claude_projects_dir()
+    encoded = _encode_path(cwd)
+    candidate = base / encoded
+    return candidate if candidate.is_dir() else None
+def _read_session_meta(path: Path) -> SessionMeta:
+    """Quick scan: read enough lines to get session metadata without full parse."""
+    session_id = path.stem
+    start_time: datetime | None = None
+    cwd: str | None = None
+    git_branch: str | None = None
+    try:
+        with path.open(encoding="utf-8", errors="replace") as fh:
+            for _ in range(50):  # cap at 50 lines — metadata is always early
+                line = fh.readline()
+                if not line:
+                    break
+                try:
+                    obj = json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+                if "sessionId" in obj:
+                    session_id = obj["sessionId"]
+                if "timestamp" in obj and start_time is None:
+                    try:
+                        raw = obj["timestamp"].replace("Z", "+00:00")
+                        start_time = datetime.fromisoformat(raw)
+                    except (ValueError, AttributeError):
+                        pass
+                if "cwd" in obj and cwd is None:
+                    cwd = obj["cwd"]
+                if "gitBranch" in obj and git_branch is None:
+                    git_branch = obj["gitBranch"]
+                if start_time and cwd:
+                    break
+    except OSError:
+        pass
+    return SessionMeta(
+        path=path,
+        session_id=session_id,
+        start_time=start_time,
+        cwd=cwd,
+        git_branch=git_branch,
+    )
+def list_sessions(project_dir: Path) -> list[SessionMeta]:
+    """List sessions in a project directory, newest first."""
+    sessions = [
+        _read_session_meta(p)
+        for p in project_dir.glob("*.jsonl")
+    ]
+    _epoch = datetime.min.replace(tzinfo=timezone.utc)
+    sessions.sort(key=lambda s: s.start_time or _epoch, reverse=True)
+    return sessions
+def _project_display_name(project_dir: Path) -> str:
+    """Derive a human-readable name from cwd in session files, or decode best-effort."""
+    for path in sorted(project_dir.glob("*.jsonl"))[:3]:
+        meta = _read_session_meta(path)
+        if meta.cwd:
+            home = str(Path.home())
+            if meta.cwd.startswith(home):
+                return "~" + meta.cwd[len(home):]
+            return meta.cwd
+    # Fallback: decode -Users-bryan-Projects-cctx → ~/Projects/cctx
+    encoded = project_dir.name
+    home_prefix = _encode_path(Path.home())  # -Users-bryan
+    if encoded.startswith(home_prefix):
+        tail = encoded[len(home_prefix):]  # -Projects-cctx
+        return "~" + tail.replace("-", "/")
+    return encoded
+def list_projects(base: Path | None = None) -> list[ProjectInfo]:
+    """List all projects in the claude projects directory, newest-activity first."""
+    base = base or claude_projects_dir()
+    if not base.is_dir():
+        return []
+    projects: list[ProjectInfo] = []
+    for entry in base.iterdir():
+        if not entry.is_dir():
+            continue
+        if not any(entry.glob("*.jsonl")):
+            continue
+        sessions = list_sessions(entry)
+        projects.append(ProjectInfo(
+            project_dir=entry,
+            display_name=_project_display_name(entry),
+            sessions=sessions,
+        ))
+    projects.sort(
+        key=lambda p: p.latest_time or datetime.min.replace(tzinfo=timezone.utc),
+        reverse=True,
+    )
+    return projects
+def latest_session(project_dir: Path) -> Path | None:
+    """Return the path of the most recent session JSONL in a project dir."""
+    sessions = list_sessions(project_dir)
+    return sessions[0].path if sessions else None
+def complete_project(ctx: object, param: object, incomplete: str) -> list[object]:
+    """Click shell_complete callback — returns local project paths matching incomplete."""
+    from click.shell_completion import CompletionItem
+    try:
+        projects = list_projects()
+    except Exception:
+        return []
+    home = str(Path.home())
+    results = []
+    for p in projects:
+        actual = p.display_name.replace("~", home)
+        if incomplete.lower() in actual.lower():
+            results.append(
+                CompletionItem(actual, help=f"{p.session_count} session(s)")
+            )
+    return results

cctx/exporters/__init__.py ADDED Viewed

File without changes

cctx/exporters/csv.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""CSV exporter — one row per turn, one header row."""
+from __future__ import annotations
+import csv as _csv
+from typing import IO, TYPE_CHECKING
+from cctx.pricing import price_per_tok as _price_per_tok
+if TYPE_CHECKING:
+    from cctx.models import Diagnosis, SessionTrace
+COLUMNS = [
+    "session_id",
+    "turn_number",
+    "role",
+    "model",
+    "input_tokens",
+    "cost_usd",
+    "tool_names",
+    "finding_kinds",
+    "is_inflection_turn",
+]
+def export_turn_rows(diagnosis: Diagnosis, trace: SessionTrace) -> list[dict[str, str]]:
+    finding_at: dict[int, list[str]] = {}
+    for f in diagnosis.findings:
+        finding_at.setdefault(f.first_turn, []).append(f.kind.value)
+    rows = []
+    for turn in trace.turns:
+        input_tokens = turn.usage.input_tokens if turn.usage else 0
+        if turn.usage:
+            p = _price_per_tok(turn.model)
+            cost_usd = (
+                turn.usage.input_tokens * p
+                + turn.usage.cache_read * p * 0.1
+                + (turn.usage.cache_creation_5m + turn.usage.cache_creation_1h) * p * 1.25
+            )
+        else:
+            cost_usd = 0.0
+        is_inflection = turn.turn_number == diagnosis.inflection_turn
+        rows.append({
+            "session_id": trace.session_id,
+            "turn_number": str(turn.turn_number),
+            "role": turn.role,
+            "model": turn.model or "",
+            "input_tokens": str(input_tokens),
+            "cost_usd": f"{cost_usd:.6f}",
+            "tool_names": ",".join(tu.tool_name for tu in turn.tool_uses),
+            "finding_kinds": ",".join(finding_at.get(turn.turn_number, [])),
+            "is_inflection_turn": "true" if is_inflection else "false",
+        })
+    return rows
+def write(
+    diagnoses: list[tuple[Diagnosis, SessionTrace]],
+    out: IO[str],
+) -> None:
+    writer = _csv.DictWriter(out, fieldnames=COLUMNS)
+    writer.writeheader()
+    for diagnosis, trace in diagnoses:
+        writer.writerows(export_turn_rows(diagnosis, trace))

cctx/exporters/jsonl.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""JSONL exporter — one JSON object per session line."""
+from __future__ import annotations
+import json
+from typing import IO, TYPE_CHECKING
+if TYPE_CHECKING:
+    from cctx.models import Diagnosis, SessionTrace
+def export_diagnosis(
+    diagnosis: Diagnosis,
+    trace: SessionTrace,
+    *,
+    include_content: bool = True,
+) -> str:
+    findings = []
+    for f in diagnosis.findings:
+        d: dict[str, object] = {
+            "kind": f.kind.value,
+            "severity": f.severity.value,
+            "confidence": f.confidence.value,
+            "first_turn": f.first_turn,
+            "last_turn": f.last_turn,
+            "cost_usd": f.cost_usd,
+        }
+        if include_content:
+            d["summary"] = f.summary
+        findings.append(d)
+    patches = []
+    for p in diagnosis.patches:
+        d = {
+            "target_file": p.target_file,
+            "finding_kind": p.finding_kind.value,
+            "description": p.description,
+        }
+        if include_content:
+            d["evidence_summary"] = p.evidence_summary
+        patches.append(d)
+    obj = {
+        "session_id": diagnosis.session_id,
+        "analysed_at": diagnosis.analysed_at.isoformat(),
+        "total_cost_usd": diagnosis.total_cost_usd,
+        "waste_cost_usd": diagnosis.waste_cost_usd,
+        "inflection_turn": diagnosis.inflection_turn,
+        "finding_count": len(diagnosis.findings),
+        "findings": findings,
+        "patches": patches,
+        "turn_count": len(trace.turns),
+        "model": trace.primary_model,
+    }
+    return json.dumps(obj)
+def write(
+    diagnoses: list[tuple[Diagnosis, SessionTrace]],
+    out: IO[str],
+    *,
+    include_content: bool = True,
+) -> None:
+    for diagnosis, trace in diagnoses:
+        out.write(export_diagnosis(diagnosis, trace, include_content=include_content) + "\n")