cctx-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,87 @@
1
+ """Scope-creep classifier.
2
+
3
+ Fires only on explicit re-scoping phrases in assistant turn text (conservative
4
+ v0). No structural heuristics. One Finding per session; all phrase matches
5
+ bundled into evidence.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ from typing import TYPE_CHECKING
11
+
12
+ from cctx.models import Confidence, Finding, FindingKind, Severity
13
+
14
+ if TYPE_CHECKING:
15
+ from cctx.models import SessionTrace
16
+
17
+ # Case-insensitive phrase list. "i noticed that" requires a following action verb.
18
+ _PLAIN_PHRASES = [
19
+ "i'll also fix",
20
+ "while i'm here",
21
+ "let me also",
22
+ "i also noticed",
23
+ "while we're at it",
24
+ "i should also",
25
+ "additionally, i'll",
26
+ ]
27
+
28
+ _ACTION_VERBS = r"(?:fix|add|update|change|remove|clean|refactor|improve|address)"
29
+ _NOTICED_THAT = re.compile(
30
+ r"i noticed that.{0,20}" + _ACTION_VERBS,
31
+ re.IGNORECASE,
32
+ )
33
+
34
+
35
+ def _matches(text: str) -> list[str]:
36
+ """Return all matched phrases found in text."""
37
+ low = text.lower()
38
+ found = [p for p in _PLAIN_PHRASES if p in low]
39
+ if _NOTICED_THAT.search(text):
40
+ found.append("i noticed that")
41
+ return found
42
+
43
+
44
+ def classify(trace: SessionTrace) -> list[Finding]:
45
+ try:
46
+ return _classify_impl(trace)
47
+ except Exception:
48
+ return []
49
+
50
+
51
+ def _classify_impl(trace: SessionTrace) -> list[Finding]:
52
+ phrases_found: list[dict] = []
53
+
54
+ for turn in trace.turns:
55
+ if turn.role != "assistant" or not turn.text:
56
+ continue
57
+ matched = _matches(turn.text)
58
+ for phrase in matched:
59
+ low = turn.text.lower()
60
+ idx = low.find(phrase)
61
+ start = max(0, idx - 20)
62
+ snippet = turn.text[start : start + 80]
63
+ phrases_found.append({
64
+ "turn": turn.turn_number,
65
+ "phrase": phrase,
66
+ "snippet": snippet,
67
+ })
68
+
69
+ if not phrases_found:
70
+ return []
71
+
72
+ first_turn = min(p["turn"] for p in phrases_found)
73
+ count = len(phrases_found)
74
+ first_phrase = phrases_found[0]["phrase"]
75
+ plural = "s" if count > 1 else ""
76
+ summary = f"'{first_phrase}' at turn {first_turn} ({count} scope expansion{plural} total)"
77
+
78
+ return [Finding(
79
+ kind=FindingKind.SCOPE_CREEP,
80
+ severity=Severity.MEDIUM,
81
+ confidence=Confidence.MEDIUM,
82
+ first_turn=first_turn,
83
+ last_turn=phrases_found[-1]["turn"] if len(phrases_found) > 1 else None,
84
+ evidence={"phrases": phrases_found},
85
+ cost_usd=None,
86
+ summary=summary,
87
+ )]
@@ -0,0 +1,147 @@
1
+ """Stale-context classifier.
2
+
3
+ Detects large tool results that remained in context well past their last
4
+ reference. Uses 3-gram overlap to detect references. Compaction-aware:
5
+ staleness resets to zero at compaction events.
6
+
7
+ Thresholds (per spec):
8
+ T_size = 2_000 tokens (minimum size to be a candidate)
9
+ N_stale = 5 turns after last reference before "stale"
10
+ """
11
+ from __future__ import annotations
12
+
13
+ from typing import TYPE_CHECKING
14
+
15
+ from cctx.models import Confidence, Finding, FindingKind, Severity
16
+
17
+ if TYPE_CHECKING:
18
+ from cctx.models import SessionTrace, Turn
19
+
20
+ T_SIZE = 2_000 # token threshold
21
+ N_STALE = 5 # turns before officially stale
22
+ STALE_HIGH_THRESHOLD = 500_000 # token-turns above which → HIGH
23
+
24
+
25
+ def _estimate_tokens(text: str) -> int:
26
+ return int(len(text.split()) * 1.3)
27
+
28
+
29
+ def _make_3grams(text: str) -> set[tuple[str, ...]]:
30
+ words = text.lower().split()
31
+ if len(words) < 3:
32
+ return set()
33
+ return {tuple(words[i : i + 3]) for i in range(len(words) - 2)}
34
+
35
+
36
+ def _is_compaction(turn: Turn) -> bool:
37
+ return turn.role == "system" and "compact" in turn.text.lower()
38
+
39
+
40
+ def _classify_impl(trace: SessionTrace) -> list[Finding]:
41
+ # Identify large tool results and their first_seen_turn
42
+ candidates: list[dict] = [] # {uid, tool_name, content, tokens, first_seen_turn}
43
+
44
+ for turn in trace.turns:
45
+ for tr in turn.tool_results:
46
+ tokens = tr.token_count if tr.token_count > 0 else _estimate_tokens(tr.content)
47
+ if tokens < T_SIZE:
48
+ continue
49
+ candidates.append({
50
+ "uid": tr.tool_use_id,
51
+ "tool_name": tr.tool_name,
52
+ "content": tr.content,
53
+ "tokens": tokens,
54
+ "first_seen_turn": turn.turn_number,
55
+ "content_3grams": _make_3grams(tr.content),
56
+ })
57
+
58
+ if not candidates:
59
+ return []
60
+
61
+ # Find the turn number of any compaction events
62
+ compaction_turns: set[int] = {
63
+ t.turn_number for t in trace.turns if _is_compaction(t)
64
+ }
65
+
66
+ last_turn_number = max((t.turn_number for t in trace.turns), default=0)
67
+
68
+ stale_items: list[dict] = []
69
+
70
+ for cand in candidates:
71
+ first_seen = cand["first_seen_turn"]
72
+ content_3grams = cand["content_3grams"]
73
+
74
+ # Find last assistant turn with a 3-gram reference to this content
75
+ last_ref = first_seen # at minimum, the turn it appeared in counts as a reference
76
+ for turn in trace.turns:
77
+ if turn.turn_number <= first_seen:
78
+ continue
79
+ if turn.role != "assistant":
80
+ continue
81
+ turn_3grams = _make_3grams(turn.text)
82
+ if content_3grams & turn_3grams:
83
+ last_ref = turn.turn_number
84
+
85
+ # Check for compaction between first_seen and end: if any, skip this item
86
+ if any(ct > first_seen for ct in compaction_turns):
87
+ continue
88
+
89
+ turns_stale = last_turn_number - last_ref
90
+ if turns_stale <= N_STALE:
91
+ continue
92
+
93
+ # Cost is attributed only to API calls (assistant turns), not to
94
+ # user/tool_result turns. Using raw turn-number delta inflates waste
95
+ # by ~2× in typical alternating-turn sessions.
96
+ billed_stale = sum(
97
+ 1 for t in trace.turns
98
+ if t.turn_number > last_ref and t.role == "assistant"
99
+ )
100
+ token_turns = cand["tokens"] * billed_stale
101
+ stale_items.append({
102
+ "tool_name": cand["tool_name"],
103
+ "content_tokens": cand["tokens"],
104
+ "first_seen_turn": first_seen,
105
+ "last_referenced_turn": last_ref,
106
+ "turns_stale": turns_stale,
107
+ "token_turns": token_turns,
108
+ })
109
+
110
+ if not stale_items:
111
+ return []
112
+
113
+ total_token_turns = sum(item["token_turns"] for item in stale_items)
114
+ level = Confidence.HIGH if total_token_turns > STALE_HIGH_THRESHOLD else Confidence.MEDIUM
115
+ severity = Severity.HIGH if total_token_turns > STALE_HIGH_THRESHOLD else Severity.MEDIUM
116
+
117
+ # first_turn = when the first item became officially stale
118
+ first_stale = min(
119
+ item["last_referenced_turn"] + N_STALE for item in stale_items
120
+ )
121
+
122
+ # Summary describes the worst offender
123
+ worst = max(stale_items, key=lambda i: i["token_turns"])
124
+ tokens_k = worst["content_tokens"] // 1000
125
+ summary = (
126
+ f"{tokens_k}K-token {worst['tool_name']} result stale "
127
+ f"{worst['turns_stale']} turns "
128
+ f"(~{total_token_turns:,} token-turns)"
129
+ )
130
+
131
+ return [Finding(
132
+ kind=FindingKind.STALE_CONTEXT,
133
+ severity=severity,
134
+ confidence=level,
135
+ first_turn=first_stale,
136
+ last_turn=last_turn_number,
137
+ evidence={"stale_items": stale_items, "total_token_turns": total_token_turns},
138
+ cost_usd=None,
139
+ summary=summary,
140
+ )]
141
+
142
+
143
+ def classify(trace: SessionTrace) -> list[Finding]:
144
+ try:
145
+ return _classify_impl(trace)
146
+ except Exception:
147
+ return []
cctx/discovery.py ADDED
@@ -0,0 +1,185 @@
1
+ """Session and project discovery for ~/.claude/projects/.
2
+
3
+ Public API:
4
+ claude_projects_dir() -> Path
5
+ find_project_dir(cwd) -> Path | None
6
+ list_projects(base) -> list[ProjectInfo]
7
+ list_sessions(project_dir) -> list[SessionMeta]
8
+ latest_session(project_dir) -> Path | None
9
+ complete_project(ctx, param, incomplete) -> list[CompletionItem]
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import os
15
+ from dataclasses import dataclass, field
16
+ from datetime import datetime, timezone
17
+ from pathlib import Path
18
+
19
+
20
+ @dataclass
21
+ class SessionMeta:
22
+ path: Path
23
+ session_id: str
24
+ start_time: datetime | None
25
+ cwd: str | None
26
+ git_branch: str | None
27
+
28
+
29
+ @dataclass
30
+ class ProjectInfo:
31
+ project_dir: Path # ~/.claude/projects/-Users-...
32
+ display_name: str # ~/Projects/cctx (from cwd in first session)
33
+ sessions: list[SessionMeta] = field(default_factory=list)
34
+
35
+ @property
36
+ def session_count(self) -> int:
37
+ return len(self.sessions)
38
+
39
+ @property
40
+ def latest_time(self) -> datetime | None:
41
+ times = [s.start_time for s in self.sessions if s.start_time]
42
+ return max(times) if times else None
43
+
44
+
45
+ def claude_projects_dir() -> Path:
46
+ if override := os.environ.get("CCTX_PROJECTS_DIR"):
47
+ return Path(override)
48
+ return Path.home() / ".claude" / "projects"
49
+
50
+
51
+ def _encode_path(path: Path) -> str:
52
+ return path.resolve().as_posix().replace("/", "-")
53
+
54
+
55
+ def find_project_dir(cwd: Path, *, base: Path | None = None) -> Path | None:
56
+ """Return the ~/.claude/projects/<encoded> dir that corresponds to cwd."""
57
+ base = base or claude_projects_dir()
58
+ encoded = _encode_path(cwd)
59
+ candidate = base / encoded
60
+ return candidate if candidate.is_dir() else None
61
+
62
+
63
+ def _read_session_meta(path: Path) -> SessionMeta:
64
+ """Quick scan: read enough lines to get session metadata without full parse."""
65
+ session_id = path.stem
66
+ start_time: datetime | None = None
67
+ cwd: str | None = None
68
+ git_branch: str | None = None
69
+
70
+ try:
71
+ with path.open(encoding="utf-8", errors="replace") as fh:
72
+ for _ in range(50): # cap at 50 lines — metadata is always early
73
+ line = fh.readline()
74
+ if not line:
75
+ break
76
+ try:
77
+ obj = json.loads(line)
78
+ except json.JSONDecodeError:
79
+ continue
80
+ if "sessionId" in obj:
81
+ session_id = obj["sessionId"]
82
+ if "timestamp" in obj and start_time is None:
83
+ try:
84
+ raw = obj["timestamp"].replace("Z", "+00:00")
85
+ start_time = datetime.fromisoformat(raw)
86
+ except (ValueError, AttributeError):
87
+ pass
88
+ if "cwd" in obj and cwd is None:
89
+ cwd = obj["cwd"]
90
+ if "gitBranch" in obj and git_branch is None:
91
+ git_branch = obj["gitBranch"]
92
+ if start_time and cwd:
93
+ break
94
+ except OSError:
95
+ pass
96
+
97
+ return SessionMeta(
98
+ path=path,
99
+ session_id=session_id,
100
+ start_time=start_time,
101
+ cwd=cwd,
102
+ git_branch=git_branch,
103
+ )
104
+
105
+
106
+ def list_sessions(project_dir: Path) -> list[SessionMeta]:
107
+ """List sessions in a project directory, newest first."""
108
+ sessions = [
109
+ _read_session_meta(p)
110
+ for p in project_dir.glob("*.jsonl")
111
+ ]
112
+ _epoch = datetime.min.replace(tzinfo=timezone.utc)
113
+ sessions.sort(key=lambda s: s.start_time or _epoch, reverse=True)
114
+ return sessions
115
+
116
+
117
+ def _project_display_name(project_dir: Path) -> str:
118
+ """Derive a human-readable name from cwd in session files, or decode best-effort."""
119
+ for path in sorted(project_dir.glob("*.jsonl"))[:3]:
120
+ meta = _read_session_meta(path)
121
+ if meta.cwd:
122
+ home = str(Path.home())
123
+ if meta.cwd.startswith(home):
124
+ return "~" + meta.cwd[len(home):]
125
+ return meta.cwd
126
+
127
+ # Fallback: decode -Users-bryan-Projects-cctx → ~/Projects/cctx
128
+ encoded = project_dir.name
129
+ home_prefix = _encode_path(Path.home()) # -Users-bryan
130
+ if encoded.startswith(home_prefix):
131
+ tail = encoded[len(home_prefix):] # -Projects-cctx
132
+ return "~" + tail.replace("-", "/")
133
+ return encoded
134
+
135
+
136
+ def list_projects(base: Path | None = None) -> list[ProjectInfo]:
137
+ """List all projects in the claude projects directory, newest-activity first."""
138
+ base = base or claude_projects_dir()
139
+ if not base.is_dir():
140
+ return []
141
+
142
+ projects: list[ProjectInfo] = []
143
+ for entry in base.iterdir():
144
+ if not entry.is_dir():
145
+ continue
146
+ if not any(entry.glob("*.jsonl")):
147
+ continue
148
+ sessions = list_sessions(entry)
149
+ projects.append(ProjectInfo(
150
+ project_dir=entry,
151
+ display_name=_project_display_name(entry),
152
+ sessions=sessions,
153
+ ))
154
+
155
+ projects.sort(
156
+ key=lambda p: p.latest_time or datetime.min.replace(tzinfo=timezone.utc),
157
+ reverse=True,
158
+ )
159
+ return projects
160
+
161
+
162
+ def latest_session(project_dir: Path) -> Path | None:
163
+ """Return the path of the most recent session JSONL in a project dir."""
164
+ sessions = list_sessions(project_dir)
165
+ return sessions[0].path if sessions else None
166
+
167
+
168
+ def complete_project(ctx: object, param: object, incomplete: str) -> list[object]:
169
+ """Click shell_complete callback — returns local project paths matching incomplete."""
170
+ from click.shell_completion import CompletionItem
171
+
172
+ try:
173
+ projects = list_projects()
174
+ except Exception:
175
+ return []
176
+
177
+ home = str(Path.home())
178
+ results = []
179
+ for p in projects:
180
+ actual = p.display_name.replace("~", home)
181
+ if incomplete.lower() in actual.lower():
182
+ results.append(
183
+ CompletionItem(actual, help=f"{p.session_count} session(s)")
184
+ )
185
+ return results
File without changes
cctx/exporters/csv.py ADDED
@@ -0,0 +1,64 @@
1
+ """CSV exporter — one row per turn, one header row."""
2
+ from __future__ import annotations
3
+
4
+ import csv as _csv
5
+ from typing import IO, TYPE_CHECKING
6
+
7
+ from cctx.pricing import price_per_tok as _price_per_tok
8
+
9
+ if TYPE_CHECKING:
10
+ from cctx.models import Diagnosis, SessionTrace
11
+
12
+ COLUMNS = [
13
+ "session_id",
14
+ "turn_number",
15
+ "role",
16
+ "model",
17
+ "input_tokens",
18
+ "cost_usd",
19
+ "tool_names",
20
+ "finding_kinds",
21
+ "is_inflection_turn",
22
+ ]
23
+
24
+
25
+ def export_turn_rows(diagnosis: Diagnosis, trace: SessionTrace) -> list[dict[str, str]]:
26
+ finding_at: dict[int, list[str]] = {}
27
+ for f in diagnosis.findings:
28
+ finding_at.setdefault(f.first_turn, []).append(f.kind.value)
29
+
30
+ rows = []
31
+ for turn in trace.turns:
32
+ input_tokens = turn.usage.input_tokens if turn.usage else 0
33
+ if turn.usage:
34
+ p = _price_per_tok(turn.model)
35
+ cost_usd = (
36
+ turn.usage.input_tokens * p
37
+ + turn.usage.cache_read * p * 0.1
38
+ + (turn.usage.cache_creation_5m + turn.usage.cache_creation_1h) * p * 1.25
39
+ )
40
+ else:
41
+ cost_usd = 0.0
42
+ is_inflection = turn.turn_number == diagnosis.inflection_turn
43
+ rows.append({
44
+ "session_id": trace.session_id,
45
+ "turn_number": str(turn.turn_number),
46
+ "role": turn.role,
47
+ "model": turn.model or "",
48
+ "input_tokens": str(input_tokens),
49
+ "cost_usd": f"{cost_usd:.6f}",
50
+ "tool_names": ",".join(tu.tool_name for tu in turn.tool_uses),
51
+ "finding_kinds": ",".join(finding_at.get(turn.turn_number, [])),
52
+ "is_inflection_turn": "true" if is_inflection else "false",
53
+ })
54
+ return rows
55
+
56
+
57
+ def write(
58
+ diagnoses: list[tuple[Diagnosis, SessionTrace]],
59
+ out: IO[str],
60
+ ) -> None:
61
+ writer = _csv.DictWriter(out, fieldnames=COLUMNS)
62
+ writer.writeheader()
63
+ for diagnosis, trace in diagnoses:
64
+ writer.writerows(export_turn_rows(diagnosis, trace))
@@ -0,0 +1,64 @@
1
+ """JSONL exporter — one JSON object per session line."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from typing import IO, TYPE_CHECKING
6
+
7
+ if TYPE_CHECKING:
8
+ from cctx.models import Diagnosis, SessionTrace
9
+
10
+
11
+ def export_diagnosis(
12
+ diagnosis: Diagnosis,
13
+ trace: SessionTrace,
14
+ *,
15
+ include_content: bool = True,
16
+ ) -> str:
17
+ findings = []
18
+ for f in diagnosis.findings:
19
+ d: dict[str, object] = {
20
+ "kind": f.kind.value,
21
+ "severity": f.severity.value,
22
+ "confidence": f.confidence.value,
23
+ "first_turn": f.first_turn,
24
+ "last_turn": f.last_turn,
25
+ "cost_usd": f.cost_usd,
26
+ }
27
+ if include_content:
28
+ d["summary"] = f.summary
29
+ findings.append(d)
30
+
31
+ patches = []
32
+ for p in diagnosis.patches:
33
+ d = {
34
+ "target_file": p.target_file,
35
+ "finding_kind": p.finding_kind.value,
36
+ "description": p.description,
37
+ }
38
+ if include_content:
39
+ d["evidence_summary"] = p.evidence_summary
40
+ patches.append(d)
41
+
42
+ obj = {
43
+ "session_id": diagnosis.session_id,
44
+ "analysed_at": diagnosis.analysed_at.isoformat(),
45
+ "total_cost_usd": diagnosis.total_cost_usd,
46
+ "waste_cost_usd": diagnosis.waste_cost_usd,
47
+ "inflection_turn": diagnosis.inflection_turn,
48
+ "finding_count": len(diagnosis.findings),
49
+ "findings": findings,
50
+ "patches": patches,
51
+ "turn_count": len(trace.turns),
52
+ "model": trace.primary_model,
53
+ }
54
+ return json.dumps(obj)
55
+
56
+
57
+ def write(
58
+ diagnoses: list[tuple[Diagnosis, SessionTrace]],
59
+ out: IO[str],
60
+ *,
61
+ include_content: bool = True,
62
+ ) -> None:
63
+ for diagnosis, trace in diagnoses:
64
+ out.write(export_diagnosis(diagnosis, trace, include_content=include_content) + "\n")