cctx-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cctx/__init__.py +3 -0
- cctx/cli.py +375 -0
- cctx/diagnostician/__init__.py +81 -0
- cctx/diagnostician/aggregate.py +40 -0
- cctx/diagnostician/inflection.py +19 -0
- cctx/diagnostician/patterns/__init__.py +1 -0
- cctx/diagnostician/patterns/retry_loop.py +145 -0
- cctx/diagnostician/patterns/scope_creep.py +87 -0
- cctx/diagnostician/patterns/stale_context.py +147 -0
- cctx/discovery.py +185 -0
- cctx/exporters/__init__.py +0 -0
- cctx/exporters/csv.py +64 -0
- cctx/exporters/jsonl.py +64 -0
- cctx/harvest.py +173 -0
- cctx/models.py +269 -0
- cctx/parsers/__init__.py +1 -0
- cctx/parsers/claude_code.py +690 -0
- cctx/pricing.py +18 -0
- cctx/recommender/__init__.py +0 -0
- cctx/recommender/claude_md.py +131 -0
- cctx/recommender/evidence.py +46 -0
- cctx/renderers/__init__.py +0 -0
- cctx/renderers/report.py +58 -0
- cctx/renderers/templates/autopsy.html.j2 +249 -0
- cctx/renderers/terminal.py +251 -0
- cctx/renderers/trace_tui.py +291 -0
- cctx/tokenizer.py +77 -0
- cctx_cli-0.1.0.dist-info/METADATA +159 -0
- cctx_cli-0.1.0.dist-info/RECORD +31 -0
- cctx_cli-0.1.0.dist-info/WHEEL +4 -0
- cctx_cli-0.1.0.dist-info/entry_points.txt +2 -0
cctx/harvest.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""Harvest — apply Patch objects to CLAUDE.md on disk.
|
|
2
|
+
|
|
3
|
+
Public API:
|
|
4
|
+
apply_patch(patch, target_dir) -> ApplyResult
|
|
5
|
+
preview_patches(patches, target_dir) -> list[ApplyResult]
|
|
6
|
+
apply_patches(patches, target_dir) -> list[ApplyResult]
|
|
7
|
+
|
|
8
|
+
Layering rules (MUST respect):
|
|
9
|
+
- Does NOT import click, rich_click, or anthropic.
|
|
10
|
+
- Does NOT import from diagnostician or recommender.
|
|
11
|
+
- Receives list[Patch] from the caller (cli.py).
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import TYPE_CHECKING
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from cctx.models import Patch
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ApplyStatus(str, Enum):
|
|
26
|
+
APPLIED = "applied"
|
|
27
|
+
SKIPPED = "skipped"
|
|
28
|
+
ERROR = "error"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ApplyResult:
|
|
33
|
+
patch: Patch
|
|
34
|
+
status: ApplyStatus
|
|
35
|
+
target_path: Path
|
|
36
|
+
message: str
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
# Internal helpers
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _extract_body(unified_diff: str) -> str:
|
|
45
|
+
"""Strip leading '+' from each line. A lone '+' becomes a blank line."""
|
|
46
|
+
lines = []
|
|
47
|
+
for line in unified_diff.splitlines():
|
|
48
|
+
if line.startswith("+"):
|
|
49
|
+
lines.append(line[1:])
|
|
50
|
+
else:
|
|
51
|
+
lines.append(line)
|
|
52
|
+
return "\n".join(lines)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _fingerprint(body: str) -> str | None:
|
|
56
|
+
"""Return the first '## ...' heading in body, or None."""
|
|
57
|
+
for line in body.splitlines():
|
|
58
|
+
if line.startswith("## "):
|
|
59
|
+
return line.rstrip()
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _already_present(content: str, fingerprint: str) -> bool:
|
|
64
|
+
"""Case-sensitive line-anchored match for the heading."""
|
|
65
|
+
pattern = re.compile(rf"^{re.escape(fingerprint)}\s*$", re.MULTILINE)
|
|
66
|
+
return bool(pattern.search(content))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _is_supported_target(patch: Patch) -> bool:
|
|
70
|
+
return patch.target_file == "CLAUDE.md"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
# Public API
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def apply_patch(patch: Patch, target_dir: Path) -> ApplyResult:
|
|
79
|
+
"""Apply one patch. Never raises — errors go into ApplyResult(status=ERROR)."""
|
|
80
|
+
target_path = target_dir / patch.target_file
|
|
81
|
+
try:
|
|
82
|
+
body = _extract_body(patch.unified_diff)
|
|
83
|
+
fp = _fingerprint(body)
|
|
84
|
+
|
|
85
|
+
if not _is_supported_target(patch):
|
|
86
|
+
return ApplyResult(
|
|
87
|
+
patch=patch,
|
|
88
|
+
status=ApplyStatus.SKIPPED,
|
|
89
|
+
target_path=target_path,
|
|
90
|
+
message=f"target not supported in v0: {patch.target_file}",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
if not target_path.exists():
|
|
94
|
+
target_path.touch()
|
|
95
|
+
|
|
96
|
+
content = target_path.read_text(encoding="utf-8")
|
|
97
|
+
|
|
98
|
+
if fp is not None and _already_present(content, fp):
|
|
99
|
+
return ApplyResult(
|
|
100
|
+
patch=patch,
|
|
101
|
+
status=ApplyStatus.SKIPPED,
|
|
102
|
+
target_path=target_path,
|
|
103
|
+
message=f"already present: {fp}",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
with target_path.open("a", encoding="utf-8") as fh:
|
|
107
|
+
if content and not content.endswith("\n\n"):
|
|
108
|
+
fh.write("\n" if content.endswith("\n") else "\n\n")
|
|
109
|
+
fh.write(body)
|
|
110
|
+
fh.write("\n")
|
|
111
|
+
|
|
112
|
+
return ApplyResult(
|
|
113
|
+
patch=patch,
|
|
114
|
+
status=ApplyStatus.APPLIED,
|
|
115
|
+
target_path=target_path,
|
|
116
|
+
message=f"appended: {fp or patch.description}",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
except Exception as exc: # noqa: BLE001
|
|
120
|
+
return ApplyResult(
|
|
121
|
+
patch=patch,
|
|
122
|
+
status=ApplyStatus.ERROR,
|
|
123
|
+
target_path=target_path,
|
|
124
|
+
message=str(exc),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def preview_patches(patches: list[Patch], target_dir: Path) -> list[ApplyResult]:
|
|
129
|
+
"""Compute what would happen without writing. Returns APPLIED or SKIPPED."""
|
|
130
|
+
results = []
|
|
131
|
+
# Track fingerprints already "seen" within this preview run (idempotency)
|
|
132
|
+
seen_fingerprints: set[str] = set()
|
|
133
|
+
|
|
134
|
+
for patch in patches:
|
|
135
|
+
target_path = target_dir / patch.target_file
|
|
136
|
+
|
|
137
|
+
if not _is_supported_target(patch):
|
|
138
|
+
results.append(ApplyResult(
|
|
139
|
+
patch=patch,
|
|
140
|
+
status=ApplyStatus.SKIPPED,
|
|
141
|
+
target_path=target_path,
|
|
142
|
+
message=f"target not supported in v0: {patch.target_file}",
|
|
143
|
+
))
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
body = _extract_body(patch.unified_diff)
|
|
147
|
+
fp = _fingerprint(body)
|
|
148
|
+
|
|
149
|
+
content = target_path.read_text(encoding="utf-8") if target_path.exists() else ""
|
|
150
|
+
|
|
151
|
+
if fp is not None and (_already_present(content, fp) or fp in seen_fingerprints):
|
|
152
|
+
results.append(ApplyResult(
|
|
153
|
+
patch=patch,
|
|
154
|
+
status=ApplyStatus.SKIPPED,
|
|
155
|
+
target_path=target_path,
|
|
156
|
+
message=f"already present: {fp}",
|
|
157
|
+
))
|
|
158
|
+
else:
|
|
159
|
+
if fp is not None:
|
|
160
|
+
seen_fingerprints.add(fp)
|
|
161
|
+
results.append(ApplyResult(
|
|
162
|
+
patch=patch,
|
|
163
|
+
status=ApplyStatus.APPLIED,
|
|
164
|
+
target_path=target_path,
|
|
165
|
+
message=f"would append: {fp or patch.description}",
|
|
166
|
+
))
|
|
167
|
+
|
|
168
|
+
return results
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def apply_patches(patches: list[Patch], target_dir: Path) -> list[ApplyResult]:
|
|
172
|
+
"""Apply all applicable patches in sequence."""
|
|
173
|
+
return [apply_patch(patch, target_dir) for patch in patches]
|
cctx/models.py
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""Shared data model for cctx.
|
|
2
|
+
|
|
3
|
+
All dataclasses live here. Pure data containers; no behavior except
|
|
4
|
+
the module-level group_into_exchanges() helper.
|
|
5
|
+
|
|
6
|
+
No imports from: anthropic, click, cctx.parsers, cctx.analyzers,
|
|
7
|
+
cctx.renderers, cctx.exporters, cctx.tokenizer.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from datetime import datetime, timedelta
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# Low-level building blocks
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class Usage:
|
|
25
|
+
"""Token usage for a single assistant API call."""
|
|
26
|
+
|
|
27
|
+
input_tokens: int
|
|
28
|
+
output_tokens: int
|
|
29
|
+
cache_creation_5m: int # ephemeral_5m_input_tokens
|
|
30
|
+
cache_creation_1h: int # ephemeral_1h_input_tokens
|
|
31
|
+
cache_read: int
|
|
32
|
+
service_tier: str | None # "standard" | "priority" | ...
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class ToolUse:
|
|
37
|
+
"""A tool_use content block inside an assistant turn."""
|
|
38
|
+
|
|
39
|
+
tool_name: str
|
|
40
|
+
tool_use_id: str
|
|
41
|
+
tool_input: dict
|
|
42
|
+
token_count: int = 0
|
|
43
|
+
subagent_session_id: str | None = None # set when tool_name == "Agent" and child found
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class ToolResult:
|
|
48
|
+
"""A tool_result content block inside a user turn.
|
|
49
|
+
|
|
50
|
+
content is always populated from inline JSONL content — sidecar files
|
|
51
|
+
are NOT the source of truth.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
tool_name: str # resolved by pairing on tool_use_id
|
|
55
|
+
tool_use_id: str
|
|
56
|
+
content: str # inline content; always populated
|
|
57
|
+
structured: dict | None # parallel toolUseResult field (bash, file, etc.)
|
|
58
|
+
is_error: bool
|
|
59
|
+
token_count: int = 0
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class Turn:
|
|
64
|
+
"""One JSONL line converted to a canonical turn.
|
|
65
|
+
|
|
66
|
+
Required fields precede fields with defaults. All nullable required fields
|
|
67
|
+
(parent_uuid, usage, model, stop_reason, duration_ms) have no default —
|
|
68
|
+
callers must pass them explicitly.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
turn_number: int # 1-based index in SessionTrace.turns
|
|
72
|
+
uuid: str # JSONL line's uuid
|
|
73
|
+
parent_uuid: str | None
|
|
74
|
+
role: str # "user" | "assistant" | "tool_result" | "system"
|
|
75
|
+
text: str # flattened text; image blocks → "<image:{media_type},{N}B>"
|
|
76
|
+
thinking: str # extended thinking is its own cost category
|
|
77
|
+
tool_uses: list[ToolUse]
|
|
78
|
+
tool_results: list[ToolResult]
|
|
79
|
+
usage: Usage | None # assistant turns only
|
|
80
|
+
model: str | None # assistant turns only
|
|
81
|
+
stop_reason: str | None # "end_turn" | "tool_use" | "stop_sequence" | None
|
|
82
|
+
timestamp: datetime # tz-aware UTC
|
|
83
|
+
duration_ms: int | None # gap to next turn; None for the last turn
|
|
84
|
+
# --- defaulted fields ---
|
|
85
|
+
token_count: int = 0 # filled by tokenizer pass
|
|
86
|
+
is_sidechain: bool = False # defensive insurance against future format drift
|
|
87
|
+
error: str | None = None # set when isApiErrorMessage was true
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class Attachment:
|
|
92
|
+
"""A classified attachment line (type == "attachment" in JSONL)."""
|
|
93
|
+
|
|
94
|
+
kind: str # "hook_output"|"mcp_servers"|"skills"|"allowed_tools"|"items"|"other"
|
|
95
|
+
raw: dict # original attachment payload, verbatim
|
|
96
|
+
content: str | None # convenience: extracted text content if any
|
|
97
|
+
timestamp: datetime | None
|
|
98
|
+
parent_uuid: str | None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class RawToolResultFile:
|
|
103
|
+
"""A sidecar tool-result file discovered on disk (NOT read by the parser)."""
|
|
104
|
+
|
|
105
|
+
path: Path
|
|
106
|
+
size_bytes: int
|
|
107
|
+
tool_use_id: str | None # always None in v1; matching deferred to v1.1
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# ---------------------------------------------------------------------------
|
|
111
|
+
# Error / warning types
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class ParserError(Exception):
|
|
116
|
+
"""Hard parse failure — only raised on unreadable files."""
|
|
117
|
+
|
|
118
|
+
def __init__(self, reason: str, *, path: Path, line_number: int | None = None) -> None:
|
|
119
|
+
super().__init__(reason)
|
|
120
|
+
self.reason = reason
|
|
121
|
+
self.path = path
|
|
122
|
+
self.line_number = line_number
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@dataclass
|
|
126
|
+
class ParserWarning:
|
|
127
|
+
"""Soft parse failure recorded on SessionTrace.warnings."""
|
|
128
|
+
|
|
129
|
+
code: str # "unknown_type"|"malformed_json"|"orphan_agent_call"|...
|
|
130
|
+
detail: str
|
|
131
|
+
line_number: int | None = None
|
|
132
|
+
path: Path | None = None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# ---------------------------------------------------------------------------
|
|
136
|
+
# Session-level aggregate
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@dataclass
|
|
141
|
+
class SessionTrace:
|
|
142
|
+
"""Fully-parsed session; every other module works from this, never raw JSONL."""
|
|
143
|
+
|
|
144
|
+
session_id: str
|
|
145
|
+
parent_session_id: str | None # set on subagent traces
|
|
146
|
+
project_path: str # decoded from dir name: "-Users-bryan-..." → "/Users/bryan/..."
|
|
147
|
+
cwd: str # actual cwd observed on the lines
|
|
148
|
+
primary_model: str | None # most-frequent model; None if no assistant turns
|
|
149
|
+
claude_code_version: str | None
|
|
150
|
+
turns: list[Turn]
|
|
151
|
+
subagents: list[SessionTrace]
|
|
152
|
+
attachments: list[Attachment]
|
|
153
|
+
raw_tool_result_files: list[RawToolResultFile]
|
|
154
|
+
initial_context_tokens: int # cache_creation_input_tokens from first assistant turn
|
|
155
|
+
tool_names_loaded: list[str] # union of MCP names + names seen in tool_uses
|
|
156
|
+
start_time: datetime | None # min timestamp; None for bookkeeping-only sessions
|
|
157
|
+
end_time: datetime | None # max timestamp; None for bookkeeping-only sessions
|
|
158
|
+
source_path: Path # the JSONL file this came from
|
|
159
|
+
subagent_meta: dict # verbatim .meta.json contents (empty for root)
|
|
160
|
+
warnings: list[ParserWarning]
|
|
161
|
+
subagent_parse_errors: list[dict] # {"path": Path, "reason": str}
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
# ---------------------------------------------------------------------------
|
|
165
|
+
# Autopsy types — M2
|
|
166
|
+
# ---------------------------------------------------------------------------
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class FindingKind(str, Enum):
|
|
170
|
+
RETRY_LOOP = "retry_loop"
|
|
171
|
+
SCOPE_CREEP = "scope_creep"
|
|
172
|
+
STALE_CONTEXT = "stale_context"
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class Severity(str, Enum):
|
|
176
|
+
HIGH = "high"
|
|
177
|
+
MEDIUM = "medium"
|
|
178
|
+
LOW = "low"
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class Confidence(str, Enum):
|
|
182
|
+
HIGH = "high"
|
|
183
|
+
MEDIUM = "medium"
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@dataclass
|
|
187
|
+
class Finding:
|
|
188
|
+
kind: FindingKind
|
|
189
|
+
severity: Severity
|
|
190
|
+
confidence: Confidence
|
|
191
|
+
first_turn: int
|
|
192
|
+
last_turn: int | None
|
|
193
|
+
evidence: dict[str, Any]
|
|
194
|
+
cost_usd: float | None
|
|
195
|
+
summary: str
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@dataclass
|
|
199
|
+
class Patch:
|
|
200
|
+
target_file: str
|
|
201
|
+
description: str
|
|
202
|
+
unified_diff: str
|
|
203
|
+
finding_kind: FindingKind
|
|
204
|
+
evidence_summary: str
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
@dataclass
|
|
208
|
+
class Diagnosis:
|
|
209
|
+
session_id: str
|
|
210
|
+
findings: list[Finding]
|
|
211
|
+
inflection_turn: int | None
|
|
212
|
+
patches: list[Patch]
|
|
213
|
+
total_cost_usd: float
|
|
214
|
+
waste_cost_usd: float
|
|
215
|
+
analysed_at: datetime
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
@dataclass
|
|
219
|
+
class KindEvidence:
|
|
220
|
+
kind: FindingKind
|
|
221
|
+
session_count: int
|
|
222
|
+
total_waste_usd: float
|
|
223
|
+
example_summaries: list[str]
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
@dataclass
|
|
227
|
+
class AggregateReport:
|
|
228
|
+
window: timedelta
|
|
229
|
+
sessions_analysed: int
|
|
230
|
+
sessions_with_findings: int
|
|
231
|
+
total_cost_usd: float
|
|
232
|
+
waste_cost_usd: float
|
|
233
|
+
by_kind: dict[FindingKind, KindEvidence]
|
|
234
|
+
patches: list[Patch]
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
# ---------------------------------------------------------------------------
|
|
238
|
+
# Renderer helper
|
|
239
|
+
# ---------------------------------------------------------------------------
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def group_into_exchanges(turns: list[Turn]) -> list[list[Turn]]:
|
|
243
|
+
"""Group a flat list of turns into render-time exchanges.
|
|
244
|
+
|
|
245
|
+
An exchange begins on each ``role == "user"`` or ``role == "tool_result"``
|
|
246
|
+
turn and includes all subsequent assistant turns until the next
|
|
247
|
+
user/tool_result turn.
|
|
248
|
+
|
|
249
|
+
Leading non-user/tool_result turns (e.g. an initial system notice before
|
|
250
|
+
the first user message) are gathered into their own exchange at index 0.
|
|
251
|
+
|
|
252
|
+
Returns an empty list for empty input.
|
|
253
|
+
"""
|
|
254
|
+
if not turns:
|
|
255
|
+
return []
|
|
256
|
+
|
|
257
|
+
exchanges: list[list[Turn]] = []
|
|
258
|
+
current: list[Turn] = []
|
|
259
|
+
|
|
260
|
+
for turn in turns:
|
|
261
|
+
if turn.role in ("user", "tool_result") and current:
|
|
262
|
+
exchanges.append(current)
|
|
263
|
+
current = []
|
|
264
|
+
current.append(turn)
|
|
265
|
+
|
|
266
|
+
if current:
|
|
267
|
+
exchanges.append(current)
|
|
268
|
+
|
|
269
|
+
return exchanges
|
cctx/parsers/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Parsers for session log formats."""
|