cctx-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cctx/harvest.py ADDED
@@ -0,0 +1,173 @@
1
+ """Harvest — apply Patch objects to CLAUDE.md on disk.
2
+
3
+ Public API:
4
+ apply_patch(patch, target_dir) -> ApplyResult
5
+ preview_patches(patches, target_dir) -> list[ApplyResult]
6
+ apply_patches(patches, target_dir) -> list[ApplyResult]
7
+
8
+ Layering rules (MUST respect):
9
+ - Does NOT import click, rich_click, or anthropic.
10
+ - Does NOT import from diagnostician or recommender.
11
+ - Receives list[Patch] from the caller (cli.py).
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import re
16
+ from dataclasses import dataclass
17
+ from enum import Enum
18
+ from pathlib import Path
19
+ from typing import TYPE_CHECKING
20
+
21
+ if TYPE_CHECKING:
22
+ from cctx.models import Patch
23
+
24
+
25
+ class ApplyStatus(str, Enum):
26
+ APPLIED = "applied"
27
+ SKIPPED = "skipped"
28
+ ERROR = "error"
29
+
30
+
31
+ @dataclass
32
+ class ApplyResult:
33
+ patch: Patch
34
+ status: ApplyStatus
35
+ target_path: Path
36
+ message: str
37
+
38
+
39
+ # ---------------------------------------------------------------------------
40
+ # Internal helpers
41
+ # ---------------------------------------------------------------------------
42
+
43
+
44
+ def _extract_body(unified_diff: str) -> str:
45
+ """Strip leading '+' from each line. A lone '+' becomes a blank line."""
46
+ lines = []
47
+ for line in unified_diff.splitlines():
48
+ if line.startswith("+"):
49
+ lines.append(line[1:])
50
+ else:
51
+ lines.append(line)
52
+ return "\n".join(lines)
53
+
54
+
55
+ def _fingerprint(body: str) -> str | None:
56
+ """Return the first '## ...' heading in body, or None."""
57
+ for line in body.splitlines():
58
+ if line.startswith("## "):
59
+ return line.rstrip()
60
+ return None
61
+
62
+
63
+ def _already_present(content: str, fingerprint: str) -> bool:
64
+ """Case-sensitive line-anchored match for the heading."""
65
+ pattern = re.compile(rf"^{re.escape(fingerprint)}\s*$", re.MULTILINE)
66
+ return bool(pattern.search(content))
67
+
68
+
69
+ def _is_supported_target(patch: Patch) -> bool:
70
+ return patch.target_file == "CLAUDE.md"
71
+
72
+
73
+ # ---------------------------------------------------------------------------
74
+ # Public API
75
+ # ---------------------------------------------------------------------------
76
+
77
+
78
+ def apply_patch(patch: Patch, target_dir: Path) -> ApplyResult:
79
+ """Apply one patch. Never raises — errors go into ApplyResult(status=ERROR)."""
80
+ target_path = target_dir / patch.target_file
81
+ try:
82
+ body = _extract_body(patch.unified_diff)
83
+ fp = _fingerprint(body)
84
+
85
+ if not _is_supported_target(patch):
86
+ return ApplyResult(
87
+ patch=patch,
88
+ status=ApplyStatus.SKIPPED,
89
+ target_path=target_path,
90
+ message=f"target not supported in v0: {patch.target_file}",
91
+ )
92
+
93
+ if not target_path.exists():
94
+ target_path.touch()
95
+
96
+ content = target_path.read_text(encoding="utf-8")
97
+
98
+ if fp is not None and _already_present(content, fp):
99
+ return ApplyResult(
100
+ patch=patch,
101
+ status=ApplyStatus.SKIPPED,
102
+ target_path=target_path,
103
+ message=f"already present: {fp}",
104
+ )
105
+
106
+ with target_path.open("a", encoding="utf-8") as fh:
107
+ if content and not content.endswith("\n\n"):
108
+ fh.write("\n" if content.endswith("\n") else "\n\n")
109
+ fh.write(body)
110
+ fh.write("\n")
111
+
112
+ return ApplyResult(
113
+ patch=patch,
114
+ status=ApplyStatus.APPLIED,
115
+ target_path=target_path,
116
+ message=f"appended: {fp or patch.description}",
117
+ )
118
+
119
+ except Exception as exc: # noqa: BLE001
120
+ return ApplyResult(
121
+ patch=patch,
122
+ status=ApplyStatus.ERROR,
123
+ target_path=target_path,
124
+ message=str(exc),
125
+ )
126
+
127
+
128
+ def preview_patches(patches: list[Patch], target_dir: Path) -> list[ApplyResult]:
129
+ """Compute what would happen without writing. Returns APPLIED or SKIPPED."""
130
+ results = []
131
+ # Track fingerprints already "seen" within this preview run (idempotency)
132
+ seen_fingerprints: set[str] = set()
133
+
134
+ for patch in patches:
135
+ target_path = target_dir / patch.target_file
136
+
137
+ if not _is_supported_target(patch):
138
+ results.append(ApplyResult(
139
+ patch=patch,
140
+ status=ApplyStatus.SKIPPED,
141
+ target_path=target_path,
142
+ message=f"target not supported in v0: {patch.target_file}",
143
+ ))
144
+ continue
145
+
146
+ body = _extract_body(patch.unified_diff)
147
+ fp = _fingerprint(body)
148
+
149
+ content = target_path.read_text(encoding="utf-8") if target_path.exists() else ""
150
+
151
+ if fp is not None and (_already_present(content, fp) or fp in seen_fingerprints):
152
+ results.append(ApplyResult(
153
+ patch=patch,
154
+ status=ApplyStatus.SKIPPED,
155
+ target_path=target_path,
156
+ message=f"already present: {fp}",
157
+ ))
158
+ else:
159
+ if fp is not None:
160
+ seen_fingerprints.add(fp)
161
+ results.append(ApplyResult(
162
+ patch=patch,
163
+ status=ApplyStatus.APPLIED,
164
+ target_path=target_path,
165
+ message=f"would append: {fp or patch.description}",
166
+ ))
167
+
168
+ return results
169
+
170
+
171
+ def apply_patches(patches: list[Patch], target_dir: Path) -> list[ApplyResult]:
172
+ """Apply all applicable patches in sequence."""
173
+ return [apply_patch(patch, target_dir) for patch in patches]
cctx/models.py ADDED
@@ -0,0 +1,269 @@
1
+ """Shared data model for cctx.
2
+
3
+ All dataclasses live here. Pure data containers; no behavior except
4
+ the module-level group_into_exchanges() helper.
5
+
6
+ No imports from: anthropic, click, cctx.parsers, cctx.analyzers,
7
+ cctx.renderers, cctx.exporters, cctx.tokenizer.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass
13
+ from datetime import datetime, timedelta
14
+ from enum import Enum
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Low-level building blocks
20
+ # ---------------------------------------------------------------------------
21
+
22
+
23
+ @dataclass
24
+ class Usage:
25
+ """Token usage for a single assistant API call."""
26
+
27
+ input_tokens: int
28
+ output_tokens: int
29
+ cache_creation_5m: int # ephemeral_5m_input_tokens
30
+ cache_creation_1h: int # ephemeral_1h_input_tokens
31
+ cache_read: int
32
+ service_tier: str | None # "standard" | "priority" | ...
33
+
34
+
35
+ @dataclass
36
+ class ToolUse:
37
+ """A tool_use content block inside an assistant turn."""
38
+
39
+ tool_name: str
40
+ tool_use_id: str
41
+ tool_input: dict
42
+ token_count: int = 0
43
+ subagent_session_id: str | None = None # set when tool_name == "Agent" and child found
44
+
45
+
46
+ @dataclass
47
+ class ToolResult:
48
+ """A tool_result content block inside a user turn.
49
+
50
+ content is always populated from inline JSONL content — sidecar files
51
+ are NOT the source of truth.
52
+ """
53
+
54
+ tool_name: str # resolved by pairing on tool_use_id
55
+ tool_use_id: str
56
+ content: str # inline content; always populated
57
+ structured: dict | None # parallel toolUseResult field (bash, file, etc.)
58
+ is_error: bool
59
+ token_count: int = 0
60
+
61
+
62
+ @dataclass
63
+ class Turn:
64
+ """One JSONL line converted to a canonical turn.
65
+
66
+ Required fields precede fields with defaults. All nullable required fields
67
+ (parent_uuid, usage, model, stop_reason, duration_ms) have no default —
68
+ callers must pass them explicitly.
69
+ """
70
+
71
+ turn_number: int # 1-based index in SessionTrace.turns
72
+ uuid: str # JSONL line's uuid
73
+ parent_uuid: str | None
74
+ role: str # "user" | "assistant" | "tool_result" | "system"
75
+ text: str # flattened text; image blocks → "<image:{media_type},{N}B>"
76
+ thinking: str # extended thinking is its own cost category
77
+ tool_uses: list[ToolUse]
78
+ tool_results: list[ToolResult]
79
+ usage: Usage | None # assistant turns only
80
+ model: str | None # assistant turns only
81
+ stop_reason: str | None # "end_turn" | "tool_use" | "stop_sequence" | None
82
+ timestamp: datetime # tz-aware UTC
83
+ duration_ms: int | None # gap to next turn; None for the last turn
84
+ # --- defaulted fields ---
85
+ token_count: int = 0 # filled by tokenizer pass
86
+ is_sidechain: bool = False # defensive insurance against future format drift
87
+ error: str | None = None # set when isApiErrorMessage was true
88
+
89
+
90
+ @dataclass
91
+ class Attachment:
92
+ """A classified attachment line (type == "attachment" in JSONL)."""
93
+
94
+ kind: str # "hook_output"|"mcp_servers"|"skills"|"allowed_tools"|"items"|"other"
95
+ raw: dict # original attachment payload, verbatim
96
+ content: str | None # convenience: extracted text content if any
97
+ timestamp: datetime | None
98
+ parent_uuid: str | None
99
+
100
+
101
+ @dataclass
102
+ class RawToolResultFile:
103
+ """A sidecar tool-result file discovered on disk (NOT read by the parser)."""
104
+
105
+ path: Path
106
+ size_bytes: int
107
+ tool_use_id: str | None # always None in v1; matching deferred to v1.1
108
+
109
+
110
+ # ---------------------------------------------------------------------------
111
+ # Error / warning types
112
+ # ---------------------------------------------------------------------------
113
+
114
+
115
+ class ParserError(Exception):
116
+ """Hard parse failure — only raised on unreadable files."""
117
+
118
+ def __init__(self, reason: str, *, path: Path, line_number: int | None = None) -> None:
119
+ super().__init__(reason)
120
+ self.reason = reason
121
+ self.path = path
122
+ self.line_number = line_number
123
+
124
+
125
+ @dataclass
126
+ class ParserWarning:
127
+ """Soft parse failure recorded on SessionTrace.warnings."""
128
+
129
+ code: str # "unknown_type"|"malformed_json"|"orphan_agent_call"|...
130
+ detail: str
131
+ line_number: int | None = None
132
+ path: Path | None = None
133
+
134
+
135
+ # ---------------------------------------------------------------------------
136
+ # Session-level aggregate
137
+ # ---------------------------------------------------------------------------
138
+
139
+
140
+ @dataclass
141
+ class SessionTrace:
142
+ """Fully-parsed session; every other module works from this, never raw JSONL."""
143
+
144
+ session_id: str
145
+ parent_session_id: str | None # set on subagent traces
146
+ project_path: str # decoded from dir name: "-Users-bryan-..." → "/Users/bryan/..."
147
+ cwd: str # actual cwd observed on the lines
148
+ primary_model: str | None # most-frequent model; None if no assistant turns
149
+ claude_code_version: str | None
150
+ turns: list[Turn]
151
+ subagents: list[SessionTrace]
152
+ attachments: list[Attachment]
153
+ raw_tool_result_files: list[RawToolResultFile]
154
+ initial_context_tokens: int # cache_creation_input_tokens from first assistant turn
155
+ tool_names_loaded: list[str] # union of MCP names + names seen in tool_uses
156
+ start_time: datetime | None # min timestamp; None for bookkeeping-only sessions
157
+ end_time: datetime | None # max timestamp; None for bookkeeping-only sessions
158
+ source_path: Path # the JSONL file this came from
159
+ subagent_meta: dict # verbatim .meta.json contents (empty for root)
160
+ warnings: list[ParserWarning]
161
+ subagent_parse_errors: list[dict] # {"path": Path, "reason": str}
162
+
163
+
164
+ # ---------------------------------------------------------------------------
165
+ # Autopsy types — M2
166
+ # ---------------------------------------------------------------------------
167
+
168
+
169
+ class FindingKind(str, Enum):
170
+ RETRY_LOOP = "retry_loop"
171
+ SCOPE_CREEP = "scope_creep"
172
+ STALE_CONTEXT = "stale_context"
173
+
174
+
175
+ class Severity(str, Enum):
176
+ HIGH = "high"
177
+ MEDIUM = "medium"
178
+ LOW = "low"
179
+
180
+
181
+ class Confidence(str, Enum):
182
+ HIGH = "high"
183
+ MEDIUM = "medium"
184
+
185
+
186
+ @dataclass
187
+ class Finding:
188
+ kind: FindingKind
189
+ severity: Severity
190
+ confidence: Confidence
191
+ first_turn: int
192
+ last_turn: int | None
193
+ evidence: dict[str, Any]
194
+ cost_usd: float | None
195
+ summary: str
196
+
197
+
198
+ @dataclass
199
+ class Patch:
200
+ target_file: str
201
+ description: str
202
+ unified_diff: str
203
+ finding_kind: FindingKind
204
+ evidence_summary: str
205
+
206
+
207
+ @dataclass
208
+ class Diagnosis:
209
+ session_id: str
210
+ findings: list[Finding]
211
+ inflection_turn: int | None
212
+ patches: list[Patch]
213
+ total_cost_usd: float
214
+ waste_cost_usd: float
215
+ analysed_at: datetime
216
+
217
+
218
+ @dataclass
219
+ class KindEvidence:
220
+ kind: FindingKind
221
+ session_count: int
222
+ total_waste_usd: float
223
+ example_summaries: list[str]
224
+
225
+
226
+ @dataclass
227
+ class AggregateReport:
228
+ window: timedelta
229
+ sessions_analysed: int
230
+ sessions_with_findings: int
231
+ total_cost_usd: float
232
+ waste_cost_usd: float
233
+ by_kind: dict[FindingKind, KindEvidence]
234
+ patches: list[Patch]
235
+
236
+
237
+ # ---------------------------------------------------------------------------
238
+ # Renderer helper
239
+ # ---------------------------------------------------------------------------
240
+
241
+
242
+ def group_into_exchanges(turns: list[Turn]) -> list[list[Turn]]:
243
+ """Group a flat list of turns into render-time exchanges.
244
+
245
+ An exchange begins on each ``role == "user"`` or ``role == "tool_result"``
246
+ turn and includes all subsequent assistant turns until the next
247
+ user/tool_result turn.
248
+
249
+ Leading non-user/tool_result turns (e.g. an initial system notice before
250
+ the first user message) are gathered into their own exchange at index 0.
251
+
252
+ Returns an empty list for empty input.
253
+ """
254
+ if not turns:
255
+ return []
256
+
257
+ exchanges: list[list[Turn]] = []
258
+ current: list[Turn] = []
259
+
260
+ for turn in turns:
261
+ if turn.role in ("user", "tool_result") and current:
262
+ exchanges.append(current)
263
+ current = []
264
+ current.append(turn)
265
+
266
+ if current:
267
+ exchanges.append(current)
268
+
269
+ return exchanges
@@ -0,0 +1 @@
1
+ """Parsers for session log formats."""