haid 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. haid/__init__.py +9 -0
  2. haid/__main__.py +4 -0
  3. haid/bridge/__init__.py +172 -0
  4. haid/bridge/reconstruct.py +222 -0
  5. haid/bridge/usage.py +71 -0
  6. haid/cli.py +612 -0
  7. haid/data/anchor_diffs/U00.diff +378 -0
  8. haid/data/anchor_diffs/U01.diff +317 -0
  9. haid/data/anchor_diffs/U07.diff +218 -0
  10. haid/data/anchor_diffs/U10.diff +129 -0
  11. haid/data/anchor_diffs/U11.diff +352 -0
  12. haid/data/anchor_diffs/U13.diff +135 -0
  13. haid/data/anchor_diffs/U16.diff +152 -0
  14. haid/data/anchor_diffs/U18.diff +254 -0
  15. haid/data/anchor_diffs/U19.diff +403 -0
  16. haid/data/anchor_diffs/U22.diff +144 -0
  17. haid/data/anchor_diffs/U24.diff +337 -0
  18. haid/data/anchor_diffs/U29.diff +43 -0
  19. haid/data/anchor_diffs/U37.diff +38 -0
  20. haid/data/anchor_diffs/U39.diff +94 -0
  21. haid/data/anchor_diffs/U40.diff +339 -0
  22. haid/data/anchor_diffs/U43.diff +51 -0
  23. haid/data/anchor_diffs/U46.diff +159 -0
  24. haid/data/anchor_diffs/U48.diff +290 -0
  25. haid/data/anchor_diffs/U50.diff +323 -0
  26. haid/data/cleanliness_anchors.json +282 -0
  27. haid/data/difficulty_anchors.json +53 -0
  28. haid/data/metric_baselines.json +184 -0
  29. haid/data/treatments.json +356 -0
  30. haid/diffio.py +139 -0
  31. haid/episodes/__init__.py +110 -0
  32. haid/episodes/grouping.py +112 -0
  33. haid/episodes/model.py +77 -0
  34. haid/episodes/score.py +188 -0
  35. haid/episodes/segment.py +163 -0
  36. haid/episodes/summarize.py +64 -0
  37. haid/filekind.py +100 -0
  38. haid/graph/__init__.py +19 -0
  39. haid/graph/bash_read.py +229 -0
  40. haid/graph/bash_write.py +201 -0
  41. haid/graph/build.py +248 -0
  42. haid/graph/model.py +130 -0
  43. haid/graph/signature.py +49 -0
  44. haid/intent/__init__.py +90 -0
  45. haid/intent/classify.py +132 -0
  46. haid/intent/messages.py +110 -0
  47. haid/intent/taxonomy.py +100 -0
  48. haid/metrics/__init__.py +68 -0
  49. haid/metrics/base.py +112 -0
  50. haid/metrics/baseline.py +64 -0
  51. haid/metrics/json_out.py +171 -0
  52. haid/metrics/rereads.py +136 -0
  53. haid/metrics/retouched.py +75 -0
  54. haid/metrics/retries.py +108 -0
  55. haid/metrics/unused_context.py +68 -0
  56. haid/metrics/view.py +114 -0
  57. haid/report/__init__.py +21 -0
  58. haid/report/benchmark.py +114 -0
  59. haid/report/compose.py +419 -0
  60. haid/report/treatments.py +107 -0
  61. haid/scoring/__init__.py +13 -0
  62. haid/scoring/anchors.py +70 -0
  63. haid/scoring/compare.py +272 -0
  64. haid/scoring/cost.py +230 -0
  65. haid/scoring/placement.py +80 -0
  66. haid/scoring/value.py +233 -0
  67. haid/scoring/volume.py +84 -0
  68. haid/session/__init__.py +28 -0
  69. haid/session/cache.py +105 -0
  70. haid/session/discover.py +56 -0
  71. haid/session/forest.py +192 -0
  72. haid/session/loader.py +96 -0
  73. haid/session/overflow.py +81 -0
  74. haid/session/parse.py +74 -0
  75. haid/session/records.py +153 -0
  76. haid/session/subagents.py +72 -0
  77. haid/why/__init__.py +64 -0
  78. haid/why/anchors.py +69 -0
  79. haid/why/investigate.py +144 -0
  80. haid/why/prompts.py +181 -0
  81. haid/window.py +71 -0
  82. haid-0.0.1.dist-info/METADATA +144 -0
  83. haid-0.0.1.dist-info/RECORD +86 -0
  84. haid-0.0.1.dist-info/WHEEL +5 -0
  85. haid-0.0.1.dist-info/entry_points.txt +2 -0
  86. haid-0.0.1.dist-info/top_level.txt +1 -0
haid/__init__.py ADDED
@@ -0,0 +1,9 @@
1
+ """HAID — "How Am I Doing": local-only self-audit & coaching for Claude Code sessions.
2
+
3
+ This package is the product code (stdlib only). The scoring subpackage places a session
4
+ diff against fixed reference ladders to produce relative achievement scores; the model
5
+ judgment those placements need is delegated to the host agent (Claude Code subagents),
6
+ never an in-process API call — see haid.scoring.compare.
7
+ """
8
+
9
+ __version__ = "0.0.1"
haid/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ raise SystemExit(main())
@@ -0,0 +1,172 @@
1
+ """The bridge: window → (diff, usage) — the join between the real-session pipeline and the
2
+ scoring stack.
3
+
4
+ The scorer (volume / difficulty / cleanliness / value) was built and validated against
5
+ calibration diffs; the session pipeline (session → graph → metrics) ingests real transcripts.
6
+ This package connects them: given an analysis window it produces the two inputs the scorer
7
+ needs — a reconstructed unified **diff** and a normalized-token **cost** — so `haid value` runs
8
+ on real work.
9
+
10
+ Design (recorded in the project notes, decided after measuring the gap):
11
+ - **Replay-primary, no git.** The diff is reconstructed from the transcript (see
12
+ `reconstruct`). The bash-write-to-source gap was measured at ~0–1% on real projects; what
13
+ little it misses is detected and FLAGGED, never silently dropped.
14
+ - **Grain-agnostic core.** `window_inputs` slices the whole window; the same engine will slice
15
+ by episode once episodes exist (Phase 2 — episode↔PR alignment is explicitly TBD, not v1).
16
+
17
+ Stdlib only; no model.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import re
23
+ from dataclasses import dataclass, field
24
+
25
+ from .reconstruct import FileRecon, ReconResult, reconstruct
26
+ from .usage import extract_cost
27
+
28
+ __all__ = ["BridgeResult", "window_inputs", "episode_inputs", "reconstruct", "extract_cost",
29
+ "FileRecon", "ReconResult"]
30
+
31
+ _ABS = re.compile(r"^(?:/|[A-Za-z]:[\\/]|\\\\)") # posix root, drive letter, or UNC
32
+
33
+
34
+ def _is_external(file_id: str) -> bool:
35
+ """A file id that isn't repo-relative — temp files, other repos, /etc — is not part of the
36
+ project work product and must not enter the scored diff. (build.py makes ids repo-relative
37
+ only when the path is under the session cwd; everything else stays absolute.)"""
38
+ return bool(_ABS.match(file_id))
39
+
40
+
41
+ @dataclass
42
+ class BridgeResult:
43
+ diff: str # reconstructed unified diff (scorer input)
44
+ cost: object # cost.CostResult (scorer denominator)
45
+ files: list = field(default_factory=list) # per-file FileRecon (kept for inspection)
46
+ caveats: list = field(default_factory=list) # honesty surface — no silent gaps
47
+
48
+ def summary(self) -> str:
49
+ changed = [f for f in self.files if f.changed]
50
+ incomplete = [f for f in self.files if not f.complete]
51
+ lines = [f"bridge: {len(changed)} changed file(s) reconstructed, "
52
+ f"{len(incomplete)} flagged incomplete",
53
+ self.cost.summary()]
54
+ if self.caveats:
55
+ lines.append("caveats:")
56
+ lines.extend(f" {c}" for c in self.caveats)
57
+ return "\n".join(lines)
58
+
59
+
60
+ def window_inputs(view, sessions) -> BridgeResult:
61
+ """Build the scorer inputs (diff, cost) for a whole analysis window.
62
+
63
+ `view` is a metrics.WindowView (its `active_stream` gives the active-branch tool calls in
64
+ order); `sessions` are the loaded Session objects (for token usage + edit content).
65
+ """
66
+ from ..graph.model import is_write
67
+
68
+ tur_by_id = _tur_index(sessions)
69
+ writes = []
70
+ excluded = 0
71
+ for _sid, tc in view.active_stream:
72
+ if not is_write(tc):
73
+ continue
74
+ fid = tc.target_file_id
75
+ if not fid:
76
+ continue
77
+ if _is_external(fid):
78
+ excluded += 1
79
+ continue
80
+ tur = tur_by_id.get(tc.id, {})
81
+ writes.append((fid, tc.tool, tur, tc.write_op, tc.write_content, tc.derived_write))
82
+
83
+ recon = reconstruct(writes, baselines=_baselines(sessions))
84
+ recon.excluded_external = excluded
85
+
86
+ caveats = list(recon.caveats)
87
+ if excluded:
88
+ caveats.append(f"{excluded} write(s) to files outside the project tree "
89
+ "(temp / other repos) excluded from the diff")
90
+ subagent_writes = _subagent_write_count(sessions)
91
+ if subagent_writes:
92
+ caveats.append(f"{subagent_writes} subagent file-write call(s) are not yet folded into "
93
+ "the diff (subagent edit stitching is deferred)")
94
+
95
+ return BridgeResult(diff=recon.diff, cost=extract_cost(sessions),
96
+ files=recon.files, caveats=caveats)
97
+
98
+
99
+ def episode_inputs(episode_sessions) -> BridgeResult:
100
+ """Build the scorer inputs (diff, cost) for ONE episode = its subset of whole sessions.
101
+
102
+ Because an episode is a set of *whole sessions* (grain decision 2026-06-08), this is just
103
+ `window_inputs` over that subset — no new slicing engine. Two things fall out for free:
104
+ - **episode-relative diff baseline**: `_baselines` takes the earliest captured `originalFile`
105
+ across these sessions only, which is each file's state as it ENTERED the episode (i.e.
106
+ after any earlier episodes touched it), so the diff is the episode's own delta;
107
+ - **clean cost**: `extract_cost` sums these sessions' per-context-window costs — no entangled
108
+ sub-session token split (the whole reason the session is the atomic floor).
109
+ """
110
+ from ..window import build_view
111
+ sub_view = build_view(episode_sessions)
112
+ return window_inputs(sub_view, episode_sessions)
113
+
114
+
115
+ def _tur_index(sessions) -> dict:
116
+ """tool_use id -> toolUseResult dict, across main + subagent records of every session.
117
+
118
+ Pairing key is the tool_use_id inside the result's tool_result block (verified on real
119
+ data — there is no top-level sourceToolUseID)."""
120
+ out: dict[str, dict] = {}
121
+ for s in sessions:
122
+ recs = list(s.parse.records) + [r for sa in s.subagents for r in sa.parse.records]
123
+ for r in recs:
124
+ tur = r.raw.get("toolUseResult")
125
+ if not isinstance(tur, dict):
126
+ continue
127
+ c = r.content
128
+ if not isinstance(c, list):
129
+ continue
130
+ for b in c:
131
+ if isinstance(b, dict) and b.get("type") == "tool_result" and b.get("tool_use_id"):
132
+ out[b["tool_use_id"]] = tur
133
+ break
134
+ return out
135
+
136
+
137
+ def _baselines(sessions) -> dict:
138
+ """file_id -> the file's content as it ENTERED the window: the earliest captured
139
+ `originalFile` for that file across all records (any branch, main + subagents).
140
+
141
+ Claude Code omits originalFile on some edits (e.g. large files), so the first edit we see
142
+ in the active stream may lack it even though an earlier touch captured it. Sourcing the
143
+ earliest one window-wide gives buffer-mode reconstruction a correct seed; files that never
144
+ captured it fall back to hunks mode in reconstruct()."""
145
+ from ..graph.build import _file_id
146
+
147
+ by_first_ts = sorted(sessions, key=lambda s: min(
148
+ (r.timestamp for r in s.parse.records if r.timestamp), default=""))
149
+ out: dict[str, str] = {}
150
+ for s in by_first_ts:
151
+ cwd = next((r.raw.get("cwd") for r in s.parse.records if r.raw.get("cwd")), None)
152
+ for r in list(s.parse.records) + [rr for sa in s.subagents for rr in sa.parse.records]:
153
+ tur = r.raw.get("toolUseResult")
154
+ if not isinstance(tur, dict) or tur.get("originalFile") is None:
155
+ continue
156
+ path = tur.get("filePath") or (tur.get("file") or {}).get("filePath")
157
+ fid = _file_id(path, cwd)
158
+ if fid and fid not in out:
159
+ out[fid] = tur["originalFile"]
160
+ return out
161
+
162
+
163
+ def _subagent_write_count(sessions) -> int:
164
+ from ..graph.model import is_write
165
+ from ..graph.build import build_graph
166
+ n = 0
167
+ for s in sessions:
168
+ for sa in s.subagents:
169
+ g = build_graph(sa.parse.records)
170
+ n += sum(1 for tc in g.toolcalls.values()
171
+ if is_write(tc) and tc.target_file_id and not _is_external(tc.target_file_id))
172
+ return n
@@ -0,0 +1,222 @@
1
+ """Reconstruct the net code diff a body of work produced — from the transcript alone.
2
+
3
+ This is the diff half of the window→(diff, usage) bridge: the join between the real-session
4
+ pipeline (session→graph) and the scoring stack (volume/difficulty/cleanliness), which until
5
+ now only ever saw calibration diffs. It is **replay-primary, no git** (decision recorded after
6
+ measuring the bash-write-to-source gap at ~0–1% across three real projects; see the project
7
+ notes). The same data Claude Code's own rewind uses:
8
+
9
+ - Edit/MultiEdit → `originalFile` (full pre-edit content) + exact `oldString`→`newString`.
10
+ - Write → full `content` (and `originalFile` for overwrites; None on create).
11
+ - Bash heredoc → recovered `write_content` (see graph/bash_write.parse_heredoc_write).
12
+
13
+ Two reconstruction modes, picked per file:
14
+
15
+ * **buffer (preferred)** — when we have the file's content as it entered the window (the
16
+ earliest captured `originalFile`), we replay every write onto a running string and emit
17
+ `unified_diff(baseline, final)`. This is **net by construction** (a line written then
18
+ rewritten appears once, in final form — exactly what `volume` wants; the churn lives on
19
+ the cost side) and **self-detects gaps**: each edit's `originalFile` must equal our running
20
+ content, so an untracked shell write in between is caught and flagged, never silently wrong.
21
+ * **hunks (fallback)** — Claude Code omits `originalFile` on some edits (e.g. large files),
22
+ so a pre-existing file may have no full baseline anywhere in the window. There we emit the
23
+ edits' `structuredPatch` hunks directly (always present). Correct for the changed lines,
24
+ but flagged: overlapping re-edits of the same lines can double-count (no net dedup).
25
+
26
+ No silent caps — every shortfall lands in `FileRecon.reasons` and surfaces as a caveat.
27
+ Grain-agnostic: `reconstruct()` takes an ordered list of writes, so the caller slices by
28
+ window now and by episode later. Stdlib only.
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import difflib
34
+ from dataclasses import dataclass, field
35
+
36
+ _NATIVE_EDIT = {"Edit", "MultiEdit"}
37
+ _NATIVE_WRITE = {"Write"}
38
+
39
+
40
+ @dataclass
41
+ class FileRecon:
42
+ """One file's reconstructed change, with mode and any honesty flags."""
43
+ file_id: str
44
+ mode: str = "buffer" # "buffer" | "hunks"
45
+ baseline: str = ""
46
+ final: str = ""
47
+ hunks: list = field(default_factory=list) # structuredPatch hunks (hunks mode)
48
+ ops: int = 0
49
+ complete: bool = True
50
+ reasons: list[str] = field(default_factory=list)
51
+
52
+ @property
53
+ def changed(self) -> bool:
54
+ return bool(self.hunks) if self.mode == "hunks" else (self.baseline != self.final)
55
+
56
+ def _flag(self, reason: str) -> None:
57
+ self.complete = False
58
+ if reason not in self.reasons:
59
+ self.reasons.append(reason)
60
+
61
+
62
+ @dataclass
63
+ class ReconResult:
64
+ diff: str # concatenated git-style unified diff
65
+ files: list[FileRecon]
66
+ caveats: list[str] = field(default_factory=list)
67
+ excluded_external: int = 0 # writes to paths outside the project tree
68
+
69
+ @property
70
+ def incomplete(self) -> list[FileRecon]:
71
+ return [f for f in self.files if not f.complete]
72
+
73
+
74
+ # --- per-tool application ---------------------------------------------------------------
75
+
76
+ def _seed_baseline(fr: FileRecon, original, baselines: dict, fid: str):
77
+ """Resolve the file's window-entry content for buffer mode, or switch to hunks mode."""
78
+ seed = original if original is not None else baselines.get(fid)
79
+ if seed is not None:
80
+ fr.mode = "buffer"
81
+ fr.baseline = seed
82
+ fr.final = seed
83
+ return True
84
+ return False
85
+
86
+
87
+ def _apply_native_edit(fr: FileRecon, tur: dict, first: bool, baselines: dict) -> None:
88
+ original = tur.get("originalFile")
89
+ if first and not _seed_baseline(fr, original, baselines, fr.file_id):
90
+ fr.mode = "hunks"
91
+ fr._flag("no full baseline captured for this pre-existing file — reconstructed from "
92
+ "diff hunks (overlapping re-edits may double-count)")
93
+ if fr.mode == "hunks":
94
+ fr.hunks.extend(tur.get("structuredPatch") or [])
95
+ return
96
+ if not first and original is not None and fr.final != original:
97
+ fr._flag("untracked change before an edit (resynced to the file's actual state)")
98
+ fr.final = original
99
+
100
+ pairs = tur.get("edits") or [{"old_string": tur.get("oldString", ""),
101
+ "new_string": tur.get("newString", ""),
102
+ "replace_all": tur.get("replaceAll", False)}]
103
+ for e in pairs:
104
+ old, new = e.get("old_string", ""), e.get("new_string", "")
105
+ if old == "": # pure insertion into the buffer
106
+ fr.final = fr.final + new if fr.final and not new.startswith(fr.final) else (fr.final or new)
107
+ continue
108
+ if old not in fr.final:
109
+ fr._flag("edit oldString not found in reconstructed content")
110
+ continue
111
+ fr.final = fr.final.replace(old, new) if e.get("replace_all") else fr.final.replace(old, new, 1)
112
+
113
+
114
+ def _apply_native_write(fr: FileRecon, tur: dict, first: bool, baselines: dict) -> None:
115
+ content = tur.get("content")
116
+ original = tur.get("originalFile")
117
+ sp = tur.get("structuredPatch") or []
118
+ if first:
119
+ if not _seed_baseline(fr, original, baselines, fr.file_id):
120
+ fr.baseline = fr.final = "" # create (sp empty) or unknown overwrite
121
+ if sp:
122
+ fr._flag("Write overwrote an existing file with no captured baseline")
123
+ elif original is not None and fr.final != original:
124
+ fr._flag("untracked change before a write (resynced to the file's actual state)")
125
+ fr.final = original
126
+ if content is None:
127
+ fr._flag("Write result had no content")
128
+ return
129
+ fr.final = content
130
+
131
+
132
+ def _apply_shell_write(fr: FileRecon, op: str | None, content: str | None, first: bool) -> None:
133
+ # Bash writes carry no originalFile, so a shell write as the FIRST touch leaves the
134
+ # pre-state unknown.
135
+ if first:
136
+ fr.baseline = fr.final = ""
137
+ if op == "append":
138
+ fr._flag("shell append as first write — prior file content is unknown")
139
+ if content is None:
140
+ fr._flag(f"shell {op or 'write'} content unrecoverable (sed -i / plain redirect)")
141
+ return
142
+ if op == "append":
143
+ fr.final = fr.final + content
144
+ else:
145
+ if not first:
146
+ fr._flag("shell overwrite of a tracked file (prior content replaced)")
147
+ fr.final = content
148
+
149
+
150
+ # --- the engine -------------------------------------------------------------------------
151
+
152
+ def reconstruct(writes, baselines: dict | None = None) -> ReconResult:
153
+ """Reconstruct per-file diffs from an ordered list of writes.
154
+
155
+ `writes` is `(file_id, tool, tur, write_op, write_content, derived)` in chronological
156
+ (active-timeline) order. `baselines` maps file_id -> the file's content as it entered the
157
+ window (earliest captured originalFile), used to seed buffer mode when an edit's own
158
+ originalFile is None.
159
+ """
160
+ baselines = baselines or {}
161
+ states: dict[str, FileRecon] = {}
162
+ order: list[str] = []
163
+ for file_id, tool, tur, write_op, write_content, derived in writes:
164
+ fr = states.get(file_id)
165
+ first = fr is None
166
+ if first:
167
+ fr = FileRecon(file_id=file_id)
168
+ states[file_id] = fr
169
+ order.append(file_id)
170
+ fr.ops += 1
171
+ tur = tur or {}
172
+ if derived or tool == "Bash":
173
+ _apply_shell_write(fr, write_op, write_content, first)
174
+ elif tool in _NATIVE_WRITE:
175
+ _apply_native_write(fr, tur, first, baselines)
176
+ elif tool in _NATIVE_EDIT:
177
+ _apply_native_edit(fr, tur, first, baselines)
178
+ else:
179
+ fr._flag(f"unhandled write tool {tool!r}")
180
+
181
+ files = [states[fid] for fid in order]
182
+ diff = "".join(_emit(fr) for fr in files if fr.changed)
183
+ return ReconResult(diff=diff, files=files, caveats=_caveats(files))
184
+
185
+
186
+ def _caveats(files: list[FileRecon]) -> list[str]:
187
+ incomplete = [f for f in files if not f.complete]
188
+ if not incomplete:
189
+ return []
190
+ out = [f"{len(incomplete)} of {len(files)} changed file(s) could not be fully reconstructed "
191
+ "from the transcript — the diff may be incomplete for these:"]
192
+ out += [f" - {fr.file_id}: " + "; ".join(fr.reasons) for fr in incomplete]
193
+ return out
194
+
195
+
196
+ def _emit(fr: FileRecon) -> str:
197
+ if fr.mode == "hunks":
198
+ return _hunks_diff(fr.file_id, fr.hunks)
199
+ return _file_diff(fr.file_id, fr.baseline, fr.final)
200
+
201
+
202
+ def _file_diff(path: str, baseline: str, final: str) -> str:
203
+ """A git-style unified-diff block from full before/after content."""
204
+ is_new = baseline == ""
205
+ is_del = final == "" and baseline != ""
206
+ fromf = "/dev/null" if is_new else f"a/{path}"
207
+ tof = "/dev/null" if is_del else f"b/{path}"
208
+ body = difflib.unified_diff(baseline.splitlines(), final.splitlines(),
209
+ fromfile=fromf, tofile=tof, lineterm="")
210
+ return f"diff --git a/{path} b/{path}\n" + "\n".join(body) + "\n"
211
+
212
+
213
+ def _hunks_diff(path: str, hunks: list) -> str:
214
+ """A git-style block assembled directly from structuredPatch hunks (fallback mode)."""
215
+ out = [f"diff --git a/{path} b/{path}", f"--- a/{path}", f"+++ b/{path}"]
216
+ for h in hunks:
217
+ if not isinstance(h, dict):
218
+ continue
219
+ out.append(f"@@ -{h.get('oldStart', 0)},{h.get('oldLines', 0)} "
220
+ f"+{h.get('newStart', 0)},{h.get('newLines', 0)} @@")
221
+ out.extend(h.get("lines") or [])
222
+ return "\n".join(out) + "\n"
haid/bridge/usage.py ADDED
@@ -0,0 +1,71 @@
1
+ """Extract the cost denominator (normalized tokens) from a window's sessions.
2
+
3
+ The easy half of the bridge: walk every assistant record's `message.usage` block, map it to a
4
+ `cost.Usage`, and fold with `cost.measure`. Two deliberate choices:
5
+
6
+ * **Cost counts ALL branches, including abandoned ones** — you paid for the tokens spent on a
7
+ rewound/abandoned attempt even though its code didn't survive. (The DIFF, by contrast, is
8
+ the *active* end-state only — that asymmetry is the point.) `parse.records` is the full,
9
+ uuid-deduped record set across all branches, so summing over it is correct by construction.
10
+ * **Subagent tokens count** — a spawned agent's tokens are real spend, so we include each
11
+ subagent's records too.
12
+
13
+ Process costs (turns, tool-calls, compactions, wall-clock) are carried separately by
14
+ `cost.CostResult`, never folded into the token total. Stdlib only; no model.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from datetime import datetime
20
+
21
+ from ..scoring import cost
22
+
23
+
24
+ def _all_records(session):
25
+ yield from session.parse.records
26
+ for sa in session.subagents:
27
+ yield from sa.parse.records
28
+
29
+
30
+ def extract_cost(sessions) -> cost.CostResult:
31
+ """Normalized-token cost over every session in the window (all branches + subagents)."""
32
+ usages: list[cost.Usage] = []
33
+ tool_calls = turns = compactions = 0
34
+ timestamps: list[str] = []
35
+
36
+ for s in sessions:
37
+ for r in _all_records(s):
38
+ msg = r.raw.get("message") or {}
39
+ u = msg.get("usage")
40
+ if isinstance(u, dict):
41
+ d = dict(u)
42
+ d["model"] = msg.get("model", "")
43
+ usages.append(cost.Usage.from_dict(d))
44
+ if r.type == "assistant" and isinstance(r.content, list):
45
+ tool_calls += sum(1 for b in r.content
46
+ if isinstance(b, dict) and b.get("type") == "tool_use")
47
+ if r.is_user_prompt():
48
+ turns += 1
49
+ if r.raw.get("type") == "system" and r.raw.get("subtype") == "compact_boundary":
50
+ compactions += 1
51
+ if r.timestamp:
52
+ timestamps.append(r.timestamp)
53
+
54
+ return cost.measure(
55
+ usages,
56
+ turns=turns,
57
+ tool_calls=tool_calls,
58
+ compactions=compactions,
59
+ wall_clock_s=_wall_clock(timestamps),
60
+ )
61
+
62
+
63
+ def _wall_clock(timestamps: list[str]) -> float | None:
64
+ if len(timestamps) < 2:
65
+ return None
66
+ try:
67
+ t0 = datetime.fromisoformat(min(timestamps).replace("Z", "+00:00"))
68
+ t1 = datetime.fromisoformat(max(timestamps).replace("Z", "+00:00"))
69
+ return (t1 - t0).total_seconds()
70
+ except ValueError:
71
+ return None