haid 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. haid-0.0.1/PKG-INFO +144 -0
  2. haid-0.0.1/README.md +122 -0
  3. haid-0.0.1/pyproject.toml +39 -0
  4. haid-0.0.1/setup.cfg +4 -0
  5. haid-0.0.1/src/haid/__init__.py +9 -0
  6. haid-0.0.1/src/haid/__main__.py +4 -0
  7. haid-0.0.1/src/haid/bridge/__init__.py +172 -0
  8. haid-0.0.1/src/haid/bridge/reconstruct.py +222 -0
  9. haid-0.0.1/src/haid/bridge/usage.py +71 -0
  10. haid-0.0.1/src/haid/cli.py +612 -0
  11. haid-0.0.1/src/haid/data/anchor_diffs/U00.diff +378 -0
  12. haid-0.0.1/src/haid/data/anchor_diffs/U01.diff +317 -0
  13. haid-0.0.1/src/haid/data/anchor_diffs/U07.diff +218 -0
  14. haid-0.0.1/src/haid/data/anchor_diffs/U10.diff +129 -0
  15. haid-0.0.1/src/haid/data/anchor_diffs/U11.diff +352 -0
  16. haid-0.0.1/src/haid/data/anchor_diffs/U13.diff +135 -0
  17. haid-0.0.1/src/haid/data/anchor_diffs/U16.diff +152 -0
  18. haid-0.0.1/src/haid/data/anchor_diffs/U18.diff +254 -0
  19. haid-0.0.1/src/haid/data/anchor_diffs/U19.diff +403 -0
  20. haid-0.0.1/src/haid/data/anchor_diffs/U22.diff +144 -0
  21. haid-0.0.1/src/haid/data/anchor_diffs/U24.diff +337 -0
  22. haid-0.0.1/src/haid/data/anchor_diffs/U29.diff +43 -0
  23. haid-0.0.1/src/haid/data/anchor_diffs/U37.diff +38 -0
  24. haid-0.0.1/src/haid/data/anchor_diffs/U39.diff +94 -0
  25. haid-0.0.1/src/haid/data/anchor_diffs/U40.diff +339 -0
  26. haid-0.0.1/src/haid/data/anchor_diffs/U43.diff +51 -0
  27. haid-0.0.1/src/haid/data/anchor_diffs/U46.diff +159 -0
  28. haid-0.0.1/src/haid/data/anchor_diffs/U48.diff +290 -0
  29. haid-0.0.1/src/haid/data/anchor_diffs/U50.diff +323 -0
  30. haid-0.0.1/src/haid/data/cleanliness_anchors.json +282 -0
  31. haid-0.0.1/src/haid/data/difficulty_anchors.json +53 -0
  32. haid-0.0.1/src/haid/data/metric_baselines.json +184 -0
  33. haid-0.0.1/src/haid/data/treatments.json +356 -0
  34. haid-0.0.1/src/haid/diffio.py +139 -0
  35. haid-0.0.1/src/haid/episodes/__init__.py +110 -0
  36. haid-0.0.1/src/haid/episodes/grouping.py +112 -0
  37. haid-0.0.1/src/haid/episodes/model.py +77 -0
  38. haid-0.0.1/src/haid/episodes/score.py +188 -0
  39. haid-0.0.1/src/haid/episodes/segment.py +163 -0
  40. haid-0.0.1/src/haid/episodes/summarize.py +64 -0
  41. haid-0.0.1/src/haid/filekind.py +100 -0
  42. haid-0.0.1/src/haid/graph/__init__.py +19 -0
  43. haid-0.0.1/src/haid/graph/bash_read.py +229 -0
  44. haid-0.0.1/src/haid/graph/bash_write.py +201 -0
  45. haid-0.0.1/src/haid/graph/build.py +248 -0
  46. haid-0.0.1/src/haid/graph/model.py +130 -0
  47. haid-0.0.1/src/haid/graph/signature.py +49 -0
  48. haid-0.0.1/src/haid/intent/__init__.py +90 -0
  49. haid-0.0.1/src/haid/intent/classify.py +132 -0
  50. haid-0.0.1/src/haid/intent/messages.py +110 -0
  51. haid-0.0.1/src/haid/intent/taxonomy.py +100 -0
  52. haid-0.0.1/src/haid/metrics/__init__.py +68 -0
  53. haid-0.0.1/src/haid/metrics/base.py +112 -0
  54. haid-0.0.1/src/haid/metrics/baseline.py +64 -0
  55. haid-0.0.1/src/haid/metrics/json_out.py +171 -0
  56. haid-0.0.1/src/haid/metrics/rereads.py +136 -0
  57. haid-0.0.1/src/haid/metrics/retouched.py +75 -0
  58. haid-0.0.1/src/haid/metrics/retries.py +108 -0
  59. haid-0.0.1/src/haid/metrics/unused_context.py +68 -0
  60. haid-0.0.1/src/haid/metrics/view.py +114 -0
  61. haid-0.0.1/src/haid/report/__init__.py +21 -0
  62. haid-0.0.1/src/haid/report/benchmark.py +114 -0
  63. haid-0.0.1/src/haid/report/compose.py +419 -0
  64. haid-0.0.1/src/haid/report/treatments.py +107 -0
  65. haid-0.0.1/src/haid/scoring/__init__.py +13 -0
  66. haid-0.0.1/src/haid/scoring/anchors.py +70 -0
  67. haid-0.0.1/src/haid/scoring/compare.py +272 -0
  68. haid-0.0.1/src/haid/scoring/cost.py +230 -0
  69. haid-0.0.1/src/haid/scoring/placement.py +80 -0
  70. haid-0.0.1/src/haid/scoring/value.py +233 -0
  71. haid-0.0.1/src/haid/scoring/volume.py +84 -0
  72. haid-0.0.1/src/haid/session/__init__.py +28 -0
  73. haid-0.0.1/src/haid/session/cache.py +105 -0
  74. haid-0.0.1/src/haid/session/discover.py +56 -0
  75. haid-0.0.1/src/haid/session/forest.py +192 -0
  76. haid-0.0.1/src/haid/session/loader.py +96 -0
  77. haid-0.0.1/src/haid/session/overflow.py +81 -0
  78. haid-0.0.1/src/haid/session/parse.py +74 -0
  79. haid-0.0.1/src/haid/session/records.py +153 -0
  80. haid-0.0.1/src/haid/session/subagents.py +72 -0
  81. haid-0.0.1/src/haid/why/__init__.py +64 -0
  82. haid-0.0.1/src/haid/why/anchors.py +69 -0
  83. haid-0.0.1/src/haid/why/investigate.py +144 -0
  84. haid-0.0.1/src/haid/why/prompts.py +181 -0
  85. haid-0.0.1/src/haid/window.py +71 -0
  86. haid-0.0.1/src/haid.egg-info/PKG-INFO +144 -0
  87. haid-0.0.1/src/haid.egg-info/SOURCES.txt +88 -0
  88. haid-0.0.1/src/haid.egg-info/dependency_links.txt +1 -0
  89. haid-0.0.1/src/haid.egg-info/entry_points.txt +2 -0
  90. haid-0.0.1/src/haid.egg-info/top_level.txt +1 -0
haid-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,144 @@
1
+ Metadata-Version: 2.4
2
+ Name: haid
3
+ Version: 0.0.1
4
+ Summary: How Am I Doing — local-only self-audit & coaching for Claude Code sessions
5
+ Author-email: dv-hart <jhart@datavine.us>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/dv-hart/haid
8
+ Project-URL: Repository, https://github.com/dv-hart/haid
9
+ Project-URL: Issues, https://github.com/dv-hart/haid/issues
10
+ Keywords: claude-code,self-audit,coaching,llm,agent,telemetry
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Quality Assurance
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+
23
+ # How Am I Doing (HAID)
24
+
25
+ *A self-audit and coaching layer for Claude Code sessions.*
26
+
27
+ HAID reads your own Claude Code session transcripts, builds a graph of what
28
+ happened, and produces annotated, **coaching-oriented** reports. The aim is less
29
+ "here is your bill" and more "here is where you and the agent diverged, why, and
30
+ what to change."
31
+
32
+ **Nothing leaves your machine** unless you explicitly choose to submit aggregate
33
+ metrics.
34
+
35
+ > Status: **Phase 1 complete; the full scoring stack now runs on real sessions** — the
36
+ > deterministic pipeline runs end to end on real transcripts (163 tests, stdlib-only, no model
37
+ > in the loop):
38
+ > - **Session parsing** (`src/haid/session/`) — forest-aware JSONL parsing: dedup,
39
+ > branch/rewind classification, subagent stitching, overflow resolution, SQLite cache.
40
+ > - **Session graph** (`src/haid/graph/`) — L0 spine + L1 action/IO graph
41
+ > (reads/produces/edits from `structuredPatch`), signatures, per-timeline scoping.
42
+ > - **Waste metrics** (`src/haid/metrics/`) — `rereads`, `retries`, `retouched`,
43
+ > `unused_context`: one rule each, run at **session and window scope**, as benchmarkable
44
+ > token-rates placed against a per-scope baseline.
45
+ > - **Analysis window** (`src/haid/window.py`) — the multi-session unit metrics run over
46
+ > (a project over a timeframe, default 30 days).
47
+ > - **Scoring** (`src/haid/scoring/`) — the relative achievement/cost value scorer
48
+ > (difficulty + cleanliness placement, volume, normalized-token cost, value combiner),
49
+ > calibration-validated. Built ahead of the earlier phases.
50
+ >
51
+ > - **`haid metrics`** (`src/haid/metrics/{json_out,view}.py` + CLI) — the measured substrate:
52
+ > four waste metrics at **session and window scope**, each placed against a per-scope
53
+ > baseline, as a Markdown inspection view + a JSON hand-off to the later "why" passes.
54
+ > - **Bridge** (`src/haid/bridge/`) — reconstructs an analysis window's net code diff from the
55
+ > **transcript alone** (replay, no git) plus its normalized-token cost, so `haid bridge` and
56
+ > `haid value --project/--session` now run the **full scoring stack on real sessions**
57
+ > (previously the scorer only ran on supplied/calibration diffs).
58
+ >
59
+ > All validated on real transcripts (163 tests). The user-facing **report and visualization are
60
+ > the final product**, composing this substrate with the Phase-2/3 why-analysis and the value
61
+ > score. Next: the diagnosis router, episode segmentation (Phase 2), and the visualization
62
+ > (Phase 1.5). See [plans/roadmap.md](plans/roadmap.md) and [plans/phase1-build.md](plans/phase1-build.md).
63
+
64
+ ## What this is not
65
+
66
+ Not another token counter. Raw usage accounting is already well covered
67
+ ([ccusage](https://github.com/ryoppippi/ccusage) and similar). The entire value
68
+ lives one layer up, in **diagnosis and coaching** — telling you not what you
69
+ spent but how to get better. A tool that confidently misdiagnoses is worse than
70
+ nothing, because people act on it, so trustworthiness of the diagnosis is the
71
+ central design constraint throughout. See
72
+ [docs/trust-discipline.md](docs/trust-discipline.md).
73
+
74
+ ## The one big idea: the session graph
75
+
76
+ Underneath everything is one data structure: a graph of the session(s). Turns
77
+ and tool-calls are nodes; edges capture *responds-to*, *reads*, and *produces*
78
+ relationships. The two headline features are just two operations on this one
79
+ graph:
80
+
81
+ - **"Why did you do X?"** → a backwards traversal from X to its trigger.
82
+ - **"Where did the tokens go?"** → a weighting over the same nodes.
83
+
84
+ Build the graph once; get both views from it. Design in
85
+ [docs/session-graph-design.md](docs/session-graph-design.md).
86
+
87
+ ## Two orthogonal analysis passes
88
+
89
+ 1. **User-anchored pass** — catches *misalignment*. Works backwards from user
90
+ messages; **corrections are ground truth** ("no, I meant…", "that's wrong").
91
+ 2. **Signature-scanning pass** — catches *silent inefficiency*. Scans for
92
+ objective, reasoning-free waste signatures (redundant re-reads, retry loops,
93
+ re-touched lines, unused context).
94
+
95
+ The two are orthogonal: one finds where the agent did the *wrong thing*, the
96
+ other where it did the *right thing wastefully*. See
97
+ [docs/architecture.md](docs/architecture.md).
98
+
99
+ ## Documentation map
100
+
101
+ | Doc | What's in it |
102
+ |-----|--------------|
103
+ | [docs/vision.md](docs/vision.md) | The full concept, goals, and the canonical test case |
104
+ | [docs/architecture.md](docs/architecture.md) | The two-pass method and how the pieces fit |
105
+ | [docs/session-graph-design.md](docs/session-graph-design.md) | Node/edge taxonomy, episodes, the two core operations |
106
+ | [docs/detectors.md](docs/detectors.md) | Detector catalog + waste metrics as graph queries |
107
+ | [docs/intent-taxonomy.md](docs/intent-taxonomy.md) | Two-axis message classification + purpose timeline + drift |
108
+ | [docs/scoring-rubric.md](docs/scoring-rubric.md) | Achievement vs. cost — the **relative** value verdict (revised; see ladder/playbook) |
109
+ | [docs/difficulty-ladder.md](docs/difficulty-ladder.md) | The validated difficulty scorer (reference ladder + placement) |
110
+ | [docs/cleanliness-ladder.md](docs/cleanliness-ladder.md) | The cleanliness/parsimony scorer (reference ladder + placement) |
111
+ | [docs/axis-calibration-playbook.md](docs/axis-calibration-playbook.md) | Self-contained recipe to calibrate a new scoring axis (worked example: cleanliness; originality calibrated then dropped) |
112
+ | [docs/calibration-pilot-1.md](docs/calibration-pilot-1.md) | Pilot report: why mined review-signals don't validate difficulty |
113
+ | [docs/visualization.md](docs/visualization.md) | The time-layered bus diagram (left-in/right-out, bundled) |
114
+ | [docs/claude-code-data-format.md](docs/claude-code-data-format.md) | **Verified** Claude Code on-disk data reference |
115
+ | [docs/data-inventory.md](docs/data-inventory.md) | Field catalog from 38 real sessions: what's auto-taggable vs. inferred |
116
+ | [docs/data-structure-report.md](docs/data-structure-report.md) | Real annotated records → the graph they produce (Tier 1 & Tier 2 walkthrough) |
117
+ | [docs/trust-discipline.md](docs/trust-discipline.md) | Cite-or-unknown, hedging, no-traceable-origin |
118
+ | [docs/tooling-landscape.md](docs/tooling-landscape.md) | Existing tools and what to build on |
119
+ | [docs/decisions/](docs/decisions/) | Architecture Decision Records (ADRs) |
120
+ | [plans/roadmap.md](plans/roadmap.md) | Phased delivery plan |
121
+ | [plans/mvp.md](plans/mvp.md) | The minimum thing that tests the core risk |
122
+ | [plans/phase1-build.md](plans/phase1-build.md) | The concrete Phase-1 build sequence + progress |
123
+ | [plans/open-questions.md](plans/open-questions.md) | Decisions to make / behaviors to verify early |
124
+
125
+ ## Repository layout
126
+
127
+ ```
128
+ HAID/
129
+ ├── README.md # you are here
130
+ ├── docs/ # design & reference documentation
131
+ │ └── decisions/ # ADRs
132
+ ├── plans/ # roadmap, MVP spec, open questions
133
+ ├── research/ # raw research reports (inputs to the docs)
134
+ ├── calibration/ # the scoring-axis calibration harness (experiment code)
135
+ ├── src/haid/ # implementation
136
+ │ ├── session/ # Phase-1 parse: forest model, subagents, overflow, cache
137
+ │ ├── graph/ # L0 spine + L1 IO graph (incl. Bash read/write parsing)
138
+ │ ├── metrics/ # the four waste metrics + baseline + `haid metrics` emitter
139
+ │ ├── window.py # the multi-session analysis window
140
+ │ ├── scoring/ # relative value scorer (difficulty/cleanliness/volume/cost/value)
141
+ │ └── bridge/ # transcript→(diff, usage) reconstruction (the bridge)
142
+ ├── tests/ # session/ graph/ metrics/ scoring/ bridge/ suites (+ fixtures/)
143
+ └── scripts/ # build_metric_baselines.py + CLI helpers
144
+ ```
haid-0.0.1/README.md ADDED
@@ -0,0 +1,122 @@
1
+ # How Am I Doing (HAID)
2
+
3
+ *A self-audit and coaching layer for Claude Code sessions.*
4
+
5
+ HAID reads your own Claude Code session transcripts, builds a graph of what
6
+ happened, and produces annotated, **coaching-oriented** reports. The aim is less
7
+ "here is your bill" and more "here is where you and the agent diverged, why, and
8
+ what to change."
9
+
10
+ **Nothing leaves your machine** unless you explicitly choose to submit aggregate
11
+ metrics.
12
+
13
+ > Status: **Phase 1 complete; the full scoring stack now runs on real sessions** — the
14
+ > deterministic pipeline runs end to end on real transcripts (163 tests, stdlib-only, no model
15
+ > in the loop):
16
+ > - **Session parsing** (`src/haid/session/`) — forest-aware JSONL parsing: dedup,
17
+ > branch/rewind classification, subagent stitching, overflow resolution, SQLite cache.
18
+ > - **Session graph** (`src/haid/graph/`) — L0 spine + L1 action/IO graph
19
+ > (reads/produces/edits from `structuredPatch`), signatures, per-timeline scoping.
20
+ > - **Waste metrics** (`src/haid/metrics/`) — `rereads`, `retries`, `retouched`,
21
+ > `unused_context`: one rule each, run at **session and window scope**, as benchmarkable
22
+ > token-rates placed against a per-scope baseline.
23
+ > - **Analysis window** (`src/haid/window.py`) — the multi-session unit metrics run over
24
+ > (a project over a timeframe, default 30 days).
25
+ > - **Scoring** (`src/haid/scoring/`) — the relative achievement/cost value scorer
26
+ > (difficulty + cleanliness placement, volume, normalized-token cost, value combiner),
27
+ > calibration-validated. Built ahead of the earlier phases.
28
+ >
29
+ > - **`haid metrics`** (`src/haid/metrics/{json_out,view}.py` + CLI) — the measured substrate:
30
+ > four waste metrics at **session and window scope**, each placed against a per-scope
31
+ > baseline, as a Markdown inspection view + a JSON hand-off to the later "why" passes.
32
+ > - **Bridge** (`src/haid/bridge/`) — reconstructs an analysis window's net code diff from the
33
+ > **transcript alone** (replay, no git) plus its normalized-token cost, so `haid bridge` and
34
+ > `haid value --project/--session` now run the **full scoring stack on real sessions**
35
+ > (previously the scorer only ran on supplied/calibration diffs).
36
+ >
37
+ > All validated on real transcripts (163 tests). The user-facing **report and visualization are
38
+ > the final product**, composing this substrate with the Phase-2/3 why-analysis and the value
39
+ > score. Next: the diagnosis router, episode segmentation (Phase 2), and the visualization
40
+ > (Phase 1.5). See [plans/roadmap.md](plans/roadmap.md) and [plans/phase1-build.md](plans/phase1-build.md).
41
+
42
+ ## What this is not
43
+
44
+ Not another token counter. Raw usage accounting is already well covered
45
+ ([ccusage](https://github.com/ryoppippi/ccusage) and similar). The entire value
46
+ lives one layer up, in **diagnosis and coaching** — telling you not what you
47
+ spent but how to get better. A tool that confidently misdiagnoses is worse than
48
+ nothing, because people act on it, so trustworthiness of the diagnosis is the
49
+ central design constraint throughout. See
50
+ [docs/trust-discipline.md](docs/trust-discipline.md).
51
+
52
+ ## The one big idea: the session graph
53
+
54
+ Underneath everything is one data structure: a graph of the session(s). Turns
55
+ and tool-calls are nodes; edges capture *responds-to*, *reads*, and *produces*
56
+ relationships. The two headline features are just two operations on this one
57
+ graph:
58
+
59
+ - **"Why did you do X?"** → a backwards traversal from X to its trigger.
60
+ - **"Where did the tokens go?"** → a weighting over the same nodes.
61
+
62
+ Build the graph once; get both views from it. Design in
63
+ [docs/session-graph-design.md](docs/session-graph-design.md).
64
+
65
+ ## Two orthogonal analysis passes
66
+
67
+ 1. **User-anchored pass** — catches *misalignment*. Works backwards from user
68
+ messages; **corrections are ground truth** ("no, I meant…", "that's wrong").
69
+ 2. **Signature-scanning pass** — catches *silent inefficiency*. Scans for
70
+ objective, reasoning-free waste signatures (redundant re-reads, retry loops,
71
+ re-touched lines, unused context).
72
+
73
+ The two are orthogonal: one finds where the agent did the *wrong thing*, the
74
+ other where it did the *right thing wastefully*. See
75
+ [docs/architecture.md](docs/architecture.md).
76
+
77
+ ## Documentation map
78
+
79
+ | Doc | What's in it |
80
+ |-----|--------------|
81
+ | [docs/vision.md](docs/vision.md) | The full concept, goals, and the canonical test case |
82
+ | [docs/architecture.md](docs/architecture.md) | The two-pass method and how the pieces fit |
83
+ | [docs/session-graph-design.md](docs/session-graph-design.md) | Node/edge taxonomy, episodes, the two core operations |
84
+ | [docs/detectors.md](docs/detectors.md) | Detector catalog + waste metrics as graph queries |
85
+ | [docs/intent-taxonomy.md](docs/intent-taxonomy.md) | Two-axis message classification + purpose timeline + drift |
86
+ | [docs/scoring-rubric.md](docs/scoring-rubric.md) | Achievement vs. cost — the **relative** value verdict (revised; see ladder/playbook) |
87
+ | [docs/difficulty-ladder.md](docs/difficulty-ladder.md) | The validated difficulty scorer (reference ladder + placement) |
88
+ | [docs/cleanliness-ladder.md](docs/cleanliness-ladder.md) | The cleanliness/parsimony scorer (reference ladder + placement) |
89
+ | [docs/axis-calibration-playbook.md](docs/axis-calibration-playbook.md) | Self-contained recipe to calibrate a new scoring axis (worked example: cleanliness; originality calibrated then dropped) |
90
+ | [docs/calibration-pilot-1.md](docs/calibration-pilot-1.md) | Pilot report: why mined review-signals don't validate difficulty |
91
+ | [docs/visualization.md](docs/visualization.md) | The time-layered bus diagram (left-in/right-out, bundled) |
92
+ | [docs/claude-code-data-format.md](docs/claude-code-data-format.md) | **Verified** Claude Code on-disk data reference |
93
+ | [docs/data-inventory.md](docs/data-inventory.md) | Field catalog from 38 real sessions: what's auto-taggable vs. inferred |
94
+ | [docs/data-structure-report.md](docs/data-structure-report.md) | Real annotated records → the graph they produce (Tier 1 & Tier 2 walkthrough) |
95
+ | [docs/trust-discipline.md](docs/trust-discipline.md) | Cite-or-unknown, hedging, no-traceable-origin |
96
+ | [docs/tooling-landscape.md](docs/tooling-landscape.md) | Existing tools and what to build on |
97
+ | [docs/decisions/](docs/decisions/) | Architecture Decision Records (ADRs) |
98
+ | [plans/roadmap.md](plans/roadmap.md) | Phased delivery plan |
99
+ | [plans/mvp.md](plans/mvp.md) | The minimum thing that tests the core risk |
100
+ | [plans/phase1-build.md](plans/phase1-build.md) | The concrete Phase-1 build sequence + progress |
101
+ | [plans/open-questions.md](plans/open-questions.md) | Decisions to make / behaviors to verify early |
102
+
103
+ ## Repository layout
104
+
105
+ ```
106
+ HAID/
107
+ ├── README.md # you are here
108
+ ├── docs/ # design & reference documentation
109
+ │ └── decisions/ # ADRs
110
+ ├── plans/ # roadmap, MVP spec, open questions
111
+ ├── research/ # raw research reports (inputs to the docs)
112
+ ├── calibration/ # the scoring-axis calibration harness (experiment code)
113
+ ├── src/haid/ # implementation
114
+ │ ├── session/ # Phase-1 parse: forest model, subagents, overflow, cache
115
+ │ ├── graph/ # L0 spine + L1 IO graph (incl. Bash read/write parsing)
116
+ │ ├── metrics/ # the four waste metrics + baseline + `haid metrics` emitter
117
+ │ ├── window.py # the multi-session analysis window
118
+ │ ├── scoring/ # relative value scorer (difficulty/cleanliness/volume/cost/value)
119
+ │ └── bridge/ # transcript→(diff, usage) reconstruction (the bridge)
120
+ ├── tests/ # session/ graph/ metrics/ scoring/ bridge/ suites (+ fixtures/)
121
+ └── scripts/ # build_metric_baselines.py + CLI helpers
122
+ ```
@@ -0,0 +1,39 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "haid"
7
+ version = "0.0.1"
8
+ description = "How Am I Doing — local-only self-audit & coaching for Claude Code sessions"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "dv-hart", email = "jhart@datavine.us" }]
13
+ keywords = ["claude-code", "self-audit", "coaching", "llm", "agent", "telemetry"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Topic :: Software Development :: Quality Assurance",
24
+ ]
25
+ dependencies = [] # stdlib only — model judgment is delegated to the host agent (see scoring/compare.py)
26
+
27
+ [project.urls]
28
+ Homepage = "https://github.com/dv-hart/haid"
29
+ Repository = "https://github.com/dv-hart/haid"
30
+ Issues = "https://github.com/dv-hart/haid/issues"
31
+
32
+ [project.scripts]
33
+ haid = "haid.cli:main"
34
+
35
+ [tool.setuptools.packages.find]
36
+ where = ["src"]
37
+
38
+ [tool.setuptools.package-data]
39
+ haid = ["data/*.json", "data/anchor_diffs/*.diff"]
haid-0.0.1/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,9 @@
1
+ """HAID — "How Am I Doing": local-only self-audit & coaching for Claude Code sessions.
2
+
3
+ This package is the product code (stdlib only). The scoring subpackage places a session
4
+ diff against fixed reference ladders to produce relative achievement scores; the model
5
+ judgment those placements need is delegated to the host agent (Claude Code subagents),
6
+ never an in-process API call — see haid.scoring.compare.
7
+ """
8
+
9
+ __version__ = "0.0.1"
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ raise SystemExit(main())
@@ -0,0 +1,172 @@
1
+ """The bridge: window → (diff, usage) — the join between the real-session pipeline and the
2
+ scoring stack.
3
+
4
+ The scorer (volume / difficulty / cleanliness / value) was built and validated against
5
+ calibration diffs; the session pipeline (session → graph → metrics) ingests real transcripts.
6
+ This package connects them: given an analysis window it produces the two inputs the scorer
7
+ needs — a reconstructed unified **diff** and a normalized-token **cost** — so `haid value` runs
8
+ on real work.
9
+
10
+ Design (recorded in the project notes, decided after measuring the gap):
11
+ - **Replay-primary, no git.** The diff is reconstructed from the transcript (see
12
+ `reconstruct`). The bash-write-to-source gap was measured at ~0–1% on real projects; what
13
+ little it misses is detected and FLAGGED, never silently dropped.
14
+ - **Grain-agnostic core.** `window_inputs` slices the whole window; the same engine will slice
15
+ by episode once episodes exist (Phase 2 — episode↔PR alignment is explicitly TBD, not v1).
16
+
17
+ Stdlib only; no model.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import re
23
+ from dataclasses import dataclass, field
24
+
25
+ from .reconstruct import FileRecon, ReconResult, reconstruct
26
+ from .usage import extract_cost
27
+
28
+ __all__ = ["BridgeResult", "window_inputs", "episode_inputs", "reconstruct", "extract_cost",
29
+ "FileRecon", "ReconResult"]
30
+
31
+ _ABS = re.compile(r"^(?:/|[A-Za-z]:[\\/]|\\\\)") # posix root, drive letter, or UNC
32
+
33
+
34
+ def _is_external(file_id: str) -> bool:
35
+ """A file id that isn't repo-relative — temp files, other repos, /etc — is not part of the
36
+ project work product and must not enter the scored diff. (build.py makes ids repo-relative
37
+ only when the path is under the session cwd; everything else stays absolute.)"""
38
+ return bool(_ABS.match(file_id))
39
+
40
+
41
+ @dataclass
42
+ class BridgeResult:
43
+ diff: str # reconstructed unified diff (scorer input)
44
+ cost: object # cost.CostResult (scorer denominator)
45
+ files: list = field(default_factory=list) # per-file FileRecon (kept for inspection)
46
+ caveats: list = field(default_factory=list) # honesty surface — no silent gaps
47
+
48
+ def summary(self) -> str:
49
+ changed = [f for f in self.files if f.changed]
50
+ incomplete = [f for f in self.files if not f.complete]
51
+ lines = [f"bridge: {len(changed)} changed file(s) reconstructed, "
52
+ f"{len(incomplete)} flagged incomplete",
53
+ self.cost.summary()]
54
+ if self.caveats:
55
+ lines.append("caveats:")
56
+ lines.extend(f" {c}" for c in self.caveats)
57
+ return "\n".join(lines)
58
+
59
+
60
+ def window_inputs(view, sessions) -> BridgeResult:
61
+ """Build the scorer inputs (diff, cost) for a whole analysis window.
62
+
63
+ `view` is a metrics.WindowView (its `active_stream` gives the active-branch tool calls in
64
+ order); `sessions` are the loaded Session objects (for token usage + edit content).
65
+ """
66
+ from ..graph.model import is_write
67
+
68
+ tur_by_id = _tur_index(sessions)
69
+ writes = []
70
+ excluded = 0
71
+ for _sid, tc in view.active_stream:
72
+ if not is_write(tc):
73
+ continue
74
+ fid = tc.target_file_id
75
+ if not fid:
76
+ continue
77
+ if _is_external(fid):
78
+ excluded += 1
79
+ continue
80
+ tur = tur_by_id.get(tc.id, {})
81
+ writes.append((fid, tc.tool, tur, tc.write_op, tc.write_content, tc.derived_write))
82
+
83
+ recon = reconstruct(writes, baselines=_baselines(sessions))
84
+ recon.excluded_external = excluded
85
+
86
+ caveats = list(recon.caveats)
87
+ if excluded:
88
+ caveats.append(f"{excluded} write(s) to files outside the project tree "
89
+ "(temp / other repos) excluded from the diff")
90
+ subagent_writes = _subagent_write_count(sessions)
91
+ if subagent_writes:
92
+ caveats.append(f"{subagent_writes} subagent file-write call(s) are not yet folded into "
93
+ "the diff (subagent edit stitching is deferred)")
94
+
95
+ return BridgeResult(diff=recon.diff, cost=extract_cost(sessions),
96
+ files=recon.files, caveats=caveats)
97
+
98
+
99
+ def episode_inputs(episode_sessions) -> BridgeResult:
100
+ """Build the scorer inputs (diff, cost) for ONE episode = its subset of whole sessions.
101
+
102
+ Because an episode is a set of *whole sessions* (grain decision 2026-06-08), this is just
103
+ `window_inputs` over that subset — no new slicing engine. Two things fall out for free:
104
+ - **episode-relative diff baseline**: `_baselines` takes the earliest captured `originalFile`
105
+ across these sessions only, which is each file's state as it ENTERED the episode (i.e.
106
+ after any earlier episodes touched it), so the diff is the episode's own delta;
107
+ - **clean cost**: `extract_cost` sums these sessions' per-context-window costs — no entangled
108
+ sub-session token split (the whole reason the session is the atomic floor).
109
+ """
110
+ from ..window import build_view
111
+ sub_view = build_view(episode_sessions)
112
+ return window_inputs(sub_view, episode_sessions)
113
+
114
+
115
+ def _tur_index(sessions) -> dict:
116
+ """tool_use id -> toolUseResult dict, across main + subagent records of every session.
117
+
118
+ Pairing key is the tool_use_id inside the result's tool_result block (verified on real
119
+ data — there is no top-level sourceToolUseID)."""
120
+ out: dict[str, dict] = {}
121
+ for s in sessions:
122
+ recs = list(s.parse.records) + [r for sa in s.subagents for r in sa.parse.records]
123
+ for r in recs:
124
+ tur = r.raw.get("toolUseResult")
125
+ if not isinstance(tur, dict):
126
+ continue
127
+ c = r.content
128
+ if not isinstance(c, list):
129
+ continue
130
+ for b in c:
131
+ if isinstance(b, dict) and b.get("type") == "tool_result" and b.get("tool_use_id"):
132
+ out[b["tool_use_id"]] = tur
133
+ break
134
+ return out
135
+
136
+
137
+ def _baselines(sessions) -> dict:
138
+ """file_id -> the file's content as it ENTERED the window: the earliest captured
139
+ `originalFile` for that file across all records (any branch, main + subagents).
140
+
141
+ Claude Code omits originalFile on some edits (e.g. large files), so the first edit we see
142
+ in the active stream may lack it even though an earlier touch captured it. Sourcing the
143
+ earliest one window-wide gives buffer-mode reconstruction a correct seed; files that never
144
+ captured it fall back to hunks mode in reconstruct()."""
145
+ from ..graph.build import _file_id
146
+
147
+ by_first_ts = sorted(sessions, key=lambda s: min(
148
+ (r.timestamp for r in s.parse.records if r.timestamp), default=""))
149
+ out: dict[str, str] = {}
150
+ for s in by_first_ts:
151
+ cwd = next((r.raw.get("cwd") for r in s.parse.records if r.raw.get("cwd")), None)
152
+ for r in list(s.parse.records) + [rr for sa in s.subagents for rr in sa.parse.records]:
153
+ tur = r.raw.get("toolUseResult")
154
+ if not isinstance(tur, dict) or tur.get("originalFile") is None:
155
+ continue
156
+ path = tur.get("filePath") or (tur.get("file") or {}).get("filePath")
157
+ fid = _file_id(path, cwd)
158
+ if fid and fid not in out:
159
+ out[fid] = tur["originalFile"]
160
+ return out
161
+
162
+
163
+ def _subagent_write_count(sessions) -> int:
164
+ from ..graph.model import is_write
165
+ from ..graph.build import build_graph
166
+ n = 0
167
+ for s in sessions:
168
+ for sa in s.subagents:
169
+ g = build_graph(sa.parse.records)
170
+ n += sum(1 for tc in g.toolcalls.values()
171
+ if is_write(tc) and tc.target_file_id and not _is_external(tc.target_file_id))
172
+ return n