PyPI - haid - Versions diffs - 0.0.1__py3-none-any.whl - Mend

haid 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

haid/__init__.py +9 -0
haid/__main__.py +4 -0
haid/bridge/__init__.py +172 -0
haid/bridge/reconstruct.py +222 -0
haid/bridge/usage.py +71 -0
haid/cli.py +612 -0
haid/data/anchor_diffs/U00.diff +378 -0
haid/data/anchor_diffs/U01.diff +317 -0
haid/data/anchor_diffs/U07.diff +218 -0
haid/data/anchor_diffs/U10.diff +129 -0
haid/data/anchor_diffs/U11.diff +352 -0
haid/data/anchor_diffs/U13.diff +135 -0
haid/data/anchor_diffs/U16.diff +152 -0
haid/data/anchor_diffs/U18.diff +254 -0
haid/data/anchor_diffs/U19.diff +403 -0
haid/data/anchor_diffs/U22.diff +144 -0
haid/data/anchor_diffs/U24.diff +337 -0
haid/data/anchor_diffs/U29.diff +43 -0
haid/data/anchor_diffs/U37.diff +38 -0
haid/data/anchor_diffs/U39.diff +94 -0
haid/data/anchor_diffs/U40.diff +339 -0
haid/data/anchor_diffs/U43.diff +51 -0
haid/data/anchor_diffs/U46.diff +159 -0
haid/data/anchor_diffs/U48.diff +290 -0
haid/data/anchor_diffs/U50.diff +323 -0
haid/data/cleanliness_anchors.json +282 -0
haid/data/difficulty_anchors.json +53 -0
haid/data/metric_baselines.json +184 -0
haid/data/treatments.json +356 -0
haid/diffio.py +139 -0
haid/episodes/__init__.py +110 -0
haid/episodes/grouping.py +112 -0
haid/episodes/model.py +77 -0
haid/episodes/score.py +188 -0
haid/episodes/segment.py +163 -0
haid/episodes/summarize.py +64 -0
haid/filekind.py +100 -0
haid/graph/__init__.py +19 -0
haid/graph/bash_read.py +229 -0
haid/graph/bash_write.py +201 -0
haid/graph/build.py +248 -0
haid/graph/model.py +130 -0
haid/graph/signature.py +49 -0
haid/intent/__init__.py +90 -0
haid/intent/classify.py +132 -0
haid/intent/messages.py +110 -0
haid/intent/taxonomy.py +100 -0
haid/metrics/__init__.py +68 -0
haid/metrics/base.py +112 -0
haid/metrics/baseline.py +64 -0
haid/metrics/json_out.py +171 -0
haid/metrics/rereads.py +136 -0
haid/metrics/retouched.py +75 -0
haid/metrics/retries.py +108 -0
haid/metrics/unused_context.py +68 -0
haid/metrics/view.py +114 -0
haid/report/__init__.py +21 -0
haid/report/benchmark.py +114 -0
haid/report/compose.py +419 -0
haid/report/treatments.py +107 -0
haid/scoring/__init__.py +13 -0
haid/scoring/anchors.py +70 -0
haid/scoring/compare.py +272 -0
haid/scoring/cost.py +230 -0
haid/scoring/placement.py +80 -0
haid/scoring/value.py +233 -0
haid/scoring/volume.py +84 -0
haid/session/__init__.py +28 -0
haid/session/cache.py +105 -0
haid/session/discover.py +56 -0
haid/session/forest.py +192 -0
haid/session/loader.py +96 -0
haid/session/overflow.py +81 -0
haid/session/parse.py +74 -0
haid/session/records.py +153 -0
haid/session/subagents.py +72 -0
haid/why/__init__.py +64 -0
haid/why/anchors.py +69 -0
haid/why/investigate.py +144 -0
haid/why/prompts.py +181 -0
haid/window.py +71 -0
haid-0.0.1.dist-info/METADATA +144 -0
haid-0.0.1.dist-info/RECORD +86 -0
haid-0.0.1.dist-info/WHEEL +5 -0
haid-0.0.1.dist-info/entry_points.txt +2 -0
haid-0.0.1.dist-info/top_level.txt +1 -0

haid/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""HAID — "How Am I Doing": local-only self-audit & coaching for Claude Code sessions.
+This package is the product code (stdlib only). The scoring subpackage places a session
+diff against fixed reference ladders to produce relative achievement scores; the model
+judgment those placements need is delegated to the host agent (Claude Code subagents),
+never an in-process API call — see haid.scoring.compare.
+"""
+__version__ = "0.0.1"

haid/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .cli import main
+if __name__ == "__main__":
+    raise SystemExit(main())

haid/bridge/__init__.py ADDED Viewed

@@ -0,0 +1,172 @@
+"""The bridge: window → (diff, usage) — the join between the real-session pipeline and the
+scoring stack.
+The scorer (volume / difficulty / cleanliness / value) was built and validated against
+calibration diffs; the session pipeline (session → graph → metrics) ingests real transcripts.
+This package connects them: given an analysis window it produces the two inputs the scorer
+needs — a reconstructed unified **diff** and a normalized-token **cost** — so `haid value` runs
+on real work.
+Design (recorded in the project notes, decided after measuring the gap):
+  - **Replay-primary, no git.** The diff is reconstructed from the transcript (see
+    `reconstruct`). The bash-write-to-source gap was measured at ~0–1% on real projects; what
+    little it misses is detected and FLAGGED, never silently dropped.
+  - **Grain-agnostic core.** `window_inputs` slices the whole window; the same engine will slice
+    by episode once episodes exist (Phase 2 — episode↔PR alignment is explicitly TBD, not v1).
+Stdlib only; no model.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass, field
+from .reconstruct import FileRecon, ReconResult, reconstruct
+from .usage import extract_cost
+__all__ = ["BridgeResult", "window_inputs", "episode_inputs", "reconstruct", "extract_cost",
+           "FileRecon", "ReconResult"]
+_ABS = re.compile(r"^(?:/|[A-Za-z]:[\\/]|\\\\)")   # posix root, drive letter, or UNC
+def _is_external(file_id: str) -> bool:
+    """A file id that isn't repo-relative — temp files, other repos, /etc — is not part of the
+    project work product and must not enter the scored diff. (build.py makes ids repo-relative
+    only when the path is under the session cwd; everything else stays absolute.)"""
+    return bool(_ABS.match(file_id))
+@dataclass
+class BridgeResult:
+    diff: str                                    # reconstructed unified diff (scorer input)
+    cost: object                                 # cost.CostResult (scorer denominator)
+    files: list = field(default_factory=list)    # per-file FileRecon (kept for inspection)
+    caveats: list = field(default_factory=list)  # honesty surface — no silent gaps
+    def summary(self) -> str:
+        changed = [f for f in self.files if f.changed]
+        incomplete = [f for f in self.files if not f.complete]
+        lines = [f"bridge: {len(changed)} changed file(s) reconstructed, "
+                 f"{len(incomplete)} flagged incomplete",
+                 self.cost.summary()]
+        if self.caveats:
+            lines.append("caveats:")
+            lines.extend(f"  {c}" for c in self.caveats)
+        return "\n".join(lines)
+def window_inputs(view, sessions) -> BridgeResult:
+    """Build the scorer inputs (diff, cost) for a whole analysis window.
+    `view` is a metrics.WindowView (its `active_stream` gives the active-branch tool calls in
+    order); `sessions` are the loaded Session objects (for token usage + edit content).
+    """
+    from ..graph.model import is_write
+    tur_by_id = _tur_index(sessions)
+    writes = []
+    excluded = 0
+    for _sid, tc in view.active_stream:
+        if not is_write(tc):
+            continue
+        fid = tc.target_file_id
+        if not fid:
+            continue
+        if _is_external(fid):
+            excluded += 1
+            continue
+        tur = tur_by_id.get(tc.id, {})
+        writes.append((fid, tc.tool, tur, tc.write_op, tc.write_content, tc.derived_write))
+    recon = reconstruct(writes, baselines=_baselines(sessions))
+    recon.excluded_external = excluded
+    caveats = list(recon.caveats)
+    if excluded:
+        caveats.append(f"{excluded} write(s) to files outside the project tree "
+                       "(temp / other repos) excluded from the diff")
+    subagent_writes = _subagent_write_count(sessions)
+    if subagent_writes:
+        caveats.append(f"{subagent_writes} subagent file-write call(s) are not yet folded into "
+                       "the diff (subagent edit stitching is deferred)")
+    return BridgeResult(diff=recon.diff, cost=extract_cost(sessions),
+                        files=recon.files, caveats=caveats)
+def episode_inputs(episode_sessions) -> BridgeResult:
+    """Build the scorer inputs (diff, cost) for ONE episode = its subset of whole sessions.
+    Because an episode is a set of *whole sessions* (grain decision 2026-06-08), this is just
+    `window_inputs` over that subset — no new slicing engine. Two things fall out for free:
+      - **episode-relative diff baseline**: `_baselines` takes the earliest captured `originalFile`
+        across these sessions only, which is each file's state as it ENTERED the episode (i.e.
+        after any earlier episodes touched it), so the diff is the episode's own delta;
+      - **clean cost**: `extract_cost` sums these sessions' per-context-window costs — no entangled
+        sub-session token split (the whole reason the session is the atomic floor).
+    """
+    from ..window import build_view
+    sub_view = build_view(episode_sessions)
+    return window_inputs(sub_view, episode_sessions)
+def _tur_index(sessions) -> dict:
+    """tool_use id -> toolUseResult dict, across main + subagent records of every session.
+    Pairing key is the tool_use_id inside the result's tool_result block (verified on real
+    data — there is no top-level sourceToolUseID)."""
+    out: dict[str, dict] = {}
+    for s in sessions:
+        recs = list(s.parse.records) + [r for sa in s.subagents for r in sa.parse.records]
+        for r in recs:
+            tur = r.raw.get("toolUseResult")
+            if not isinstance(tur, dict):
+                continue
+            c = r.content
+            if not isinstance(c, list):
+                continue
+            for b in c:
+                if isinstance(b, dict) and b.get("type") == "tool_result" and b.get("tool_use_id"):
+                    out[b["tool_use_id"]] = tur
+                    break
+    return out
+def _baselines(sessions) -> dict:
+    """file_id -> the file's content as it ENTERED the window: the earliest captured
+    `originalFile` for that file across all records (any branch, main + subagents).
+    Claude Code omits originalFile on some edits (e.g. large files), so the first edit we see
+    in the active stream may lack it even though an earlier touch captured it. Sourcing the
+    earliest one window-wide gives buffer-mode reconstruction a correct seed; files that never
+    captured it fall back to hunks mode in reconstruct()."""
+    from ..graph.build import _file_id
+    by_first_ts = sorted(sessions, key=lambda s: min(
+        (r.timestamp for r in s.parse.records if r.timestamp), default=""))
+    out: dict[str, str] = {}
+    for s in by_first_ts:
+        cwd = next((r.raw.get("cwd") for r in s.parse.records if r.raw.get("cwd")), None)
+        for r in list(s.parse.records) + [rr for sa in s.subagents for rr in sa.parse.records]:
+            tur = r.raw.get("toolUseResult")
+            if not isinstance(tur, dict) or tur.get("originalFile") is None:
+                continue
+            path = tur.get("filePath") or (tur.get("file") or {}).get("filePath")
+            fid = _file_id(path, cwd)
+            if fid and fid not in out:
+                out[fid] = tur["originalFile"]
+    return out
+def _subagent_write_count(sessions) -> int:
+    from ..graph.model import is_write
+    from ..graph.build import build_graph
+    n = 0
+    for s in sessions:
+        for sa in s.subagents:
+            g = build_graph(sa.parse.records)
+            n += sum(1 for tc in g.toolcalls.values()
+                     if is_write(tc) and tc.target_file_id and not _is_external(tc.target_file_id))
+    return n

haid/bridge/reconstruct.py ADDED Viewed

@@ -0,0 +1,222 @@
+"""Reconstruct the net code diff a body of work produced — from the transcript alone.
+This is the diff half of the window→(diff, usage) bridge: the join between the real-session
+pipeline (session→graph) and the scoring stack (volume/difficulty/cleanliness), which until
+now only ever saw calibration diffs. It is **replay-primary, no git** (decision recorded after
+measuring the bash-write-to-source gap at ~0–1% across three real projects; see the project
+notes). The same data Claude Code's own rewind uses:
+  - Edit/MultiEdit  → `originalFile` (full pre-edit content) + exact `oldString`→`newString`.
+  - Write           → full `content` (and `originalFile` for overwrites; None on create).
+  - Bash heredoc    → recovered `write_content` (see graph/bash_write.parse_heredoc_write).
+Two reconstruction modes, picked per file:
+  * **buffer (preferred)** — when we have the file's content as it entered the window (the
+    earliest captured `originalFile`), we replay every write onto a running string and emit
+    `unified_diff(baseline, final)`. This is **net by construction** (a line written then
+    rewritten appears once, in final form — exactly what `volume` wants; the churn lives on
+    the cost side) and **self-detects gaps**: each edit's `originalFile` must equal our running
+    content, so an untracked shell write in between is caught and flagged, never silently wrong.
+  * **hunks (fallback)** — Claude Code omits `originalFile` on some edits (e.g. large files),
+    so a pre-existing file may have no full baseline anywhere in the window. There we emit the
+    edits' `structuredPatch` hunks directly (always present). Correct for the changed lines,
+    but flagged: overlapping re-edits of the same lines can double-count (no net dedup).
+No silent caps — every shortfall lands in `FileRecon.reasons` and surfaces as a caveat.
+Grain-agnostic: `reconstruct()` takes an ordered list of writes, so the caller slices by
+window now and by episode later. Stdlib only.
+"""
+from __future__ import annotations
+import difflib
+from dataclasses import dataclass, field
+_NATIVE_EDIT = {"Edit", "MultiEdit"}
+_NATIVE_WRITE = {"Write"}
+@dataclass
+class FileRecon:
+    """One file's reconstructed change, with mode and any honesty flags."""
+    file_id: str
+    mode: str = "buffer"                         # "buffer" | "hunks"
+    baseline: str = ""
+    final: str = ""
+    hunks: list = field(default_factory=list)    # structuredPatch hunks (hunks mode)
+    ops: int = 0
+    complete: bool = True
+    reasons: list[str] = field(default_factory=list)
+    @property
+    def changed(self) -> bool:
+        return bool(self.hunks) if self.mode == "hunks" else (self.baseline != self.final)
+    def _flag(self, reason: str) -> None:
+        self.complete = False
+        if reason not in self.reasons:
+            self.reasons.append(reason)
+@dataclass
+class ReconResult:
+    diff: str                                   # concatenated git-style unified diff
+    files: list[FileRecon]
+    caveats: list[str] = field(default_factory=list)
+    excluded_external: int = 0                  # writes to paths outside the project tree
+    @property
+    def incomplete(self) -> list[FileRecon]:
+        return [f for f in self.files if not f.complete]
+# --- per-tool application ---------------------------------------------------------------
+def _seed_baseline(fr: FileRecon, original, baselines: dict, fid: str):
+    """Resolve the file's window-entry content for buffer mode, or switch to hunks mode."""
+    seed = original if original is not None else baselines.get(fid)
+    if seed is not None:
+        fr.mode = "buffer"
+        fr.baseline = seed
+        fr.final = seed
+        return True
+    return False
+def _apply_native_edit(fr: FileRecon, tur: dict, first: bool, baselines: dict) -> None:
+    original = tur.get("originalFile")
+    if first and not _seed_baseline(fr, original, baselines, fr.file_id):
+        fr.mode = "hunks"
+        fr._flag("no full baseline captured for this pre-existing file — reconstructed from "
+                 "diff hunks (overlapping re-edits may double-count)")
+    if fr.mode == "hunks":
+        fr.hunks.extend(tur.get("structuredPatch") or [])
+        return
+    if not first and original is not None and fr.final != original:
+        fr._flag("untracked change before an edit (resynced to the file's actual state)")
+        fr.final = original
+    pairs = tur.get("edits") or [{"old_string": tur.get("oldString", ""),
+                                  "new_string": tur.get("newString", ""),
+                                  "replace_all": tur.get("replaceAll", False)}]
+    for e in pairs:
+        old, new = e.get("old_string", ""), e.get("new_string", "")
+        if old == "":                            # pure insertion into the buffer
+            fr.final = fr.final + new if fr.final and not new.startswith(fr.final) else (fr.final or new)
+            continue
+        if old not in fr.final:
+            fr._flag("edit oldString not found in reconstructed content")
+            continue
+        fr.final = fr.final.replace(old, new) if e.get("replace_all") else fr.final.replace(old, new, 1)
+def _apply_native_write(fr: FileRecon, tur: dict, first: bool, baselines: dict) -> None:
+    content = tur.get("content")
+    original = tur.get("originalFile")
+    sp = tur.get("structuredPatch") or []
+    if first:
+        if not _seed_baseline(fr, original, baselines, fr.file_id):
+            fr.baseline = fr.final = ""          # create (sp empty) or unknown overwrite
+            if sp:
+                fr._flag("Write overwrote an existing file with no captured baseline")
+    elif original is not None and fr.final != original:
+        fr._flag("untracked change before a write (resynced to the file's actual state)")
+        fr.final = original
+    if content is None:
+        fr._flag("Write result had no content")
+        return
+    fr.final = content
+def _apply_shell_write(fr: FileRecon, op: str | None, content: str | None, first: bool) -> None:
+    # Bash writes carry no originalFile, so a shell write as the FIRST touch leaves the
+    # pre-state unknown.
+    if first:
+        fr.baseline = fr.final = ""
+        if op == "append":
+            fr._flag("shell append as first write — prior file content is unknown")
+    if content is None:
+        fr._flag(f"shell {op or 'write'} content unrecoverable (sed -i / plain redirect)")
+        return
+    if op == "append":
+        fr.final = fr.final + content
+    else:
+        if not first:
+            fr._flag("shell overwrite of a tracked file (prior content replaced)")
+        fr.final = content
+# --- the engine -------------------------------------------------------------------------
+def reconstruct(writes, baselines: dict | None = None) -> ReconResult:
+    """Reconstruct per-file diffs from an ordered list of writes.
+    `writes` is `(file_id, tool, tur, write_op, write_content, derived)` in chronological
+    (active-timeline) order. `baselines` maps file_id -> the file's content as it entered the
+    window (earliest captured originalFile), used to seed buffer mode when an edit's own
+    originalFile is None.
+    """
+    baselines = baselines or {}
+    states: dict[str, FileRecon] = {}
+    order: list[str] = []
+    for file_id, tool, tur, write_op, write_content, derived in writes:
+        fr = states.get(file_id)
+        first = fr is None
+        if first:
+            fr = FileRecon(file_id=file_id)
+            states[file_id] = fr
+            order.append(file_id)
+        fr.ops += 1
+        tur = tur or {}
+        if derived or tool == "Bash":
+            _apply_shell_write(fr, write_op, write_content, first)
+        elif tool in _NATIVE_WRITE:
+            _apply_native_write(fr, tur, first, baselines)
+        elif tool in _NATIVE_EDIT:
+            _apply_native_edit(fr, tur, first, baselines)
+        else:
+            fr._flag(f"unhandled write tool {tool!r}")
+    files = [states[fid] for fid in order]
+    diff = "".join(_emit(fr) for fr in files if fr.changed)
+    return ReconResult(diff=diff, files=files, caveats=_caveats(files))
+def _caveats(files: list[FileRecon]) -> list[str]:
+    incomplete = [f for f in files if not f.complete]
+    if not incomplete:
+        return []
+    out = [f"{len(incomplete)} of {len(files)} changed file(s) could not be fully reconstructed "
+           "from the transcript — the diff may be incomplete for these:"]
+    out += [f"  - {fr.file_id}: " + "; ".join(fr.reasons) for fr in incomplete]
+    return out
+def _emit(fr: FileRecon) -> str:
+    if fr.mode == "hunks":
+        return _hunks_diff(fr.file_id, fr.hunks)
+    return _file_diff(fr.file_id, fr.baseline, fr.final)
+def _file_diff(path: str, baseline: str, final: str) -> str:
+    """A git-style unified-diff block from full before/after content."""
+    is_new = baseline == ""
+    is_del = final == "" and baseline != ""
+    fromf = "/dev/null" if is_new else f"a/{path}"
+    tof = "/dev/null" if is_del else f"b/{path}"
+    body = difflib.unified_diff(baseline.splitlines(), final.splitlines(),
+                                fromfile=fromf, tofile=tof, lineterm="")
+    return f"diff --git a/{path} b/{path}\n" + "\n".join(body) + "\n"
+def _hunks_diff(path: str, hunks: list) -> str:
+    """A git-style block assembled directly from structuredPatch hunks (fallback mode)."""
+    out = [f"diff --git a/{path} b/{path}", f"--- a/{path}", f"+++ b/{path}"]
+    for h in hunks:
+        if not isinstance(h, dict):
+            continue
+        out.append(f"@@ -{h.get('oldStart', 0)},{h.get('oldLines', 0)} "
+                   f"+{h.get('newStart', 0)},{h.get('newLines', 0)} @@")
+        out.extend(h.get("lines") or [])
+    return "\n".join(out) + "\n"

haid/bridge/usage.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""Extract the cost denominator (normalized tokens) from a window's sessions.
+The easy half of the bridge: walk every assistant record's `message.usage` block, map it to a
+`cost.Usage`, and fold with `cost.measure`. Two deliberate choices:
+  * **Cost counts ALL branches, including abandoned ones** — you paid for the tokens spent on a
+    rewound/abandoned attempt even though its code didn't survive. (The DIFF, by contrast, is
+    the *active* end-state only — that asymmetry is the point.) `parse.records` is the full,
+    uuid-deduped record set across all branches, so summing over it is correct by construction.
+  * **Subagent tokens count** — a spawned agent's tokens are real spend, so we include each
+    subagent's records too.
+Process costs (turns, tool-calls, compactions, wall-clock) are carried separately by
+`cost.CostResult`, never folded into the token total. Stdlib only; no model.
+"""
+from __future__ import annotations
+from datetime import datetime
+from ..scoring import cost
+def _all_records(session):
+    yield from session.parse.records
+    for sa in session.subagents:
+        yield from sa.parse.records
+def extract_cost(sessions) -> cost.CostResult:
+    """Normalized-token cost over every session in the window (all branches + subagents)."""
+    usages: list[cost.Usage] = []
+    tool_calls = turns = compactions = 0
+    timestamps: list[str] = []
+    for s in sessions:
+        for r in _all_records(s):
+            msg = r.raw.get("message") or {}
+            u = msg.get("usage")
+            if isinstance(u, dict):
+                d = dict(u)
+                d["model"] = msg.get("model", "")
+                usages.append(cost.Usage.from_dict(d))
+            if r.type == "assistant" and isinstance(r.content, list):
+                tool_calls += sum(1 for b in r.content
+                                  if isinstance(b, dict) and b.get("type") == "tool_use")
+            if r.is_user_prompt():
+                turns += 1
+            if r.raw.get("type") == "system" and r.raw.get("subtype") == "compact_boundary":
+                compactions += 1
+            if r.timestamp:
+                timestamps.append(r.timestamp)
+    return cost.measure(
+        usages,
+        turns=turns,
+        tool_calls=tool_calls,
+        compactions=compactions,
+        wall_clock_s=_wall_clock(timestamps),
+    )
+def _wall_clock(timestamps: list[str]) -> float | None:
+    if len(timestamps) < 2:
+        return None
+    try:
+        t0 = datetime.fromisoformat(min(timestamps).replace("Z", "+00:00"))
+        t1 = datetime.fromisoformat(max(timestamps).replace("Z", "+00:00"))
+        return (t1 - t0).total_seconds()
+    except ValueError:
+        return None