claude-sql 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
claude_sql/binding.py ADDED
@@ -0,0 +1,740 @@
1
+ """Transcript-to-PR binding via commit trailers + git notes.
2
+
3
+ Implements RFC 0001 (`docs/rfc/0001-transcript-pr-binding.md`). Pure-stdlib
4
+ helpers for writing and reading the three-trailer + JSON-note convention
5
+ that points a merged commit at the AI-agent transcript that produced it.
6
+
7
+ Design boundaries:
8
+
9
+ * No new dependencies. ``hashlib``, ``subprocess``, ``pathlib``,
10
+ ``dataclasses``, ``json``, ``os``, ``re`` — all stdlib.
11
+ * Subprocess to ``git`` only via ``subprocess.run([...], check=False,
12
+ capture_output=True, text=True)``; we inspect ``returncode`` and
13
+ ``stderr`` explicitly. No ``shell=True``; no ``check=True``. The
14
+ caller's branch is responsible for raising; this keeps the helpers
15
+ composable.
16
+ * All public functions carry full type hints and pass ``ty`` strict
17
+ mode. The dataclass is ``frozen=True`` so a ``TranscriptBinding`` is
18
+ hashable and safe to share across threads.
19
+ * Every function is independently unit-testable against a real
20
+ ``git init`` repository under ``tmp_path``. No Bedrock, no live
21
+ filesystem outside the JSONL discovery helpers.
22
+
23
+ Implements strategy-memo §Coherent Actions #1.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import hashlib
29
+ import json
30
+ import os
31
+ import re
32
+ import subprocess
33
+ from dataclasses import asdict, dataclass
34
+ from datetime import UTC, datetime
35
+ from pathlib import Path
36
+ from typing import Any, ClassVar, TypedDict
37
+
38
+
39
+ class BindingSources(TypedDict):
40
+ """Diagnostic dict shape returned by :func:`resolve_all_sources`.
41
+
42
+ Defined as a ``TypedDict`` rather than a ``dataclass`` so the
43
+ return is ``json``-serializable as-is and so callers can
44
+ structurally narrow on ``trailer`` / ``note`` keys without
45
+ importing the type. The forward references resolve under
46
+ ``from __future__ import annotations``; ``TranscriptBinding`` is
47
+ declared further down in this module.
48
+ """
49
+
50
+ trailer: TranscriptBinding | None
51
+ note: TranscriptBinding | None
52
+
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Public constants — wire format. RFC 0001 §Specification.
56
+ # ---------------------------------------------------------------------------
57
+
58
+ DIGEST_PREFIX: str = "sha256:"
59
+ """Prefix on every ``Claude-Transcript-Digest:`` value. The rest is
60
+ a 64-character hex digest from ``hashlib.sha256``."""
61
+
62
+ NOTES_REF: str = "transcripts"
63
+ """Short ref name passed to ``git notes --ref=...``. Resolves to
64
+ ``refs/notes/transcripts`` per ``git-notes(1)``."""
65
+
66
+ TRAILER_DIGEST: str = "Claude-Transcript-Digest"
67
+ """Trailer key carrying the SHA-256 digest of the JSONL transcript."""
68
+
69
+ TRAILER_URI: str = "Claude-Transcript-URI"
70
+ """Trailer key carrying the URI where the transcript can be retrieved.
71
+
72
+ One of ``file://<path>``, ``s3://<bucket>/<key>``, or
73
+ ``git-notes://<refname>``. The reference implementation in v0 only
74
+ emits ``file://``; the other two are spec-only entry points for future
75
+ emitters and readers per RFC 0001 §Specification.URI scheme.
76
+ """
77
+
78
+ TRAILER_RUNTIME: str = "Claude-Agent-Runtime"
79
+ """Trailer key carrying the agent runtime identifier (``vendor/version``)."""
80
+
81
+
82
+ # ---------------------------------------------------------------------------
83
+ # Errors
84
+ # ---------------------------------------------------------------------------
85
+
86
+
87
+ class BindingMismatchError(RuntimeError):
88
+ """Raised when trailer and note disagree about the bound transcript.
89
+
90
+ Carries both surfaces so callers can render a useful error message
91
+ (CLI surfaces this as exit 70 with a structured payload). The
92
+ ``trailer`` and ``note`` attributes are the parsed bindings; either
93
+ may be ``None`` if only one side was present and the other was
94
+ malformed beyond rescue.
95
+ """
96
+
97
+ def __init__(
98
+ self,
99
+ message: str,
100
+ *,
101
+ trailer: TranscriptBinding | None,
102
+ note: TranscriptBinding | None,
103
+ ) -> None:
104
+ super().__init__(message)
105
+ self.trailer = trailer
106
+ self.note = note
107
+
108
+
109
+ class GitInvocationError(RuntimeError):
110
+ """Raised when a ``git`` subprocess returns a non-zero exit code.
111
+
112
+ Wraps ``returncode``, ``stdout``, and ``stderr`` so the caller can
113
+ classify the failure (e.g., commit not found vs. notes ref empty
114
+ vs. dirty working tree) without re-running ``git``.
115
+ """
116
+
117
+ def __init__(
118
+ self,
119
+ argv: list[str],
120
+ *,
121
+ returncode: int,
122
+ stdout: str,
123
+ stderr: str,
124
+ ) -> None:
125
+ super().__init__(
126
+ f"git command failed (exit {returncode}): {' '.join(argv)}\n{stderr.strip()}"
127
+ )
128
+ self.argv = argv
129
+ self.returncode = returncode
130
+ self.stdout = stdout
131
+ self.stderr = stderr
132
+
133
+
134
+ # ---------------------------------------------------------------------------
135
+ # Core dataclass
136
+ # ---------------------------------------------------------------------------
137
+
138
+
139
+ @dataclass(frozen=True)
140
+ class TranscriptBinding:
141
+ """One commit's pointer to the transcript that authored it.
142
+
143
+ All fields carry the wire-format strings from RFC 0001
144
+ §Specification — no parsing or validation beyond what the writer
145
+ already enforced. Read-side callers wanting structured access
146
+ (e.g., parsing the URI scheme) operate on the string fields
147
+ directly.
148
+ """
149
+
150
+ digest: str
151
+ """``sha256:<64-hex-chars>`` — same value as the
152
+ ``Claude-Transcript-Digest:`` trailer."""
153
+
154
+ uri: str
155
+ """``file://...`` | ``s3://...`` | ``git-notes://...`` — same value
156
+ as the ``Claude-Transcript-URI:`` trailer."""
157
+
158
+ agent_runtime: str
159
+ """``vendor/version`` — same value as the
160
+ ``Claude-Agent-Runtime:`` trailer (e.g., ``claude-code/0.42.1``)."""
161
+
162
+ transcript_id: str
163
+ """Opaque session identifier from inside the JSONL artifact (e.g.,
164
+ Claude Code's session-id field). Lives in the note only; trailers
165
+ don't carry it because it's forensic metadata, not human-readable
166
+ provenance."""
167
+
168
+ captured_at: str
169
+ """ISO-8601 UTC timestamp for when the binding was written. Note-only."""
170
+
171
+ # Class-level reserved JSON keys — mirrored in ``to_note_payload``
172
+ # and ``_from_note_payload``. Anything unknown is ignored on read,
173
+ # forward-compatibility per RFC 0001 §Compatibility.
174
+ _NOTE_KEYS: ClassVar[tuple[str, ...]] = (
175
+ "digest",
176
+ "uri",
177
+ "agent_runtime",
178
+ "transcript_id",
179
+ "captured_at",
180
+ )
181
+
182
+ def to_note_payload(self) -> dict[str, str]:
183
+ """Serialize to the JSON shape stored in ``refs/notes/transcripts``."""
184
+ return {
185
+ "uri": self.uri,
186
+ "digest": self.digest,
187
+ "agent_runtime": self.agent_runtime,
188
+ "transcript_id": self.transcript_id,
189
+ "captured_at": self.captured_at,
190
+ }
191
+
192
+ def to_dict(self) -> dict[str, str]:
193
+ """Plain dict view for ``emit_json`` / structured CLI output."""
194
+ return asdict(self)
195
+
196
+
197
+ # ---------------------------------------------------------------------------
198
+ # Pure helpers (no subprocess, no I/O beyond reading a path)
199
+ # ---------------------------------------------------------------------------
200
+
201
+
202
+ def projectify(cwd: Path) -> str:
203
+ """Mirror Claude Code's projectified-cwd convention.
204
+
205
+ Claude Code stores transcripts under
206
+ ``~/.claude/projects/<projectified>/`` where ``<projectified>`` is
207
+ the absolute path of the working directory with leading ``/``
208
+ stripped, every remaining ``/`` replaced with ``-``, and a leading
209
+ ``-`` re-prepended.
210
+
211
+ Examples:
212
+ ``/foo/bar`` → ``-foo-bar``
213
+ ``/efs/laith/workplace/claude-sql`` →
214
+ ``-efs-laith-workplace-claude-sql``
215
+ ``/`` → ``-``
216
+
217
+ The result is a relative path component, never absolute. Callers
218
+ join it with ``~/.claude/projects/`` to form the discovery root.
219
+ """
220
+ text = str(cwd)
221
+ # ``Path("/").as_posix()`` returns "/"; treat root specially so we
222
+ # don't end up with the empty string after stripping.
223
+ if text == "/":
224
+ return "-"
225
+ stripped = text.lstrip("/")
226
+ return "-" + stripped.replace("/", "-")
227
+
228
+
229
+ def compute_digest(jsonl_path: Path) -> str:
230
+ """SHA-256 hex digest of the JSONL artifact's raw bytes.
231
+
232
+ Returns the digest with the ``sha256:`` algorithm prefix per RFC
233
+ 0001. Raw-bytes hashing — *not* canonical-JSON-per-line —
234
+ deliberately so the digest is recomputable in one line of any
235
+ language and matches the artifact reviewers will inspect. See RFC
236
+ 0001 §Security.Digest determinism for the trade-off.
237
+
238
+ Raises ``FileNotFoundError`` if ``jsonl_path`` doesn't exist; the
239
+ caller (typically ``write_trailer``-side code) handles this — there
240
+ is no recovery, the binding can't be written without a transcript.
241
+ """
242
+ digest = hashlib.sha256(jsonl_path.read_bytes()).hexdigest()
243
+ return f"{DIGEST_PREFIX}{digest}"
244
+
245
+
246
+ def detect_agent_runtime() -> str:
247
+ """Identify the agent runtime that emitted the transcript.
248
+
249
+ Reads ``CLAUDE_AGENT_RUNTIME`` from the environment if set (the
250
+ first emitter convention is for the runtime itself to set this
251
+ before invoking the bind hook). Falls back to ``claude-code/unknown``
252
+ when unset; any future runtime gets a different default by setting
253
+ its own env var name (or, more usefully, by setting
254
+ ``CLAUDE_AGENT_RUNTIME=cursor/0.42`` from inside its hook wiring).
255
+ """
256
+ explicit = os.environ.get("CLAUDE_AGENT_RUNTIME")
257
+ if explicit:
258
+ return explicit.strip()
259
+ return "claude-code/unknown"
260
+
261
+
262
+ def find_active_transcript(cwd: Path) -> Path | None:
263
+ """Resolve the active transcript JSONL for ``cwd``, if any.
264
+
265
+ Lists ``~/.claude/projects/<projectified-cwd>/*.jsonl`` and returns
266
+ the most recently-modified file. Returns ``None`` when the
267
+ projects directory doesn't exist or contains no JSONL — for
268
+ example, the user is committing from a directory that hasn't been
269
+ used inside Claude Code yet, or has explicitly opted out of
270
+ transcript persistence. The caller (``claude-sql bind``) treats
271
+ ``None`` as "no transcript to bind, exit cleanly with no trailer."
272
+ """
273
+ home = Path(os.path.expanduser("~"))
274
+ projects_root = home / ".claude" / "projects" / projectify(cwd)
275
+ if not projects_root.is_dir():
276
+ return None
277
+ candidates = sorted(
278
+ projects_root.glob("*.jsonl"),
279
+ key=lambda p: p.stat().st_mtime,
280
+ reverse=True,
281
+ )
282
+ return candidates[0] if candidates else None
283
+
284
+
285
+ def _read_transcript_id(jsonl_path: Path) -> str:
286
+ """Best-effort transcript-id extraction from the first JSON line.
287
+
288
+ Claude Code's JSONL emits a ``sessionId`` (or ``session_id``) field
289
+ on each line. We read just the first line — same convention as the
290
+ on-disk JSONL invariant — and parse it to find the id. Falls back
291
+ to the file's basename (stem) if no id is present, so the field
292
+ is always populated for the note.
293
+
294
+ Raises nothing: a malformed first line falls through to the
295
+ basename fallback. Forensic data; we don't fail the binding over
296
+ a parse error here.
297
+ """
298
+ try:
299
+ with jsonl_path.open(encoding="utf-8") as fh:
300
+ first_line = fh.readline()
301
+ if not first_line.strip():
302
+ return jsonl_path.stem
303
+ record = json.loads(first_line)
304
+ if not isinstance(record, dict):
305
+ return jsonl_path.stem
306
+ for key in ("sessionId", "session_id", "uuid"):
307
+ value = record.get(key)
308
+ if isinstance(value, str) and value:
309
+ return value
310
+ except (OSError, json.JSONDecodeError):
311
+ # Fallthrough: forensic metadata, we'd rather have a stable
312
+ # filename-derived id than a ``KeyError`` on bind.
313
+ pass
314
+ return jsonl_path.stem
315
+
316
+
317
+ def build_binding(
318
+ *,
319
+ transcript_path: Path,
320
+ runtime: str | None = None,
321
+ captured_at: str | None = None,
322
+ ) -> TranscriptBinding:
323
+ """Compose a fully-populated ``TranscriptBinding`` for ``transcript_path``.
324
+
325
+ Convenience constructor for callers that have a JSONL path and
326
+ want every field filled. ``runtime`` defaults to
327
+ :func:`detect_agent_runtime`; ``captured_at`` defaults to
328
+ ``datetime.now(UTC).isoformat()``. Both are accepted as
329
+ overrides primarily for tests that need deterministic output.
330
+ """
331
+ digest = compute_digest(transcript_path)
332
+ uri = transcript_path.resolve().as_uri()
333
+ transcript_id = _read_transcript_id(transcript_path)
334
+ if runtime is None:
335
+ runtime = detect_agent_runtime()
336
+ if captured_at is None:
337
+ captured_at = datetime.now(UTC).isoformat()
338
+ return TranscriptBinding(
339
+ digest=digest,
340
+ uri=uri,
341
+ agent_runtime=runtime,
342
+ transcript_id=transcript_id,
343
+ captured_at=captured_at,
344
+ )
345
+
346
+
347
+ # ---------------------------------------------------------------------------
348
+ # Subprocess helpers
349
+ # ---------------------------------------------------------------------------
350
+
351
+
352
+ def _run_git(
353
+ argv: list[str],
354
+ *,
355
+ cwd: Path | None = None,
356
+ stdin: str | None = None,
357
+ ) -> subprocess.CompletedProcess[str]:
358
+ """Run a ``git`` subprocess with our standard knobs.
359
+
360
+ All git invocations in this module funnel through here so the
361
+ options are uniform: ``check=False`` (we inspect ``returncode``
362
+ explicitly), ``capture_output=True`` (we always want stdout +
363
+ stderr captured), ``text=True`` (we treat git output as UTF-8
364
+ text). Never ``shell=True``.
365
+ """
366
+ return subprocess.run(
367
+ argv,
368
+ cwd=cwd,
369
+ capture_output=True,
370
+ text=True,
371
+ check=False,
372
+ input=stdin,
373
+ )
374
+
375
+
376
+ def _resolve_repo(repo: Path | None) -> Path:
377
+ """Resolve the repo root.
378
+
379
+ ``None`` means ``git rev-parse --show-toplevel`` from the current
380
+ process cwd. An explicit ``Path`` is passed through verbatim. The
381
+ resolved path is always absolute.
382
+ """
383
+ if repo is not None:
384
+ return repo.resolve()
385
+ cp = _run_git(["git", "rev-parse", "--show-toplevel"])
386
+ if cp.returncode != 0:
387
+ raise GitInvocationError(
388
+ ["git", "rev-parse", "--show-toplevel"],
389
+ returncode=cp.returncode,
390
+ stdout=cp.stdout,
391
+ stderr=cp.stderr,
392
+ )
393
+ return Path(cp.stdout.strip()).resolve()
394
+
395
+
396
+ # ---------------------------------------------------------------------------
397
+ # Writers
398
+ # ---------------------------------------------------------------------------
399
+
400
+
401
+ def write_trailer(commit_msg_path: Path, binding: TranscriptBinding) -> None:
402
+ """Append the three trailers to ``commit_msg_path`` in place.
403
+
404
+ Three calls to ``git interpret-trailers --in-place``, each writing
405
+ one ``Key: Value`` pair. We pass ``--if-exists replace`` so the
406
+ operation is idempotent under ``git commit --amend`` and repeated
407
+ ``prepare-commit-msg`` invocations — re-running ``bind`` on an
408
+ amended commit replaces the existing trailer rather than
409
+ appending a duplicate.
410
+
411
+ Order of invocation matches the order in RFC 0001's wire format:
412
+ digest, URI, runtime. ``git interpret-trailers`` ensures a single
413
+ blank line precedes the trailer block, even if the message had
414
+ none.
415
+ """
416
+ pairs: tuple[tuple[str, str], ...] = (
417
+ (TRAILER_DIGEST, binding.digest),
418
+ (TRAILER_URI, binding.uri),
419
+ (TRAILER_RUNTIME, binding.agent_runtime),
420
+ )
421
+ for key, value in pairs:
422
+ argv = [
423
+ "git",
424
+ "interpret-trailers",
425
+ "--in-place",
426
+ "--if-exists",
427
+ "replace",
428
+ "--trailer",
429
+ f"{key}: {value}",
430
+ str(commit_msg_path),
431
+ ]
432
+ cp = _run_git(argv)
433
+ if cp.returncode != 0:
434
+ raise GitInvocationError(
435
+ argv,
436
+ returncode=cp.returncode,
437
+ stdout=cp.stdout,
438
+ stderr=cp.stderr,
439
+ )
440
+
441
+
442
+ def write_note(repo: Path, commit_sha: str, binding: TranscriptBinding) -> None:
443
+ """Write ``binding`` as a JSON note under ``refs/notes/transcripts``.
444
+
445
+ Uses ``git notes --ref=transcripts add -f -m '<json>' <sha>``. The
446
+ ``-f`` (force) flag overwrites an existing note for the same
447
+ commit, which is the right semantics for re-runs (e.g., amend);
448
+ re-running the bind step with a different transcript replaces
449
+ the entry instead of failing with "note already exists."
450
+
451
+ The JSON is single-line (no ``indent``) so the note body stays
452
+ one line — matches `git`'s preference for compact note formats.
453
+ """
454
+ payload = json.dumps(binding.to_note_payload(), ensure_ascii=False, separators=(",", ":"))
455
+ argv = [
456
+ "git",
457
+ "-C",
458
+ str(repo),
459
+ "notes",
460
+ f"--ref={NOTES_REF}",
461
+ "add",
462
+ "-f",
463
+ "-m",
464
+ payload,
465
+ commit_sha,
466
+ ]
467
+ cp = _run_git(argv)
468
+ if cp.returncode != 0:
469
+ raise GitInvocationError(
470
+ argv,
471
+ returncode=cp.returncode,
472
+ stdout=cp.stdout,
473
+ stderr=cp.stderr,
474
+ )
475
+
476
+
477
+ # ---------------------------------------------------------------------------
478
+ # Readers
479
+ # ---------------------------------------------------------------------------
480
+
481
+
482
+ _TRAILER_LINE_RE: re.Pattern[str] = re.compile(r"^([A-Za-z][A-Za-z0-9-]*):\s*(.*)$")
483
+
484
+
485
+ def _parse_trailer_block(text: str) -> dict[str, str]:
486
+ """Parse the ``key: value`` pairs that ``git interpret-trailers --parse`` emits.
487
+
488
+ The output format is one trailer per line, ``Key: Value``. We
489
+ case-fold keys for the lookup map but preserve the original
490
+ capitalization in the returned dict so callers see exactly what
491
+ the writer emitted. When a key appears multiple times, the *first*
492
+ occurrence wins — matches RFC 0001 §Compatibility.Trailer survival
493
+ (rebase / fixup squash duplication tolerance).
494
+ """
495
+ parsed: dict[str, str] = {}
496
+ seen_lower: set[str] = set()
497
+ for raw_line in text.splitlines():
498
+ line = raw_line.strip()
499
+ if not line:
500
+ continue
501
+ match = _TRAILER_LINE_RE.match(line)
502
+ if match is None:
503
+ continue
504
+ key, value = match.group(1), match.group(2).strip()
505
+ if key.lower() in seen_lower:
506
+ continue
507
+ parsed[key] = value
508
+ seen_lower.add(key.lower())
509
+ return parsed
510
+
511
+
512
+ def read_trailer(
513
+ commit_sha: str,
514
+ *,
515
+ repo: Path | None = None,
516
+ ) -> TranscriptBinding | None:
517
+ """Read the binding trailer set off a commit's message, if present.
518
+
519
+ Subprocess sequence:
520
+
521
+ 1. ``git -C <repo> log --format=%B -1 <sha>`` — the commit message body.
522
+ 2. ``git interpret-trailers --parse`` (with the message piped on stdin)
523
+ — emits one line per trailer.
524
+
525
+ Returns ``None`` when:
526
+
527
+ * The commit doesn't exist (``git log`` fails — surfaced as
528
+ ``GitInvocationError`` and re-raised; callers map to exit 65).
529
+ * The message has no trailers at all.
530
+ * The trailer block is present but missing one or more of the
531
+ three required keys.
532
+
533
+ Returns the populated :class:`TranscriptBinding` when all three
534
+ trailers are present. ``transcript_id`` and ``captured_at`` are
535
+ note-only fields — they default to empty strings on the
536
+ trailer-only path, and resolution callers wanting those should
537
+ fall back to the note. This matches RFC 0001 §Specification.Resolution
538
+ precedence: trailer first, note fallback.
539
+ """
540
+ repo_path = _resolve_repo(repo)
541
+ log_argv = ["git", "-C", str(repo_path), "log", "--format=%B", "-1", commit_sha]
542
+ cp = _run_git(log_argv)
543
+ if cp.returncode != 0:
544
+ raise GitInvocationError(
545
+ log_argv, returncode=cp.returncode, stdout=cp.stdout, stderr=cp.stderr
546
+ )
547
+ message = cp.stdout
548
+ parse_argv = ["git", "interpret-trailers", "--parse"]
549
+ parsed_cp = _run_git(parse_argv, stdin=message)
550
+ if parsed_cp.returncode != 0:
551
+ raise GitInvocationError(
552
+ parse_argv,
553
+ returncode=parsed_cp.returncode,
554
+ stdout=parsed_cp.stdout,
555
+ stderr=parsed_cp.stderr,
556
+ )
557
+ pairs = _parse_trailer_block(parsed_cp.stdout)
558
+ digest = pairs.get(TRAILER_DIGEST)
559
+ uri = pairs.get(TRAILER_URI)
560
+ runtime = pairs.get(TRAILER_RUNTIME)
561
+ if not (digest and uri and runtime):
562
+ return None
563
+ return TranscriptBinding(
564
+ digest=digest,
565
+ uri=uri,
566
+ agent_runtime=runtime,
567
+ transcript_id="",
568
+ captured_at="",
569
+ )
570
+
571
+
572
+ def _from_note_payload(payload: dict[str, Any]) -> TranscriptBinding | None:
573
+ """Build a binding from a parsed JSON note dict.
574
+
575
+ Returns ``None`` when any of the three wire-format fields
576
+ (``digest``, ``uri``, ``agent_runtime``) is missing or non-string.
577
+ Unknown keys are ignored — forward-compatibility per RFC 0001
578
+ §Compatibility.Forward compatibility.
579
+ """
580
+ digest = payload.get("digest")
581
+ uri = payload.get("uri")
582
+ runtime = payload.get("agent_runtime")
583
+ if not isinstance(digest, str) or not isinstance(uri, str) or not isinstance(runtime, str):
584
+ return None
585
+ transcript_id = payload.get("transcript_id", "")
586
+ captured_at = payload.get("captured_at", "")
587
+ return TranscriptBinding(
588
+ digest=digest,
589
+ uri=uri,
590
+ agent_runtime=runtime,
591
+ transcript_id=transcript_id if isinstance(transcript_id, str) else "",
592
+ captured_at=captured_at if isinstance(captured_at, str) else "",
593
+ )
594
+
595
+
596
+ def read_note(commit_sha: str, *, repo: Path) -> TranscriptBinding | None:
597
+ """Read the binding's JSON note under ``refs/notes/transcripts``.
598
+
599
+ Subprocess: ``git -C <repo> notes --ref=transcripts show <sha>``.
600
+
601
+ Returns ``None`` when:
602
+
603
+ * The commit has no note (``git notes show`` exits non-zero with
604
+ "no note found for object" — we treat this as the absence
605
+ signal, not an error).
606
+ * The note exists but isn't valid JSON.
607
+ * The note is JSON but is missing one of the three wire-format
608
+ fields.
609
+
610
+ Raises :class:`GitInvocationError` on any other ``git`` failure
611
+ (e.g., the commit SHA itself doesn't exist — git emits a
612
+ different stderr in that case, which the caller may want to
613
+ distinguish from "note absent").
614
+ """
615
+ argv = [
616
+ "git",
617
+ "-C",
618
+ str(repo),
619
+ "notes",
620
+ f"--ref={NOTES_REF}",
621
+ "show",
622
+ commit_sha,
623
+ ]
624
+ cp = _run_git(argv)
625
+ if cp.returncode != 0:
626
+ # ``git notes show`` returns 1 with "error: no note found for
627
+ # object <sha>" when the object exists but has no note. Treat
628
+ # that as the absence signal; everything else bubbles.
629
+ stderr = cp.stderr.lower()
630
+ if "no note found" in stderr or "no note for object" in stderr:
631
+ return None
632
+ raise GitInvocationError(argv, returncode=cp.returncode, stdout=cp.stdout, stderr=cp.stderr)
633
+ body = cp.stdout.strip()
634
+ if not body:
635
+ return None
636
+ try:
637
+ payload = json.loads(body)
638
+ except json.JSONDecodeError:
639
+ return None
640
+ if not isinstance(payload, dict):
641
+ return None
642
+ return _from_note_payload(payload)
643
+
644
+
645
+ # ---------------------------------------------------------------------------
646
+ # Resolution — the public read-side entry point
647
+ # ---------------------------------------------------------------------------
648
+
649
+
650
+ def resolve_all_sources(
651
+ commit_sha: str,
652
+ *,
653
+ repo: Path | None = None,
654
+ ) -> BindingSources:
655
+ """Diagnostic resolver: return both surfaces without merging.
656
+
657
+ Never raises on trailer/note disagreement — that's what the
658
+ diagnostic flow exists to investigate. Returns a typed dict with
659
+ ``trailer`` and ``note`` keys, each either a populated
660
+ :class:`TranscriptBinding` or ``None``.
661
+
662
+ Underlying ``git`` failures (e.g., the commit SHA itself doesn't
663
+ exist) still raise :class:`GitInvocationError`.
664
+ """
665
+ repo_path = _resolve_repo(repo)
666
+ trailer = read_trailer(commit_sha, repo=repo_path)
667
+ note = read_note(commit_sha, repo=repo_path)
668
+ return {"trailer": trailer, "note": note}
669
+
670
+
671
+ def resolve_commit_to_transcript(
672
+ commit_sha: str,
673
+ *,
674
+ repo: Path | None = None,
675
+ all_sources: bool = False,
676
+ ) -> TranscriptBinding:
677
+ """Resolve a commit SHA to its bound transcript.
678
+
679
+ Implements RFC 0001 §Specification.Resolution precedence:
680
+
681
+ 1. Trailer first.
682
+ 2. Note fallback.
683
+ 3. Loud failure on disagreement.
684
+
685
+ Returns the merged :class:`TranscriptBinding`: trailer wins on
686
+ wire-format fields (``digest``, ``uri``, ``agent_runtime``);
687
+ note supplements with forensic fields (``transcript_id``,
688
+ ``captured_at``) when both surfaces are present.
689
+
690
+ Raises:
691
+
692
+ * :class:`BindingMismatchError` when both surfaces are present
693
+ and the digest disagrees.
694
+ * :class:`LookupError` when neither surface is present (CLI maps
695
+ to exit 2).
696
+
697
+ The ``all_sources`` parameter is preserved for API compatibility
698
+ with the plan signature; setting it ``True`` short-circuits to
699
+ :func:`resolve_all_sources` and raises ``TypeError`` (callers
700
+ wanting the dict shape should call :func:`resolve_all_sources`
701
+ directly so the return type is statically narrowable).
702
+ """
703
+ if all_sources:
704
+ raise TypeError(
705
+ "resolve_commit_to_transcript(all_sources=True) is not supported; "
706
+ "call resolve_all_sources() directly for the diagnostic dict shape"
707
+ )
708
+ repo_path = _resolve_repo(repo)
709
+ trailer = read_trailer(commit_sha, repo=repo_path)
710
+ note = read_note(commit_sha, repo=repo_path)
711
+
712
+ if trailer is None and note is None:
713
+ raise LookupError(f"no transcript binding for commit {commit_sha}")
714
+
715
+ if trailer is not None and note is not None:
716
+ if trailer.digest != note.digest:
717
+ raise BindingMismatchError(
718
+ (
719
+ f"trailer/note disagreement on commit {commit_sha}: "
720
+ f"trailer digest={trailer.digest!r}, note digest={note.digest!r}"
721
+ ),
722
+ trailer=trailer,
723
+ note=note,
724
+ )
725
+ # Both agree on wire-format. Take trailer's wire values (per
726
+ # RFC 0001 precedence) and supplement with note's forensic fields.
727
+ return TranscriptBinding(
728
+ digest=trailer.digest,
729
+ uri=trailer.uri,
730
+ agent_runtime=trailer.agent_runtime,
731
+ transcript_id=note.transcript_id,
732
+ captured_at=note.captured_at,
733
+ )
734
+
735
+ if trailer is not None:
736
+ return trailer
737
+ # ``note is not None`` here by exhaustion: lines above returned/raised on
738
+ # (both None) and (both non-None); the only remaining state is note-only.
739
+ assert note is not None # noqa: S101 type-narrow for the type checker
740
+ return note