claude-sql 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_sql/__init__.py +5 -0
- claude_sql/binding.py +740 -0
- claude_sql/blind_handover.py +155 -0
- claude_sql/checkpointer.py +202 -0
- claude_sql/cli.py +2344 -0
- claude_sql/cluster_worker.py +208 -0
- claude_sql/community_worker.py +306 -0
- claude_sql/config.py +380 -0
- claude_sql/embed_worker.py +482 -0
- claude_sql/freeze.py +189 -0
- claude_sql/friction_worker.py +561 -0
- claude_sql/install_source.py +77 -0
- claude_sql/judge_worker.py +459 -0
- claude_sql/judges.py +239 -0
- claude_sql/kappa_worker.py +257 -0
- claude_sql/llm_worker.py +1760 -0
- claude_sql/logging_setup.py +95 -0
- claude_sql/output.py +248 -0
- claude_sql/parquet_shards.py +172 -0
- claude_sql/retry_queue.py +180 -0
- claude_sql/review_sheet_render.py +167 -0
- claude_sql/review_sheet_worker.py +463 -0
- claude_sql/schemas.py +454 -0
- claude_sql/session_text.py +387 -0
- claude_sql/skills_catalog.py +354 -0
- claude_sql/sql_views.py +1751 -0
- claude_sql/terms_worker.py +145 -0
- claude_sql/ungrounded_worker.py +190 -0
- claude_sql-0.4.0.dist-info/METADATA +530 -0
- claude_sql-0.4.0.dist-info/RECORD +32 -0
- claude_sql-0.4.0.dist-info/WHEEL +4 -0
- claude_sql-0.4.0.dist-info/entry_points.txt +3 -0
claude_sql/binding.py
ADDED
|
@@ -0,0 +1,740 @@
|
|
|
1
|
+
"""Transcript-to-PR binding via commit trailers + git notes.
|
|
2
|
+
|
|
3
|
+
Implements RFC 0001 (`docs/rfc/0001-transcript-pr-binding.md`). Pure-stdlib
|
|
4
|
+
helpers for writing and reading the three-trailer + JSON-note convention
|
|
5
|
+
that points a merged commit at the AI-agent transcript that produced it.
|
|
6
|
+
|
|
7
|
+
Design boundaries:
|
|
8
|
+
|
|
9
|
+
* No new dependencies. ``hashlib``, ``subprocess``, ``pathlib``,
|
|
10
|
+
``dataclasses``, ``json``, ``os``, ``re`` — all stdlib.
|
|
11
|
+
* Subprocess to ``git`` only via ``subprocess.run([...], check=False,
|
|
12
|
+
capture_output=True, text=True)``; we inspect ``returncode`` and
|
|
13
|
+
``stderr`` explicitly. No ``shell=True``; no ``check=True``. The
|
|
14
|
+
caller's branch is responsible for raising; this keeps the helpers
|
|
15
|
+
composable.
|
|
16
|
+
* All public functions carry full type hints and pass ``ty`` strict
|
|
17
|
+
mode. The dataclass is ``frozen=True`` so a ``TranscriptBinding`` is
|
|
18
|
+
hashable and safe to share across threads.
|
|
19
|
+
* Every function is independently unit-testable against a real
|
|
20
|
+
``git init`` repository under ``tmp_path``. No Bedrock, no live
|
|
21
|
+
filesystem outside the JSONL discovery helpers.
|
|
22
|
+
|
|
23
|
+
Implements strategy-memo §Coherent Actions #1.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import hashlib
|
|
29
|
+
import json
|
|
30
|
+
import os
|
|
31
|
+
import re
|
|
32
|
+
import subprocess
|
|
33
|
+
from dataclasses import asdict, dataclass
|
|
34
|
+
from datetime import UTC, datetime
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
from typing import Any, ClassVar, TypedDict
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class BindingSources(TypedDict):
|
|
40
|
+
"""Diagnostic dict shape returned by :func:`resolve_all_sources`.
|
|
41
|
+
|
|
42
|
+
Defined as a ``TypedDict`` rather than a ``dataclass`` so the
|
|
43
|
+
return is ``json``-serializable as-is and so callers can
|
|
44
|
+
structurally narrow on ``trailer`` / ``note`` keys without
|
|
45
|
+
importing the type. The forward references resolve under
|
|
46
|
+
``from __future__ import annotations``; ``TranscriptBinding`` is
|
|
47
|
+
declared further down in this module.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
trailer: TranscriptBinding | None
|
|
51
|
+
note: TranscriptBinding | None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
# Public constants — wire format. RFC 0001 §Specification.
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
DIGEST_PREFIX: str = "sha256:"
|
|
59
|
+
"""Prefix on every ``Claude-Transcript-Digest:`` value. The rest is
|
|
60
|
+
a 64-character hex digest from ``hashlib.sha256``."""
|
|
61
|
+
|
|
62
|
+
NOTES_REF: str = "transcripts"
|
|
63
|
+
"""Short ref name passed to ``git notes --ref=...``. Resolves to
|
|
64
|
+
``refs/notes/transcripts`` per ``git-notes(1)``."""
|
|
65
|
+
|
|
66
|
+
TRAILER_DIGEST: str = "Claude-Transcript-Digest"
|
|
67
|
+
"""Trailer key carrying the SHA-256 digest of the JSONL transcript."""
|
|
68
|
+
|
|
69
|
+
TRAILER_URI: str = "Claude-Transcript-URI"
|
|
70
|
+
"""Trailer key carrying the URI where the transcript can be retrieved.
|
|
71
|
+
|
|
72
|
+
One of ``file://<path>``, ``s3://<bucket>/<key>``, or
|
|
73
|
+
``git-notes://<refname>``. The reference implementation in v0 only
|
|
74
|
+
emits ``file://``; the other two are spec-only entry points for future
|
|
75
|
+
emitters and readers per RFC 0001 §Specification.URI scheme.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
TRAILER_RUNTIME: str = "Claude-Agent-Runtime"
|
|
79
|
+
"""Trailer key carrying the agent runtime identifier (``vendor/version``)."""
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ---------------------------------------------------------------------------
|
|
83
|
+
# Errors
|
|
84
|
+
# ---------------------------------------------------------------------------
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class BindingMismatchError(RuntimeError):
|
|
88
|
+
"""Raised when trailer and note disagree about the bound transcript.
|
|
89
|
+
|
|
90
|
+
Carries both surfaces so callers can render a useful error message
|
|
91
|
+
(CLI surfaces this as exit 70 with a structured payload). The
|
|
92
|
+
``trailer`` and ``note`` attributes are the parsed bindings; either
|
|
93
|
+
may be ``None`` if only one side was present and the other was
|
|
94
|
+
malformed beyond rescue.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
def __init__(
|
|
98
|
+
self,
|
|
99
|
+
message: str,
|
|
100
|
+
*,
|
|
101
|
+
trailer: TranscriptBinding | None,
|
|
102
|
+
note: TranscriptBinding | None,
|
|
103
|
+
) -> None:
|
|
104
|
+
super().__init__(message)
|
|
105
|
+
self.trailer = trailer
|
|
106
|
+
self.note = note
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class GitInvocationError(RuntimeError):
|
|
110
|
+
"""Raised when a ``git`` subprocess returns a non-zero exit code.
|
|
111
|
+
|
|
112
|
+
Wraps ``returncode``, ``stdout``, and ``stderr`` so the caller can
|
|
113
|
+
classify the failure (e.g., commit not found vs. notes ref empty
|
|
114
|
+
vs. dirty working tree) without re-running ``git``.
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(
|
|
118
|
+
self,
|
|
119
|
+
argv: list[str],
|
|
120
|
+
*,
|
|
121
|
+
returncode: int,
|
|
122
|
+
stdout: str,
|
|
123
|
+
stderr: str,
|
|
124
|
+
) -> None:
|
|
125
|
+
super().__init__(
|
|
126
|
+
f"git command failed (exit {returncode}): {' '.join(argv)}\n{stderr.strip()}"
|
|
127
|
+
)
|
|
128
|
+
self.argv = argv
|
|
129
|
+
self.returncode = returncode
|
|
130
|
+
self.stdout = stdout
|
|
131
|
+
self.stderr = stderr
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# ---------------------------------------------------------------------------
|
|
135
|
+
# Core dataclass
|
|
136
|
+
# ---------------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@dataclass(frozen=True)
|
|
140
|
+
class TranscriptBinding:
|
|
141
|
+
"""One commit's pointer to the transcript that authored it.
|
|
142
|
+
|
|
143
|
+
All fields carry the wire-format strings from RFC 0001
|
|
144
|
+
§Specification — no parsing or validation beyond what the writer
|
|
145
|
+
already enforced. Read-side callers wanting structured access
|
|
146
|
+
(e.g., parsing the URI scheme) operate on the string fields
|
|
147
|
+
directly.
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
digest: str
|
|
151
|
+
"""``sha256:<64-hex-chars>`` — same value as the
|
|
152
|
+
``Claude-Transcript-Digest:`` trailer."""
|
|
153
|
+
|
|
154
|
+
uri: str
|
|
155
|
+
"""``file://...`` | ``s3://...`` | ``git-notes://...`` — same value
|
|
156
|
+
as the ``Claude-Transcript-URI:`` trailer."""
|
|
157
|
+
|
|
158
|
+
agent_runtime: str
|
|
159
|
+
"""``vendor/version`` — same value as the
|
|
160
|
+
``Claude-Agent-Runtime:`` trailer (e.g., ``claude-code/0.42.1``)."""
|
|
161
|
+
|
|
162
|
+
transcript_id: str
|
|
163
|
+
"""Opaque session identifier from inside the JSONL artifact (e.g.,
|
|
164
|
+
Claude Code's session-id field). Lives in the note only; trailers
|
|
165
|
+
don't carry it because it's forensic metadata, not human-readable
|
|
166
|
+
provenance."""
|
|
167
|
+
|
|
168
|
+
captured_at: str
|
|
169
|
+
"""ISO-8601 UTC timestamp for when the binding was written. Note-only."""
|
|
170
|
+
|
|
171
|
+
# Class-level reserved JSON keys — mirrored in ``to_note_payload``
|
|
172
|
+
# and ``_from_note_payload``. Anything unknown is ignored on read,
|
|
173
|
+
# forward-compatibility per RFC 0001 §Compatibility.
|
|
174
|
+
_NOTE_KEYS: ClassVar[tuple[str, ...]] = (
|
|
175
|
+
"digest",
|
|
176
|
+
"uri",
|
|
177
|
+
"agent_runtime",
|
|
178
|
+
"transcript_id",
|
|
179
|
+
"captured_at",
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
def to_note_payload(self) -> dict[str, str]:
|
|
183
|
+
"""Serialize to the JSON shape stored in ``refs/notes/transcripts``."""
|
|
184
|
+
return {
|
|
185
|
+
"uri": self.uri,
|
|
186
|
+
"digest": self.digest,
|
|
187
|
+
"agent_runtime": self.agent_runtime,
|
|
188
|
+
"transcript_id": self.transcript_id,
|
|
189
|
+
"captured_at": self.captured_at,
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
def to_dict(self) -> dict[str, str]:
|
|
193
|
+
"""Plain dict view for ``emit_json`` / structured CLI output."""
|
|
194
|
+
return asdict(self)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
# Pure helpers (no subprocess, no I/O beyond reading a path)
|
|
199
|
+
# ---------------------------------------------------------------------------
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def projectify(cwd: Path) -> str:
|
|
203
|
+
"""Mirror Claude Code's projectified-cwd convention.
|
|
204
|
+
|
|
205
|
+
Claude Code stores transcripts under
|
|
206
|
+
``~/.claude/projects/<projectified>/`` where ``<projectified>`` is
|
|
207
|
+
the absolute path of the working directory with leading ``/``
|
|
208
|
+
stripped, every remaining ``/`` replaced with ``-``, and a leading
|
|
209
|
+
``-`` re-prepended.
|
|
210
|
+
|
|
211
|
+
Examples:
|
|
212
|
+
``/foo/bar`` → ``-foo-bar``
|
|
213
|
+
``/efs/laith/workplace/claude-sql`` →
|
|
214
|
+
``-efs-laith-workplace-claude-sql``
|
|
215
|
+
``/`` → ``-``
|
|
216
|
+
|
|
217
|
+
The result is a relative path component, never absolute. Callers
|
|
218
|
+
join it with ``~/.claude/projects/`` to form the discovery root.
|
|
219
|
+
"""
|
|
220
|
+
text = str(cwd)
|
|
221
|
+
# ``Path("/").as_posix()`` returns "/"; treat root specially so we
|
|
222
|
+
# don't end up with the empty string after stripping.
|
|
223
|
+
if text == "/":
|
|
224
|
+
return "-"
|
|
225
|
+
stripped = text.lstrip("/")
|
|
226
|
+
return "-" + stripped.replace("/", "-")
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def compute_digest(jsonl_path: Path) -> str:
|
|
230
|
+
"""SHA-256 hex digest of the JSONL artifact's raw bytes.
|
|
231
|
+
|
|
232
|
+
Returns the digest with the ``sha256:`` algorithm prefix per RFC
|
|
233
|
+
0001. Raw-bytes hashing — *not* canonical-JSON-per-line —
|
|
234
|
+
deliberately so the digest is recomputable in one line of any
|
|
235
|
+
language and matches the artifact reviewers will inspect. See RFC
|
|
236
|
+
0001 §Security.Digest determinism for the trade-off.
|
|
237
|
+
|
|
238
|
+
Raises ``FileNotFoundError`` if ``jsonl_path`` doesn't exist; the
|
|
239
|
+
caller (typically ``write_trailer``-side code) handles this — there
|
|
240
|
+
is no recovery, the binding can't be written without a transcript.
|
|
241
|
+
"""
|
|
242
|
+
digest = hashlib.sha256(jsonl_path.read_bytes()).hexdigest()
|
|
243
|
+
return f"{DIGEST_PREFIX}{digest}"
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def detect_agent_runtime() -> str:
|
|
247
|
+
"""Identify the agent runtime that emitted the transcript.
|
|
248
|
+
|
|
249
|
+
Reads ``CLAUDE_AGENT_RUNTIME`` from the environment if set (the
|
|
250
|
+
first emitter convention is for the runtime itself to set this
|
|
251
|
+
before invoking the bind hook). Falls back to ``claude-code/unknown``
|
|
252
|
+
when unset; any future runtime gets a different default by setting
|
|
253
|
+
its own env var name (or, more usefully, by setting
|
|
254
|
+
``CLAUDE_AGENT_RUNTIME=cursor/0.42`` from inside its hook wiring).
|
|
255
|
+
"""
|
|
256
|
+
explicit = os.environ.get("CLAUDE_AGENT_RUNTIME")
|
|
257
|
+
if explicit:
|
|
258
|
+
return explicit.strip()
|
|
259
|
+
return "claude-code/unknown"
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def find_active_transcript(cwd: Path) -> Path | None:
|
|
263
|
+
"""Resolve the active transcript JSONL for ``cwd``, if any.
|
|
264
|
+
|
|
265
|
+
Lists ``~/.claude/projects/<projectified-cwd>/*.jsonl`` and returns
|
|
266
|
+
the most recently-modified file. Returns ``None`` when the
|
|
267
|
+
projects directory doesn't exist or contains no JSONL — for
|
|
268
|
+
example, the user is committing from a directory that hasn't been
|
|
269
|
+
used inside Claude Code yet, or has explicitly opted out of
|
|
270
|
+
transcript persistence. The caller (``claude-sql bind``) treats
|
|
271
|
+
``None`` as "no transcript to bind, exit cleanly with no trailer."
|
|
272
|
+
"""
|
|
273
|
+
home = Path(os.path.expanduser("~"))
|
|
274
|
+
projects_root = home / ".claude" / "projects" / projectify(cwd)
|
|
275
|
+
if not projects_root.is_dir():
|
|
276
|
+
return None
|
|
277
|
+
candidates = sorted(
|
|
278
|
+
projects_root.glob("*.jsonl"),
|
|
279
|
+
key=lambda p: p.stat().st_mtime,
|
|
280
|
+
reverse=True,
|
|
281
|
+
)
|
|
282
|
+
return candidates[0] if candidates else None
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _read_transcript_id(jsonl_path: Path) -> str:
|
|
286
|
+
"""Best-effort transcript-id extraction from the first JSON line.
|
|
287
|
+
|
|
288
|
+
Claude Code's JSONL emits a ``sessionId`` (or ``session_id``) field
|
|
289
|
+
on each line. We read just the first line — same convention as the
|
|
290
|
+
on-disk JSONL invariant — and parse it to find the id. Falls back
|
|
291
|
+
to the file's basename (stem) if no id is present, so the field
|
|
292
|
+
is always populated for the note.
|
|
293
|
+
|
|
294
|
+
Raises nothing: a malformed first line falls through to the
|
|
295
|
+
basename fallback. Forensic data; we don't fail the binding over
|
|
296
|
+
a parse error here.
|
|
297
|
+
"""
|
|
298
|
+
try:
|
|
299
|
+
with jsonl_path.open(encoding="utf-8") as fh:
|
|
300
|
+
first_line = fh.readline()
|
|
301
|
+
if not first_line.strip():
|
|
302
|
+
return jsonl_path.stem
|
|
303
|
+
record = json.loads(first_line)
|
|
304
|
+
if not isinstance(record, dict):
|
|
305
|
+
return jsonl_path.stem
|
|
306
|
+
for key in ("sessionId", "session_id", "uuid"):
|
|
307
|
+
value = record.get(key)
|
|
308
|
+
if isinstance(value, str) and value:
|
|
309
|
+
return value
|
|
310
|
+
except (OSError, json.JSONDecodeError):
|
|
311
|
+
# Fallthrough: forensic metadata, we'd rather have a stable
|
|
312
|
+
# filename-derived id than a ``KeyError`` on bind.
|
|
313
|
+
pass
|
|
314
|
+
return jsonl_path.stem
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def build_binding(
|
|
318
|
+
*,
|
|
319
|
+
transcript_path: Path,
|
|
320
|
+
runtime: str | None = None,
|
|
321
|
+
captured_at: str | None = None,
|
|
322
|
+
) -> TranscriptBinding:
|
|
323
|
+
"""Compose a fully-populated ``TranscriptBinding`` for ``transcript_path``.
|
|
324
|
+
|
|
325
|
+
Convenience constructor for callers that have a JSONL path and
|
|
326
|
+
want every field filled. ``runtime`` defaults to
|
|
327
|
+
:func:`detect_agent_runtime`; ``captured_at`` defaults to
|
|
328
|
+
``datetime.now(UTC).isoformat()``. Both are accepted as
|
|
329
|
+
overrides primarily for tests that need deterministic output.
|
|
330
|
+
"""
|
|
331
|
+
digest = compute_digest(transcript_path)
|
|
332
|
+
uri = transcript_path.resolve().as_uri()
|
|
333
|
+
transcript_id = _read_transcript_id(transcript_path)
|
|
334
|
+
if runtime is None:
|
|
335
|
+
runtime = detect_agent_runtime()
|
|
336
|
+
if captured_at is None:
|
|
337
|
+
captured_at = datetime.now(UTC).isoformat()
|
|
338
|
+
return TranscriptBinding(
|
|
339
|
+
digest=digest,
|
|
340
|
+
uri=uri,
|
|
341
|
+
agent_runtime=runtime,
|
|
342
|
+
transcript_id=transcript_id,
|
|
343
|
+
captured_at=captured_at,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
# ---------------------------------------------------------------------------
|
|
348
|
+
# Subprocess helpers
|
|
349
|
+
# ---------------------------------------------------------------------------
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _run_git(
|
|
353
|
+
argv: list[str],
|
|
354
|
+
*,
|
|
355
|
+
cwd: Path | None = None,
|
|
356
|
+
stdin: str | None = None,
|
|
357
|
+
) -> subprocess.CompletedProcess[str]:
|
|
358
|
+
"""Run a ``git`` subprocess with our standard knobs.
|
|
359
|
+
|
|
360
|
+
All git invocations in this module funnel through here so the
|
|
361
|
+
options are uniform: ``check=False`` (we inspect ``returncode``
|
|
362
|
+
explicitly), ``capture_output=True`` (we always want stdout +
|
|
363
|
+
stderr captured), ``text=True`` (we treat git output as UTF-8
|
|
364
|
+
text). Never ``shell=True``.
|
|
365
|
+
"""
|
|
366
|
+
return subprocess.run(
|
|
367
|
+
argv,
|
|
368
|
+
cwd=cwd,
|
|
369
|
+
capture_output=True,
|
|
370
|
+
text=True,
|
|
371
|
+
check=False,
|
|
372
|
+
input=stdin,
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def _resolve_repo(repo: Path | None) -> Path:
|
|
377
|
+
"""Resolve the repo root.
|
|
378
|
+
|
|
379
|
+
``None`` means ``git rev-parse --show-toplevel`` from the current
|
|
380
|
+
process cwd. An explicit ``Path`` is passed through verbatim. The
|
|
381
|
+
resolved path is always absolute.
|
|
382
|
+
"""
|
|
383
|
+
if repo is not None:
|
|
384
|
+
return repo.resolve()
|
|
385
|
+
cp = _run_git(["git", "rev-parse", "--show-toplevel"])
|
|
386
|
+
if cp.returncode != 0:
|
|
387
|
+
raise GitInvocationError(
|
|
388
|
+
["git", "rev-parse", "--show-toplevel"],
|
|
389
|
+
returncode=cp.returncode,
|
|
390
|
+
stdout=cp.stdout,
|
|
391
|
+
stderr=cp.stderr,
|
|
392
|
+
)
|
|
393
|
+
return Path(cp.stdout.strip()).resolve()
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
# ---------------------------------------------------------------------------
|
|
397
|
+
# Writers
|
|
398
|
+
# ---------------------------------------------------------------------------
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def write_trailer(commit_msg_path: Path, binding: TranscriptBinding) -> None:
|
|
402
|
+
"""Append the three trailers to ``commit_msg_path`` in place.
|
|
403
|
+
|
|
404
|
+
Three calls to ``git interpret-trailers --in-place``, each writing
|
|
405
|
+
one ``Key: Value`` pair. We pass ``--if-exists replace`` so the
|
|
406
|
+
operation is idempotent under ``git commit --amend`` and repeated
|
|
407
|
+
``prepare-commit-msg`` invocations — re-running ``bind`` on an
|
|
408
|
+
amended commit replaces the existing trailer rather than
|
|
409
|
+
appending a duplicate.
|
|
410
|
+
|
|
411
|
+
Order of invocation matches the order in RFC 0001's wire format:
|
|
412
|
+
digest, URI, runtime. ``git interpret-trailers`` ensures a single
|
|
413
|
+
blank line precedes the trailer block, even if the message had
|
|
414
|
+
none.
|
|
415
|
+
"""
|
|
416
|
+
pairs: tuple[tuple[str, str], ...] = (
|
|
417
|
+
(TRAILER_DIGEST, binding.digest),
|
|
418
|
+
(TRAILER_URI, binding.uri),
|
|
419
|
+
(TRAILER_RUNTIME, binding.agent_runtime),
|
|
420
|
+
)
|
|
421
|
+
for key, value in pairs:
|
|
422
|
+
argv = [
|
|
423
|
+
"git",
|
|
424
|
+
"interpret-trailers",
|
|
425
|
+
"--in-place",
|
|
426
|
+
"--if-exists",
|
|
427
|
+
"replace",
|
|
428
|
+
"--trailer",
|
|
429
|
+
f"{key}: {value}",
|
|
430
|
+
str(commit_msg_path),
|
|
431
|
+
]
|
|
432
|
+
cp = _run_git(argv)
|
|
433
|
+
if cp.returncode != 0:
|
|
434
|
+
raise GitInvocationError(
|
|
435
|
+
argv,
|
|
436
|
+
returncode=cp.returncode,
|
|
437
|
+
stdout=cp.stdout,
|
|
438
|
+
stderr=cp.stderr,
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def write_note(repo: Path, commit_sha: str, binding: TranscriptBinding) -> None:
|
|
443
|
+
"""Write ``binding`` as a JSON note under ``refs/notes/transcripts``.
|
|
444
|
+
|
|
445
|
+
Uses ``git notes --ref=transcripts add -f -m '<json>' <sha>``. The
|
|
446
|
+
``-f`` (force) flag overwrites an existing note for the same
|
|
447
|
+
commit, which is the right semantics for re-runs (e.g., amend);
|
|
448
|
+
re-running the bind step with a different transcript replaces
|
|
449
|
+
the entry instead of failing with "note already exists."
|
|
450
|
+
|
|
451
|
+
The JSON is single-line (no ``indent``) so the note body stays
|
|
452
|
+
one line — matches `git`'s preference for compact note formats.
|
|
453
|
+
"""
|
|
454
|
+
payload = json.dumps(binding.to_note_payload(), ensure_ascii=False, separators=(",", ":"))
|
|
455
|
+
argv = [
|
|
456
|
+
"git",
|
|
457
|
+
"-C",
|
|
458
|
+
str(repo),
|
|
459
|
+
"notes",
|
|
460
|
+
f"--ref={NOTES_REF}",
|
|
461
|
+
"add",
|
|
462
|
+
"-f",
|
|
463
|
+
"-m",
|
|
464
|
+
payload,
|
|
465
|
+
commit_sha,
|
|
466
|
+
]
|
|
467
|
+
cp = _run_git(argv)
|
|
468
|
+
if cp.returncode != 0:
|
|
469
|
+
raise GitInvocationError(
|
|
470
|
+
argv,
|
|
471
|
+
returncode=cp.returncode,
|
|
472
|
+
stdout=cp.stdout,
|
|
473
|
+
stderr=cp.stderr,
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
# ---------------------------------------------------------------------------
|
|
478
|
+
# Readers
|
|
479
|
+
# ---------------------------------------------------------------------------
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
_TRAILER_LINE_RE: re.Pattern[str] = re.compile(r"^([A-Za-z][A-Za-z0-9-]*):\s*(.*)$")
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def _parse_trailer_block(text: str) -> dict[str, str]:
|
|
486
|
+
"""Parse the ``key: value`` pairs that ``git interpret-trailers --parse`` emits.
|
|
487
|
+
|
|
488
|
+
The output format is one trailer per line, ``Key: Value``. We
|
|
489
|
+
case-fold keys for the lookup map but preserve the original
|
|
490
|
+
capitalization in the returned dict so callers see exactly what
|
|
491
|
+
the writer emitted. When a key appears multiple times, the *first*
|
|
492
|
+
occurrence wins — matches RFC 0001 §Compatibility.Trailer survival
|
|
493
|
+
(rebase / fixup squash duplication tolerance).
|
|
494
|
+
"""
|
|
495
|
+
parsed: dict[str, str] = {}
|
|
496
|
+
seen_lower: set[str] = set()
|
|
497
|
+
for raw_line in text.splitlines():
|
|
498
|
+
line = raw_line.strip()
|
|
499
|
+
if not line:
|
|
500
|
+
continue
|
|
501
|
+
match = _TRAILER_LINE_RE.match(line)
|
|
502
|
+
if match is None:
|
|
503
|
+
continue
|
|
504
|
+
key, value = match.group(1), match.group(2).strip()
|
|
505
|
+
if key.lower() in seen_lower:
|
|
506
|
+
continue
|
|
507
|
+
parsed[key] = value
|
|
508
|
+
seen_lower.add(key.lower())
|
|
509
|
+
return parsed
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def read_trailer(
|
|
513
|
+
commit_sha: str,
|
|
514
|
+
*,
|
|
515
|
+
repo: Path | None = None,
|
|
516
|
+
) -> TranscriptBinding | None:
|
|
517
|
+
"""Read the binding trailer set off a commit's message, if present.
|
|
518
|
+
|
|
519
|
+
Subprocess sequence:
|
|
520
|
+
|
|
521
|
+
1. ``git -C <repo> log --format=%B -1 <sha>`` — the commit message body.
|
|
522
|
+
2. ``git interpret-trailers --parse`` (with the message piped on stdin)
|
|
523
|
+
— emits one line per trailer.
|
|
524
|
+
|
|
525
|
+
Returns ``None`` when:
|
|
526
|
+
|
|
527
|
+
* The commit doesn't exist (``git log`` fails — surfaced as
|
|
528
|
+
``GitInvocationError`` and re-raised; callers map to exit 65).
|
|
529
|
+
* The message has no trailers at all.
|
|
530
|
+
* The trailer block is present but missing one or more of the
|
|
531
|
+
three required keys.
|
|
532
|
+
|
|
533
|
+
Returns the populated :class:`TranscriptBinding` when all three
|
|
534
|
+
trailers are present. ``transcript_id`` and ``captured_at`` are
|
|
535
|
+
note-only fields — they default to empty strings on the
|
|
536
|
+
trailer-only path, and resolution callers wanting those should
|
|
537
|
+
fall back to the note. This matches RFC 0001 §Specification.Resolution
|
|
538
|
+
precedence: trailer first, note fallback.
|
|
539
|
+
"""
|
|
540
|
+
repo_path = _resolve_repo(repo)
|
|
541
|
+
log_argv = ["git", "-C", str(repo_path), "log", "--format=%B", "-1", commit_sha]
|
|
542
|
+
cp = _run_git(log_argv)
|
|
543
|
+
if cp.returncode != 0:
|
|
544
|
+
raise GitInvocationError(
|
|
545
|
+
log_argv, returncode=cp.returncode, stdout=cp.stdout, stderr=cp.stderr
|
|
546
|
+
)
|
|
547
|
+
message = cp.stdout
|
|
548
|
+
parse_argv = ["git", "interpret-trailers", "--parse"]
|
|
549
|
+
parsed_cp = _run_git(parse_argv, stdin=message)
|
|
550
|
+
if parsed_cp.returncode != 0:
|
|
551
|
+
raise GitInvocationError(
|
|
552
|
+
parse_argv,
|
|
553
|
+
returncode=parsed_cp.returncode,
|
|
554
|
+
stdout=parsed_cp.stdout,
|
|
555
|
+
stderr=parsed_cp.stderr,
|
|
556
|
+
)
|
|
557
|
+
pairs = _parse_trailer_block(parsed_cp.stdout)
|
|
558
|
+
digest = pairs.get(TRAILER_DIGEST)
|
|
559
|
+
uri = pairs.get(TRAILER_URI)
|
|
560
|
+
runtime = pairs.get(TRAILER_RUNTIME)
|
|
561
|
+
if not (digest and uri and runtime):
|
|
562
|
+
return None
|
|
563
|
+
return TranscriptBinding(
|
|
564
|
+
digest=digest,
|
|
565
|
+
uri=uri,
|
|
566
|
+
agent_runtime=runtime,
|
|
567
|
+
transcript_id="",
|
|
568
|
+
captured_at="",
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def _from_note_payload(payload: dict[str, Any]) -> TranscriptBinding | None:
|
|
573
|
+
"""Build a binding from a parsed JSON note dict.
|
|
574
|
+
|
|
575
|
+
Returns ``None`` when any of the three wire-format fields
|
|
576
|
+
(``digest``, ``uri``, ``agent_runtime``) is missing or non-string.
|
|
577
|
+
Unknown keys are ignored — forward-compatibility per RFC 0001
|
|
578
|
+
§Compatibility.Forward compatibility.
|
|
579
|
+
"""
|
|
580
|
+
digest = payload.get("digest")
|
|
581
|
+
uri = payload.get("uri")
|
|
582
|
+
runtime = payload.get("agent_runtime")
|
|
583
|
+
if not isinstance(digest, str) or not isinstance(uri, str) or not isinstance(runtime, str):
|
|
584
|
+
return None
|
|
585
|
+
transcript_id = payload.get("transcript_id", "")
|
|
586
|
+
captured_at = payload.get("captured_at", "")
|
|
587
|
+
return TranscriptBinding(
|
|
588
|
+
digest=digest,
|
|
589
|
+
uri=uri,
|
|
590
|
+
agent_runtime=runtime,
|
|
591
|
+
transcript_id=transcript_id if isinstance(transcript_id, str) else "",
|
|
592
|
+
captured_at=captured_at if isinstance(captured_at, str) else "",
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def read_note(commit_sha: str, *, repo: Path) -> TranscriptBinding | None:
|
|
597
|
+
"""Read the binding's JSON note under ``refs/notes/transcripts``.
|
|
598
|
+
|
|
599
|
+
Subprocess: ``git -C <repo> notes --ref=transcripts show <sha>``.
|
|
600
|
+
|
|
601
|
+
Returns ``None`` when:
|
|
602
|
+
|
|
603
|
+
* The commit has no note (``git notes show`` exits non-zero with
|
|
604
|
+
"no note found for object" — we treat this as the absence
|
|
605
|
+
signal, not an error).
|
|
606
|
+
* The note exists but isn't valid JSON.
|
|
607
|
+
* The note is JSON but is missing one of the three wire-format
|
|
608
|
+
fields.
|
|
609
|
+
|
|
610
|
+
Raises :class:`GitInvocationError` on any other ``git`` failure
|
|
611
|
+
(e.g., the commit SHA itself doesn't exist — git emits a
|
|
612
|
+
different stderr in that case, which the caller may want to
|
|
613
|
+
distinguish from "note absent").
|
|
614
|
+
"""
|
|
615
|
+
argv = [
|
|
616
|
+
"git",
|
|
617
|
+
"-C",
|
|
618
|
+
str(repo),
|
|
619
|
+
"notes",
|
|
620
|
+
f"--ref={NOTES_REF}",
|
|
621
|
+
"show",
|
|
622
|
+
commit_sha,
|
|
623
|
+
]
|
|
624
|
+
cp = _run_git(argv)
|
|
625
|
+
if cp.returncode != 0:
|
|
626
|
+
# ``git notes show`` returns 1 with "error: no note found for
|
|
627
|
+
# object <sha>" when the object exists but has no note. Treat
|
|
628
|
+
# that as the absence signal; everything else bubbles.
|
|
629
|
+
stderr = cp.stderr.lower()
|
|
630
|
+
if "no note found" in stderr or "no note for object" in stderr:
|
|
631
|
+
return None
|
|
632
|
+
raise GitInvocationError(argv, returncode=cp.returncode, stdout=cp.stdout, stderr=cp.stderr)
|
|
633
|
+
body = cp.stdout.strip()
|
|
634
|
+
if not body:
|
|
635
|
+
return None
|
|
636
|
+
try:
|
|
637
|
+
payload = json.loads(body)
|
|
638
|
+
except json.JSONDecodeError:
|
|
639
|
+
return None
|
|
640
|
+
if not isinstance(payload, dict):
|
|
641
|
+
return None
|
|
642
|
+
return _from_note_payload(payload)
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
# ---------------------------------------------------------------------------
|
|
646
|
+
# Resolution — the public read-side entry point
|
|
647
|
+
# ---------------------------------------------------------------------------
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def resolve_all_sources(
|
|
651
|
+
commit_sha: str,
|
|
652
|
+
*,
|
|
653
|
+
repo: Path | None = None,
|
|
654
|
+
) -> BindingSources:
|
|
655
|
+
"""Diagnostic resolver: return both surfaces without merging.
|
|
656
|
+
|
|
657
|
+
Never raises on trailer/note disagreement — that's what the
|
|
658
|
+
diagnostic flow exists to investigate. Returns a typed dict with
|
|
659
|
+
``trailer`` and ``note`` keys, each either a populated
|
|
660
|
+
:class:`TranscriptBinding` or ``None``.
|
|
661
|
+
|
|
662
|
+
Underlying ``git`` failures (e.g., the commit SHA itself doesn't
|
|
663
|
+
exist) still raise :class:`GitInvocationError`.
|
|
664
|
+
"""
|
|
665
|
+
repo_path = _resolve_repo(repo)
|
|
666
|
+
trailer = read_trailer(commit_sha, repo=repo_path)
|
|
667
|
+
note = read_note(commit_sha, repo=repo_path)
|
|
668
|
+
return {"trailer": trailer, "note": note}
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def resolve_commit_to_transcript(
|
|
672
|
+
commit_sha: str,
|
|
673
|
+
*,
|
|
674
|
+
repo: Path | None = None,
|
|
675
|
+
all_sources: bool = False,
|
|
676
|
+
) -> TranscriptBinding:
|
|
677
|
+
"""Resolve a commit SHA to its bound transcript.
|
|
678
|
+
|
|
679
|
+
Implements RFC 0001 §Specification.Resolution precedence:
|
|
680
|
+
|
|
681
|
+
1. Trailer first.
|
|
682
|
+
2. Note fallback.
|
|
683
|
+
3. Loud failure on disagreement.
|
|
684
|
+
|
|
685
|
+
Returns the merged :class:`TranscriptBinding`: trailer wins on
|
|
686
|
+
wire-format fields (``digest``, ``uri``, ``agent_runtime``);
|
|
687
|
+
note supplements with forensic fields (``transcript_id``,
|
|
688
|
+
``captured_at``) when both surfaces are present.
|
|
689
|
+
|
|
690
|
+
Raises:
|
|
691
|
+
|
|
692
|
+
* :class:`BindingMismatchError` when both surfaces are present
|
|
693
|
+
and the digest disagrees.
|
|
694
|
+
* :class:`LookupError` when neither surface is present (CLI maps
|
|
695
|
+
to exit 2).
|
|
696
|
+
|
|
697
|
+
The ``all_sources`` parameter is preserved for API compatibility
|
|
698
|
+
with the plan signature; setting it ``True`` short-circuits to
|
|
699
|
+
:func:`resolve_all_sources` and raises ``TypeError`` (callers
|
|
700
|
+
wanting the dict shape should call :func:`resolve_all_sources`
|
|
701
|
+
directly so the return type is statically narrowable).
|
|
702
|
+
"""
|
|
703
|
+
if all_sources:
|
|
704
|
+
raise TypeError(
|
|
705
|
+
"resolve_commit_to_transcript(all_sources=True) is not supported; "
|
|
706
|
+
"call resolve_all_sources() directly for the diagnostic dict shape"
|
|
707
|
+
)
|
|
708
|
+
repo_path = _resolve_repo(repo)
|
|
709
|
+
trailer = read_trailer(commit_sha, repo=repo_path)
|
|
710
|
+
note = read_note(commit_sha, repo=repo_path)
|
|
711
|
+
|
|
712
|
+
if trailer is None and note is None:
|
|
713
|
+
raise LookupError(f"no transcript binding for commit {commit_sha}")
|
|
714
|
+
|
|
715
|
+
if trailer is not None and note is not None:
|
|
716
|
+
if trailer.digest != note.digest:
|
|
717
|
+
raise BindingMismatchError(
|
|
718
|
+
(
|
|
719
|
+
f"trailer/note disagreement on commit {commit_sha}: "
|
|
720
|
+
f"trailer digest={trailer.digest!r}, note digest={note.digest!r}"
|
|
721
|
+
),
|
|
722
|
+
trailer=trailer,
|
|
723
|
+
note=note,
|
|
724
|
+
)
|
|
725
|
+
# Both agree on wire-format. Take trailer's wire values (per
|
|
726
|
+
# RFC 0001 precedence) and supplement with note's forensic fields.
|
|
727
|
+
return TranscriptBinding(
|
|
728
|
+
digest=trailer.digest,
|
|
729
|
+
uri=trailer.uri,
|
|
730
|
+
agent_runtime=trailer.agent_runtime,
|
|
731
|
+
transcript_id=note.transcript_id,
|
|
732
|
+
captured_at=note.captured_at,
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
if trailer is not None:
|
|
736
|
+
return trailer
|
|
737
|
+
# ``note is not None`` here by exhaustion: lines above returned/raised on
|
|
738
|
+
# (both None) and (both non-None); the only remaining state is note-only.
|
|
739
|
+
assert note is not None # noqa: S101 type-narrow for the type checker
|
|
740
|
+
return note
|