claude-sql 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_sql/__init__.py +5 -0
- claude_sql/binding.py +740 -0
- claude_sql/blind_handover.py +155 -0
- claude_sql/checkpointer.py +202 -0
- claude_sql/cli.py +2344 -0
- claude_sql/cluster_worker.py +208 -0
- claude_sql/community_worker.py +306 -0
- claude_sql/config.py +380 -0
- claude_sql/embed_worker.py +482 -0
- claude_sql/freeze.py +189 -0
- claude_sql/friction_worker.py +561 -0
- claude_sql/install_source.py +77 -0
- claude_sql/judge_worker.py +459 -0
- claude_sql/judges.py +239 -0
- claude_sql/kappa_worker.py +257 -0
- claude_sql/llm_worker.py +1760 -0
- claude_sql/logging_setup.py +95 -0
- claude_sql/output.py +248 -0
- claude_sql/parquet_shards.py +172 -0
- claude_sql/retry_queue.py +180 -0
- claude_sql/review_sheet_render.py +167 -0
- claude_sql/review_sheet_worker.py +463 -0
- claude_sql/schemas.py +454 -0
- claude_sql/session_text.py +387 -0
- claude_sql/skills_catalog.py +354 -0
- claude_sql/sql_views.py +1751 -0
- claude_sql/terms_worker.py +145 -0
- claude_sql/ungrounded_worker.py +190 -0
- claude_sql-0.4.0.dist-info/METADATA +530 -0
- claude_sql-0.4.0.dist-info/RECORD +32 -0
- claude_sql-0.4.0.dist-info/WHEEL +4 -0
- claude_sql-0.4.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Markdown renderer for the PR review sheet.
|
|
2
|
+
|
|
3
|
+
Pure formatting — no schema validation, no Bedrock, no I/O. Takes the
|
|
4
|
+
structured dict produced by :func:`review_sheet_worker.generate_review_sheet`
|
|
5
|
+
and returns a Markdown string per strategy-memo §Coherent Actions #2.
|
|
6
|
+
|
|
7
|
+
The renderer is split into a separate module from the worker so the CLI
|
|
8
|
+
can decide whether to emit JSON (off-TTY default) or Markdown (TTY
|
|
9
|
+
default) without dragging Bedrock imports through the JSON path.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _short_sha(commit_sha: str, *, length: int = 12) -> str:
|
|
18
|
+
"""Trim a commit SHA to the conventional review-sheet length."""
|
|
19
|
+
return commit_sha[:length] if len(commit_sha) > length else commit_sha
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _short_digest(digest: str, *, length: int = 16) -> str:
|
|
23
|
+
"""Trim ``sha256:<hex>`` for header display.
|
|
24
|
+
|
|
25
|
+
Preserves the ``sha256:`` prefix and the leading hex chars so the
|
|
26
|
+
user can still spot-check the digest against the binding output;
|
|
27
|
+
the rest is noise in a header line.
|
|
28
|
+
"""
|
|
29
|
+
if ":" not in digest:
|
|
30
|
+
return digest[:length]
|
|
31
|
+
prefix, rest = digest.split(":", 1)
|
|
32
|
+
return f"{prefix}:{rest[:length]}"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _format_corrections(corrections: list[dict[str, Any]]) -> list[str]:
|
|
36
|
+
"""Render the corrections list. ``_None._`` placeholder if empty."""
|
|
37
|
+
if not corrections:
|
|
38
|
+
return ["_None._"]
|
|
39
|
+
out: list[str] = []
|
|
40
|
+
for entry in corrections:
|
|
41
|
+
if not isinstance(entry, dict):
|
|
42
|
+
continue
|
|
43
|
+
what = entry.get("what_agent_did", "").strip()
|
|
44
|
+
correction = entry.get("correction", "").strip()
|
|
45
|
+
if what and correction:
|
|
46
|
+
out.append(f"- *{what}* → {correction}")
|
|
47
|
+
elif what:
|
|
48
|
+
out.append(f"- *{what}*")
|
|
49
|
+
elif correction:
|
|
50
|
+
out.append(f"- {correction}")
|
|
51
|
+
return out or ["_None._"]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _format_inline_code_list(values: list[str]) -> str:
|
|
55
|
+
"""Render a list of tool names as backtick-wrapped inline code."""
|
|
56
|
+
if not values:
|
|
57
|
+
return "_None._"
|
|
58
|
+
return ", ".join(f"`{v}`" for v in values if v)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _format_refusal_lines(values: list[str]) -> list[str]:
|
|
62
|
+
"""Render ``tools_refused`` as one bullet per entry, "_None._" if empty."""
|
|
63
|
+
if not values:
|
|
64
|
+
return ["_None._"]
|
|
65
|
+
return [f"- `{entry}`" for entry in values if entry]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def render_markdown(sheet: dict[str, Any], metadata: dict[str, Any]) -> str:
|
|
69
|
+
"""Render a PR review sheet dict into the canonical Markdown shape.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
sheet
|
|
74
|
+
The structured review-sheet dict — schema-shaped per
|
|
75
|
+
:class:`claude_sql.schemas.PRReviewSheet`. Either the ``sheet``
|
|
76
|
+
sub-key from :func:`generate_review_sheet`'s success return or
|
|
77
|
+
an arbitrary dict in the same shape.
|
|
78
|
+
metadata
|
|
79
|
+
``{commit_sha, transcript_uri, transcript_digest, model_id,
|
|
80
|
+
captured_at}`` — populated by the worker on the success path.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
str
|
|
85
|
+
Markdown with a ``# PR Review Sheet — `<sha-short>`` header
|
|
86
|
+
followed by the five canonical sections. The trailing newline
|
|
87
|
+
keeps it pipe-friendly.
|
|
88
|
+
"""
|
|
89
|
+
commit_sha = str(metadata.get("commit_sha", ""))
|
|
90
|
+
transcript_uri = str(metadata.get("transcript_uri", ""))
|
|
91
|
+
transcript_digest = str(metadata.get("transcript_digest", ""))
|
|
92
|
+
runtime = str(metadata.get("model_id", ""))
|
|
93
|
+
captured_at = str(metadata.get("captured_at", ""))
|
|
94
|
+
|
|
95
|
+
human_intent = str(sheet.get("human_intent", "")).strip() or "_(missing)_"
|
|
96
|
+
exploration = sheet.get("agent_exploration") or []
|
|
97
|
+
corrections = sheet.get("corrections") or []
|
|
98
|
+
tools_used = sheet.get("tools_used") or []
|
|
99
|
+
tools_refused = sheet.get("tools_refused") or []
|
|
100
|
+
diff_rationale = str(sheet.get("diff_rationale", "")).strip() or "_(missing)_"
|
|
101
|
+
|
|
102
|
+
parts: list[str] = []
|
|
103
|
+
parts.append(f"# PR Review Sheet — `{_short_sha(commit_sha)}`")
|
|
104
|
+
parts.append("")
|
|
105
|
+
parts.append(
|
|
106
|
+
f"**Transcript:** `{transcript_uri}` (digest `{_short_digest(transcript_digest)}`)"
|
|
107
|
+
)
|
|
108
|
+
parts.append(f"**Agent runtime:** {runtime}")
|
|
109
|
+
parts.append(f"**Generated:** {captured_at}")
|
|
110
|
+
parts.append("")
|
|
111
|
+
parts.append("## What the human asked for")
|
|
112
|
+
parts.append(human_intent)
|
|
113
|
+
parts.append("")
|
|
114
|
+
parts.append("## What the agent explored")
|
|
115
|
+
if exploration:
|
|
116
|
+
parts.extend(f"- {str(bullet).strip()}" for bullet in exploration)
|
|
117
|
+
else:
|
|
118
|
+
parts.append("_None._")
|
|
119
|
+
parts.append("")
|
|
120
|
+
parts.append("## Corrections")
|
|
121
|
+
parts.extend(_format_corrections(list(corrections)))
|
|
122
|
+
parts.append("")
|
|
123
|
+
parts.append("## Tools used")
|
|
124
|
+
parts.append(_format_inline_code_list([str(v) for v in tools_used]))
|
|
125
|
+
parts.append("")
|
|
126
|
+
parts.append("## Tools refused")
|
|
127
|
+
parts.extend(_format_refusal_lines([str(v) for v in tools_refused]))
|
|
128
|
+
parts.append("")
|
|
129
|
+
parts.append("## Why this diff")
|
|
130
|
+
parts.append(diff_rationale)
|
|
131
|
+
parts.append("")
|
|
132
|
+
return "\n".join(parts)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def render_refusal_markdown(reason: str, metadata: dict[str, Any]) -> str:
|
|
136
|
+
"""Render the markdown footer for a refused review-sheet call.
|
|
137
|
+
|
|
138
|
+
Keeps the same header so downstream consumers can still file the
|
|
139
|
+
output by commit, then prints the canonical refusal note in place
|
|
140
|
+
of the five sections.
|
|
141
|
+
"""
|
|
142
|
+
commit_sha = str(metadata.get("commit_sha", ""))
|
|
143
|
+
transcript_uri = str(metadata.get("transcript_uri", ""))
|
|
144
|
+
transcript_digest = str(metadata.get("transcript_digest", ""))
|
|
145
|
+
runtime = str(metadata.get("model_id", ""))
|
|
146
|
+
captured_at = str(metadata.get("captured_at", ""))
|
|
147
|
+
|
|
148
|
+
parts: list[str] = []
|
|
149
|
+
parts.append(f"# PR Review Sheet — `{_short_sha(commit_sha)}`")
|
|
150
|
+
parts.append("")
|
|
151
|
+
parts.append(
|
|
152
|
+
f"**Transcript:** `{transcript_uri}` (digest `{_short_digest(transcript_digest)}`)"
|
|
153
|
+
)
|
|
154
|
+
parts.append(f"**Agent runtime:** {runtime}")
|
|
155
|
+
parts.append(f"**Generated:** {captured_at}")
|
|
156
|
+
parts.append("")
|
|
157
|
+
parts.append("_Review sheet refused; see metadata._")
|
|
158
|
+
parts.append("")
|
|
159
|
+
parts.append(f"> {reason.strip()}")
|
|
160
|
+
parts.append("")
|
|
161
|
+
return "\n".join(parts)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
__all__ = [
|
|
165
|
+
"render_markdown",
|
|
166
|
+
"render_refusal_markdown",
|
|
167
|
+
]
|
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
"""Single-shot PR review-sheet worker.
|
|
2
|
+
|
|
3
|
+
Compresses the bound transcript for one merged commit into a 1K-token PR
|
|
4
|
+
review sheet via Sonnet 4.6 with ``output_config.format`` structured
|
|
5
|
+
output. Implements strategy-memo §Coherent Actions #2.
|
|
6
|
+
|
|
7
|
+
Pipeline shape
|
|
8
|
+
--------------
|
|
9
|
+
1. Resolve ``commit_sha`` → ``transcript_uri`` via
|
|
10
|
+
:func:`claude_sql.binding.resolve_commit_to_transcript`. The override
|
|
11
|
+
parameter ``transcript_uri_override`` is the test / direct-invocation
|
|
12
|
+
bypass that loads a JSONL by path without touching git.
|
|
13
|
+
2. Compress the JSONL into a flat session text. We avoid the full DuckDB
|
|
14
|
+
``SessionTextCorpus.assemble`` round-trip on the override path so the
|
|
15
|
+
worker is invokable from tests without a populated database.
|
|
16
|
+
3. Build a system prompt with Anthropic XML tags
|
|
17
|
+
(``<instructions>``, ``<context>``, ``<examples><example>``,
|
|
18
|
+
``<anti_patterns>``) that frames the task as "compress the bound
|
|
19
|
+
transcript into a 1K-token PR review sheet."
|
|
20
|
+
4. Call ``llm_worker._invoke_classifier_sync`` with
|
|
21
|
+
:data:`PR_REVIEW_SHEET_SCHEMA`. Adaptive thinking is on by default;
|
|
22
|
+
``no_thinking=True`` disables it.
|
|
23
|
+
5. Return ``{"sheet": <PRReviewSheet dict>, "metadata": {...}}`` on
|
|
24
|
+
success, ``{"refused": True, "reason": ...}`` on
|
|
25
|
+
:class:`BedrockRefusalError`.
|
|
26
|
+
|
|
27
|
+
Out of scope: caching (single-shot, no parquet), batched processing,
|
|
28
|
+
DuckDB views. The worker is a thin wrapper around the structured-output
|
|
29
|
+
call so it can be re-used by future review-fleet code without a rewrite.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
import hashlib
|
|
35
|
+
import json
|
|
36
|
+
from datetime import UTC, datetime
|
|
37
|
+
from pathlib import Path
|
|
38
|
+
from typing import TYPE_CHECKING, Any
|
|
39
|
+
from urllib.parse import unquote, urlparse
|
|
40
|
+
|
|
41
|
+
from loguru import logger
|
|
42
|
+
|
|
43
|
+
from claude_sql.binding import resolve_commit_to_transcript
|
|
44
|
+
from claude_sql.llm_worker import (
|
|
45
|
+
BedrockRefusalError,
|
|
46
|
+
_build_bedrock_client,
|
|
47
|
+
_invoke_classifier_sync,
|
|
48
|
+
)
|
|
49
|
+
from claude_sql.schemas import PR_REVIEW_SHEET_SCHEMA
|
|
50
|
+
|
|
51
|
+
if TYPE_CHECKING:
|
|
52
|
+
import duckdb
|
|
53
|
+
|
|
54
|
+
from claude_sql.config import Settings
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
# System prompt — Anthropic XML canonical tags only.
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
REVIEW_SHEET_SYSTEM_PROMPT = """\
|
|
63
|
+
<instructions>
|
|
64
|
+
You compress one Claude Code coding session — bound to a merged commit —
|
|
65
|
+
into a structured PR review sheet. The user message contains the bound
|
|
66
|
+
JSONL transcript, already flattened to one event per line in chronological
|
|
67
|
+
order (user turns, assistant turns, tool calls, tool results).
|
|
68
|
+
|
|
69
|
+
Your job is to populate the schema with a faithful, factual summary in
|
|
70
|
+
under ~1K rendered tokens. Six fields, no surrounding prose, no markdown
|
|
71
|
+
fences.
|
|
72
|
+
</instructions>
|
|
73
|
+
|
|
74
|
+
<context>
|
|
75
|
+
How to read the transcript:
|
|
76
|
+
|
|
77
|
+
- Opening user-role messages state the human's intent. Goal restatements
|
|
78
|
+
later in the session refine it.
|
|
79
|
+
- Tool calls (``[tool_use:<name>...]``) are the strongest evidence of
|
|
80
|
+
what the agent actually explored — read past chitchat to the actions.
|
|
81
|
+
- Tool results (``[tool_result ...]``) confirm what landed; pay attention
|
|
82
|
+
to errors, refusals, and "blocked" markers — those become
|
|
83
|
+
``tools_refused`` entries.
|
|
84
|
+
- Closing exchanges state whether the goal was met; the diff_rationale
|
|
85
|
+
field captures the WHY of the merged change.
|
|
86
|
+
|
|
87
|
+
Field semantics:
|
|
88
|
+
|
|
89
|
+
- human_intent: 1-3 sentences in present tense, paraphrased from the
|
|
90
|
+
human's opening turn(s). Not a literal quote; not the agent's
|
|
91
|
+
restatement.
|
|
92
|
+
- agent_exploration: 3-8 short bullets, each a noun phrase or short
|
|
93
|
+
clause. Drawn from tool calls, search queries, and file reads.
|
|
94
|
+
Examples: "Read src/auth/middleware.py", "Searched for token rotator
|
|
95
|
+
call sites", "Inspected failing test fixture".
|
|
96
|
+
- corrections: human-redirected agent actions only. Up to 5 entries,
|
|
97
|
+
empty list if none. Skip surface-level acknowledgements ("ok", "thanks").
|
|
98
|
+
Each entry pairs what the agent did with the human's correction.
|
|
99
|
+
- tools_used: deduplicated tool names (Read, Edit, Write, Bash, Grep,
|
|
100
|
+
Glob, etc.). Order by first use.
|
|
101
|
+
- tools_refused: tool calls the agent declined or that hooks/permissions
|
|
102
|
+
blocked. Format: "ToolName: brief reason". Empty list when nothing
|
|
103
|
+
was blocked.
|
|
104
|
+
- diff_rationale: 2-4 sentences naming the files / modules touched and
|
|
105
|
+
the user-facing change. The WHY, not a line-by-line summary.
|
|
106
|
+
</context>
|
|
107
|
+
|
|
108
|
+
<examples>
|
|
109
|
+
<example>
|
|
110
|
+
<input>A 30-minute session: user asks "fix the off-by-one in the
|
|
111
|
+
pagination cursor". Agent reads src/api/pagination.py, runs the failing
|
|
112
|
+
test, edits the cursor math, re-runs tests (green), commits.</input>
|
|
113
|
+
<output>human_intent="Fix the off-by-one bug in the pagination cursor.";
|
|
114
|
+
agent_exploration=["Read src/api/pagination.py", "Ran the failing
|
|
115
|
+
pagination test", "Inspected cursor advance math"]; corrections=[];
|
|
116
|
+
tools_used=["Read", "Bash", "Edit"]; tools_refused=[];
|
|
117
|
+
diff_rationale="Adjusted the cursor advance math in
|
|
118
|
+
src/api/pagination.py from len(rows) to len(rows)-1 so the next page
|
|
119
|
+
starts on the correct row. Test pagination_test.py::test_cursor_edge
|
|
120
|
+
covers the regression."</output>
|
|
121
|
+
</example>
|
|
122
|
+
<example>
|
|
123
|
+
<input>User asks for a refactor; agent starts rewriting from scratch;
|
|
124
|
+
user says "no, keep the existing module and only update the rotator
|
|
125
|
+
call". Agent pivots, edits the rotator, ships.</input>
|
|
126
|
+
<output>human_intent="Refactor the auth middleware to use the new
|
|
127
|
+
token rotator without rewriting the existing module.";
|
|
128
|
+
agent_exploration=["Read src/auth/middleware.py", "Surveyed token
|
|
129
|
+
rotator call sites", "Reviewed the existing rotator interface"];
|
|
130
|
+
corrections=[{what_agent_did="Started rewriting the auth middleware
|
|
131
|
+
from scratch.", correction="Asked to keep the existing middleware and
|
|
132
|
+
only update the rotator call."}]; tools_used=["Read", "Grep", "Edit"];
|
|
133
|
+
tools_refused=[]; diff_rationale="Updated the rotator invocation in
|
|
134
|
+
src/auth/middleware.py to call rotate_v2() instead of rotate_v1(),
|
|
135
|
+
preserving the surrounding control flow per the user's redirection."
|
|
136
|
+
</output>
|
|
137
|
+
</example>
|
|
138
|
+
</examples>
|
|
139
|
+
|
|
140
|
+
<anti_patterns>
|
|
141
|
+
- Do not invent files or tool calls that aren't in the transcript. The
|
|
142
|
+
review sheet is forensic; fabrication corrupts the audit trail.
|
|
143
|
+
- Do not restate the agent's narration as exploration. "I will now
|
|
144
|
+
read the file" is narration; the matching ``[tool_use:Read ...]``
|
|
145
|
+
block is the exploration evidence.
|
|
146
|
+
- Do not pad ``corrections`` with surface acknowledgements ("ok",
|
|
147
|
+
"thanks", "sounds good"). Empty list is correct when nothing
|
|
148
|
+
substantive was redirected.
|
|
149
|
+
- Do not echo the diff line-by-line in ``diff_rationale``. Reviewers
|
|
150
|
+
read the diff itself; the field is the WHY in 2-4 sentences.
|
|
151
|
+
- Do not include subagent or sidecar transcripts unless they appear
|
|
152
|
+
in the bound JSONL — the session text we hand you is the ground
|
|
153
|
+
truth.
|
|
154
|
+
- ``tools_refused`` is for declined / blocked calls only. A
|
|
155
|
+
successfully-executed Bash command does not belong here.
|
|
156
|
+
</anti_patterns>
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# ---------------------------------------------------------------------------
|
|
161
|
+
# Transcript loading
|
|
162
|
+
# ---------------------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
_MAX_LINE_PREVIEW: int = 800
|
|
166
|
+
"""Per-event preview cap when flattening a JSONL into review-sheet text."""
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _resolve_transcript_path(uri: str) -> Path:
|
|
170
|
+
"""Translate a ``file://`` URI back into an absolute :class:`Path`.
|
|
171
|
+
|
|
172
|
+
The binding module emits ``Path.resolve().as_uri()``, which produces
|
|
173
|
+
a percent-encoded ``file://`` URI. Round-trip through ``urlparse`` so
|
|
174
|
+
spaces and other escaped characters survive.
|
|
175
|
+
|
|
176
|
+
Raises
|
|
177
|
+
------
|
|
178
|
+
ValueError
|
|
179
|
+
If the URI scheme isn't ``file``. The review-sheet worker has
|
|
180
|
+
no S3 / git-notes loader yet — those entry points are reserved
|
|
181
|
+
for future emitters per RFC 0001.
|
|
182
|
+
"""
|
|
183
|
+
parsed = urlparse(uri)
|
|
184
|
+
if parsed.scheme != "file":
|
|
185
|
+
raise ValueError(
|
|
186
|
+
f"transcript URI scheme {parsed.scheme!r} not supported by review-sheet worker; "
|
|
187
|
+
"expected file://"
|
|
188
|
+
)
|
|
189
|
+
return Path(unquote(parsed.path))
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _flatten_jsonl_to_text(jsonl_path: Path, *, total_max_chars: int = 32_000) -> str:
|
|
193
|
+
"""Compress a Claude Code JSONL into one event-per-line review text.
|
|
194
|
+
|
|
195
|
+
Mirrors the shape of :meth:`SessionTextCorpus.assemble` but works
|
|
196
|
+
directly off a single JSONL — no DuckDB connection required. This is
|
|
197
|
+
what makes the worker testable in isolation: hand it a tmp_path
|
|
198
|
+
JSONL and a ``file://`` URI override and it produces the same
|
|
199
|
+
flattened format that the prompt expects.
|
|
200
|
+
|
|
201
|
+
The output cap matches ``Settings.session_text_total_max_chars``
|
|
202
|
+
defaults so a long session still fits inside the Sonnet 4.6 context
|
|
203
|
+
after the system prompt and schema overhead.
|
|
204
|
+
"""
|
|
205
|
+
lines: list[str] = []
|
|
206
|
+
running = 0
|
|
207
|
+
with jsonl_path.open(encoding="utf-8") as fh:
|
|
208
|
+
for raw in fh:
|
|
209
|
+
stripped = raw.strip()
|
|
210
|
+
if not stripped:
|
|
211
|
+
continue
|
|
212
|
+
try:
|
|
213
|
+
record = json.loads(stripped)
|
|
214
|
+
except json.JSONDecodeError:
|
|
215
|
+
# Skip malformed lines — same forgiving posture as
|
|
216
|
+
# ``read_json(..., ignore_errors=true)`` upstream.
|
|
217
|
+
continue
|
|
218
|
+
line = _render_event_line(record)
|
|
219
|
+
if not line:
|
|
220
|
+
continue
|
|
221
|
+
if running + len(line) + 1 > total_max_chars:
|
|
222
|
+
lines.append(
|
|
223
|
+
f"…(transcript truncated at {total_max_chars} chars; "
|
|
224
|
+
f"{len(lines)} events rendered)"
|
|
225
|
+
)
|
|
226
|
+
break
|
|
227
|
+
lines.append(line)
|
|
228
|
+
running += len(line) + 1
|
|
229
|
+
return "\n".join(lines)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _render_event_line(record: dict[str, Any]) -> str:
|
|
233
|
+
"""Render one Claude Code JSONL record as a single review-text line.
|
|
234
|
+
|
|
235
|
+
Falls back to ``""`` (skipped) for record types we don't surface in
|
|
236
|
+
the review sheet — snapshots, permission updates, and other
|
|
237
|
+
bookkeeping events. The renderer is deliberately conservative:
|
|
238
|
+
review-sheet quality depends on the prompt seeing user/assistant
|
|
239
|
+
turns plus tool calls, not internal CLI events.
|
|
240
|
+
"""
|
|
241
|
+
rec_type = record.get("type")
|
|
242
|
+
ts = record.get("timestamp") or record.get("ts") or ""
|
|
243
|
+
if rec_type in ("user", "assistant"):
|
|
244
|
+
return _render_message(record, ts)
|
|
245
|
+
return ""
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _render_message(record: dict[str, Any], ts: str) -> str:
|
|
249
|
+
"""Format a user/assistant message record into the review-text shape."""
|
|
250
|
+
message = record.get("message")
|
|
251
|
+
if not isinstance(message, dict):
|
|
252
|
+
return ""
|
|
253
|
+
role = message.get("role") or record.get("type") or "?"
|
|
254
|
+
content = message.get("content")
|
|
255
|
+
if isinstance(content, str):
|
|
256
|
+
body = content[:_MAX_LINE_PREVIEW]
|
|
257
|
+
return f"[{role} {ts}] {body}"
|
|
258
|
+
if not isinstance(content, list):
|
|
259
|
+
return ""
|
|
260
|
+
rendered: list[str] = []
|
|
261
|
+
for block in content:
|
|
262
|
+
if not isinstance(block, dict):
|
|
263
|
+
continue
|
|
264
|
+
block_line = _render_content_block(block, ts)
|
|
265
|
+
if block_line:
|
|
266
|
+
rendered.append(block_line)
|
|
267
|
+
if not rendered:
|
|
268
|
+
return ""
|
|
269
|
+
if len(rendered) == 1 and rendered[0].startswith("["):
|
|
270
|
+
# Tool-only block — return as its own line so the prompt sees
|
|
271
|
+
# the tool_use / tool_result framing the system prompt expects.
|
|
272
|
+
return rendered[0]
|
|
273
|
+
body = " ".join(rendered)[:_MAX_LINE_PREVIEW]
|
|
274
|
+
return f"[{role} {ts}] {body}"
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _render_content_block(block: dict[str, Any], ts: str) -> str:
|
|
278
|
+
"""Format one content block (text / tool_use / tool_result)."""
|
|
279
|
+
btype = block.get("type")
|
|
280
|
+
if btype == "text":
|
|
281
|
+
text = block.get("text", "")
|
|
282
|
+
if not isinstance(text, str):
|
|
283
|
+
return ""
|
|
284
|
+
return text
|
|
285
|
+
if btype == "tool_use":
|
|
286
|
+
name = block.get("name") or "tool"
|
|
287
|
+
tool_input = block.get("input")
|
|
288
|
+
return f"[tool_use:{name} {ts}] {_safe_preview(tool_input)}"
|
|
289
|
+
if btype == "tool_result":
|
|
290
|
+
tu_id = block.get("tool_use_id") or "?"
|
|
291
|
+
return f"[tool_result {tu_id} {ts}] {_safe_preview(block.get('content'))}"
|
|
292
|
+
if btype == "thinking":
|
|
293
|
+
# Skip thinking blocks — they're agent-internal, not review evidence.
|
|
294
|
+
return ""
|
|
295
|
+
return ""
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _safe_preview(value: Any) -> str:
|
|
299
|
+
"""Compact one-line preview of arbitrary JSON content."""
|
|
300
|
+
if value is None:
|
|
301
|
+
return ""
|
|
302
|
+
if isinstance(value, str):
|
|
303
|
+
return value[:_MAX_LINE_PREVIEW]
|
|
304
|
+
try:
|
|
305
|
+
rendered = json.dumps(value, ensure_ascii=False, default=str)
|
|
306
|
+
except (TypeError, ValueError):
|
|
307
|
+
rendered = str(value)
|
|
308
|
+
return rendered[:_MAX_LINE_PREVIEW]
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
# ---------------------------------------------------------------------------
|
|
312
|
+
# Worker entry point
|
|
313
|
+
# ---------------------------------------------------------------------------
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def _digest_transcript_text(text: str) -> str:
|
|
317
|
+
"""Stable digest of the flattened transcript for plan/metadata output.
|
|
318
|
+
|
|
319
|
+
Uses a short SHA-256 prefix so dry-run plans are diff-friendly across
|
|
320
|
+
runs. Distinct from :func:`claude_sql.binding.compute_digest`, which
|
|
321
|
+
hashes the raw JSONL bytes — this digest covers what the LLM saw,
|
|
322
|
+
not what's on disk.
|
|
323
|
+
"""
|
|
324
|
+
h = hashlib.sha256(text.encode("utf-8")).hexdigest()
|
|
325
|
+
return f"sha256:{h[:16]}"
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _resolve_uri_for_commit(
|
|
329
|
+
commit_sha: str,
|
|
330
|
+
*,
|
|
331
|
+
transcript_uri_override: str | None,
|
|
332
|
+
) -> str:
|
|
333
|
+
"""Pick the transcript URI: override if set, else the bound trailer."""
|
|
334
|
+
if transcript_uri_override is not None:
|
|
335
|
+
return transcript_uri_override
|
|
336
|
+
binding = resolve_commit_to_transcript(commit_sha)
|
|
337
|
+
return binding.uri
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def generate_review_sheet(
|
|
341
|
+
con: duckdb.DuckDBPyConnection | None,
|
|
342
|
+
settings: Settings,
|
|
343
|
+
*,
|
|
344
|
+
commit_sha: str,
|
|
345
|
+
transcript_uri_override: str | None = None,
|
|
346
|
+
dry_run: bool = True,
|
|
347
|
+
no_thinking: bool = False,
|
|
348
|
+
) -> dict[str, Any]:
|
|
349
|
+
"""Produce a PR review sheet for ``commit_sha``.
|
|
350
|
+
|
|
351
|
+
Parameters
|
|
352
|
+
----------
|
|
353
|
+
con
|
|
354
|
+
DuckDB connection (unused on the override / file:// path; kept
|
|
355
|
+
in the signature so future S3 / git-notes resolution can scan
|
|
356
|
+
the views without changing the public API).
|
|
357
|
+
settings
|
|
358
|
+
:class:`Settings` driving region, model id, and concurrency.
|
|
359
|
+
commit_sha
|
|
360
|
+
Merged commit whose transcript should be summarized.
|
|
361
|
+
transcript_uri_override
|
|
362
|
+
Skip the binding lookup and load this URI directly. Reserved for
|
|
363
|
+
tests and direct invocation; production callers pass ``None``.
|
|
364
|
+
dry_run
|
|
365
|
+
When ``True`` returns a plan dict without invoking Bedrock.
|
|
366
|
+
no_thinking
|
|
367
|
+
Force ``thinking_mode='disabled'``. Defaults to adaptive
|
|
368
|
+
thinking, which adds ~10-30% latency but improves field
|
|
369
|
+
synthesis quality on edge cases.
|
|
370
|
+
|
|
371
|
+
Returns
|
|
372
|
+
-------
|
|
373
|
+
dict
|
|
374
|
+
Under ``dry_run=True``: ``{"plan": {commit_sha, transcript_uri,
|
|
375
|
+
transcript_digest, model_id, prompt_chars_estimate, dry_run}}``.
|
|
376
|
+
|
|
377
|
+
Under successful Bedrock call: ``{"sheet": <PRReviewSheet dict>,
|
|
378
|
+
"metadata": {commit_sha, transcript_uri, transcript_digest,
|
|
379
|
+
model_id, captured_at}}``.
|
|
380
|
+
|
|
381
|
+
Under :class:`BedrockRefusalError`: ``{"refused": True,
|
|
382
|
+
"reason": <str>}``.
|
|
383
|
+
"""
|
|
384
|
+
del con # connection is not consumed on the file:// path; reserved for future loaders.
|
|
385
|
+
transcript_uri = _resolve_uri_for_commit(
|
|
386
|
+
commit_sha, transcript_uri_override=transcript_uri_override
|
|
387
|
+
)
|
|
388
|
+
transcript_path = _resolve_transcript_path(transcript_uri)
|
|
389
|
+
if not transcript_path.is_file():
|
|
390
|
+
raise FileNotFoundError(
|
|
391
|
+
f"transcript JSONL not found at resolved path {transcript_path!s} "
|
|
392
|
+
f"(uri={transcript_uri})"
|
|
393
|
+
)
|
|
394
|
+
transcript_text = _flatten_jsonl_to_text(transcript_path)
|
|
395
|
+
transcript_digest = _digest_transcript_text(transcript_text)
|
|
396
|
+
|
|
397
|
+
if dry_run:
|
|
398
|
+
return {
|
|
399
|
+
"plan": {
|
|
400
|
+
"commit_sha": commit_sha,
|
|
401
|
+
"transcript_uri": transcript_uri,
|
|
402
|
+
"transcript_digest": transcript_digest,
|
|
403
|
+
"model_id": settings.sonnet_model_id,
|
|
404
|
+
"prompt_chars_estimate": len(transcript_text),
|
|
405
|
+
"dry_run": True,
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
thinking_mode = "disabled" if no_thinking else settings.classify_thinking
|
|
410
|
+
user_text = (
|
|
411
|
+
"Compress the following bound transcript into a PR review sheet "
|
|
412
|
+
"matching the schema. Be faithful to the events; do not invent "
|
|
413
|
+
"files, tools, or corrections.\n\n"
|
|
414
|
+
f"COMMIT: {commit_sha}\n"
|
|
415
|
+
f"TRANSCRIPT URI: {transcript_uri}\n"
|
|
416
|
+
f"TRANSCRIPT DIGEST: {transcript_digest}\n\n"
|
|
417
|
+
"TRANSCRIPT (chronological events):\n"
|
|
418
|
+
"```\n"
|
|
419
|
+
f"{transcript_text}\n"
|
|
420
|
+
"```\n"
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
client = _build_bedrock_client(settings)
|
|
424
|
+
try:
|
|
425
|
+
sheet = _invoke_classifier_sync(
|
|
426
|
+
client,
|
|
427
|
+
settings.sonnet_model_id,
|
|
428
|
+
PR_REVIEW_SHEET_SCHEMA,
|
|
429
|
+
user_text,
|
|
430
|
+
max_tokens=settings.classify_max_tokens,
|
|
431
|
+
thinking_mode=thinking_mode,
|
|
432
|
+
system=REVIEW_SHEET_SYSTEM_PROMPT,
|
|
433
|
+
)
|
|
434
|
+
except BedrockRefusalError as exc:
|
|
435
|
+
logger.info("review-sheet: refused by Bedrock — {}", exc)
|
|
436
|
+
return {
|
|
437
|
+
"refused": True,
|
|
438
|
+
"reason": str(exc),
|
|
439
|
+
"metadata": {
|
|
440
|
+
"commit_sha": commit_sha,
|
|
441
|
+
"transcript_uri": transcript_uri,
|
|
442
|
+
"transcript_digest": transcript_digest,
|
|
443
|
+
"model_id": settings.sonnet_model_id,
|
|
444
|
+
"captured_at": datetime.now(UTC).isoformat(),
|
|
445
|
+
},
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
return {
|
|
449
|
+
"sheet": sheet,
|
|
450
|
+
"metadata": {
|
|
451
|
+
"commit_sha": commit_sha,
|
|
452
|
+
"transcript_uri": transcript_uri,
|
|
453
|
+
"transcript_digest": transcript_digest,
|
|
454
|
+
"model_id": settings.sonnet_model_id,
|
|
455
|
+
"captured_at": datetime.now(UTC).isoformat(),
|
|
456
|
+
},
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
__all__ = [
|
|
461
|
+
"REVIEW_SHEET_SYSTEM_PROMPT",
|
|
462
|
+
"generate_review_sheet",
|
|
463
|
+
]
|