typeclaw 0.36.5 → 0.36.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "typeclaw",
3
- "version": "0.36.5",
3
+ "version": "0.36.7",
4
4
  "homepage": "https://github.com/typeclaw/typeclaw#readme",
5
5
  "bugs": {
6
6
  "url": "https://github.com/typeclaw/typeclaw/issues"
@@ -48,7 +48,7 @@
48
48
  "@mariozechner/pi-tui": "^0.67.3",
49
49
  "@modelcontextprotocol/sdk": "^1.29.0",
50
50
  "@mozilla/readability": "^0.6.0",
51
- "agent-messenger": "2.19.5",
51
+ "agent-messenger": "2.20.1",
52
52
  "cheerio": "^1.2.0",
53
53
  "citty": "^0.2.2",
54
54
  "cron-parser": "^5.5.0",
@@ -33,6 +33,13 @@ export type ReviewVerdictGuard = {
33
33
  verdict: ReviewVerdict
34
34
  }) => Promise<ApproveBlock | null>
35
35
  release: (args: { callId: string; succeeded: boolean }) => Promise<void>
36
+ // Arms the read-after-write lag shield for a verdict that landed WITHOUT a prior
37
+ // guard() reservation. The pre-execution detector can miss a review-submission
38
+ // command shape, so the verdict is only recovered post-hoc from the REST result
39
+ // (review-recorder's backstop). Without this, `release()` has no reservation for
40
+ // that callId and never writes `recentLandedByPr`, leaving the next same-commit
41
+ // submission undeduped — the exact gap the backstop was meant to close.
42
+ noteLandedReview: (args: { workspace: string; prNumber: number; verdict: ReviewVerdict }) => Promise<void>
36
43
  }
37
44
 
38
45
  // Back-compat alias: the guard now covers REQUEST_CHANGES too, not just APPROVE.
@@ -73,10 +80,15 @@ const LEASE_TTL_MS = 5 * 60_000
73
80
  // read-after-write lag rather than a genuine absence. GitHub's `/pulls/<n>/reviews`
74
81
  // list lags a write by up to ~10s, so a second engagement turn firing in that
75
82
  // window reads NONE and would land a duplicate. Observed duplicates were ~10-18s
76
- // apart; 60s is a comfortable lag margin without making a legitimate re-verdict
77
- // wait long. This window only shadows a raw NONE on the SAME verdict (+ same or
83
+ // apart originally; a later fan-out incident spread FOUR sequential APPROVEs over
84
+ // ~15s (one channel session per inline review thread), each ~3-7s after the last
85
+ // well inside the read lag yet beyond a single turn. 120s gives margin for that
86
+ // thread fan-out plus slow API indexing without turning the shield into a human-
87
+ // facing rate limit (a legitimate re-verdict after a new push carries a new head
88
+ // SHA and bypasses this entirely; staying under ~5min avoids blocking genuine
89
+ // re-reviews). This window only shadows a raw NONE on the SAME verdict (+ same or
78
90
  // uncertain head) — a DISMISSED/CHANGES_REQUESTED/flipped-verdict all bypass it.
79
- const RECENT_LANDED_TTL_MS = 60_000
91
+ const RECENT_LANDED_TTL_MS = 120_000
80
92
 
81
93
  type Reservation = {
82
94
  key: string
@@ -230,6 +242,20 @@ export function createApproveIdempotencyGuard(deps: {
230
242
  releaseReservation(args.callId, reservation)
231
243
  }
232
244
  },
245
+
246
+ async noteLandedReview(args): Promise<void> {
247
+ if (args.verdict !== 'APPROVE' && args.verdict !== 'REQUEST_CHANGES') return
248
+ // No pre-submit head was captured (guard() never ran), so the best pin we
249
+ // can prove is the CURRENT head. A null resolve becomes the uncertainty
250
+ // sentinel, which still matches the current head for the lag window — the
251
+ // same conservative behaviour release() uses for a push-during-review.
252
+ const headSha = (await deps.resolveHeadSha?.({ workspace: args.workspace, prNumber: args.prNumber })) ?? null
253
+ recentLandedByPr.set(prKey(args.workspace, args.prNumber), {
254
+ verdict: args.verdict,
255
+ headSha,
256
+ landedAt: now(),
257
+ })
258
+ },
233
259
  }
234
260
  }
235
261
 
@@ -6,6 +6,15 @@ import type { ReviewVerdict } from '@/channels/github-review-turn-ledger'
6
6
  // inline `-f/-F event=...`, and the `gh pr review` porcelain. Returns null when
7
7
  // the command is not a verdict-bearing review submission (incl. COMMENT reviews,
8
8
  // which carry no false-receipt risk and are not tracked).
9
+ //
10
+ // The `gh` invocation does NOT have to lead the command. The observed duplicate-
11
+ // approval incident used four different shapes — `cd /agent && gh api …`,
12
+ // `tmp=$(mktemp); … ; gh api --input "$tmp"`, a heredoc-then-`gh` two-stager, and
13
+ // the canonical bare `gh api …`. Only the bare shape was detected, so the
14
+ // idempotency guard never armed for the other three and the duplicates landed.
15
+ // Detection therefore scans every shell-separated segment for a `gh` invocation,
16
+ // independent of `analyzeGhCommand` (which is a token-injection-safety gate, not a
17
+ // proxy for "will this command execute" — a classic PAT skips that block).
9
18
 
10
19
  // `source` drives success detection downstream: the REST endpoints echo the
11
20
  // created review JSON, while the `gh pr review` porcelain prints a plain
@@ -24,17 +33,80 @@ export type GhReviewDetectInput = {
24
33
  inputFileContents?: string | null
25
34
  }
26
35
 
27
- const REVIEWS_ENDPOINT = /\/repos\/([^/\s]+)\/([^/\s]+)\/pulls\/(\d+)\/reviews\b/
36
+ // `gh api` accepts the endpoint with or without a leading slash
37
+ // (`repos/o/r/pulls/N/reviews` and `/repos/…` both work), so the match is
38
+ // anchored on a `repos/` boundary, not a slash. The observed compound shape
39
+ // `cd /agent && gh api -X POST repos/o/r/pulls/224/reviews …` used the
40
+ // slash-less form and was missed by the slash-anchored pattern.
41
+ const REVIEWS_ENDPOINT = /(?:^|\/)repos\/([^/\s]+)\/([^/\s]+)\/pulls\/(\d+)\/reviews\b/
28
42
 
29
43
  export function detectReviewSubmission(input: GhReviewDetectInput): DetectedReview | null {
30
- const args = splitArgs(input.command)
31
- if (args[0] !== 'gh') return null
44
+ const fileContents = input.inputFileContents ?? null
45
+ for (const segment of ghSegments(input.command)) {
46
+ const detected = detectInGhSegment(segment, fileContents)
47
+ if (detected !== null) return detected
48
+ }
49
+ return null
50
+ }
51
+
52
+ // Each segment is the argv of one `gh` invocation found anywhere in the command.
53
+ function detectInGhSegment(args: readonly string[], fileContents: string | null): DetectedReview | null {
32
54
  const sub = args[1]
33
- if (sub === 'api') return detectApiReview(args, input.inputFileContents ?? null)
55
+ if (sub === 'api') return detectApiReview(args, fileContents)
34
56
  if (sub === 'pr' && args[2] === 'review') return detectPrReview(args)
35
57
  return null
36
58
  }
37
59
 
60
+ export type ReviewSubmissionAttempt = { workspace: string; prNumber: number }
61
+
62
+ // Submission INTENT, verdict aside: a `gh api .../pulls/N/reviews` with a POST
63
+ // method, or a `gh pr review N` carrying a verdict flag. Gates the post-execution
64
+ // backstop so it only fires for a command that actually tried to CREATE a review.
65
+ // A bare `gh api .../pulls/N/reviews` is a GET that LISTS existing reviews; its
66
+ // response array can contain `"state":"APPROVED"` and a pulls URL, which would
67
+ // otherwise make the backstop credit a review that never landed this turn. A read
68
+ // is not an attempt and returns null here.
69
+ export function detectReviewSubmissionAttempt(command: string): ReviewSubmissionAttempt | null {
70
+ for (const args of ghSegments(command)) {
71
+ const attempt = attemptInGhSegment(args)
72
+ if (attempt !== null) return attempt
73
+ }
74
+ return null
75
+ }
76
+
77
+ function attemptInGhSegment(args: readonly string[]): ReviewSubmissionAttempt | null {
78
+ const sub = args[1]
79
+ if (sub === 'api') {
80
+ if (!isPostMethod(args)) return null
81
+ const endpoint = args.find((a) => REVIEWS_ENDPOINT.test(a))
82
+ if (endpoint === undefined) return null
83
+ const m = REVIEWS_ENDPOINT.exec(endpoint)
84
+ if (m === null) return null
85
+ const prNumber = Number(m[3])
86
+ if (!Number.isSafeInteger(prNumber)) return null
87
+ return { workspace: `${m[1]}/${m[2]}`, prNumber }
88
+ }
89
+ if (sub === 'pr' && args[2] === 'review') {
90
+ const detected = detectPrReview(args)
91
+ return detected === null ? null : { workspace: detected.workspace, prNumber: detected.prNumber }
92
+ }
93
+ return null
94
+ }
95
+
96
+ // `gh api` defaults to GET; creating a review is a POST. Accept `-X POST` /
97
+ // `--method POST` in both `flag value` and `flag=value` shapes, case-insensitive.
98
+ function isPostMethod(args: readonly string[]): boolean {
99
+ for (let i = 0; i < args.length; i++) {
100
+ const a = args[i]
101
+ if (a === undefined) continue
102
+ if ((a === '-X' || a === '--method') && (args[i + 1] ?? '').toUpperCase() === 'POST') return true
103
+ if ((a.startsWith('-X=') || a.startsWith('--method=')) && a.slice(a.indexOf('=') + 1).toUpperCase() === 'POST') {
104
+ return true
105
+ }
106
+ }
107
+ return false
108
+ }
109
+
38
110
  function detectApiReview(args: readonly string[], fileContents: string | null): DetectedReview | null {
39
111
  const endpoint = args.find((a) => REVIEWS_ENDPOINT.test(a))
40
112
  if (endpoint === undefined) return null
@@ -139,37 +211,84 @@ function isRepoSlug(value: string | undefined): boolean {
139
211
  return owner !== undefined && owner !== '' && name !== undefined && name !== '' && rest.length === 0
140
212
  }
141
213
 
142
- // Quote-aware whitespace split. The interceptor guarantees a single bare `gh`
143
- // command before we record (no pipes/substitution), so this only needs to honor
144
- // quotes, not full shell grammar.
145
- function splitArgs(command: string): string[] {
146
- const out: string[] = []
147
- let cur = ''
214
+ // Yields the argv of every `gh` invocation in the command, one per shell-
215
+ // separated segment. A segment runs from one command separator (`&&`, `||`, `;`,
216
+ // `|`, newline) to the next; within it we strip leading `VAR=value` assignments
217
+ // (so `tmp=$(mktemp) gh …` and a `VAR=…` prefix both still see `gh` first) and
218
+ // recognise `gh` as the segment's command word. Quote-aware so an embedded `;`
219
+ // or `gh` inside a quoted body (e.g. a review `-f body=''`) is not mistaken for
220
+ // a separator or a second invocation.
221
+ function* ghSegments(command: string): Generator<readonly string[]> {
222
+ for (const segment of splitSegments(command)) {
223
+ const args = stripLeadingAssignments(segment)
224
+ if (args[0] === 'gh') yield args
225
+ }
226
+ }
227
+
228
+ // Drop leading `NAME=value` tokens (env-var prefixes) so the command word that
229
+ // follows them is the one we classify. `tmp=$(mktemp)` tokenises to a single
230
+ // `tmp=$(mktemp)` token here (the `$(…)` stays attached), which this skips.
231
+ function stripLeadingAssignments(args: readonly string[]): readonly string[] {
232
+ let i = 0
233
+ while (i < args.length && /^[A-Za-z_][A-Za-z0-9_]*=/.test(args[i] as string)) i++
234
+ return args.slice(i)
235
+ }
236
+
237
+ // Quote-aware split into shell segments AND tokens. Segments break on top-level
238
+ // `&&`, `||`, `;`, `|`, and newlines (outside quotes). Heredoc bodies are NOT
239
+ // modelled — a heredoc writes a payload file consumed by a later `gh … --input`
240
+ // segment, and that file's contents are resolved separately (review-recorder
241
+ // reads it off disk); detection here only needs the `gh` segment itself.
242
+ function splitSegments(command: string): string[][] {
243
+ const segments: string[][] = []
244
+ let cur: string[] = []
245
+ let tok = ''
148
246
  let quote: '"' | "'" | null = null
149
- let has = false
150
- for (const ch of command) {
247
+ let hasTok = false
248
+ const endTok = () => {
249
+ if (hasTok) {
250
+ cur.push(tok)
251
+ tok = ''
252
+ hasTok = false
253
+ }
254
+ }
255
+ const endSeg = () => {
256
+ endTok()
257
+ if (cur.length > 0) {
258
+ segments.push(cur)
259
+ cur = []
260
+ }
261
+ }
262
+ for (let i = 0; i < command.length; i++) {
263
+ const ch = command[i] as string
151
264
  if (quote !== null) {
152
265
  if (ch === quote) quote = null
153
- else cur += ch
154
- has = true
266
+ else tok += ch
267
+ hasTok = true
155
268
  continue
156
269
  }
157
270
  if (ch === '"' || ch === "'") {
158
271
  quote = ch
159
- has = true
272
+ hasTok = true
273
+ continue
274
+ }
275
+ const next = command[i + 1]
276
+ if ((ch === '&' && next === '&') || (ch === '|' && next === '|')) {
277
+ endSeg()
278
+ i++
279
+ continue
280
+ }
281
+ if (ch === ';' || ch === '|' || ch === '\n') {
282
+ endSeg()
160
283
  continue
161
284
  }
162
- if (ch === ' ' || ch === '\t' || ch === '\n') {
163
- if (has) {
164
- out.push(cur)
165
- cur = ''
166
- has = false
167
- }
285
+ if (ch === ' ' || ch === '\t' || ch === '\r') {
286
+ endTok()
168
287
  continue
169
288
  }
170
- cur += ch
171
- has = true
289
+ tok += ch
290
+ hasTok = true
172
291
  }
173
- if (has) out.push(cur)
174
- return out
292
+ endSeg()
293
+ return segments
175
294
  }
@@ -12,12 +12,22 @@ import { dirname, join } from 'node:path'
12
12
  // `insteadOf`/`pushurl` rewrite redirected to) we exit non-zero WITHOUT printing
13
13
  // the token, so a redirect can never exfiltrate it. The analyzer already blocks
14
14
  // the known redirect vectors; this is defense-in-depth at the credential edge.
15
- // The host match is on \`//github.com/\` and \`//github.com'\` (git wraps the URL
16
- // in quotes: \`Password for 'https://github.com': \`) so it cannot be fooled by
17
- // \`evil-github.com\` or \`github.com.evil/\`.
15
+ //
16
+ // Two prompt shapes must match, because git rewrites the host between the two
17
+ // prompts of a single clone/fetch: it first asks `Username for
18
+ // 'https://github.com': `, and AFTER we answer `x-access-token` it folds that
19
+ // userinfo into the host of the SECOND prompt — `Password for
20
+ // 'https://x-access-token@github.com': `. So we accept both bare-host
21
+ // (\`//github.com/\` or \`//github.com'\`) and userinfo-host
22
+ // (\`//<user>@github.com/\` or \`//<user>@github.com'\`). The anchor is the
23
+ // literal \`github.com\` immediately followed by \`/\` or the closing quote git
24
+ // wraps the URL in, so it cannot be fooled by \`evil-github.com\`,
25
+ // \`github.com.evil/\`, or \`x@github.com.evil/\`. Without the userinfo arm the
26
+ // password prompt falls through to \`exit 1\` and every HTTPS clone/fetch fails
27
+ // with "unable to read askpass response".
18
28
  const ASKPASS_SCRIPT = `#!/bin/sh
19
29
  case "$1" in
20
- *//github.com/*|*//github.com\\'*) : ;;
30
+ *//github.com/*|*//github.com\\'*|*//*@github.com/*|*//*@github.com\\'*) : ;;
21
31
  *) exit 1 ;;
22
32
  esac
23
33
  case "$1" in
@@ -1,5 +1,7 @@
1
1
  import { TYPECLAW_INTERNAL_BASH_ENV } from '@/agent/plugin-tools'
2
+ import type { SessionOrigin } from '@/agent/session-origin'
2
3
  import { definePlugin } from '@/plugin'
4
+ import { resolveHiddenPaths } from '@/sandbox'
3
5
 
4
6
  import { createApproveIdempotencyGuard } from './approve-idempotency'
5
7
  import { createGithubEffectiveApprovalResolver, createGithubHeadShaResolver } from './effective-approval'
@@ -14,6 +16,40 @@ export default definePlugin({
14
16
  plugin: async (ctx) => {
15
17
  const resolveTokenForRepo = ctx.github.resolveTokenForRepo
16
18
  const hasAppTokenResolver = ctx.github.hasAppTokenResolver
19
+
20
+ // A .env PAT is broad and long-lived, so it may only reach bash that runs
21
+ // WITHOUT bwrap's --clearenv — otherwise a low-trust, stranger-drivable
22
+ // sandbox could exfiltrate it. We gate on the SAME signal applyBashSandbox
23
+ // uses (resolveHiddenPaths empty => unsandboxed) rather than a role name, so
24
+ // the credential policy can never diverge from the actual sandbox decision
25
+ // and custom roles follow their real fs.see.secrets / security.bypass grant.
26
+ const runsUnsandboxed = (origin: SessionOrigin | undefined): boolean => {
27
+ const { dirs, files } = resolveHiddenPaths(ctx.permissions, origin, ctx.agentDir)
28
+ return dirs.length === 0 && files.length === 0
29
+ }
30
+
31
+ // The PAT is in the container env but stripped by --clearenv for this role,
32
+ // and a PAT is not re-mintable per repo, so there is no token to inject. Tell
33
+ // the AGENT (model-visible block) instead of letting git/gh fail ambiguously
34
+ // — the silent variant of this is exactly what caused a multi-day debugging
35
+ // hunt. App auth is the supported path for low-trust roles.
36
+ const sandboxedPatWithheldReason =
37
+ 'A classic/fine-grained GitHub PAT is configured (via .env GH_TOKEN), but this command runs ' +
38
+ 'in a sandboxed (low-trust) role whose environment is cleared before bash — so the PAT is ' +
39
+ 'withheld here and is NOT available to git/gh. This is a deliberate guard, not missing auth: a ' +
40
+ 'broad, long-lived PAT must not be reachable from a low-trust sandbox. Configure GitHub App auth ' +
41
+ '(channels.github) to grant per-repo, short-lived tokens that DO work for sandboxed roles.'
42
+
43
+ let warnedSandboxedPatWithheld = false
44
+ const warnSandboxedPatWithheldOnce = (): void => {
45
+ if (warnedSandboxedPatWithheld) return
46
+ warnedSandboxedPatWithheld = true
47
+ ctx.logger.warn(
48
+ 'GH_TOKEN (classic/fine-grained PAT) withheld from a sandboxed role: the env is cleared for ' +
49
+ 'low-trust bash, so git/gh have no credential. Configure GitHub App auth (channels.github) ' +
50
+ 'for per-repo tokens that work in sandboxed roles.',
51
+ )
52
+ }
17
53
  // `/user` resolves the caller's USER identity. An App installation token is not
18
54
  // a user, so GitHub rejects it on a token-class basis (403, or no-token error in
19
55
  // the sandbox) no matter how valid the token is. We block-and-guide so the agent
@@ -39,7 +75,7 @@ export default definePlugin({
39
75
  // 'fall-through' means "not a repo-targeting gh command" so the caller can
40
76
  // try the git path on the same command (e.g. `git ... # gh` substrings).
41
77
  const handleGhCommand = async (params: {
42
- event: { callId: string; args: Record<string, unknown> }
78
+ event: { callId: string; args: Record<string, unknown>; origin?: SessionOrigin }
43
79
  command: string
44
80
  }): Promise<HookResult | 'fall-through'> => {
45
81
  const { event, command } = params
@@ -83,19 +119,48 @@ export default definePlugin({
83
119
  }
84
120
 
85
121
  const tokenClass = classifyGhToken(process.env.GH_TOKEN)
86
- // Classic PATs reach every owner; nothing to inject or enforce.
87
- if (tokenClass === 'cross-owner') return
88
122
 
89
- if (decision.kind === 'block') return { block: true, reason: decision.reason }
123
+ // PAT classes (classic = cross-owner, fine-grained) are not re-minted per
124
+ // repo; the seeded GH_TOKEN is the only token we have. App minting, when
125
+ // available, is still preferred for SANDBOXED roles (the PAT can't reach
126
+ // them), so a PAT must NOT suppress minting there — only for unsandboxed
127
+ // execution does the PAT win. Unsandboxed: the PAT already rides inherited
128
+ // process.env, but re-asserting it in the overlay keeps the command-local
129
+ // GH_TOKEN explicit and consistent with the git path. Sandboxed PAT-only:
130
+ // block with guidance instead of failing silently.
131
+ // Set when a sandboxed PAT falls through to App minting: the tail's
132
+ // shouldMintAppToken(process.env.GH_TOKEN) re-check would see the PAT and
133
+ // bail, so this flag forces the mint that the PAT must not suppress.
134
+ let mintForSandboxedPat = false
135
+ if (tokenClass === 'cross-owner' || tokenClass === 'fine-grained-pat') {
136
+ // Unsandboxed: the PAT authenticates directly (it already rides inherited
137
+ // process.env). For a repo-targeting command we re-assert it in the
138
+ // overlay so behavior is explicit and matches the git path; otherwise we
139
+ // pass through. The App-oriented missing-repo / multi-owner BLOCK does
140
+ // NOT apply — a PAT needs no per-repo mint — so we never surface it here.
141
+ if (runsUnsandboxed(event.origin)) {
142
+ if (decision.kind === 'inject') {
143
+ event.args[TYPECLAW_INTERNAL_BASH_ENV] = { GH_TOKEN: process.env.GH_TOKEN as string }
144
+ }
145
+ return
146
+ }
147
+ // Sandboxed: the PAT is stripped by --clearenv. Prefer App minting when
148
+ // available (a PAT must NOT suppress it, or the original silent-failure
149
+ // bug returns); otherwise block with guidance rather than failing mute.
150
+ if (!shouldMintAppToken(undefined, hasAppTokenResolver())) {
151
+ if (decision.kind === 'block') return { block: true, reason: decision.reason }
152
+ warnSandboxedPatWithheldOnce()
153
+ return { block: true, reason: sandboxedPatWithheldReason }
154
+ }
155
+ mintForSandboxedPat = true
156
+ }
90
157
 
91
- // Fine-grained PATs are single-owner but cannot be re-minted per repo;
92
- // the seeded GH_TOKEN is the only token we have. Leave it in place so
93
- // `gh` fails honestly if the named repo is under a different owner.
94
- if (tokenClass === 'fine-grained-pat') return
158
+ if (decision.kind === 'block') return { block: true, reason: decision.reason }
95
159
 
96
160
  // No App auth (no App-class GH_TOKEN and no live minter): leave whatever
97
- // is seeded so `gh` fails honestly rather than us guessing a token.
98
- if (!shouldMintAppToken(process.env.GH_TOKEN, hasAppTokenResolver())) return
161
+ // is seeded so `gh` fails honestly rather than us guessing a token. The
162
+ // sandboxed-PAT mint path bypasses this PAT-class re-check via the flag.
163
+ if (!mintForSandboxedPat && !shouldMintAppToken(process.env.GH_TOKEN, hasAppTokenResolver())) return
99
164
 
100
165
  const result = await resolveTokenForRepo(decision.repoSlug)
101
166
  if (result.kind === 'unavailable') return { block: true, reason: result.reason }
@@ -107,34 +172,65 @@ export default definePlugin({
107
172
  }
108
173
 
109
174
  const handleGitCommand = async (params: {
110
- event: { args: Record<string, unknown> }
175
+ event: { args: Record<string, unknown>; origin?: SessionOrigin }
111
176
  command: string
112
177
  agentDir: string
113
178
  }): Promise<HookResult> => {
114
179
  const { event, command, agentDir } = params
115
- // Only App auth re-mints per repo. Classic/fine-grained PATs and absent
116
- // tokens are left untouched, exactly as the gh path treats them. App auth
117
- // is detected by the live minter too, not just an App-class GH_TOKEN:
118
- // multi-owner / no-repos App configs never seed GH_TOKEN yet can mint.
119
- if (!shouldMintAppToken(process.env.GH_TOKEN, hasAppTokenResolver())) return
180
+ const tokenClass = classifyGhToken(process.env.GH_TOKEN)
181
+ const isPat = tokenClass === 'cross-owner' || tokenClass === 'fine-grained-pat'
182
+
183
+ // A PAT is not re-mintable per repo. For unsandboxed roles it rides the
184
+ // git-askpass path so SSH/scp remotes get rewritten to https and clone
185
+ // works uniformly (matching the gh path). For sandboxed roles the PAT is
186
+ // withheld (env cleared): mint an App token instead if available, else
187
+ // block with guidance rather than letting git fail silently. App auth must
188
+ // still mint for sandboxed roles even when a PAT is present.
189
+ const useEnvPat = isPat && runsUnsandboxed(event.origin)
190
+ // Sandboxed PAT: the env is cleared, so the PAT can't reach git. Mint an
191
+ // App token instead when a minter is live (a PAT must NOT suppress it);
192
+ // otherwise block with guidance below rather than fail silently.
193
+ const mintForSandboxedPat = isPat && !useEnvPat && shouldMintAppToken(undefined, hasAppTokenResolver())
194
+ if (isPat && !useEnvPat && !mintForSandboxedPat) {
195
+ const decision = await analyzeGitCommand(command, { cwd: agentDir, resolvers: defaultGitResolvers })
196
+ if (decision.kind === 'pass-through') return
197
+ if (decision.kind === 'block') return { block: true, reason: decision.reason }
198
+ warnSandboxedPatWithheldOnce()
199
+ return { block: true, reason: sandboxedPatWithheldReason }
200
+ }
201
+
202
+ // Neither a usable PAT nor App auth: leave the command untouched so git
203
+ // fails honestly rather than us guessing a token. App auth is detected by
204
+ // the live minter too, not just an App-class GH_TOKEN: multi-owner /
205
+ // no-repos App configs never seed GH_TOKEN yet can mint. The mintForSandboxedPat
206
+ // flag forces minting past this PAT-class re-check.
207
+ if (!useEnvPat && !mintForSandboxedPat && !shouldMintAppToken(process.env.GH_TOKEN, hasAppTokenResolver())) return
120
208
 
121
209
  const decision = await analyzeGitCommand(command, { cwd: agentDir, resolvers: defaultGitResolvers })
122
210
  if (decision.kind === 'pass-through') return
123
211
  if (decision.kind === 'block') return { block: true, reason: decision.reason }
124
212
 
125
- const result = await resolveTokenForRepo(decision.repoSlug)
126
- if (result.kind === 'unavailable') return { block: true, reason: result.reason }
213
+ // The unsandboxed-PAT path uses the PAT directly; otherwise mint a per-repo
214
+ // App token. Both ride TYPECLAW_GIT_TOKEN (read by the askpass helper),
215
+ // never argv/config.
216
+ let gitToken: string
217
+ if (useEnvPat) {
218
+ gitToken = process.env.GH_TOKEN as string
219
+ } else {
220
+ const result = await resolveTokenForRepo(decision.repoSlug)
221
+ if (result.kind === 'unavailable') return { block: true, reason: result.reason }
222
+ gitToken = result.token
223
+ }
127
224
 
128
225
  const askpass = await ensureGitAskPassHelper()
129
226
  const existing = event.args[TYPECLAW_INTERNAL_BASH_ENV]
130
227
  const overlay = existing !== null && typeof existing === 'object' ? (existing as Record<string, string>) : {}
131
- // Token rides in TYPECLAW_GIT_TOKEN (read by the askpass helper), never in
132
- // argv/config. insteadOf rewrites SSH/scp remotes to https so the helper's
133
- // credential applies; GIT_TERMINAL_PROMPT=0 fails fast instead of hanging.
228
+ // insteadOf rewrites SSH/scp remotes to https so the helper's credential
229
+ // applies; GIT_TERMINAL_PROMPT=0 fails fast instead of hanging.
134
230
  event.args[TYPECLAW_INTERNAL_BASH_ENV] = {
135
231
  ...overlay,
136
232
  GIT_ASKPASS: askpass,
137
- TYPECLAW_GIT_TOKEN: result.token,
233
+ TYPECLAW_GIT_TOKEN: gitToken,
138
234
  GIT_TERMINAL_PROMPT: '0',
139
235
  GIT_CONFIG_COUNT: '2',
140
236
  GIT_CONFIG_KEY_0: 'url.https://github.com/.insteadOf',
@@ -165,12 +261,18 @@ export default definePlugin({
165
261
  },
166
262
  'tool.after': async (event) => {
167
263
  checkGraphqlAuthNudge({ tool: event.tool, result: event.result })
168
- const committed = commitReviewIfSucceeded({
264
+ const review = commitReviewIfSucceeded({
169
265
  sessionId: event.sessionId,
170
266
  callId: event.callId,
171
267
  result: event.result,
172
268
  })
173
- await verdictGuard.release({ callId: event.callId, succeeded: committed })
269
+ await verdictGuard.release({ callId: event.callId, succeeded: review.committed })
270
+ // A backstop-recovered verdict had no guard() reservation, so release()
271
+ // could not arm the lag shield — do it explicitly here so the next
272
+ // same-commit submission is deduped.
273
+ if (review.landedFromResult !== null) {
274
+ await verdictGuard.noteLandedReview(review.landedFromResult)
275
+ }
174
276
  },
175
277
  },
176
278
  }
@@ -3,7 +3,12 @@ import { readFile } from 'node:fs/promises'
3
3
  import { recordReview } from '@/channels/github-review-turn-ledger'
4
4
  import type { ContentPart, ToolResult } from '@/plugin'
5
5
 
6
- import { detectReviewSubmission, type DetectedReview } from './gh-review-detect'
6
+ import {
7
+ detectReviewSubmission,
8
+ detectReviewSubmissionAttempt,
9
+ type DetectedReview,
10
+ type ReviewSubmissionAttempt,
11
+ } from './gh-review-detect'
7
12
  import { detectReviewDump, type ReviewDumpDecision } from './gh-review-inline-detect'
8
13
 
9
14
  // Bridges the bash `gh` interceptor to the false-receipt ledger: at tool.before
@@ -12,8 +17,18 @@ import { detectReviewDump, type ReviewDumpDecision } from './gh-review-inline-de
12
17
  // succeeded. Strict success detection is the safe bias here — wrongly crediting a
13
18
  // review that never landed would re-open the false-receipt hole, so an ambiguous
14
19
  // result is treated as "not landed" and left uncredited.
20
+ //
21
+ // A post-execution BACKSTOP runs when pre-detection produced no pending entry: the
22
+ // REST create-review response is authoritative (it echoes the landed review's
23
+ // `state` and the PR url), so we can credit a verdict whose command shape dodged
24
+ // the before-detector. This only arms the dedupe window for the NEXT submission —
25
+ // it cannot un-land a duplicate already posted — but that is precisely what the
26
+ // sequential fan-out incident needed: the first landed APPROVE must arm the shield.
27
+ // The backstop is gated on a tool.before submission-ATTEMPT marker so it never
28
+ // fires for a reviews-list READ whose response happens to carry a decisive state.
15
29
 
16
30
  const pending = new Map<string, DetectedReview>()
31
+ const submissionAttempts = new Map<string, ReviewSubmissionAttempt>()
17
32
 
18
33
  const MAX_INPUT_BYTES = 1_000_000
19
34
 
@@ -26,21 +41,100 @@ export async function noteReviewCommand(args: { callId: string; command: string
26
41
  const inputFileContents = await readInputFile(args.command)
27
42
  const detected = detectReviewSubmission({ command: args.command, inputFileContents })
28
43
  if (detected !== null) pending.set(args.callId, detected)
44
+ // Record submission INTENT even when the verdict could not be extracted (a
45
+ // missed shape): only such a command may later arm the backstop, so a reviews
46
+ // READ — which is not an attempt — can never be miscredited as a landed review.
47
+ else {
48
+ const attempt = detectReviewSubmissionAttempt(args.command)
49
+ if (attempt !== null) submissionAttempts.set(args.callId, attempt)
50
+ }
29
51
  return { dump: detectReviewDump({ command: args.command, inputFileContents }), detected }
30
52
  }
31
53
 
32
- export function commitReviewIfSucceeded(args: { sessionId: string; callId: string; result: ToolResult }): boolean {
54
+ export type CommitReviewResult = {
55
+ // Whether a verdict was credited this turn (drives verdictGuard.release()).
56
+ committed: boolean
57
+ // Set ONLY on the backstop path (pre-detection missed): the caller must arm the
58
+ // idempotency lag shield with this, since no guard() reservation exists to do it
59
+ // via release(). Null on the pending path, where release() arms the shield.
60
+ landedFromResult: DetectedReview | null
61
+ }
62
+
63
+ export function commitReviewIfSucceeded(args: {
64
+ sessionId: string
65
+ callId: string
66
+ result: ToolResult
67
+ }): CommitReviewResult {
68
+ const text = collectText(args.result.content)
33
69
  const detected = pending.get(args.callId)
34
- if (detected === undefined) return false
35
- pending.delete(args.callId)
36
- if (!looksSucceeded(detected, collectText(args.result.content))) return false
70
+ if (detected !== undefined) {
71
+ pending.delete(args.callId)
72
+ if (!looksSucceeded(detected, text)) return { committed: false, landedFromResult: null }
73
+ recordReview({
74
+ sessionId: args.sessionId,
75
+ workspace: detected.workspace,
76
+ prNumber: detected.prNumber,
77
+ verdict: detected.verdict,
78
+ })
79
+ return { committed: true, landedFromResult: null }
80
+ }
81
+
82
+ // The backstop runs ONLY for a command that tool.before saw as a real
83
+ // submission attempt (POST create-review) but whose verdict it could not
84
+ // extract. This excludes a reviews-list READ outright, and the PR-match below
85
+ // rejects a stray pulls URL for a different PR in the output.
86
+ const attempt = submissionAttempts.get(args.callId)
87
+ if (attempt === undefined) return { committed: false, landedFromResult: null }
88
+ submissionAttempts.delete(args.callId)
89
+
90
+ const landed = detectLandedReviewFromResult(text)
91
+ if (landed === null) return { committed: false, landedFromResult: null }
92
+ if (landed.workspace !== attempt.workspace || landed.prNumber !== attempt.prNumber) {
93
+ return { committed: false, landedFromResult: null }
94
+ }
37
95
  recordReview({
38
96
  sessionId: args.sessionId,
39
- workspace: detected.workspace,
40
- prNumber: detected.prNumber,
41
- verdict: detected.verdict,
97
+ workspace: landed.workspace,
98
+ prNumber: landed.prNumber,
99
+ verdict: landed.verdict,
42
100
  })
43
- return true
101
+ return { committed: true, landedFromResult: landed }
102
+ }
103
+
104
+ // Authoritative post-execution credit from a REST create-review response, used
105
+ // only when pre-detection missed (no pending entry). Requires ALL of: a decisive
106
+ // landed `state`, a recoverable PR identity from the echoed `pull_request_url`,
107
+ // and no failure marker — so a partial/garbled capture or an unrelated success
108
+ // line cannot fabricate a verdict. COMMENT and DISMISSED are not decisive and are
109
+ // ignored, matching the before-detector's scope.
110
+ function detectLandedReviewFromResult(text: string): DetectedReview | null {
111
+ if (FAILURE_MARKERS.some((m) => text.includes(m))) return null
112
+ const verdict = landedVerdictFromState(text)
113
+ if (verdict === null) return null
114
+ const pr = prFromPullRequestUrl(text)
115
+ if (pr === null) return null
116
+ return { workspace: pr.workspace, prNumber: pr.prNumber, verdict, source: 'api' }
117
+ }
118
+
119
+ // The create-review response echoes `"state": "APPROVED" | "CHANGES_REQUESTED"`.
120
+ // Tolerant of the spacing both `gh api` (compact) and a piped `jq .` (pretty)
121
+ // produce.
122
+ function landedVerdictFromState(text: string): DetectedReview['verdict'] | null {
123
+ if (/"state"\s*:\s*"APPROVED"/.test(text)) return 'APPROVE'
124
+ if (/"state"\s*:\s*"CHANGES_REQUESTED"/.test(text)) return 'REQUEST_CHANGES'
125
+ return null
126
+ }
127
+
128
+ // The review object carries `"pull_request_url":
129
+ // "https://api.github.com/repos/<owner>/<repo>/pulls/<n>"`, the authoritative PR
130
+ // identity for the landed review. Recovered here so a shape-dodging command is
131
+ // still credited to the right PR.
132
+ function prFromPullRequestUrl(text: string): { workspace: string; prNumber: number } | null {
133
+ const m = /\/repos\/([^/\s"]+)\/([^/\s"]+)\/pulls\/(\d+)\b/.exec(text)
134
+ if (m === null) return null
135
+ const prNumber = Number(m[3])
136
+ if (!Number.isSafeInteger(prNumber)) return null
137
+ return { workspace: `${m[1]}/${m[2]}`, prNumber }
44
138
  }
45
139
 
46
140
  async function readInputFile(command: string): Promise<string | null> {
@@ -88,49 +88,83 @@ function splitRow(row: string): string[] {
88
88
  }
89
89
 
90
90
  function computeWidths(rows: string[][]): number[] {
91
- const widths: number[] = []
92
- for (const row of rows) {
93
- for (let c = 0; c < row.length; c++) {
94
- const cellWidth = displayWidth(row[c]!)
95
- if (widths[c] === undefined || cellWidth > widths[c]!) {
96
- widths[c] = cellWidth
97
- }
98
- }
99
- }
100
- return widths
91
+ const columnCount = Math.max(0, ...rows.map((row) => row.length))
92
+ return Array.from({ length: columnCount }, (_, c) => Math.max(0, ...rows.map((row) => widthTenths(row[c] ?? ''))))
101
93
  }
102
94
 
103
95
  function padRow(cells: string[], widths: number[]): string {
104
- const padded = widths.map((width, c) => padToWidth(cells[c] ?? '', width))
96
+ const pads = computePads(cells, widths)
105
97
  // Two spaces between columns keeps them visually distinct inside the
106
98
  // monospaced span without a vertical-bar separator.
107
- return padded.join(' ')
99
+ return widths.map((_, c) => (cells[c] ?? '') + ' '.repeat(pads[c]!)).join(' ')
108
100
  }
109
101
 
110
- function padToWidth(cell: string, width: number): string {
111
- const pad = width - displayWidth(cell)
112
- return pad > 0 ? cell + ' '.repeat(pad) : cell
102
+ // A CJK glyph is 1.7 cells wide, so column widths are fractional while padding
103
+ // can only insert whole spaces. The naive fix — round each column's deficit on
104
+ // its own keeps total row widths close but lets the START of column N drift
105
+ // between rows (a row may earn an extra space in column 1 that another row
106
+ // spends in column 3), which is exactly the misalignment a table must avoid.
107
+ //
108
+ // Instead we make every column BOUNDARY prefix-stable: the running deficit up to
109
+ // column c is rounded to whole spaces once, and each cell's pad is the delta of
110
+ // consecutive rounded prefixes. Because the rounding target at boundary c (the
111
+ // column max-widths plus separators before it) is the same constant for every
112
+ // row, each boundary lands within half a cell of the same offset across rows —
113
+ // the tightest column alignment whole-space padding allows.
114
+ //
115
+ // Widths are carried in TENTHS of a cell (latin 10, wide 17) so the arithmetic
116
+ // is exact integers; 1.7 has no finite binary form and would otherwise let
117
+ // rounding flip on float noise.
118
+ function computePads(cells: string[], widths: number[]): number[] {
119
+ const pads: number[] = []
120
+ let prefixDeficit = 0
121
+ let prevPrefixPad = 0
122
+ for (let c = 0; c < widths.length; c++) {
123
+ prefixDeficit += Math.max(0, widths[c]! - widthTenths(cells[c] ?? ''))
124
+ const prefixPad = roundTenthsToSpaces(prefixDeficit)
125
+ pads.push(Math.max(0, prefixPad - prevPrefixPad))
126
+ prevPrefixPad = prefixPad
127
+ }
128
+ return pads
113
129
  }
114
130
 
115
- // Discord's monospaced inline-code font renders CJK ideographs, full-width
116
- // punctuation, and most emoji at two columns, while combining/zero-width marks
117
- // take none. `String.prototype.padEnd` counts UTF-16 code units, so padding by
118
- // `.length` leaves wide-character tables visually ragged. We iterate by code
119
- // point and sum per-glyph column widths so every cell pads to the same VISUAL
120
- // width. The ranges below are the standard East-Asian-Wide / Wide blocks plus
121
- // the common emoji planes; this is the same wcwidth approximation editors use.
122
- export function displayWidth(text: string): number {
123
- let width = 0
131
+ const CELL_TENTHS = 10
132
+
133
+ function roundTenthsToSpaces(tenths: number): number {
134
+ return Math.floor((tenths + CELL_TENTHS / 2) / CELL_TENTHS)
135
+ }
136
+
137
+ function widthTenths(text: string): number {
138
+ let tenths = 0
124
139
  for (const ch of text) {
125
- width += charWidth(ch.codePointAt(0)!)
140
+ tenths += charWidthTenths(ch.codePointAt(0)!)
126
141
  }
127
- return width
142
+ return tenths
128
143
  }
129
144
 
130
- function charWidth(cp: number): number {
145
+ function charWidthTenths(cp: number): number {
131
146
  if (isZeroWidth(cp)) return 0
132
- if (isWide(cp)) return 2
133
- return 1
147
+ if (isWide(cp)) return WIDE_CHAR_TENTHS
148
+ return CELL_TENTHS
149
+ }
150
+
151
+ // Discord's monospaced inline-code font renders CJK ideographs, full-width
152
+ // punctuation, and most emoji WIDER than a latin glyph, while combining/
153
+ // zero-width marks take none. `String.prototype.padEnd` counts UTF-16 code
154
+ // units, so padding by `.length` leaves wide-character tables visually ragged.
155
+ // The ranges below are the standard East-Asian-Wide / Wide blocks plus the
156
+ // common emoji planes.
157
+ //
158
+ // The wide multiplier is 1.7, not the textbook wcwidth value of 2: Discord's
159
+ // proportional code font renders a Hangul/CJK glyph at roughly 1.7x a latin
160
+ // monospace cell, so charging 2 over-pads CJK columns and leaves them visibly
161
+ // too wide. `displayWidth` reports that fractional width in cells; the table
162
+ // padder works in the integer `widthTenths` domain to keep boundary rounding
163
+ // exact — see `computePads`.
164
+ const WIDE_CHAR_TENTHS = 17
165
+
166
+ export function displayWidth(text: string): number {
167
+ return widthTenths(text) / CELL_TENTHS
134
168
  }
135
169
 
136
170
  function isZeroWidth(cp: number): boolean {