npm - typeclaw - Versions diffs - 0.36.6 → 0.36.8 - Mend

typeclaw 0.36.6 → 0.36.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json +2 -2
package/src/bundled-plugins/github-cli-auth/approve-idempotency.ts +29 -3
package/src/bundled-plugins/github-cli-auth/gh-review-detect.ts +144 -25
package/src/bundled-plugins/github-cli-auth/git-askpass.ts +14 -4
package/src/bundled-plugins/github-cli-auth/index.ts +126 -24
package/src/bundled-plugins/github-cli-auth/review-recorder.ts +103 -9
package/src/channels/adapters/discord-bot-format.ts +63 -29
package/src/init/dockerfile.ts +43 -17
package/src/init/line-auth.ts +50 -21

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "typeclaw",
-  "version": "0.36.6",
+  "version": "0.36.8",
   "homepage": "https://github.com/typeclaw/typeclaw#readme",
   "bugs": {
     "url": "https://github.com/typeclaw/typeclaw/issues"
@@ -48,7 +48,7 @@
     "@mariozechner/pi-tui": "^0.67.3",
     "@modelcontextprotocol/sdk": "^1.29.0",
     "@mozilla/readability": "^0.6.0",
-    "agent-messenger": "2.20.0",
+    "agent-messenger": "2.20.1",
     "cheerio": "^1.2.0",
     "citty": "^0.2.2",
     "cron-parser": "^5.5.0",

package/src/bundled-plugins/github-cli-auth/approve-idempotency.ts CHANGED Viewed

@@ -33,6 +33,13 @@ export type ReviewVerdictGuard = {
     verdict: ReviewVerdict
   }) => Promise<ApproveBlock | null>
   release: (args: { callId: string; succeeded: boolean }) => Promise<void>
+  // Arms the read-after-write lag shield for a verdict that landed WITHOUT a prior
+  // guard() reservation. The pre-execution detector can miss a review-submission
+  // command shape, so the verdict is only recovered post-hoc from the REST result
+  // (review-recorder's backstop). Without this, `release()` has no reservation for
+  // that callId and never writes `recentLandedByPr`, leaving the next same-commit
+  // submission undeduped — the exact gap the backstop was meant to close.
+  noteLandedReview: (args: { workspace: string; prNumber: number; verdict: ReviewVerdict }) => Promise<void>
 }
 // Back-compat alias: the guard now covers REQUEST_CHANGES too, not just APPROVE.
@@ -73,10 +80,15 @@ const LEASE_TTL_MS = 5 * 60_000
 // read-after-write lag rather than a genuine absence. GitHub's `/pulls/<n>/reviews`
 // list lags a write by up to ~10s, so a second engagement turn firing in that
 // window reads NONE and would land a duplicate. Observed duplicates were ~10-18s
-// apart; 60s is a comfortable lag margin without making a legitimate re-verdict
-// wait long. This window only shadows a raw NONE on the SAME verdict (+ same or
+// apart originally; a later fan-out incident spread FOUR sequential APPROVEs over
+// ~15s (one channel session per inline review thread), each ~3-7s after the last —
+// well inside the read lag yet beyond a single turn. 120s gives margin for that
+// thread fan-out plus slow API indexing without turning the shield into a human-
+// facing rate limit (a legitimate re-verdict after a new push carries a new head
+// SHA and bypasses this entirely; staying under ~5min avoids blocking genuine
+// re-reviews). This window only shadows a raw NONE on the SAME verdict (+ same or
 // uncertain head) — a DISMISSED/CHANGES_REQUESTED/flipped-verdict all bypass it.
-const RECENT_LANDED_TTL_MS = 60_000
+const RECENT_LANDED_TTL_MS = 120_000
 type Reservation = {
   key: string
@@ -230,6 +242,20 @@ export function createApproveIdempotencyGuard(deps: {
         releaseReservation(args.callId, reservation)
       }
     },
+    async noteLandedReview(args): Promise<void> {
+      if (args.verdict !== 'APPROVE' && args.verdict !== 'REQUEST_CHANGES') return
+      // No pre-submit head was captured (guard() never ran), so the best pin we
+      // can prove is the CURRENT head. A null resolve becomes the uncertainty
+      // sentinel, which still matches the current head for the lag window — the
+      // same conservative behaviour release() uses for a push-during-review.
+      const headSha = (await deps.resolveHeadSha?.({ workspace: args.workspace, prNumber: args.prNumber })) ?? null
+      recentLandedByPr.set(prKey(args.workspace, args.prNumber), {
+        verdict: args.verdict,
+        headSha,
+        landedAt: now(),
+      })
+    },
   }
 }

package/src/bundled-plugins/github-cli-auth/gh-review-detect.ts CHANGED Viewed

@@ -6,6 +6,15 @@ import type { ReviewVerdict } from '@/channels/github-review-turn-ledger'
 // inline `-f/-F event=...`, and the `gh pr review` porcelain. Returns null when
 // the command is not a verdict-bearing review submission (incl. COMMENT reviews,
 // which carry no false-receipt risk and are not tracked).
+//
+// The `gh` invocation does NOT have to lead the command. The observed duplicate-
+// approval incident used four different shapes — `cd /agent && gh api …`,
+// `tmp=$(mktemp); … ; gh api --input "$tmp"`, a heredoc-then-`gh` two-stager, and
+// the canonical bare `gh api …`. Only the bare shape was detected, so the
+// idempotency guard never armed for the other three and the duplicates landed.
+// Detection therefore scans every shell-separated segment for a `gh` invocation,
+// independent of `analyzeGhCommand` (which is a token-injection-safety gate, not a
+// proxy for "will this command execute" — a classic PAT skips that block).
 // `source` drives success detection downstream: the REST endpoints echo the
 // created review JSON, while the `gh pr review` porcelain prints a plain
@@ -24,17 +33,80 @@ export type GhReviewDetectInput = {
   inputFileContents?: string | null
 }
-const REVIEWS_ENDPOINT = /\/repos\/([^/\s]+)\/([^/\s]+)\/pulls\/(\d+)\/reviews\b/
+// `gh api` accepts the endpoint with or without a leading slash
+// (`repos/o/r/pulls/N/reviews` and `/repos/…` both work), so the match is
+// anchored on a `repos/` boundary, not a slash. The observed compound shape
+// `cd /agent && gh api -X POST repos/o/r/pulls/224/reviews …` used the
+// slash-less form and was missed by the slash-anchored pattern.
+const REVIEWS_ENDPOINT = /(?:^|\/)repos\/([^/\s]+)\/([^/\s]+)\/pulls\/(\d+)\/reviews\b/
 export function detectReviewSubmission(input: GhReviewDetectInput): DetectedReview | null {
-  const args = splitArgs(input.command)
-  if (args[0] !== 'gh') return null
+  const fileContents = input.inputFileContents ?? null
+  for (const segment of ghSegments(input.command)) {
+    const detected = detectInGhSegment(segment, fileContents)
+    if (detected !== null) return detected
+  }
+  return null
+}
+// Each segment is the argv of one `gh` invocation found anywhere in the command.
+function detectInGhSegment(args: readonly string[], fileContents: string | null): DetectedReview | null {
   const sub = args[1]
-  if (sub === 'api') return detectApiReview(args, input.inputFileContents ?? null)
+  if (sub === 'api') return detectApiReview(args, fileContents)
   if (sub === 'pr' && args[2] === 'review') return detectPrReview(args)
   return null
 }
+export type ReviewSubmissionAttempt = { workspace: string; prNumber: number }
+// Submission INTENT, verdict aside: a `gh api .../pulls/N/reviews` with a POST
+// method, or a `gh pr review N` carrying a verdict flag. Gates the post-execution
+// backstop so it only fires for a command that actually tried to CREATE a review.
+// A bare `gh api .../pulls/N/reviews` is a GET that LISTS existing reviews; its
+// response array can contain `"state":"APPROVED"` and a pulls URL, which would
+// otherwise make the backstop credit a review that never landed this turn. A read
+// is not an attempt and returns null here.
+export function detectReviewSubmissionAttempt(command: string): ReviewSubmissionAttempt | null {
+  for (const args of ghSegments(command)) {
+    const attempt = attemptInGhSegment(args)
+    if (attempt !== null) return attempt
+  }
+  return null
+}
+function attemptInGhSegment(args: readonly string[]): ReviewSubmissionAttempt | null {
+  const sub = args[1]
+  if (sub === 'api') {
+    if (!isPostMethod(args)) return null
+    const endpoint = args.find((a) => REVIEWS_ENDPOINT.test(a))
+    if (endpoint === undefined) return null
+    const m = REVIEWS_ENDPOINT.exec(endpoint)
+    if (m === null) return null
+    const prNumber = Number(m[3])
+    if (!Number.isSafeInteger(prNumber)) return null
+    return { workspace: `${m[1]}/${m[2]}`, prNumber }
+  }
+  if (sub === 'pr' && args[2] === 'review') {
+    const detected = detectPrReview(args)
+    return detected === null ? null : { workspace: detected.workspace, prNumber: detected.prNumber }
+  }
+  return null
+}
+// `gh api` defaults to GET; creating a review is a POST. Accept `-X POST` /
+// `--method POST` in both `flag value` and `flag=value` shapes, case-insensitive.
+function isPostMethod(args: readonly string[]): boolean {
+  for (let i = 0; i < args.length; i++) {
+    const a = args[i]
+    if (a === undefined) continue
+    if ((a === '-X' || a === '--method') && (args[i + 1] ?? '').toUpperCase() === 'POST') return true
+    if ((a.startsWith('-X=') || a.startsWith('--method=')) && a.slice(a.indexOf('=') + 1).toUpperCase() === 'POST') {
+      return true
+    }
+  }
+  return false
+}
 function detectApiReview(args: readonly string[], fileContents: string | null): DetectedReview | null {
   const endpoint = args.find((a) => REVIEWS_ENDPOINT.test(a))
   if (endpoint === undefined) return null
@@ -139,37 +211,84 @@ function isRepoSlug(value: string | undefined): boolean {
   return owner !== undefined && owner !== '' && name !== undefined && name !== '' && rest.length === 0
 }
-// Quote-aware whitespace split. The interceptor guarantees a single bare `gh`
-// command before we record (no pipes/substitution), so this only needs to honor
-// quotes, not full shell grammar.
-function splitArgs(command: string): string[] {
-  const out: string[] = []
-  let cur = ''
+// Yields the argv of every `gh` invocation in the command, one per shell-
+// separated segment. A segment runs from one command separator (`&&`, `||`, `;`,
+// `|`, newline) to the next; within it we strip leading `VAR=value` assignments
+// (so `tmp=$(mktemp) gh …` and a `VAR=…` prefix both still see `gh` first) and
+// recognise `gh` as the segment's command word. Quote-aware so an embedded `;`
+// or `gh` inside a quoted body (e.g. a review `-f body='…'`) is not mistaken for
+// a separator or a second invocation.
+function* ghSegments(command: string): Generator<readonly string[]> {
+  for (const segment of splitSegments(command)) {
+    const args = stripLeadingAssignments(segment)
+    if (args[0] === 'gh') yield args
+  }
+}
+// Drop leading `NAME=value` tokens (env-var prefixes) so the command word that
+// follows them is the one we classify. `tmp=$(mktemp)` tokenises to a single
+// `tmp=$(mktemp)` token here (the `$(…)` stays attached), which this skips.
+function stripLeadingAssignments(args: readonly string[]): readonly string[] {
+  let i = 0
+  while (i < args.length && /^[A-Za-z_][A-Za-z0-9_]*=/.test(args[i] as string)) i++
+  return args.slice(i)
+}
+// Quote-aware split into shell segments AND tokens. Segments break on top-level
+// `&&`, `||`, `;`, `|`, and newlines (outside quotes). Heredoc bodies are NOT
+// modelled — a heredoc writes a payload file consumed by a later `gh … --input`
+// segment, and that file's contents are resolved separately (review-recorder
+// reads it off disk); detection here only needs the `gh` segment itself.
+function splitSegments(command: string): string[][] {
+  const segments: string[][] = []
+  let cur: string[] = []
+  let tok = ''
   let quote: '"' | "'" | null = null
-  let has = false
-  for (const ch of command) {
+  let hasTok = false
+  const endTok = () => {
+    if (hasTok) {
+      cur.push(tok)
+      tok = ''
+      hasTok = false
+    }
+  }
+  const endSeg = () => {
+    endTok()
+    if (cur.length > 0) {
+      segments.push(cur)
+      cur = []
+    }
+  }
+  for (let i = 0; i < command.length; i++) {
+    const ch = command[i] as string
     if (quote !== null) {
       if (ch === quote) quote = null
-      else cur += ch
-      has = true
+      else tok += ch
+      hasTok = true
       continue
     }
     if (ch === '"' || ch === "'") {
       quote = ch
-      has = true
+      hasTok = true
+      continue
+    }
+    const next = command[i + 1]
+    if ((ch === '&' && next === '&') || (ch === '|' && next === '|')) {
+      endSeg()
+      i++
+      continue
+    }
+    if (ch === ';' || ch === '|' || ch === '\n') {
+      endSeg()
       continue
     }
-    if (ch === ' ' || ch === '\t' || ch === '\n') {
-      if (has) {
-        out.push(cur)
-        cur = ''
-        has = false
-      }
+    if (ch === ' ' || ch === '\t' || ch === '\r') {
+      endTok()
       continue
     }
-    cur += ch
-    has = true
+    tok += ch
+    hasTok = true
   }
-  if (has) out.push(cur)
-  return out
+  endSeg()
+  return segments
 }

package/src/bundled-plugins/github-cli-auth/git-askpass.ts CHANGED Viewed

@@ -12,12 +12,22 @@ import { dirname, join } from 'node:path'
 // `insteadOf`/`pushurl` rewrite redirected to) we exit non-zero WITHOUT printing
 // the token, so a redirect can never exfiltrate it. The analyzer already blocks
 // the known redirect vectors; this is defense-in-depth at the credential edge.
-// The host match is on \`//github.com/\` and \`//github.com'\` (git wraps the URL
-// in quotes: \`Password for 'https://github.com': \`) so it cannot be fooled by
-// \`evil-github.com\` or \`github.com.evil/\`.
+//
+// Two prompt shapes must match, because git rewrites the host between the two
+// prompts of a single clone/fetch: it first asks `Username for
+// 'https://github.com': `, and AFTER we answer `x-access-token` it folds that
+// userinfo into the host of the SECOND prompt — `Password for
+// 'https://x-access-token@github.com': `. So we accept both bare-host
+// (\`//github.com/\` or \`//github.com'\`) and userinfo-host
+// (\`//<user>@github.com/\` or \`//<user>@github.com'\`). The anchor is the
+// literal \`github.com\` immediately followed by \`/\` or the closing quote git
+// wraps the URL in, so it cannot be fooled by \`evil-github.com\`,
+// \`github.com.evil/\`, or \`x@github.com.evil/\`. Without the userinfo arm the
+// password prompt falls through to \`exit 1\` and every HTTPS clone/fetch fails
+// with "unable to read askpass response".
 const ASKPASS_SCRIPT = `#!/bin/sh
 case "$1" in
-  *//github.com/*|*//github.com\\'*) : ;;
+  *//github.com/*|*//github.com\\'*|*//*@github.com/*|*//*@github.com\\'*) : ;;
   *) exit 1 ;;
 esac
 case "$1" in

package/src/bundled-plugins/github-cli-auth/index.ts CHANGED Viewed

@@ -1,5 +1,7 @@
 import { TYPECLAW_INTERNAL_BASH_ENV } from '@/agent/plugin-tools'
+import type { SessionOrigin } from '@/agent/session-origin'
 import { definePlugin } from '@/plugin'
+import { resolveHiddenPaths } from '@/sandbox'
 import { createApproveIdempotencyGuard } from './approve-idempotency'
 import { createGithubEffectiveApprovalResolver, createGithubHeadShaResolver } from './effective-approval'
@@ -14,6 +16,40 @@ export default definePlugin({
   plugin: async (ctx) => {
     const resolveTokenForRepo = ctx.github.resolveTokenForRepo
     const hasAppTokenResolver = ctx.github.hasAppTokenResolver
+    // A .env PAT is broad and long-lived, so it may only reach bash that runs
+    // WITHOUT bwrap's --clearenv — otherwise a low-trust, stranger-drivable
+    // sandbox could exfiltrate it. We gate on the SAME signal applyBashSandbox
+    // uses (resolveHiddenPaths empty => unsandboxed) rather than a role name, so
+    // the credential policy can never diverge from the actual sandbox decision
+    // and custom roles follow their real fs.see.secrets / security.bypass grant.
+    const runsUnsandboxed = (origin: SessionOrigin | undefined): boolean => {
+      const { dirs, files } = resolveHiddenPaths(ctx.permissions, origin, ctx.agentDir)
+      return dirs.length === 0 && files.length === 0
+    }
+    // The PAT is in the container env but stripped by --clearenv for this role,
+    // and a PAT is not re-mintable per repo, so there is no token to inject. Tell
+    // the AGENT (model-visible block) instead of letting git/gh fail ambiguously
+    // — the silent variant of this is exactly what caused a multi-day debugging
+    // hunt. App auth is the supported path for low-trust roles.
+    const sandboxedPatWithheldReason =
+      'A classic/fine-grained GitHub PAT is configured (via .env GH_TOKEN), but this command runs ' +
+      'in a sandboxed (low-trust) role whose environment is cleared before bash — so the PAT is ' +
+      'withheld here and is NOT available to git/gh. This is a deliberate guard, not missing auth: a ' +
+      'broad, long-lived PAT must not be reachable from a low-trust sandbox. Configure GitHub App auth ' +
+      '(channels.github) to grant per-repo, short-lived tokens that DO work for sandboxed roles.'
+    let warnedSandboxedPatWithheld = false
+    const warnSandboxedPatWithheldOnce = (): void => {
+      if (warnedSandboxedPatWithheld) return
+      warnedSandboxedPatWithheld = true
+      ctx.logger.warn(
+        'GH_TOKEN (classic/fine-grained PAT) withheld from a sandboxed role: the env is cleared for ' +
+          'low-trust bash, so git/gh have no credential. Configure GitHub App auth (channels.github) ' +
+          'for per-repo tokens that work in sandboxed roles.',
+      )
+    }
     // `/user` resolves the caller's USER identity. An App installation token is not
     // a user, so GitHub rejects it on a token-class basis (403, or no-token error in
     // the sandbox) no matter how valid the token is. We block-and-guide so the agent
@@ -39,7 +75,7 @@ export default definePlugin({
     // 'fall-through' means "not a repo-targeting gh command" so the caller can
     // try the git path on the same command (e.g. `git ... # gh` substrings).
     const handleGhCommand = async (params: {
-      event: { callId: string; args: Record<string, unknown> }
+      event: { callId: string; args: Record<string, unknown>; origin?: SessionOrigin }
       command: string
     }): Promise<HookResult | 'fall-through'> => {
       const { event, command } = params
@@ -83,19 +119,48 @@ export default definePlugin({
       }
       const tokenClass = classifyGhToken(process.env.GH_TOKEN)
-      // Classic PATs reach every owner; nothing to inject or enforce.
-      if (tokenClass === 'cross-owner') return
-      if (decision.kind === 'block') return { block: true, reason: decision.reason }
+      // PAT classes (classic = cross-owner, fine-grained) are not re-minted per
+      // repo; the seeded GH_TOKEN is the only token we have. App minting, when
+      // available, is still preferred for SANDBOXED roles (the PAT can't reach
+      // them), so a PAT must NOT suppress minting there — only for unsandboxed
+      // execution does the PAT win. Unsandboxed: the PAT already rides inherited
+      // process.env, but re-asserting it in the overlay keeps the command-local
+      // GH_TOKEN explicit and consistent with the git path. Sandboxed PAT-only:
+      // block with guidance instead of failing silently.
+      // Set when a sandboxed PAT falls through to App minting: the tail's
+      // shouldMintAppToken(process.env.GH_TOKEN) re-check would see the PAT and
+      // bail, so this flag forces the mint that the PAT must not suppress.
+      let mintForSandboxedPat = false
+      if (tokenClass === 'cross-owner' || tokenClass === 'fine-grained-pat') {
+        // Unsandboxed: the PAT authenticates directly (it already rides inherited
+        // process.env). For a repo-targeting command we re-assert it in the
+        // overlay so behavior is explicit and matches the git path; otherwise we
+        // pass through. The App-oriented missing-repo / multi-owner BLOCK does
+        // NOT apply — a PAT needs no per-repo mint — so we never surface it here.
+        if (runsUnsandboxed(event.origin)) {
+          if (decision.kind === 'inject') {
+            event.args[TYPECLAW_INTERNAL_BASH_ENV] = { GH_TOKEN: process.env.GH_TOKEN as string }
+          }
+          return
+        }
+        // Sandboxed: the PAT is stripped by --clearenv. Prefer App minting when
+        // available (a PAT must NOT suppress it, or the original silent-failure
+        // bug returns); otherwise block with guidance rather than failing mute.
+        if (!shouldMintAppToken(undefined, hasAppTokenResolver())) {
+          if (decision.kind === 'block') return { block: true, reason: decision.reason }
+          warnSandboxedPatWithheldOnce()
+          return { block: true, reason: sandboxedPatWithheldReason }
+        }
+        mintForSandboxedPat = true
+      }
-      // Fine-grained PATs are single-owner but cannot be re-minted per repo;
-      // the seeded GH_TOKEN is the only token we have. Leave it in place so
-      // `gh` fails honestly if the named repo is under a different owner.
-      if (tokenClass === 'fine-grained-pat') return
+      if (decision.kind === 'block') return { block: true, reason: decision.reason }
       // No App auth (no App-class GH_TOKEN and no live minter): leave whatever
-      // is seeded so `gh` fails honestly rather than us guessing a token.
-      if (!shouldMintAppToken(process.env.GH_TOKEN, hasAppTokenResolver())) return
+      // is seeded so `gh` fails honestly rather than us guessing a token. The
+      // sandboxed-PAT mint path bypasses this PAT-class re-check via the flag.
+      if (!mintForSandboxedPat && !shouldMintAppToken(process.env.GH_TOKEN, hasAppTokenResolver())) return
       const result = await resolveTokenForRepo(decision.repoSlug)
       if (result.kind === 'unavailable') return { block: true, reason: result.reason }
@@ -107,34 +172,65 @@ export default definePlugin({
     }
     const handleGitCommand = async (params: {
-      event: { args: Record<string, unknown> }
+      event: { args: Record<string, unknown>; origin?: SessionOrigin }
       command: string
       agentDir: string
     }): Promise<HookResult> => {
       const { event, command, agentDir } = params
-      // Only App auth re-mints per repo. Classic/fine-grained PATs and absent
-      // tokens are left untouched, exactly as the gh path treats them. App auth
-      // is detected by the live minter too, not just an App-class GH_TOKEN:
-      // multi-owner / no-repos App configs never seed GH_TOKEN yet can mint.
-      if (!shouldMintAppToken(process.env.GH_TOKEN, hasAppTokenResolver())) return
+      const tokenClass = classifyGhToken(process.env.GH_TOKEN)
+      const isPat = tokenClass === 'cross-owner' || tokenClass === 'fine-grained-pat'
+      // A PAT is not re-mintable per repo. For unsandboxed roles it rides the
+      // git-askpass path so SSH/scp remotes get rewritten to https and clone
+      // works uniformly (matching the gh path). For sandboxed roles the PAT is
+      // withheld (env cleared): mint an App token instead if available, else
+      // block with guidance rather than letting git fail silently. App auth must
+      // still mint for sandboxed roles even when a PAT is present.
+      const useEnvPat = isPat && runsUnsandboxed(event.origin)
+      // Sandboxed PAT: the env is cleared, so the PAT can't reach git. Mint an
+      // App token instead when a minter is live (a PAT must NOT suppress it);
+      // otherwise block with guidance below rather than fail silently.
+      const mintForSandboxedPat = isPat && !useEnvPat && shouldMintAppToken(undefined, hasAppTokenResolver())
+      if (isPat && !useEnvPat && !mintForSandboxedPat) {
+        const decision = await analyzeGitCommand(command, { cwd: agentDir, resolvers: defaultGitResolvers })
+        if (decision.kind === 'pass-through') return
+        if (decision.kind === 'block') return { block: true, reason: decision.reason }
+        warnSandboxedPatWithheldOnce()
+        return { block: true, reason: sandboxedPatWithheldReason }
+      }
+      // Neither a usable PAT nor App auth: leave the command untouched so git
+      // fails honestly rather than us guessing a token. App auth is detected by
+      // the live minter too, not just an App-class GH_TOKEN: multi-owner /
+      // no-repos App configs never seed GH_TOKEN yet can mint. The mintForSandboxedPat
+      // flag forces minting past this PAT-class re-check.
+      if (!useEnvPat && !mintForSandboxedPat && !shouldMintAppToken(process.env.GH_TOKEN, hasAppTokenResolver())) return
       const decision = await analyzeGitCommand(command, { cwd: agentDir, resolvers: defaultGitResolvers })
       if (decision.kind === 'pass-through') return
       if (decision.kind === 'block') return { block: true, reason: decision.reason }
-      const result = await resolveTokenForRepo(decision.repoSlug)
-      if (result.kind === 'unavailable') return { block: true, reason: result.reason }
+      // The unsandboxed-PAT path uses the PAT directly; otherwise mint a per-repo
+      // App token. Both ride TYPECLAW_GIT_TOKEN (read by the askpass helper),
+      // never argv/config.
+      let gitToken: string
+      if (useEnvPat) {
+        gitToken = process.env.GH_TOKEN as string
+      } else {
+        const result = await resolveTokenForRepo(decision.repoSlug)
+        if (result.kind === 'unavailable') return { block: true, reason: result.reason }
+        gitToken = result.token
+      }
       const askpass = await ensureGitAskPassHelper()
       const existing = event.args[TYPECLAW_INTERNAL_BASH_ENV]
       const overlay = existing !== null && typeof existing === 'object' ? (existing as Record<string, string>) : {}
-      // Token rides in TYPECLAW_GIT_TOKEN (read by the askpass helper), never in
-      // argv/config. insteadOf rewrites SSH/scp remotes to https so the helper's
-      // credential applies; GIT_TERMINAL_PROMPT=0 fails fast instead of hanging.
+      // insteadOf rewrites SSH/scp remotes to https so the helper's credential
+      // applies; GIT_TERMINAL_PROMPT=0 fails fast instead of hanging.
       event.args[TYPECLAW_INTERNAL_BASH_ENV] = {
         ...overlay,
         GIT_ASKPASS: askpass,
-        TYPECLAW_GIT_TOKEN: result.token,
+        TYPECLAW_GIT_TOKEN: gitToken,
         GIT_TERMINAL_PROMPT: '0',
         GIT_CONFIG_COUNT: '2',
         GIT_CONFIG_KEY_0: 'url.https://github.com/.insteadOf',
@@ -165,12 +261,18 @@ export default definePlugin({
         },
         'tool.after': async (event) => {
           checkGraphqlAuthNudge({ tool: event.tool, result: event.result })
-          const committed = commitReviewIfSucceeded({
+          const review = commitReviewIfSucceeded({
             sessionId: event.sessionId,
             callId: event.callId,
             result: event.result,
           })
-          await verdictGuard.release({ callId: event.callId, succeeded: committed })
+          await verdictGuard.release({ callId: event.callId, succeeded: review.committed })
+          // A backstop-recovered verdict had no guard() reservation, so release()
+          // could not arm the lag shield — do it explicitly here so the next
+          // same-commit submission is deduped.
+          if (review.landedFromResult !== null) {
+            await verdictGuard.noteLandedReview(review.landedFromResult)
+          }
         },
       },
     }

package/src/bundled-plugins/github-cli-auth/review-recorder.ts CHANGED Viewed

@@ -3,7 +3,12 @@ import { readFile } from 'node:fs/promises'
 import { recordReview } from '@/channels/github-review-turn-ledger'
 import type { ContentPart, ToolResult } from '@/plugin'
-import { detectReviewSubmission, type DetectedReview } from './gh-review-detect'
+import {
+  detectReviewSubmission,
+  detectReviewSubmissionAttempt,
+  type DetectedReview,
+  type ReviewSubmissionAttempt,
+} from './gh-review-detect'
 import { detectReviewDump, type ReviewDumpDecision } from './gh-review-inline-detect'
 // Bridges the bash `gh` interceptor to the false-receipt ledger: at tool.before
@@ -12,8 +17,18 @@ import { detectReviewDump, type ReviewDumpDecision } from './gh-review-inline-de
 // succeeded. Strict success detection is the safe bias here — wrongly crediting a
 // review that never landed would re-open the false-receipt hole, so an ambiguous
 // result is treated as "not landed" and left uncredited.
+//
+// A post-execution BACKSTOP runs when pre-detection produced no pending entry: the
+// REST create-review response is authoritative (it echoes the landed review's
+// `state` and the PR url), so we can credit a verdict whose command shape dodged
+// the before-detector. This only arms the dedupe window for the NEXT submission —
+// it cannot un-land a duplicate already posted — but that is precisely what the
+// sequential fan-out incident needed: the first landed APPROVE must arm the shield.
+// The backstop is gated on a tool.before submission-ATTEMPT marker so it never
+// fires for a reviews-list READ whose response happens to carry a decisive state.
 const pending = new Map<string, DetectedReview>()
+const submissionAttempts = new Map<string, ReviewSubmissionAttempt>()
 const MAX_INPUT_BYTES = 1_000_000
@@ -26,21 +41,100 @@ export async function noteReviewCommand(args: { callId: string; command: string
   const inputFileContents = await readInputFile(args.command)
   const detected = detectReviewSubmission({ command: args.command, inputFileContents })
   if (detected !== null) pending.set(args.callId, detected)
+  // Record submission INTENT even when the verdict could not be extracted (a
+  // missed shape): only such a command may later arm the backstop, so a reviews
+  // READ — which is not an attempt — can never be miscredited as a landed review.
+  else {
+    const attempt = detectReviewSubmissionAttempt(args.command)
+    if (attempt !== null) submissionAttempts.set(args.callId, attempt)
+  }
   return { dump: detectReviewDump({ command: args.command, inputFileContents }), detected }
 }
-export function commitReviewIfSucceeded(args: { sessionId: string; callId: string; result: ToolResult }): boolean {
+export type CommitReviewResult = {
+  // Whether a verdict was credited this turn (drives verdictGuard.release()).
+  committed: boolean
+  // Set ONLY on the backstop path (pre-detection missed): the caller must arm the
+  // idempotency lag shield with this, since no guard() reservation exists to do it
+  // via release(). Null on the pending path, where release() arms the shield.
+  landedFromResult: DetectedReview | null
+}
+export function commitReviewIfSucceeded(args: {
+  sessionId: string
+  callId: string
+  result: ToolResult
+}): CommitReviewResult {
+  const text = collectText(args.result.content)
   const detected = pending.get(args.callId)
-  if (detected === undefined) return false
-  pending.delete(args.callId)
-  if (!looksSucceeded(detected, collectText(args.result.content))) return false
+  if (detected !== undefined) {
+    pending.delete(args.callId)
+    if (!looksSucceeded(detected, text)) return { committed: false, landedFromResult: null }
+    recordReview({
+      sessionId: args.sessionId,
+      workspace: detected.workspace,
+      prNumber: detected.prNumber,
+      verdict: detected.verdict,
+    })
+    return { committed: true, landedFromResult: null }
+  }
+  // The backstop runs ONLY for a command that tool.before saw as a real
+  // submission attempt (POST create-review) but whose verdict it could not
+  // extract. This excludes a reviews-list READ outright, and the PR-match below
+  // rejects a stray pulls URL for a different PR in the output.
+  const attempt = submissionAttempts.get(args.callId)
+  if (attempt === undefined) return { committed: false, landedFromResult: null }
+  submissionAttempts.delete(args.callId)
+  const landed = detectLandedReviewFromResult(text)
+  if (landed === null) return { committed: false, landedFromResult: null }
+  if (landed.workspace !== attempt.workspace || landed.prNumber !== attempt.prNumber) {
+    return { committed: false, landedFromResult: null }
+  }
   recordReview({
     sessionId: args.sessionId,
-    workspace: detected.workspace,
-    prNumber: detected.prNumber,
-    verdict: detected.verdict,
+    workspace: landed.workspace,
+    prNumber: landed.prNumber,
+    verdict: landed.verdict,
   })
-  return true
+  return { committed: true, landedFromResult: landed }
+}
+// Authoritative post-execution credit from a REST create-review response, used
+// only when pre-detection missed (no pending entry). Requires ALL of: a decisive
+// landed `state`, a recoverable PR identity from the echoed `pull_request_url`,
+// and no failure marker — so a partial/garbled capture or an unrelated success
+// line cannot fabricate a verdict. COMMENT and DISMISSED are not decisive and are
+// ignored, matching the before-detector's scope.
+function detectLandedReviewFromResult(text: string): DetectedReview | null {
+  if (FAILURE_MARKERS.some((m) => text.includes(m))) return null
+  const verdict = landedVerdictFromState(text)
+  if (verdict === null) return null
+  const pr = prFromPullRequestUrl(text)
+  if (pr === null) return null
+  return { workspace: pr.workspace, prNumber: pr.prNumber, verdict, source: 'api' }
+}
+// The create-review response echoes `"state": "APPROVED" | "CHANGES_REQUESTED"`.
+// Tolerant of the spacing both `gh api` (compact) and a piped `jq .` (pretty)
+// produce.
+function landedVerdictFromState(text: string): DetectedReview['verdict'] | null {
+  if (/"state"\s*:\s*"APPROVED"/.test(text)) return 'APPROVE'
+  if (/"state"\s*:\s*"CHANGES_REQUESTED"/.test(text)) return 'REQUEST_CHANGES'
+  return null
+}
+// The review object carries `"pull_request_url":
+// "https://api.github.com/repos/<owner>/<repo>/pulls/<n>"`, the authoritative PR
+// identity for the landed review. Recovered here so a shape-dodging command is
+// still credited to the right PR.
+function prFromPullRequestUrl(text: string): { workspace: string; prNumber: number } | null {
+  const m = /\/repos\/([^/\s"]+)\/([^/\s"]+)\/pulls\/(\d+)\b/.exec(text)
+  if (m === null) return null
+  const prNumber = Number(m[3])
+  if (!Number.isSafeInteger(prNumber)) return null
+  return { workspace: `${m[1]}/${m[2]}`, prNumber }
 }
 async function readInputFile(command: string): Promise<string | null> {

package/src/channels/adapters/discord-bot-format.ts CHANGED Viewed

@@ -88,49 +88,83 @@ function splitRow(row: string): string[] {
 }
 function computeWidths(rows: string[][]): number[] {
-  const widths: number[] = []
-  for (const row of rows) {
-    for (let c = 0; c < row.length; c++) {
-      const cellWidth = displayWidth(row[c]!)
-      if (widths[c] === undefined || cellWidth > widths[c]!) {
-        widths[c] = cellWidth
-      }
-    }
-  }
-  return widths
+  const columnCount = Math.max(0, ...rows.map((row) => row.length))
+  return Array.from({ length: columnCount }, (_, c) => Math.max(0, ...rows.map((row) => widthTenths(row[c] ?? ''))))
 }
 function padRow(cells: string[], widths: number[]): string {
-  const padded = widths.map((width, c) => padToWidth(cells[c] ?? '', width))
+  const pads = computePads(cells, widths)
   // Two spaces between columns keeps them visually distinct inside the
   // monospaced span without a vertical-bar separator.
-  return padded.join('  ')
+  return widths.map((_, c) => (cells[c] ?? '') + ' '.repeat(pads[c]!)).join('  ')
 }
-function padToWidth(cell: string, width: number): string {
-  const pad = width - displayWidth(cell)
-  return pad > 0 ? cell + ' '.repeat(pad) : cell
+// A CJK glyph is 1.7 cells wide, so column widths are fractional while padding
+// can only insert whole spaces. The naive fix — round each column's deficit on
+// its own — keeps total row widths close but lets the START of column N drift
+// between rows (a row may earn an extra space in column 1 that another row
+// spends in column 3), which is exactly the misalignment a table must avoid.
+//
+// Instead we make every column BOUNDARY prefix-stable: the running deficit up to
+// column c is rounded to whole spaces once, and each cell's pad is the delta of
+// consecutive rounded prefixes. Because the rounding target at boundary c (the
+// column max-widths plus separators before it) is the same constant for every
+// row, each boundary lands within half a cell of the same offset across rows —
+// the tightest column alignment whole-space padding allows.
+//
+// Widths are carried in TENTHS of a cell (latin 10, wide 17) so the arithmetic
+// is exact integers; 1.7 has no finite binary form and would otherwise let
+// rounding flip on float noise.
+function computePads(cells: string[], widths: number[]): number[] {
+  const pads: number[] = []
+  let prefixDeficit = 0
+  let prevPrefixPad = 0
+  for (let c = 0; c < widths.length; c++) {
+    prefixDeficit += Math.max(0, widths[c]! - widthTenths(cells[c] ?? ''))
+    const prefixPad = roundTenthsToSpaces(prefixDeficit)
+    pads.push(Math.max(0, prefixPad - prevPrefixPad))
+    prevPrefixPad = prefixPad
+  }
+  return pads
 }
-// Discord's monospaced inline-code font renders CJK ideographs, full-width
-// punctuation, and most emoji at two columns, while combining/zero-width marks
-// take none. `String.prototype.padEnd` counts UTF-16 code units, so padding by
-// `.length` leaves wide-character tables visually ragged. We iterate by code
-// point and sum per-glyph column widths so every cell pads to the same VISUAL
-// width. The ranges below are the standard East-Asian-Wide / Wide blocks plus
-// the common emoji planes; this is the same wcwidth approximation editors use.
-export function displayWidth(text: string): number {
-  let width = 0
+const CELL_TENTHS = 10
+function roundTenthsToSpaces(tenths: number): number {
+  return Math.floor((tenths + CELL_TENTHS / 2) / CELL_TENTHS)
+}
+function widthTenths(text: string): number {
+  let tenths = 0
   for (const ch of text) {
-    width += charWidth(ch.codePointAt(0)!)
+    tenths += charWidthTenths(ch.codePointAt(0)!)
   }
-  return width
+  return tenths
 }
-function charWidth(cp: number): number {
+function charWidthTenths(cp: number): number {
   if (isZeroWidth(cp)) return 0
-  if (isWide(cp)) return 2
-  return 1
+  if (isWide(cp)) return WIDE_CHAR_TENTHS
+  return CELL_TENTHS
+}
+// Discord's monospaced inline-code font renders CJK ideographs, full-width
+// punctuation, and most emoji WIDER than a latin glyph, while combining/
+// zero-width marks take none. `String.prototype.padEnd` counts UTF-16 code
+// units, so padding by `.length` leaves wide-character tables visually ragged.
+// The ranges below are the standard East-Asian-Wide / Wide blocks plus the
+// common emoji planes.
+//
+// The wide multiplier is 1.7, not the textbook wcwidth value of 2: Discord's
+// proportional code font renders a Hangul/CJK glyph at roughly 1.7x a latin
+// monospace cell, so charging 2 over-pads CJK columns and leaves them visibly
+// too wide. `displayWidth` reports that fractional width in cells; the table
+// padder works in the integer `widthTenths` domain to keep boundary rounding
+// exact — see `computePads`.
+const WIDE_CHAR_TENTHS = 17
+export function displayWidth(text: string): number {
+  return widthTenths(text) / CELL_TENTHS
 }
 function isZeroWidth(cp: number): boolean {

package/src/init/dockerfile.ts CHANGED Viewed

@@ -293,9 +293,20 @@ set -eu
 #    (missing library, port conflict, malformed args). Without the
 #    explicit liveness probe below, the shim would then export DISPLAY
 #    and exec bun, agent-browser launches would die with "cannot open
-#    display", and the operator would chase a phantom bug. We capture
-#    $! and \`kill -0\` it on every poll iteration so an early exit
-#    becomes a clear stderr line and a non-zero shim exit.
+#    display", and the operator would chase a phantom bug. A monitor
+#    subshell owns Xvfb, \`wait\`s for it, and drops a status file the
+#    instant it exits; the poll loop checks that file (before the socket
+#    check) so an early exit becomes a clear stderr line and a non-zero
+#    shim exit.
+#
+#    We do NOT probe liveness with \`kill -0 "\$xvfb_pid"\`. A backgrounded
+#    child that exits before the shell \`wait\`s for it becomes a zombie,
+#    and \`kill -0\` returns success on a zombie PID (it still exists in
+#    the process table). Under load the shell reaps the zombie lazily, so
+#    \`kill -0\` reported the dead Xvfb as alive for up to the full 3s
+#    window — the loop then timed out and printed the misleading "did not
+#    create socket within 3s" diagnostic instead of "exited immediately".
+#    The status-file handshake sidesteps zombie semantics entirely.
 #
 # We DO NOT use \`xvfb-run\`. xvfb-run hangs forever when it runs as
 # PID 1 inside a container: its SIGUSR1-based ready handshake races
@@ -451,30 +462,45 @@ start_xvfb() {
   if ! command -v Xvfb >/dev/null 2>&1; then
     return 0
   fi
-  setpriv --bounding-set -net_admin --inh-caps -net_admin --ambient-caps -net_admin \\
-    -- Xvfb :99 -screen 0 1920x1080x24 -ac +extension RANDR -nolisten tcp \\
-    >/dev/null 2>&1 &
+  # A monitor subshell owns Xvfb and \`wait\`s for it (the bare command
+  # blocks until exit), then writes Xvfb's exit code to a status file.
+  # The poll loop below reads that file instead of probing \`kill -0\` —
+  # see invariant 2 above for why zombie semantics make \`kill -0\`
+  # unreliable. \`set +e\` inside the subshell keeps the outer \`set -e\`
+  # from killing the monitor before it records a non-zero Xvfb exit.
+  xvfb_status="/tmp/typeclaw-xvfb-status.$$"
+  rm -f "$xvfb_status"
+  (
+    set +e
+    setpriv --bounding-set -net_admin --inh-caps -net_admin --ambient-caps -net_admin \\
+      -- Xvfb :99 -screen 0 1920x1080x24 -ac +extension RANDR -nolisten tcp \\
+      >/dev/null 2>&1
+    printf '%s\\n' "$?" > "$xvfb_status"
+  ) &
   xvfb_pid=$!
   export DISPLAY=:99
-  # Poll the socket every 10ms up to ~3s. Xvfb cold start is typically
-  # ~20-50ms on a modern host; 3s covers slow Docker Desktop VMs,
-  # Rosetta/QEMU emulation, and loaded CI runners. We also \`kill -0\`
-  # the pid each iteration so an Xvfb that died immediately surfaces
-  # as a clear error instead of a 3-second hang followed by silent
-  # "cannot open display" downstream.
+  # Poll every 10ms up to ~3s. Xvfb cold start is typically ~20-50ms on
+  # a modern host; 3s covers slow Docker Desktop VMs, Rosetta/QEMU
+  # emulation, and loaded CI runners. The status-file check comes FIRST
+  # so an Xvfb that creates the socket and then immediately dies is still
+  # treated as a startup failure.
   i=0
   while [ $i -lt 300 ]; do
-    if [ -S /tmp/.X11-unix/X99 ]; then
-      unset i xvfb_pid
-      return 0
-    fi
-    if ! kill -0 "$xvfb_pid" 2>/dev/null; then
+    if [ -f "$xvfb_status" ]; then
+      wait "$xvfb_pid" 2>/dev/null || true
+      rm -f "$xvfb_status"
       echo "typeclaw-entrypoint: Xvfb exited immediately; cannot start headed display (docker.file.xvfb=true)" >&2
       exit 1
     fi
+    if [ -S /tmp/.X11-unix/X99 ]; then
+      rm -f "$xvfb_status"
+      unset i xvfb_pid xvfb_status
+      return 0
+    fi
     sleep 0.01
     i=$((i + 1))
   done
+  rm -f "$xvfb_status"
   echo "typeclaw-entrypoint: Xvfb did not create /tmp/.X11-unix/X99 within 3s; refusing to continue (docker.file.xvfb=true)" >&2
   exit 1
 }

package/src/init/line-auth.ts CHANGED Viewed

@@ -50,30 +50,49 @@ export function lineSecretsPath(agentDir: string): string {
   return join(agentDir, 'secrets.json')
 }
+// The SDK persists E2EE (Letter-Sealing) key material under
+// `<AGENT_MESSENGER_CONFIG_DIR>/line-storage/`. The container sets that env to
+// the agent workspace (src/init/dockerfile.ts), but a host-stage login (init /
+// `channel reauth line`) would otherwise fall back to `~/.config/agent-messenger`
+// — so the E2EE key gets written somewhere the container never reads, and inbound
+// Letter-Sealing messages stay undecryptable. Point the host login at the same
+// per-agent dir the container uses so the key lands where the runtime reads it.
+export function lineConfigDir(agentDir: string): string {
+  return join(agentDir, 'workspace', '.agent-messenger')
+}
 export async function runLineBootstrap(input: LineLoginInput): Promise<LineBootstrapStatus> {
   try {
     const store = new SecretsLineCredentialStore({ mode: 'host', secretsPath: lineSecretsPath(input.agentDir) })
-    // The LINE SDK persists the minted auth_token + certificate by calling
-    // setAccount() on whatever credential manager the client was built with.
-    // Wiring our secrets.json-backed store in here means a successful login
-    // writes straight to secrets.json#channels.line — no second copy in
-    // ~/.config/agent-messenger to keep in sync.
-    const client = input.client ?? buildLineClient(store)
-    const result = await suppressLineTokenInfoDump(() =>
-      input.method === 'qr'
-        ? client.loginWithQR({
-            onQRUrl: async (url) => {
-              await input.callbacks.onQRUrl?.(url)
-            },
-            onPincode: input.callbacks.onPincode,
-          })
-        : client.loginWithEmail({
-            email: input.email,
-            password: input.password,
-            onPincode: input.callbacks.onPincode,
-          }),
-    )
+    // The env is set only for the duration of client construction + login (when
+    // the SDK reads it to locate line-storage) and restored after, so a second
+    // bootstrap for a different agent in the same process can't inherit the
+    // first agent's path. An already-set value (the container's Dockerfile env)
+    // is left untouched.
+    const result = await withLineConfigDir(lineConfigDir(input.agentDir), () => {
+      // The LINE SDK persists the minted auth_token + certificate by calling
+      // setAccount() on whatever credential manager the client was built with.
+      // Wiring our secrets.json-backed store in here means a successful login
+      // writes straight to secrets.json#channels.line — no second copy in
+      // ~/.config/agent-messenger to keep in sync.
+      const client = input.client ?? buildLineClient(store)
+      return suppressLineTokenInfoDump(() =>
+        input.method === 'qr'
+          ? client.loginWithQR({
+              onQRUrl: async (url) => {
+                await input.callbacks.onQRUrl?.(url)
+              },
+              onPincode: input.callbacks.onPincode,
+            })
+          : client.loginWithEmail({
+              email: input.email,
+              password: input.password,
+              onPincode: input.callbacks.onPincode,
+            }),
+      )
+    })
     if (!result.authenticated || result.account_id === undefined) {
       const reason = result.message ?? result.error ?? 'LINE login did not authenticate'
@@ -105,6 +124,16 @@ function buildLineClient(store: SecretsLineCredentialStore): LineLoginClient {
   return new RealLineClient(credManager) as unknown as LineLoginClient
 }
+async function withLineConfigDir<T>(dir: string, fn: () => Promise<T>): Promise<T> {
+  const previous = process.env.AGENT_MESSENGER_CONFIG_DIR
+  if (previous === undefined) process.env.AGENT_MESSENGER_CONFIG_DIR = dir
+  try {
+    return await fn()
+  } finally {
+    if (previous === undefined) delete process.env.AGENT_MESSENGER_CONFIG_DIR
+  }
+}
 async function suppressLineTokenInfoDump<T>(fn: () => Promise<T>): Promise<T> {
   const previous = lineTokenInfoSuppressionQueue
   let release: () => void = () => {}