typeclaw 0.30.0 → 0.30.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "typeclaw",
3
- "version": "0.30.0",
3
+ "version": "0.30.1",
4
4
  "homepage": "https://github.com/typeclaw/typeclaw#readme",
5
5
  "bugs": {
6
6
  "url": "https://github.com/typeclaw/typeclaw/issues"
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env bash
2
+ # Manual acceptance check for the sandbox.realProc strategy (src/sandbox/build.ts).
3
+ # Not a unit test: it needs a Linux container with CAP_SYS_ADMIN, which the macOS
4
+ # dev host and standard CI runners cannot provide, so it lives here as an
5
+ # operator-runnable script instead of a skipIf-everywhere test.
6
+ #
7
+ # Proves two properties of the two-phase `unshare --mount-proc -- bwrap` sandbox:
8
+ # 1. An external package runner (bunx) runs to completion (no Bun "NotDir").
9
+ # 2. A secret in a sibling process's environment NEVER appears in any
10
+ # /proc/*/environ the sandbox can read (PID-namespace scoping holds).
11
+ #
12
+ # Usage: scripts/verify-realproc-sandbox.sh [image]
13
+ # image defaults to ghcr.io/typeclaw/typeclaw-base:<version-from-package.json>
14
+ set -euo pipefail
15
+
16
+ IMAGE="${1:-}"
17
+ if [ -z "$IMAGE" ]; then
18
+ version="$(node -p "require('./package.json').version" 2>/dev/null || echo latest)"
19
+ IMAGE="ghcr.io/typeclaw/typeclaw-base:${version}"
20
+ fi
21
+
22
+ secret="TYPECLAW_REALPROC_LEAK_CANARY_$$"
23
+
24
+ inner='
25
+ echo "=== bunx via real-proc sandbox ==="
26
+ bunx cowsay "real-proc ok" 2>&1 | tail -6
27
+ echo "bunx exit=$?"
28
+ echo "=== visible pids (sandbox should NOT see the canary holder) ==="
29
+ ls /proc | grep -E "^[0-9]+$" | tr "\n" " "; echo
30
+ echo "=== leak scan ==="
31
+ found=0
32
+ for f in /proc/[0-9]*/environ; do
33
+ if tr "\0" "\n" < "$f" 2>/dev/null | grep -q "CANARY_TOKEN"; then
34
+ echo "LEAK:$f"; found=1
35
+ fi
36
+ done
37
+ if [ $found -eq 0 ]; then echo "NO_LEAK_CONFIRMED"; else echo "LEAK_DETECTED"; exit 1; fi
38
+ '
39
+ inner="${inner//CANARY_TOKEN/$secret}"
40
+
41
+ # The real-proc argv shape mirrors buildArgv() in src/sandbox/build.ts. Keep in
42
+ # sync if that helper changes.
43
+ runner="
44
+ ${secret}_holder() { :; }
45
+ env CANARY=${secret} sleep 120 &
46
+ unshare --pid --fork --mount --mount-proc -- \
47
+ bwrap --unshare-user --unshare-ipc --unshare-uts --unshare-cgroup \
48
+ --new-session --die-with-parent --clearenv \
49
+ --setenv PATH /usr/local/bin:/usr/bin:/bin --setenv HOME /tmp --setenv LANG C.UTF-8 \
50
+ --ro-bind /usr /usr --ro-bind /etc /etc --dev /dev --tmpfs /tmp \
51
+ --ro-bind-try /bin /bin --ro-bind-try /sbin /sbin --ro-bind-try /lib /lib --ro-bind-try /lib64 /lib64 \
52
+ --ro-bind /proc /proc \
53
+ bash -c '$inner'
54
+ "
55
+
56
+ echo "Image: $IMAGE"
57
+ docker run --rm --security-opt seccomp=unconfined --cap-add SYS_ADMIN \
58
+ -e "CANARY=${secret}" "$IMAGE" bash -c "$runner"
@@ -23,6 +23,7 @@ import {
23
23
  checkNonWorkspaceWriteGuard,
24
24
  checkSkillAuthoringGuard,
25
25
  } from '@/bundled-plugins/guard/policy'
26
+ import { config } from '@/config/config'
26
27
  import type { PermissionService } from '@/permissions/permissions'
27
28
  import type {
28
29
  BuiltinToolRef,
@@ -582,6 +583,17 @@ async function applyBashSandbox(
582
583
  // bwrap does --clearenv, so the overlay must be re-introduced via env.set or
583
584
  // it would never reach the sandboxed process (the non-sandboxed spawnHook
584
585
  // path does not run when the command is rewritten to a bwrap invocation).
586
+ // 'real-proc' gives a sandboxed JS package runner a working /proc/self/{fd,
587
+ // maps} so `bunx`/`bun add`/`bun run <pkg>` stop aborting with Bun's NotDir.
588
+ // Opt-in (default 'tmpfs') because it makes start.ts grant the container
589
+ // CAP_SYS_ADMIN at boot. Read from the boot-time `config` snapshot, NOT live
590
+ // getConfig(): sandbox.realProc is restart-required, and the strategy MUST
591
+ // track the boot-time capability. A `typeclaw reload` that flips realProc to
592
+ // true would otherwise make this emit `unshare --mount-proc` in a container
593
+ // booted WITHOUT CAP_SYS_ADMIN, so the mount fails instead of the old tmpfs
594
+ // strategy holding until restart. `config` never changes on reload.
595
+ // procSelfExe is only consumed by the 'tmpfs' branch.
596
+ const realProc = config.sandbox.realProc
585
597
  const { commandString } = buildSandboxedCommand(command, {
586
598
  mounts: [
587
599
  { type: 'ro-bind', source: agentDir, dest: agentDir },
@@ -592,6 +604,7 @@ async function applyBashSandbox(
592
604
  protected: protectedZones,
593
605
  network: 'inherit',
594
606
  cwd: agentDir,
607
+ proc: realProc ? 'real-proc' : 'tmpfs',
595
608
  procSelfExe: resolveProcSelfExe(),
596
609
  ...(envOverlay !== undefined ? { env: { set: envOverlay } } : {}),
597
610
  })
@@ -93,7 +93,7 @@ Delegate focused work to subagents via \`spawn_subagent\`, \`subagent_output\`,
93
93
 
94
94
  There are three delegation modes. Pick deliberately.
95
95
 
96
- **Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer. Match the worker to the depth: a fast or narrow web lookup goes to \`scout\`; a fuzzy question that needs decomposition, many sources, cross-validation, and a synthesized verdict goes to \`researcher\` (don't do that grind inline with \`web_search\` yourself).
96
+ **Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer. Match the worker to the depth: a fast or narrow web lookup goes to \`scout\`; a fuzzy question that needs decomposition, many sources, cross-validation, and a synthesized verdict goes to \`researcher\` (don't do that grind inline with \`web_search\` yourself). When the user *explicitly* says "research"/"investigate" (or equivalent), you MUST spawn \`researcher\` — answering from training memory or a single inline \`web_search\` does not satisfy the request, even if you think you know the answer. (Fanning out \`scout\`/\`explorer\` underneath is fine, but it does not replace \`researcher\`.)
97
97
 
98
98
  **Mode B — Delegate-and-converse.** Asked to DO something long-running (>~30s: installs, builds, \`docker\`, scrapes, long test suites, multi-host loops, any noisy "fetch N and synthesize" chain)? Don't run it inline — blocking your own \`bash\` freezes the conversation and stalls the channel typing heartbeat (\`MAX_TYPING_HEARTBEAT_MS\`). Spawn one subagent (\`operator\` for side effects, \`scout\` for a quick web lookup, \`researcher\` for a deep multi-source "fetch N and synthesize" investigation, \`planner\` when a multi-step goal needs a sequenced, risk-aware plan before anyone acts) with \`run_in_background: true\`, acknowledge, and KEEP TALKING. Single fast calls (\`git status\`, one known-endpoint \`curl\`) stay inline. When the completion reminder lands, weave the result in; in a channel session, the completion \`<system-reminder>\` is NOT a user message but plain text is still invisible — Surface the result via \`channel_reply\` (or \`channel_send\`). If you already posted the substantive answer in the spawn turn, prefer \`skip_response({ reason: "result confirms prior reply" })\` over going silent.
99
99
 
@@ -1,13 +1,17 @@
1
1
  import type { ReviewVerdict } from '@/channels/github-review-turn-ledger'
2
2
 
3
+ // `NONE` covers "never reviewed" and "last decisive review was DISMISSED" — both
4
+ // mean a fresh verdict is legitimate (not a duplicate).
5
+ export type EffectiveVerdict = 'APPROVED' | 'CHANGES_REQUESTED' | 'NONE'
6
+
3
7
  export type EffectiveApprovalResolver = (target: {
4
8
  workspace: string
5
9
  prNumber: number
6
- }) => Promise<{ ok: true; alreadyApproved: boolean } | { ok: false }>
10
+ }) => Promise<{ ok: true; effective: EffectiveVerdict } | { ok: false }>
7
11
 
8
12
  export type ApproveBlock = { block: true; reason: string }
9
13
 
10
- export type ApproveIdempotencyGuard = {
14
+ export type ReviewVerdictGuard = {
11
15
  guard: (args: {
12
16
  callId: string
13
17
  workspace: string
@@ -17,78 +21,135 @@ export type ApproveIdempotencyGuard = {
17
21
  release: (args: { callId: string; succeeded: boolean }) => void
18
22
  }
19
23
 
20
- const DUPLICATE_REASON =
21
- 'This bot has already approved this pull request. A second APPROVE would post a redundant review. ' +
22
- 'If you intended to change your verdict, request changes or dismiss the prior review instead of re-approving.'
24
+ // Back-compat alias: the guard now covers REQUEST_CHANGES too, not just APPROVE.
25
+ export type ApproveIdempotencyGuard = ReviewVerdictGuard
26
+
27
+ function duplicateReason(verdict: ReviewVerdict): string {
28
+ if (verdict === 'APPROVE') {
29
+ return (
30
+ 'This bot already holds a standing APPROVED review on this pull request. A second APPROVE would ' +
31
+ 'post a redundant review. If you intended to change your verdict, request changes or dismiss the ' +
32
+ 'prior review instead of re-approving.'
33
+ )
34
+ }
35
+ return (
36
+ 'This bot already holds a standing CHANGES_REQUESTED review on this pull request. A second ' +
37
+ 'REQUEST_CHANGES would post a redundant blocking review. The prior review is still live — push a fix ' +
38
+ 'and APPROVE, or reply in the existing thread, instead of re-requesting changes.'
39
+ )
40
+ }
41
+
42
+ const CONCURRENT_REASON =
43
+ 'Another session in this agent is already submitting a formal review verdict for this pull request. ' +
44
+ 'Only one verdict may land per PR — do not submit a second review; the in-flight one will post.'
45
+
46
+ // The standing verdict a fresh attempt would duplicate. APPROVE duplicates a
47
+ // standing APPROVED; REQUEST_CHANGES duplicates a standing CHANGES_REQUESTED.
48
+ function duplicatesStanding(verdict: ReviewVerdict, effective: EffectiveVerdict): boolean {
49
+ return verdict === 'APPROVE' ? effective === 'APPROVED' : effective === 'CHANGES_REQUESTED'
50
+ }
51
+
52
+ // How long a reservation may sit before it is treated as abandoned. A normal
53
+ // `gh` review submit completes in seconds; this only guards against a tool.after
54
+ // that never fires (crash mid-command), so it must outlast a slow command yet
55
+ // never strand a PR for long.
56
+ const LEASE_TTL_MS = 5 * 60_000
57
+
58
+ type Reservation = { key: string; token: number; createdAt: number }
23
59
 
24
- // Makes formal `gh ... event=APPROVE` idempotent per PR across turns, sessions,
25
- // and restarts. Two layers, each with a single job:
60
+ // MODULE-LEVEL singletons, shared by every plugin instance in this process. The
61
+ // github-cli-auth plugin's `plugin: async (ctx) => ...` factory may run once per
62
+ // session, giving each its own closure — but all of those closures import THIS
63
+ // module, so they coordinate through one Map. A closure-local Set (the prior
64
+ // design) could not see a concurrent session's in-flight verdict, which is how
65
+ // three sessions each landed an APPROVE on the same PR within ten seconds.
66
+ const inFlightByPr = new Map<string, Reservation>()
67
+ const reservationByCall = new Map<string, Reservation>()
68
+ let tokenSeq = 0
69
+
70
+ // Makes a formal `gh ... event=APPROVE|REQUEST_CHANGES` idempotent per PR across
71
+ // turns, sessions, and (in-process) concurrent fan-out. Two layers:
72
+ //
73
+ // 1. A process-wide in-flight lease keyed by `workspace#prNumber`, held from
74
+ // tool.before through tool.after. While one verdict is mid-flight, every
75
+ // other session's verdict for the same PR is blocked — even though GitHub
76
+ // has not yet recorded the in-flight review. This is the layer the old
77
+ // closure-local Set could not provide: separate plugin instances meant
78
+ // separate Sets, so concurrent sessions never saw each other.
26
79
  //
27
- // 1. An in-process set of *in-flight* reservations (`pendingApprovals`) that
28
- // blocks a second APPROVE while a first is still mid-flight in the same
29
- // container the concurrent-double-approve case the remote read can't see
30
- // yet (GitHub hasn't recorded the in-flight review).
31
- // 2. The authoritative GitHub effective-state read, the SOLE source of truth
32
- // for "the bot already holds a standing APPROVED review." It understands
33
- // supersession: a later CHANGES_REQUESTED / DISMISSED demotes an earlier
34
- // APPROVED, so the bot may legitimately re-approve.
80
+ // 2. The authoritative GitHub effective-state read, consulted AFTER the lease
81
+ // is acquired. It catches the cross-restart case (lease lost) and tracks
82
+ // supersession: a later CHANGES_REQUESTED/DISMISSED demotes an earlier
83
+ // APPROVED, so a genuine re-verdict is allowed. Reads fail OPEN — a
84
+ // transient error must never strand a genuine first verdict; the lease
85
+ // still covers the concurrent case while the command runs.
35
86
  //
36
- // The set is strictly an in-flight lock never a persistent "already approved"
37
- // memory. A completed APPROVE drops its reservation in release(), so the next
38
- // APPROVE re-consults GitHub instead of being shadowed by a stale local entry.
39
- // That separation fixes the strand bug: once a standing approval is superseded
40
- // (PR back to CHANGES_REQUESTED), a stale local lock must not keep blocking a
41
- // genuine re-approve — only the remote read decides, and it now reports
42
- // alreadyApproved=false. Reads fail OPEN: a transient GitHub error must never
43
- // permanently strand a first approval; the in-flight reservation still covers
44
- // the concurrent case.
87
+ // The lease is released only in release() (tool.after) or on a terminal block,
88
+ // never after the remote read releasing early reopens the TOCTOU the lease
89
+ // exists to close. Release is keyed by a per-call token so a late/stale
90
+ // tool.after for a superseded reservation cannot drop a newer session's lease.
45
91
  export function createApproveIdempotencyGuard(deps: {
46
92
  resolveEffectiveApproval: EffectiveApprovalResolver
47
- }): ApproveIdempotencyGuard {
48
- const pendingApprovals = new Set<string>()
49
- const reservedByCall = new Map<string, string>()
93
+ now?: () => number
94
+ }): ReviewVerdictGuard {
95
+ const now = deps.now ?? Date.now
50
96
 
51
97
  return {
52
98
  async guard(args): Promise<ApproveBlock | null> {
53
- if (args.verdict !== 'APPROVE') return null
99
+ if (args.verdict !== 'APPROVE' && args.verdict !== 'REQUEST_CHANGES') return null
54
100
  const key = prKey(args.workspace, args.prNumber)
55
101
 
56
- // Reserve BEFORE the await so two calls racing into guard() for the same
57
- // PR cannot both observe an empty set: the loser sees the winner's
58
- // in-flight reservation and is blocked. The reservation is provisional
59
- // and is always cleared on a terminal path (block below or release()).
60
- if (pendingApprovals.has(key)) return { block: true, reason: DUPLICATE_REASON }
61
- pendingApprovals.add(key)
62
- reservedByCall.set(args.callId, key)
102
+ // Reserve BEFORE the await so two calls racing into guard() for the same PR
103
+ // cannot both observe an empty map: the loser sees the winner's in-flight
104
+ // lease and is blocked. An expired lease (tool.after never fired) is
105
+ // reclaimable so a crash cannot permanently strand the PR.
106
+ const held = inFlightByPr.get(key)
107
+ if (held !== undefined && now() - held.createdAt < LEASE_TTL_MS) {
108
+ return { block: true, reason: CONCURRENT_REASON }
109
+ }
110
+ const reservation: Reservation = { key, token: ++tokenSeq, createdAt: now() }
111
+ inFlightByPr.set(key, reservation)
112
+ reservationByCall.set(args.callId, reservation)
63
113
 
64
114
  const remote = await deps.resolveEffectiveApproval({ workspace: args.workspace, prNumber: args.prNumber })
65
- if (remote.ok && remote.alreadyApproved) {
66
- // Standing approval upstream. Block, and release the in-flight lock now:
67
- // a blocked command never reaches tool.after, so release() won't run for
68
- // this callId. Leaving the key set would resurrect the strand bug — the
69
- // GitHub read is authoritative for the standing-approval case, not a
70
- // lingering local entry.
71
- reservedByCall.delete(args.callId)
72
- pendingApprovals.delete(key)
73
- return { block: true, reason: DUPLICATE_REASON }
115
+ if (remote.ok && duplicatesStanding(args.verdict, remote.effective)) {
116
+ // Standing verdict upstream already matches. Block, and release the lease
117
+ // now: a blocked command never reaches tool.after, so release() won't run
118
+ // for this callId. Leaving the lease set would resurrect the strand bug —
119
+ // the GitHub read is authoritative for the standing case.
120
+ releaseReservation(args.callId, reservation)
121
+ return { block: true, reason: duplicateReason(args.verdict) }
74
122
  }
75
123
 
76
124
  return null
77
125
  },
78
126
 
79
127
  release(args): void {
80
- const key = reservedByCall.get(args.callId)
81
- if (key === undefined) return
82
- reservedByCall.delete(args.callId)
83
- // Always drop the in-flight lock, success or fail. On success the standing
84
- // approval now lives on GitHub, so future APPROVEs are caught by the remote
85
- // read (which tracks supersession); the local lock must not outlive the
86
- // in-flight window and shadow that read.
87
- pendingApprovals.delete(key)
128
+ const reservation = reservationByCall.get(args.callId)
129
+ if (reservation === undefined) return
130
+ releaseReservation(args.callId, reservation)
88
131
  },
89
132
  }
90
133
  }
91
134
 
135
+ // Drop the lease only if THIS reservation still owns the key. A stale tool.after
136
+ // for a reservation that was already superseded (e.g. reclaimed after TTL by a
137
+ // newer session) must not yank the live session's lease.
138
+ function releaseReservation(callId: string, reservation: Reservation): void {
139
+ reservationByCall.delete(callId)
140
+ const current = inFlightByPr.get(reservation.key)
141
+ if (current !== undefined && current.token === reservation.token) {
142
+ inFlightByPr.delete(reservation.key)
143
+ }
144
+ }
145
+
92
146
  function prKey(workspace: string, prNumber: number): string {
93
147
  return `${workspace}#${prNumber}`
94
148
  }
149
+
150
+ // Test-only: clear the process-wide lease state between cases.
151
+ export function __resetReviewVerdictGuardForTest(): void {
152
+ inFlightByPr.clear()
153
+ reservationByCall.clear()
154
+ tokenSeq = 0
155
+ }
@@ -1,13 +1,12 @@
1
1
  import { GITHUB_API_BASE, githubJsonHeaders } from '@/channels/adapters/github/auth-pat'
2
2
 
3
- import type { EffectiveApprovalResolver } from './approve-idempotency'
4
-
5
- // Resolves whether THIS bot already has a standing APPROVED review on a PR, used
6
- // by the approve-idempotency guard to stop a second formal APPROVE after a
7
- // restart (the in-process pending set covers the same-container case but is lost
8
- // when the container bounces). Every failure returns { ok: false } so the guard
9
- // fails open — a transient read error must never permanently block a genuine
10
- // first approval.
3
+ import type { EffectiveApprovalResolver, EffectiveVerdict } from './approve-idempotency'
4
+
5
+ // Resolves THIS bot's standing decisive review on a PR, used by the review
6
+ // verdict guard to stop a second formal verdict after a restart (the in-process
7
+ // lease covers the same-container case but is lost when the container bounces).
8
+ // Every failure returns { ok: false } so the guard fails open — a transient read
9
+ // error must never permanently block a genuine first verdict.
11
10
  export function createGithubEffectiveApprovalResolver(deps: {
12
11
  resolveToken: (workspace: string) => Promise<string | null>
13
12
  fetchImpl?: typeof fetch
@@ -27,10 +26,16 @@ export function createGithubEffectiveApprovalResolver(deps: {
27
26
  if (reviews === null) return { ok: false }
28
27
 
29
28
  const lastDecisive = reviews.filter((r) => isSelf(r.login, r.isBot, self) && isDecisive(r.state)).at(-1)
30
- return { ok: true, alreadyApproved: lastDecisive?.state === 'APPROVED' }
29
+ return { ok: true, effective: toEffective(lastDecisive?.state) }
31
30
  }
32
31
  }
33
32
 
33
+ function toEffective(state: string | undefined): EffectiveVerdict {
34
+ if (state === 'APPROVED') return 'APPROVED'
35
+ if (state === 'CHANGES_REQUESTED') return 'CHANGES_REQUESTED'
36
+ return 'NONE'
37
+ }
38
+
34
39
  // A bot's effective review is its LATEST decisive one. COMMENTED/PENDING are
35
40
  // non-deciding noise that must not clear an earlier APPROVED/CHANGES_REQUESTED;
36
41
  // a later CHANGES_REQUESTED or DISMISSED supersedes an earlier APPROVED. The
@@ -11,7 +11,7 @@ import { classifyGhToken } from './token-class'
11
11
  export default definePlugin({
12
12
  plugin: async (ctx) => {
13
13
  const resolveTokenForRepo = ctx.github.resolveTokenForRepo
14
- const approveGuard = createApproveIdempotencyGuard({
14
+ const verdictGuard = createApproveIdempotencyGuard({
15
15
  resolveEffectiveApproval: createGithubEffectiveApprovalResolver({
16
16
  resolveToken: async (workspace) => {
17
17
  const result = await resolveTokenForRepo(workspace)
@@ -28,7 +28,7 @@ export default definePlugin({
28
28
 
29
29
  const review = await noteReviewCommand({ callId: event.callId, command })
30
30
  if (review.detected !== null) {
31
- const block = await approveGuard.guard({
31
+ const block = await verdictGuard.guard({
32
32
  callId: event.callId,
33
33
  workspace: review.detected.workspace,
34
34
  prNumber: review.detected.prNumber,
@@ -70,7 +70,7 @@ export default definePlugin({
70
70
  callId: event.callId,
71
71
  result: event.result,
72
72
  })
73
- approveGuard.release({ callId: event.callId, succeeded: committed })
73
+ verdictGuard.release({ callId: event.callId, succeeded: committed })
74
74
  },
75
75
  },
76
76
  }
@@ -0,0 +1,191 @@
1
+ // Discord renders no GitHub-flavored Markdown tables — a `| a | b |` block
2
+ // shows up as literal pipes and dashes, so an agent reply that leans on a table
3
+ // (very common) becomes unreadable. Discord DOES preserve whitespace verbatim
4
+ // inside inline code spans, so we re-emit each table row as a single
5
+ // backtick-wrapped line with columns padded to a fixed width. Columns line up
6
+ // because every row is the same monospaced inline-code span. The header row is
7
+ // additionally wrapped in `**...**` so it reads as a bold caption above the body.
8
+ //
9
+ // This is a line-walker, not a Markdown parser: it only touches blocks that
10
+ // match the pipe-table shape (a `|`-bearing line followed by a `|---|` alignment
11
+ // row) and leaves every other byte — prose, code fences, lists — untouched.
12
+
13
+ const TABLE_SEP_RE = /^\s*\|?\s*:?-{3,}:?\s*(\|\s*:?-{3,}:?\s*)+\|?\s*$/
14
+ const FENCE_RE = /^(\s*)(```+|~~~+)(.*)$/
15
+
16
+ export function convertDiscordTables(input: string): string {
17
+ if (input === '') return ''
18
+ if (!input.includes('|')) return input
19
+
20
+ const lines = input.split('\n')
21
+ const out: string[] = []
22
+ let i = 0
23
+ let openFence: string | null = null
24
+
25
+ while (i < lines.length) {
26
+ const line = lines[i]!
27
+
28
+ // A code fence (``` / ~~~) suspends table detection until it closes — a
29
+ // table-shaped block inside a fence is literal text, not a table. The close
30
+ // must use the same fence char and be at least as long as the opener, per
31
+ // CommonMark.
32
+ const fence = FENCE_RE.exec(line)
33
+ if (fence !== null) {
34
+ const marker = fence[2]!
35
+ if (openFence === null) {
36
+ openFence = marker
37
+ } else if (marker[0] === openFence[0] && marker.length >= openFence.length) {
38
+ openFence = null
39
+ }
40
+ out.push(line)
41
+ i++
42
+ continue
43
+ }
44
+ if (openFence !== null) {
45
+ out.push(line)
46
+ i++
47
+ continue
48
+ }
49
+
50
+ // A table needs a `|`-bearing header line immediately followed by the
51
+ // alignment row; same disambiguation rule chunkMarkdown uses so a stray
52
+ // leading `|` in prose is not mistaken for a table.
53
+ if (line.includes('|') && i + 1 < lines.length && TABLE_SEP_RE.test(lines[i + 1]!)) {
54
+ const start = i
55
+ i += 2
56
+ while (i < lines.length && lines[i]!.includes('|') && lines[i]!.trim() !== '') {
57
+ i++
58
+ }
59
+ const tableLines = lines.slice(start, i)
60
+ out.push(renderTable(tableLines))
61
+ continue
62
+ }
63
+ out.push(line)
64
+ i++
65
+ }
66
+
67
+ return out.join('\n')
68
+ }
69
+
70
+ function renderTable(tableLines: string[]): string {
71
+ const headerCells = splitRow(tableLines[0]!)
72
+ const bodyRows = tableLines.slice(2).map(splitRow)
73
+ const widths = computeWidths([headerCells, ...bodyRows])
74
+
75
+ const header = wrapCode(padRow(headerCells, widths))
76
+ const renderedRows = [`**${header}**`, ...bodyRows.map((cells) => wrapCode(padRow(cells, widths)))]
77
+ return renderedRows.join('\n')
78
+ }
79
+
80
+ function splitRow(row: string): string[] {
81
+ // Trim one optional leading/trailing pipe, then split on the rest. A trailing
82
+ // backslash before a pipe escapes it, but GFM table escaping is rare in agent
83
+ // output — we keep it simple and split on bare pipes.
84
+ let trimmed = row.trim()
85
+ if (trimmed.startsWith('|')) trimmed = trimmed.slice(1)
86
+ if (trimmed.endsWith('|')) trimmed = trimmed.slice(0, -1)
87
+ return trimmed.split('|').map((cell) => cell.trim())
88
+ }
89
+
90
+ function computeWidths(rows: string[][]): number[] {
91
+ const widths: number[] = []
92
+ for (const row of rows) {
93
+ for (let c = 0; c < row.length; c++) {
94
+ const cellWidth = displayWidth(row[c]!)
95
+ if (widths[c] === undefined || cellWidth > widths[c]!) {
96
+ widths[c] = cellWidth
97
+ }
98
+ }
99
+ }
100
+ return widths
101
+ }
102
+
103
+ function padRow(cells: string[], widths: number[]): string {
104
+ const padded = widths.map((width, c) => padToWidth(cells[c] ?? '', width))
105
+ // Two spaces between columns keeps them visually distinct inside the
106
+ // monospaced span without a vertical-bar separator.
107
+ return padded.join(' ')
108
+ }
109
+
110
+ function padToWidth(cell: string, width: number): string {
111
+ const pad = width - displayWidth(cell)
112
+ return pad > 0 ? cell + ' '.repeat(pad) : cell
113
+ }
114
+
115
+ // Discord's monospaced inline-code font renders CJK ideographs, full-width
116
+ // punctuation, and most emoji at two columns, while combining/zero-width marks
117
+ // take none. `String.prototype.padEnd` counts UTF-16 code units, so padding by
118
+ // `.length` leaves wide-character tables visually ragged. We iterate by code
119
+ // point and sum per-glyph column widths so every cell pads to the same VISUAL
120
+ // width. The ranges below are the standard East-Asian-Wide / Wide blocks plus
121
+ // the common emoji planes; this is the same wcwidth approximation editors use.
122
+ export function displayWidth(text: string): number {
123
+ let width = 0
124
+ for (const ch of text) {
125
+ width += charWidth(ch.codePointAt(0)!)
126
+ }
127
+ return width
128
+ }
129
+
130
+ function charWidth(cp: number): number {
131
+ if (isZeroWidth(cp)) return 0
132
+ if (isWide(cp)) return 2
133
+ return 1
134
+ }
135
+
136
+ function isZeroWidth(cp: number): boolean {
137
+ return (
138
+ cp === 0x200b || // zero-width space
139
+ (cp >= 0x0300 && cp <= 0x036f) || // combining diacritical marks
140
+ (cp >= 0x200c && cp <= 0x200f) || // ZWNJ/ZWJ/directional marks
141
+ (cp >= 0xfe00 && cp <= 0xfe0f) // variation selectors
142
+ )
143
+ }
144
+
145
+ function isWide(cp: number): boolean {
146
+ return (
147
+ (cp >= 0x1100 && cp <= 0x115f) || // Hangul Jamo
148
+ (cp >= 0x2e80 && cp <= 0x303e) || // CJK radicals, Kangxi
149
+ (cp >= 0x3041 && cp <= 0x33ff) || // Hiragana, Katakana, CJK symbols
150
+ (cp >= 0x3400 && cp <= 0x4dbf) || // CJK Ext A
151
+ (cp >= 0x4e00 && cp <= 0x9fff) || // CJK Unified Ideographs
152
+ (cp >= 0xa000 && cp <= 0xa4cf) || // Yi
153
+ (cp >= 0xac00 && cp <= 0xd7a3) || // Hangul Syllables
154
+ (cp >= 0xf900 && cp <= 0xfaff) || // CJK Compatibility Ideographs
155
+ (cp >= 0xfe30 && cp <= 0xfe4f) || // CJK Compatibility Forms
156
+ (cp >= 0xff00 && cp <= 0xff60) || // Fullwidth Forms
157
+ (cp >= 0xffe0 && cp <= 0xffe6) || // Fullwidth signs
158
+ (cp >= 0x2600 && cp <= 0x26ff) || // Miscellaneous Symbols (☀ ♻ ⚠ …)
159
+ (cp >= 0x2700 && cp <= 0x27bf) || // Dingbats (✅ ✔ ✨ ➡ …)
160
+ (cp >= 0x2b00 && cp <= 0x2bff) || // Misc Symbols and Arrows (⭐ …)
161
+ (cp >= 0x1f300 && cp <= 0x1faff) || // emoji, symbols, pictographs
162
+ (cp >= 0x20000 && cp <= 0x3fffd) // CJK Ext B+ (supplementary ideographic)
163
+ )
164
+ }
165
+
166
+ // CommonMark inline code: the delimiter must be a backtick run LONGER than any
167
+ // run inside the content, otherwise an embedded `` ` `` (e.g. a cell holding
168
+ // `bun test`) closes the span early and corrupts the row. When the content
169
+ // begins or ends with a backtick, one space of padding is inserted on each side
170
+ // so the delimiter is not adjacent to a content backtick; CommonMark strips that
171
+ // single padding space on render, leaving our column widths intact.
172
+ function wrapCode(text: string): string {
173
+ const fence = '`'.repeat(longestBacktickRun(text) + 1)
174
+ const needsPad = text.startsWith('`') || text.endsWith('`')
175
+ const pad = needsPad ? ' ' : ''
176
+ return `${fence}${pad}${text}${pad}${fence}`
177
+ }
178
+
179
+ function longestBacktickRun(text: string): number {
180
+ let longest = 0
181
+ let run = 0
182
+ for (const ch of text) {
183
+ if (ch === '`') {
184
+ run++
185
+ if (run > longest) longest = run
186
+ } else {
187
+ run = 0
188
+ }
189
+ }
190
+ return longest
191
+ }
@@ -39,6 +39,7 @@ import {
39
39
  type InboundDropReason,
40
40
  renderPlaceholder,
41
41
  } from './discord-bot-classify'
42
+ import { convertDiscordTables } from './discord-bot-format'
42
43
  import { createDiscordReactionCallback, createDiscordRemoveReactionCallback } from './discord-bot-reactions'
43
44
  import { enrichDiscordMessageReferences } from './discord-bot-reference'
44
45
  import {
@@ -647,7 +648,7 @@ export function createOutboundCallback(deps: {
647
648
  if (msg.adapter !== 'discord-bot') {
648
649
  return { ok: false, error: `unknown adapter: ${msg.adapter}` }
649
650
  }
650
- const text = msg.text ?? ''
651
+ const text = convertDiscordTables(msg.text ?? '')
651
652
  const attachments = msg.attachments ?? []
652
653
  if (text === '' && attachments.length === 0) {
653
654
  return { ok: false, error: 'message has neither text nor attachments' }
@@ -9,6 +9,7 @@ import { removeRequestedReviewer } from './decoy-reviewer'
9
9
  import type { DeliveryDedup } from './dedup'
10
10
  import { isGithubEventAllowed } from './event-allowlist'
11
11
  import { encodeGithubReactionRef, type GithubReactionTarget } from './reactions'
12
+ import { fetchSelfReviewBlocking } from './review-state'
12
13
  import { listUnresolvedSelfReviewThreads } from './review-thread-resolver'
13
14
 
14
15
  export type GithubInboundLogger = { info: (m: string) => void; warn: (m: string) => void; error: (m: string) => void }
@@ -83,14 +84,16 @@ export function createGithubWebhookHandler(options: GithubWebhookHandlerOptions)
83
84
  }
84
85
 
85
86
  // A push to an open PR (`synchronize`) is not a message to react to — it is
86
- // a trigger to re-check whether the new commits addressed the bot's own
87
- // still-open review threads. The check needs a GraphQL round-trip, so it
88
- // runs OFF the ACK path (like the decoy-reviewer drop) and only wakes a
89
- // session when there is at least one such thread. Returning here also keeps
87
+ // a trigger to re-evaluate the bot's own outstanding review obligations on
88
+ // this PR: unresolved review threads it authored AND a sticky
89
+ // CHANGES_REQUESTED block (which leaves no threads when filed as a top-level
90
+ // verdict the black hole this path closes). Both need an API round-trip,
91
+ // so it runs OFF the ACK path (like the decoy-reviewer drop) and only wakes a
92
+ // session when an obligation is outstanding. Returning here also keeps
90
93
  // synchronize out of the generic awareness-only fallthrough below.
91
94
  if (event === 'pull_request' && action === 'synchronize') {
92
95
  if (delivery !== '') options.dedup.add(delivery)
93
- scheduleReviewThreadRecheck({ payload, selfLogin, options })
96
+ scheduleReviewFollowup({ payload, selfLogin, options })
94
97
  return ok()
95
98
  }
96
99
 
@@ -187,7 +190,7 @@ function defaultScheduleBackgroundTask(task: () => Promise<void>): void {
187
190
  void task().catch(() => {})
188
191
  }
189
192
 
190
- function scheduleReviewThreadRecheck(input: {
193
+ function scheduleReviewFollowup(input: {
191
194
  payload: Record<string, unknown>
192
195
  selfLogin: string | null
193
196
  options: GithubWebhookHandlerOptions
@@ -203,13 +206,27 @@ function scheduleReviewThreadRecheck(input: {
203
206
  if (repository === null || pullNumber === null) return
204
207
  const headSha = readString(readRecord(pr?.head), 'sha')
205
208
 
209
+ // Same webhook head SHA can arrive on several deliveries (a multi-commit push
210
+ // emits one synchronize per ref update). Dedup the follow-up on the head SHA
211
+ // so a single push wakes at most one re-review, distinct from the per-delivery
212
+ // dedup above. When headSha is absent we cannot dedup, so we skip the followup
213
+ // rather than risk a re-review storm.
214
+ if (headSha === null) {
215
+ options.logger.warn(`[github] synchronize for ${repository.owner}/${repository.name}#${pullNumber} has no head sha`)
216
+ return
217
+ }
218
+ const followupKey = `synchronize-followup:${repository.owner}/${repository.name}#${pullNumber}:${headSha}`
219
+ if (options.dedup.has(followupKey)) return
220
+ options.dedup.add(followupKey)
221
+
222
+ const reviewOn = options.reviewOn?.() ?? 'review_requested'
206
223
  const fetchImpl = options.fetchImpl ?? fetch
207
224
  const schedule = options.scheduleBackgroundTask ?? defaultScheduleBackgroundTask
208
225
  const target = `${repository.owner}/${repository.name}#${pullNumber}`
209
226
  schedule(async () => {
210
227
  try {
211
228
  const token = await authToken({ repoSlug: `${repository.owner}/${repository.name}` })
212
- const result = await listUnresolvedSelfReviewThreads({
229
+ const threads = await listUnresolvedSelfReviewThreads({
213
230
  token,
214
231
  selfLogin,
215
232
  owner: repository.owner,
@@ -217,46 +234,63 @@ function scheduleReviewThreadRecheck(input: {
217
234
  prNumber: pullNumber,
218
235
  fetchImpl,
219
236
  })
220
- if (!result.ok) {
221
- options.logger.warn(`[github] review-thread recheck failed for ${target}: ${result.error}`)
237
+ if (!threads.ok) {
238
+ options.logger.warn(`[github] review-thread recheck failed for ${target}: ${threads.error}`)
222
239
  return
223
240
  }
224
- if (result.threads.length === 0) return
241
+
242
+ // A held CHANGES_REQUESTED is the bot's own obligation regardless of how
243
+ // reviews are triggered, so re-evaluate it on push unless review is off.
244
+ let selfBlocking = false
245
+ if (reviewOn !== 'off') {
246
+ const blocking = await fetchSelfReviewBlocking({
247
+ token,
248
+ selfLogin,
249
+ owner: repository.owner,
250
+ repo: repository.name,
251
+ prNumber: pullNumber,
252
+ fetchImpl,
253
+ })
254
+ if (blocking.ok) selfBlocking = blocking.selfBlocking
255
+ else options.logger.warn(`[github] review-state recheck failed for ${target}: ${blocking.error}`)
256
+ }
257
+
258
+ const rootCommentIds = threads.threads.map((t) => t.rootCommentId)
259
+ if (rootCommentIds.length === 0 && !selfBlocking) return
225
260
  options.route(
226
- buildRecheckInbound({
227
- repository,
228
- pullNumber,
229
- headSha,
230
- rootCommentIds: result.threads.map((t) => t.rootCommentId),
231
- title: readString(pr, 'title'),
232
- }),
261
+ withApprovalPolicy(
262
+ buildReviewFollowupInbound({
263
+ repository,
264
+ pullNumber,
265
+ headSha,
266
+ rootCommentIds,
267
+ selfBlocking,
268
+ title: readString(pr, 'title'),
269
+ }),
270
+ options.allowApprove?.() ?? true,
271
+ ),
233
272
  )
234
273
  } catch (err) {
235
274
  options.logger.warn(
236
- `[github] review-thread recheck failed for ${target}: ${err instanceof Error ? err.message : String(err)}`,
275
+ `[github] review followup failed for ${target}: ${err instanceof Error ? err.message : String(err)}`,
237
276
  )
238
277
  }
239
278
  })
240
279
  }
241
280
 
242
- function buildRecheckInbound(input: {
281
+ function buildReviewFollowupInbound(input: {
243
282
  repository: { owner: string; name: string }
244
283
  pullNumber: number
245
- headSha: string | null
284
+ headSha: string
246
285
  rootCommentIds: readonly number[]
286
+ selfBlocking: boolean
247
287
  title: string | null
248
288
  }): InboundMessage {
249
- const { repository, pullNumber, headSha, rootCommentIds, title } = input
289
+ const { repository, pullNumber, headSha, rootCommentIds, selfBlocking, title } = input
250
290
  const titleSegment = title !== null && title.trim() !== '' ? `: "${title}"` : ''
251
- const shaSegment = headSha !== null ? ` (now at ${headSha.slice(0, 7)})` : ''
252
- const idList = rootCommentIds.join(', ')
253
291
  const text =
254
- `PR #${pullNumber}${titleSegment} received new commits${shaSegment}. ` +
255
- `You have ${rootCommentIds.length} unresolved review thread(s) you authored on this PR ` +
256
- `(root comment id(s): ${idList}). For each, check whether the new commits addressed your ` +
257
- `concern. If addressed, reply on that thread via channel_send with a short acknowledgement ` +
258
- `and resolve_review_thread: true (the thread id is the root comment id). If not addressed, ` +
259
- `leave it open. If none are addressed, end your turn without replying.`
292
+ `PR #${pullNumber}${titleSegment} received new commits (now at ${headSha.slice(0, 7)}). ` +
293
+ followupInstruction(rootCommentIds, selfBlocking)
260
294
 
261
295
  return {
262
296
  adapter: 'github',
@@ -264,7 +298,7 @@ function buildRecheckInbound(input: {
264
298
  chat: `pr:${pullNumber}`,
265
299
  thread: null,
266
300
  text,
267
- externalMessageId: `pr-${pullNumber}-recheck-${headSha ?? 'unknown'}`,
301
+ externalMessageId: `pr-${pullNumber}-recheck-${headSha}`,
268
302
  authorId: 'github-system',
269
303
  authorName: 'github',
270
304
  authorIsBot: false,
@@ -277,6 +311,30 @@ function buildRecheckInbound(input: {
277
311
  }
278
312
  }
279
313
 
314
+ function followupInstruction(rootCommentIds: readonly number[], selfBlocking: boolean): string {
315
+ const threadPart =
316
+ rootCommentIds.length > 0
317
+ ? `You have ${rootCommentIds.length} unresolved review thread(s) you authored on this PR ` +
318
+ `(root comment id(s): ${rootCommentIds.join(', ')}). For each, check whether the new commits ` +
319
+ `addressed your concern. If addressed, reply on that thread via channel_send with a short ` +
320
+ `acknowledgement and resolve_review_thread: true (the thread id is the root comment id); ` +
321
+ `if not, leave it open. `
322
+ : ''
323
+ // A held CHANGES_REQUESTED never clears itself: GitHub keeps the block until a
324
+ // fresh APPROVE/COMMENT/dismiss, so a blocking follow-up must always end with a
325
+ // submitted verdict — the "end without replying" escape hatch is reserved for
326
+ // the thread-only path, where leaving every thread open is a valid no-op.
327
+ const blockingPart = selfBlocking
328
+ ? `Your latest review on this PR is still CHANGES_REQUESTED, which keeps the PR blocked until you ` +
329
+ `submit a fresh review. Re-review the current head against the concerns from that blocking review ` +
330
+ `and always end with a new verdict: if the commits resolve your concerns, submit an APPROVE ` +
331
+ `(or COMMENT if approval is disabled) to clear the block; if concerns remain, submit a new ` +
332
+ `CHANGES_REQUESTED explaining what is still blocking. `
333
+ : ''
334
+ const tail = selfBlocking ? '' : 'If none are addressed, end your turn without replying.'
335
+ return `${threadPart}${blockingPart}${tail}`
336
+ }
337
+
280
338
  export async function verifySignature(body: string, secret: string, sigHeader: string): Promise<boolean> {
281
339
  const expected = `sha256=${createHmac('sha256', secret).update(body).digest('hex')}`
282
340
  const a = Buffer.from(expected)
@@ -48,6 +48,33 @@ export function createGithubReviewStateResolver(deps: {
48
48
  }
49
49
  }
50
50
 
51
+ export type SelfReviewBlockingResult =
52
+ | { ok: true; selfBlocking: boolean }
53
+ | { ok: false; error: string; code: 'not-found' | 'permission-denied' | 'transient' }
54
+
55
+ // Last DECISIVE self review == CHANGES_REQUESTED? (COMMENTED/PENDING ignored, as
56
+ // in createGithubReviewStateResolver.) Standalone so the synchronize follow-up
57
+ // skips the reviewDecision round-trip the stranding guard needs but this doesn't.
58
+ export async function fetchSelfReviewBlocking(deps: {
59
+ token: string
60
+ selfLogin: string
61
+ owner: string
62
+ repo: string
63
+ prNumber: number
64
+ fetchImpl?: typeof fetch
65
+ }): Promise<SelfReviewBlockingResult> {
66
+ const fetchImpl = deps.fetchImpl ?? fetch
67
+ const reviews = await fetchSelfReviews(
68
+ fetchImpl,
69
+ deps.token,
70
+ { owner: deps.owner, repo: deps.repo, prNumber: deps.prNumber },
71
+ deps.selfLogin,
72
+ )
73
+ if (!reviews.ok) return { ok: false, error: reviews.error, code: reviews.code }
74
+ const lastDecisive = reviews.states.filter(isDecisive).at(-1) ?? null
75
+ return { ok: true, selfBlocking: lastDecisive === 'CHANGES_REQUESTED' }
76
+ }
77
+
51
78
  type Target = { owner: string; repo: string; prNumber: number }
52
79
 
53
80
  function parseTarget(workspace: string, chat: string): Target | null {
@@ -3,9 +3,34 @@ export type OutboundFloodCheckResult = { ok: true } | { ok: false; reason: strin
3
3
  const MIN_LENGTH = 40
4
4
  const MAX_RUN = 30
5
5
  const MIN_LONG_LENGTH = 80
6
- const MIN_UNIQUE_RATIO = 0.05
7
6
  const MAX_DOMINANCE = 0.9
8
7
 
8
+ // Contiguous-span detector for multi-character floods ("lollol...", "ababab...",
9
+ // repeated emoji pairs) — including a flood body buried inside otherwise-varied
10
+ // text, which a whole-message periodicity test misses. Strict equality (no
11
+ // mismatch budget) and a large span floor keep it clear of incidental prose
12
+ // repetition ("---", "....", "hahaha", code indentation, table separators).
13
+ const MAX_REPEATING_PERIOD = 32
14
+ // Span floor is deliberately a flood boundary, not a "never-deny" guarantee: it
15
+ // catches obvious short-period floods like "ab".repeat(300) (600 chars) and
16
+ // "lol".repeat(300) (900). Hundreds of byte-identical rows or box-art lines also
17
+ // trip it — that output is information-poor and flood-like, and raising the floor
18
+ // to clear it would let those real floods through. Tables/diagrams with varying
19
+ // cells break periodicity and pass.
20
+ const MIN_PERIODIC_SPAN = 384
21
+ const MIN_PERIODIC_REPETITIONS = 24
22
+
23
+ // Narrow last resort: structured text (code, tables, logs) is often lower-
24
+ // entropy than prose, so this only fires on a tiny alphabet at real length.
25
+ const MIN_ENTROPY_LENGTH = 200
26
+ const MAX_TINY_ALPHABET_SIZE = 4
27
+ const VERY_LOW_ENTROPY_BITS = 1.25
28
+
29
+ // Replaces the old `uniqueRatio = distinctChars / length` gate, which was
30
+ // length-coupled: natural language draws from a fixed alphabet, so any reply
31
+ // past ~(alphabet/0.05) chars failed it regardless of variety — a 2.9KB
32
+ // markdown report was silently dropped. Every check below is bounded-run or
33
+ // length-independent, so length alone never makes a reply look like a flood.
9
34
  export function checkOutboundFlood(text: string): OutboundFloodCheckResult {
10
35
  if (text.length < MIN_LENGTH) return { ok: true }
11
36
 
@@ -18,12 +43,18 @@ export function checkOutboundFlood(text: string): OutboundFloodCheckResult {
18
43
  if (graphemes.length < MIN_LONG_LENGTH) return { ok: true }
19
44
 
20
45
  const counts = countGraphemes(graphemes)
21
- const uniqueRatio = counts.size / graphemes.length
22
- if (uniqueRatio < MIN_UNIQUE_RATIO) return { ok: false, reason: `low-unique-ratio:${uniqueRatio.toFixed(3)}` }
23
46
 
24
47
  const dominance = maxValue(counts) / graphemes.length
25
48
  if (dominance > MAX_DOMINANCE) return { ok: false, reason: `char-dominance:${dominance.toFixed(2)}` }
26
49
 
50
+ const span = findLongestPeriodicSpan(graphemes)
51
+ if (span !== undefined) return { ok: false, reason: `repeated-pattern-span:${span.period}:${span.spanLength}` }
52
+
53
+ if (graphemes.length >= MIN_ENTROPY_LENGTH && counts.size <= MAX_TINY_ALPHABET_SIZE) {
54
+ const entropy = shannonEntropyBitsPerGrapheme(counts, graphemes.length)
55
+ if (entropy < VERY_LOW_ENTROPY_BITS) return { ok: false, reason: `low-entropy:${entropy.toFixed(2)}` }
56
+ }
57
+
27
58
  return { ok: true }
28
59
  }
29
60
 
@@ -42,6 +73,42 @@ function findLongestRun(graphemes: readonly string[]): number {
42
73
  return longest
43
74
  }
44
75
 
76
+ // Longest contiguous span (in graphemes) that is exactly periodic at some
77
+ // period 2..32, or undefined when no span clears the flood floor. Period 1 is
78
+ // left to the run check above. A span must reach MIN_PERIODIC_SPAN graphemes
79
+ // AND repeat its unit MIN_PERIODIC_REPETITIONS times — the larger bound wins,
80
+ // so a 32-period unit needs 768 graphemes, not three echoes of a 32-char line.
81
+ function findLongestPeriodicSpan(graphemes: readonly string[]): { period: number; spanLength: number } | undefined {
82
+ const maxPeriod = Math.min(MAX_REPEATING_PERIOD, Math.floor(graphemes.length / MIN_PERIODIC_REPETITIONS))
83
+ let best: { period: number; spanLength: number } | undefined
84
+ for (let period = 2; period <= maxPeriod; period++) {
85
+ let matches = 0
86
+ let longestForPeriod = 0
87
+ for (let i = period; i < graphemes.length; i++) {
88
+ if (graphemes[i] === graphemes[i - period]) {
89
+ matches++
90
+ const spanLength = matches + period
91
+ if (spanLength > longestForPeriod) longestForPeriod = spanLength
92
+ } else {
93
+ matches = 0
94
+ }
95
+ }
96
+ const requiredSpan = Math.max(MIN_PERIODIC_SPAN, period * MIN_PERIODIC_REPETITIONS)
97
+ if (longestForPeriod < requiredSpan) continue
98
+ if (best === undefined || longestForPeriod > best.spanLength) best = { period, spanLength: longestForPeriod }
99
+ }
100
+ return best
101
+ }
102
+
103
+ function shannonEntropyBitsPerGrapheme(counts: Map<string, number>, length: number): number {
104
+ let entropy = 0
105
+ for (const count of counts.values()) {
106
+ const probability = count / length
107
+ entropy -= probability * Math.log2(probability)
108
+ }
109
+ return entropy
110
+ }
111
+
45
112
  function countGraphemes(graphemes: readonly string[]): Map<string, number> {
46
113
  const counts = new Map<string, number>()
47
114
  for (const grapheme of graphemes) counts.set(grapheme, (counts.get(grapheme) ?? 0) + 1)
@@ -1,6 +1,7 @@
1
1
  import { readdirSync } from 'node:fs'
2
2
  import { join, resolve } from 'node:path'
3
3
 
4
+ import { loadConfigSyncOrDefaults } from '@/config'
4
5
  import { containerNameFromCwd } from '@/container'
5
6
  import { isInitialized } from '@/init'
6
7
 
@@ -17,7 +18,9 @@ export type AgentEntry = {
17
18
  //
18
19
  // Underscore-prefixed names are also skipped so operators can park a disabled
19
20
  // or in-progress agent next to live ones (e.g. `_archived-coder/`) without
20
- // compose touching it.
21
+ // compose touching it. Agents with `compose.exclude: true` in typeclaw.json
22
+ // are skipped too — the in-config opt-out for operators who don't want to rename
23
+ // the folder.
21
24
  //
22
25
  // Returns an empty array when rootCwd doesn't exist or is empty — discovery is
23
26
  // not the place to fail; the caller decides what to do with zero agents.
@@ -40,6 +43,7 @@ export function discoverAgents(rootCwd: string): AgentEntry[] {
40
43
  if (entry.name.startsWith('_')) continue
41
44
  const cwd = join(root, entry.name)
42
45
  if (!isInitialized(cwd)) continue
46
+ if (loadConfigSyncOrDefaults(cwd).compose.exclude) continue
43
47
  agents.push({ name: entry.name, cwd, containerName: containerNameFromCwd(cwd) })
44
48
  }
45
49
 
@@ -338,6 +338,39 @@ export const networkSchema = z
338
338
 
339
339
  export type NetworkConfig = z.infer<typeof networkSchema>
340
340
 
341
+ // `realProc` opts the per-tool bwrap sandbox into the 'real-proc' strategy
342
+ // (src/sandbox/build.ts): a fresh procfs scoped to a new PID namespace so
343
+ // external-package runners (`bunx`, `bun add <pkg>`, `bun run <pkg-bin>`) get a
344
+ // working /proc/self/{fd,maps} and stop aborting with Bun's "NotDir". Default
345
+ // `false` keeps the universally-portable '--tmpfs /proc' profile, under which
346
+ // sandboxed external-package execution is unsupported by design. Turning it on
347
+ // makes `typeclaw start` grant the container CAP_SYS_ADMIN (required to mount
348
+ // proc for the new PID namespace), which is a deliberate posture change on the
349
+ // single-tenant outer boundary — see docs/internals/sandbox.mdx. PID isolation
350
+ // and the /proc/N/environ leak guard are both preserved; the trade is the
351
+ // CAP_SYS_ADMIN grant, not sandbox strength.
352
+ export const sandboxSchema = z
353
+ .object({
354
+ realProc: z.boolean().default(false),
355
+ })
356
+ .default({ realProc: false })
357
+
358
+ export type SandboxConfig = z.infer<typeof sandboxSchema>
359
+
360
+ // Host-stage `typeclaw compose` knobs. `exclude: true` skips this agent during
361
+ // compose discovery (same effect as parking it under an `_`-prefixed dir, but
362
+ // without renaming the folder). The container never reads this block — it's a
363
+ // pure compose CLI hint, so omitting it keeps the agent in every compose
364
+ // operation. Namespaced under `compose` so future compose-only settings have a
365
+ // home without crowding the top level.
366
+ export const composeSchema = z
367
+ .object({
368
+ exclude: z.boolean().default(false),
369
+ })
370
+ .default({ exclude: false })
371
+
372
+ export type ComposeConfig = z.infer<typeof composeSchema>
373
+
341
374
  // Reverse-proxy tunnels expose a container-private port to the public internet
342
375
  // via a managed subprocess (cloudflared) or a user-supplied external URL.
343
376
  // See AGENTS.md `## Tunnels`. Keeping the enum scoped to what's implemented
@@ -490,9 +523,11 @@ export const configSchema = z
490
523
  // time. Defaults to `[]`. Hatching appends the agent's chosen name
491
524
  // here, so a freshly-hatched bot already has its identity wired up.
492
525
  alias: z.array(z.string().trim().min(1)).default([]),
526
+ compose: composeSchema,
493
527
  channels: channelsSchema,
494
528
  portForward: portForwardSchema,
495
529
  network: networkSchema,
530
+ sandbox: sandboxSchema,
496
531
  docker: dockerSchema,
497
532
  git: gitSchema,
498
533
  roles: rolesConfigSchema.optional(),
@@ -632,9 +667,11 @@ export const FIELD_EFFECTS: Record<string, FieldEffect> = {
632
667
  mcpServers: 'restart-required',
633
668
  plugins: 'restart-required',
634
669
  alias: 'applied',
670
+ compose: 'ignored',
635
671
  channels: 'applied',
636
672
  portForward: 'restart-required',
637
673
  network: 'restart-required',
674
+ sandbox: 'restart-required',
638
675
  tunnels: 'restart-required',
639
676
  'docker.file': 'restart-required',
640
677
  'git.ignore': 'restart-required',
@@ -723,6 +760,7 @@ export function extractPluginConfigs(raw: unknown): Record<string, unknown> {
723
760
  'mounts',
724
761
  'plugins',
725
762
  'alias',
763
+ 'compose',
726
764
  'channels',
727
765
  'portForward',
728
766
  'network',
@@ -514,6 +514,20 @@ export async function planStart({
514
514
  }
515
515
  }
516
516
 
517
+ // sandbox.realProc opts the per-tool bwrap sandbox into the 'real-proc'
518
+ // strategy (src/sandbox/build.ts), which prefixes the sandbox with
519
+ // `unshare --pid --fork --mount --mount-proc`. Mounting a fresh procfs for the
520
+ // new PID namespace needs real CAP_SYS_ADMIN — seccomp=unconfined alone is not
521
+ // enough (it only unblocks the unshare/clone SYSCALLS; the kernel still
522
+ // rejects mount(2) of proc without the capability). This is the deliberate
523
+ // posture change documented in docs/internals/sandbox.mdx: the default keeps
524
+ // the narrower seccomp-only profile, and the operator grants the broad
525
+ // "new root" capability ONLY by opting into real-proc. Placed before the
526
+ // image tag (like --cap-add=NET_ADMIN) so docker applies it at run time.
527
+ if (cfg.sandbox.realProc) {
528
+ runArgs.push('--cap-add=SYS_ADMIN')
529
+ }
530
+
517
531
  if (hostdControl) {
518
532
  runArgs.push('--add-host', HOST_GATEWAY_ALIAS)
519
533
  }
@@ -36,14 +36,35 @@ export function buildSandboxedCommand(command: string, policy: SandboxPolicy = {
36
36
 
37
37
  function buildArgv(command: string, policy: SandboxPolicy): string[] {
38
38
  const bwrap = policy.bwrapPath ?? 'bwrap'
39
- const argv: string[] = [bwrap, '--unshare-all']
40
-
41
- if (policy.network === 'inherit') {
42
- // --unshare-all already unshared the net namespace; --share-net rejoins
43
- // the outer container's network. Other namespaces (user/pid/mount/ipc/
44
- // uts/cgroup) stay unshared. Default ('none' / undefined) leaves the net
45
- // namespace isolated prompt-injected bash cannot exfiltrate over the
46
- // network without the consumer explicitly opting in.
39
+ const procStrategy = policy.proc ?? 'tmpfs'
40
+ const realProc = procStrategy === 'real-proc'
41
+
42
+ // 'real-proc' splits PID-namespace ownership from bwrap. `unshare --pid
43
+ // --fork --mount --mount-proc` (util-linux, baseline) creates the new PID +
44
+ // mount namespaces as REAL root and mounts a fresh procfs scoped to that PID
45
+ // namespace — which OrbStack permits only with CAP_SYS_ADMIN and NOT from
46
+ // bwrap's user namespace (bwrap's --proc is blocked there). bwrap then runs
47
+ // INSIDE that namespace and must NOT re-unshare pid (it would create a second
48
+ // PID ns with no matching procfs and reintroduce the ENOTDIR crash), so we
49
+ // unshare each namespace EXCEPT pid explicitly instead of --unshare-all. The
50
+ // freshly mounted /proc contains only the sandbox subtree, so --ro-bind /proc
51
+ // (below) binds that scoped procfs, never the agent runtime's /proc/N/environ.
52
+ const argv: string[] = realProc
53
+ ? ['unshare', '--pid', '--fork', '--mount', '--mount-proc', '--', bwrap]
54
+ : [bwrap, '--unshare-all']
55
+ if (realProc) {
56
+ argv.push('--unshare-user', '--unshare-ipc', '--unshare-uts', '--unshare-cgroup')
57
+ }
58
+
59
+ if (policy.network !== 'inherit') {
60
+ // Default ('none' / undefined) isolates the net namespace — prompt-injected
61
+ // bash cannot exfiltrate over the network unless the consumer opts in.
62
+ // --unshare-all already covers this in the non-real-proc path; under
63
+ // real-proc the explicit unshares above omit net, so add it here.
64
+ if (realProc) argv.push('--unshare-net')
65
+ } else if (!realProc) {
66
+ // --unshare-all unshared the net namespace; --share-net rejoins the outer
67
+ // container's network. Under real-proc we simply never add --unshare-net.
47
68
  argv.push('--share-net')
48
69
  }
49
70
 
@@ -97,7 +118,15 @@ function buildArgv(command: string, policy: SandboxPolicy): string[] {
97
118
  '/lib64',
98
119
  )
99
120
 
100
- if ((policy.proc ?? 'tmpfs') === 'tmpfs') {
121
+ if (realProc) {
122
+ // The outer `unshare --mount-proc` already mounted a fresh procfs scoped to
123
+ // the new PID namespace. --ro-bind /proc /proc binds THAT procfs (not the
124
+ // outer container's), so the child gets real /proc/self/{fd,maps} and the
125
+ // agent runtime's pids — and their /proc/N/environ secrets — are simply
126
+ // absent from this namespace. No /proc/self/exe symlink is needed: a real
127
+ // /proc/self/exe already resolves correctly.
128
+ argv.push('--ro-bind', '/proc', '/proc')
129
+ } else if (procStrategy === 'tmpfs') {
101
130
  // --tmpfs /proc, never --proc /proc (OrbStack's kernel blocks
102
131
  // mount("proc",...) from user namespaces) and never --dev-bind /proc /proc
103
132
  // (leaks the outer container's /proc/N/environ — including
@@ -111,6 +140,9 @@ function buildArgv(command: string, policy: SandboxPolicy): string[] {
111
140
  // /proc/self/exe. --symlink (not --ro-bind /proc/self/exe): /proc/self at
112
141
  // setup time is bwrap's pid, so a bind would capture bwrap's own binary.
113
142
  // Must come AFTER --tmpfs /proc (last-op-wins) or the tmpfs erases it.
143
+ // This restores only the runner's SELF-location; a spawned child still
144
+ // reads /proc/self/fd + /proc/self/maps, which the empty tmpfs lacks, so
145
+ // external-package execution requires the 'real-proc' strategy above.
114
146
  if (policy.procSelfExe !== undefined) {
115
147
  argv.push('--ro-bind', policy.procSelfExe, policy.procSelfExe)
116
148
  argv.push('--symlink', policy.procSelfExe, '/proc/self/exe')
@@ -6,7 +6,15 @@ export type SandboxMount =
6
6
 
7
7
  export type SandboxNetwork = 'none' | 'inherit'
8
8
 
9
- export type SandboxProcStrategy = 'tmpfs' | 'none'
9
+ // 'tmpfs' (default): empty /proc + a single /proc/self/exe symlink. Works on
10
+ // every host but gives no /proc/self/{fd,maps}, so a JS package runner's CHILD
11
+ // (the spawned bin) crashes with ENOTDIR reading /proc/self/fd. 'none': no
12
+ // /proc at all. 'real-proc': mount a fresh procfs scoped to a NEW pid namespace
13
+ // so the child gets a real /proc/self/{fd,maps} WITHOUT seeing the agent
14
+ // runtime's pids (no /proc/<agent>/environ leak). 'real-proc' requires the
15
+ // outer container to hold CAP_SYS_ADMIN (mount(2) of proc); start.ts only grants
16
+ // it when the operator opts in via typeclaw.json#sandbox.realProc.
17
+ export type SandboxProcStrategy = 'tmpfs' | 'none' | 'real-proc'
10
18
 
11
19
  export type SandboxEnvPolicy = {
12
20
  set?: Record<string, string>
@@ -190,6 +190,18 @@
190
190
  "minLength": 1
191
191
  }
192
192
  },
193
+ "compose": {
194
+ "default": {
195
+ "exclude": false
196
+ },
197
+ "type": "object",
198
+ "properties": {
199
+ "exclude": {
200
+ "default": false,
201
+ "type": "boolean"
202
+ }
203
+ }
204
+ },
193
205
  "channels": {
194
206
  "default": {},
195
207
  "type": "object",
@@ -1114,6 +1126,18 @@
1114
1126
  }
1115
1127
  }
1116
1128
  },
1129
+ "sandbox": {
1130
+ "default": {
1131
+ "realProc": false
1132
+ },
1133
+ "type": "object",
1134
+ "properties": {
1135
+ "realProc": {
1136
+ "default": false,
1137
+ "type": "boolean"
1138
+ }
1139
+ }
1140
+ },
1117
1141
  "docker": {
1118
1142
  "default": {
1119
1143
  "file": {