typeclaw 0.30.0 → 0.30.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/verify-realproc-sandbox.sh +58 -0
- package/src/agent/plugin-tools.ts +13 -0
- package/src/agent/system-prompt.ts +1 -1
- package/src/bundled-plugins/github-cli-auth/approve-idempotency.ts +113 -52
- package/src/bundled-plugins/github-cli-auth/effective-approval.ts +14 -9
- package/src/bundled-plugins/github-cli-auth/index.ts +3 -3
- package/src/channels/adapters/discord-bot-format.ts +191 -0
- package/src/channels/adapters/discord-bot.ts +2 -1
- package/src/channels/adapters/github/inbound.ts +88 -30
- package/src/channels/adapters/github/review-state.ts +27 -0
- package/src/channels/outbound-flood-filter.ts +70 -3
- package/src/compose/discover.ts +5 -1
- package/src/config/config.ts +38 -0
- package/src/container/start.ts +14 -0
- package/src/sandbox/build.ts +41 -9
- package/src/sandbox/policy.ts +9 -1
- package/typeclaw.schema.json +24 -0
package/package.json
CHANGED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Manual acceptance check for the sandbox.realProc strategy (src/sandbox/build.ts).
|
|
3
|
+
# Not a unit test: it needs a Linux container with CAP_SYS_ADMIN, which the macOS
|
|
4
|
+
# dev host and standard CI runners cannot provide, so it lives here as an
|
|
5
|
+
# operator-runnable script instead of a skipIf-everywhere test.
|
|
6
|
+
#
|
|
7
|
+
# Proves two properties of the two-phase `unshare --mount-proc -- bwrap` sandbox:
|
|
8
|
+
# 1. An external package runner (bunx) runs to completion (no Bun "NotDir").
|
|
9
|
+
# 2. A secret in a sibling process's environment NEVER appears in any
|
|
10
|
+
# /proc/*/environ the sandbox can read (PID-namespace scoping holds).
|
|
11
|
+
#
|
|
12
|
+
# Usage: scripts/verify-realproc-sandbox.sh [image]
|
|
13
|
+
# image defaults to ghcr.io/typeclaw/typeclaw-base:<version-from-package.json>
|
|
14
|
+
set -euo pipefail
|
|
15
|
+
|
|
16
|
+
IMAGE="${1:-}"
|
|
17
|
+
if [ -z "$IMAGE" ]; then
|
|
18
|
+
version="$(node -p "require('./package.json').version" 2>/dev/null || echo latest)"
|
|
19
|
+
IMAGE="ghcr.io/typeclaw/typeclaw-base:${version}"
|
|
20
|
+
fi
|
|
21
|
+
|
|
22
|
+
secret="TYPECLAW_REALPROC_LEAK_CANARY_$$"
|
|
23
|
+
|
|
24
|
+
inner='
|
|
25
|
+
echo "=== bunx via real-proc sandbox ==="
|
|
26
|
+
bunx cowsay "real-proc ok" 2>&1 | tail -6
|
|
27
|
+
echo "bunx exit=$?"
|
|
28
|
+
echo "=== visible pids (sandbox should NOT see the canary holder) ==="
|
|
29
|
+
ls /proc | grep -E "^[0-9]+$" | tr "\n" " "; echo
|
|
30
|
+
echo "=== leak scan ==="
|
|
31
|
+
found=0
|
|
32
|
+
for f in /proc/[0-9]*/environ; do
|
|
33
|
+
if tr "\0" "\n" < "$f" 2>/dev/null | grep -q "CANARY_TOKEN"; then
|
|
34
|
+
echo "LEAK:$f"; found=1
|
|
35
|
+
fi
|
|
36
|
+
done
|
|
37
|
+
if [ $found -eq 0 ]; then echo "NO_LEAK_CONFIRMED"; else echo "LEAK_DETECTED"; exit 1; fi
|
|
38
|
+
'
|
|
39
|
+
inner="${inner//CANARY_TOKEN/$secret}"
|
|
40
|
+
|
|
41
|
+
# The real-proc argv shape mirrors buildArgv() in src/sandbox/build.ts. Keep in
|
|
42
|
+
# sync if that helper changes.
|
|
43
|
+
runner="
|
|
44
|
+
${secret}_holder() { :; }
|
|
45
|
+
env CANARY=${secret} sleep 120 &
|
|
46
|
+
unshare --pid --fork --mount --mount-proc -- \
|
|
47
|
+
bwrap --unshare-user --unshare-ipc --unshare-uts --unshare-cgroup \
|
|
48
|
+
--new-session --die-with-parent --clearenv \
|
|
49
|
+
--setenv PATH /usr/local/bin:/usr/bin:/bin --setenv HOME /tmp --setenv LANG C.UTF-8 \
|
|
50
|
+
--ro-bind /usr /usr --ro-bind /etc /etc --dev /dev --tmpfs /tmp \
|
|
51
|
+
--ro-bind-try /bin /bin --ro-bind-try /sbin /sbin --ro-bind-try /lib /lib --ro-bind-try /lib64 /lib64 \
|
|
52
|
+
--ro-bind /proc /proc \
|
|
53
|
+
bash -c '$inner'
|
|
54
|
+
"
|
|
55
|
+
|
|
56
|
+
echo "Image: $IMAGE"
|
|
57
|
+
docker run --rm --security-opt seccomp=unconfined --cap-add SYS_ADMIN \
|
|
58
|
+
-e "CANARY=${secret}" "$IMAGE" bash -c "$runner"
|
|
@@ -23,6 +23,7 @@ import {
|
|
|
23
23
|
checkNonWorkspaceWriteGuard,
|
|
24
24
|
checkSkillAuthoringGuard,
|
|
25
25
|
} from '@/bundled-plugins/guard/policy'
|
|
26
|
+
import { config } from '@/config/config'
|
|
26
27
|
import type { PermissionService } from '@/permissions/permissions'
|
|
27
28
|
import type {
|
|
28
29
|
BuiltinToolRef,
|
|
@@ -582,6 +583,17 @@ async function applyBashSandbox(
|
|
|
582
583
|
// bwrap does --clearenv, so the overlay must be re-introduced via env.set or
|
|
583
584
|
// it would never reach the sandboxed process (the non-sandboxed spawnHook
|
|
584
585
|
// path does not run when the command is rewritten to a bwrap invocation).
|
|
586
|
+
// 'real-proc' gives a sandboxed JS package runner a working /proc/self/{fd,
|
|
587
|
+
// maps} so `bunx`/`bun add`/`bun run <pkg>` stop aborting with Bun's NotDir.
|
|
588
|
+
// Opt-in (default 'tmpfs') because it makes start.ts grant the container
|
|
589
|
+
// CAP_SYS_ADMIN at boot. Read from the boot-time `config` snapshot, NOT live
|
|
590
|
+
// getConfig(): sandbox.realProc is restart-required, and the strategy MUST
|
|
591
|
+
// track the boot-time capability. A `typeclaw reload` that flips realProc to
|
|
592
|
+
// true would otherwise make this emit `unshare --mount-proc` in a container
|
|
593
|
+
// booted WITHOUT CAP_SYS_ADMIN, so the mount fails instead of the old tmpfs
|
|
594
|
+
// strategy holding until restart. `config` never changes on reload.
|
|
595
|
+
// procSelfExe is only consumed by the 'tmpfs' branch.
|
|
596
|
+
const realProc = config.sandbox.realProc
|
|
585
597
|
const { commandString } = buildSandboxedCommand(command, {
|
|
586
598
|
mounts: [
|
|
587
599
|
{ type: 'ro-bind', source: agentDir, dest: agentDir },
|
|
@@ -592,6 +604,7 @@ async function applyBashSandbox(
|
|
|
592
604
|
protected: protectedZones,
|
|
593
605
|
network: 'inherit',
|
|
594
606
|
cwd: agentDir,
|
|
607
|
+
proc: realProc ? 'real-proc' : 'tmpfs',
|
|
595
608
|
procSelfExe: resolveProcSelfExe(),
|
|
596
609
|
...(envOverlay !== undefined ? { env: { set: envOverlay } } : {}),
|
|
597
610
|
})
|
|
@@ -93,7 +93,7 @@ Delegate focused work to subagents via \`spawn_subagent\`, \`subagent_output\`,
|
|
|
93
93
|
|
|
94
94
|
There are three delegation modes. Pick deliberately.
|
|
95
95
|
|
|
96
|
-
**Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer. Match the worker to the depth: a fast or narrow web lookup goes to \`scout\`; a fuzzy question that needs decomposition, many sources, cross-validation, and a synthesized verdict goes to \`researcher\` (don't do that grind inline with \`web_search\` yourself).
|
|
96
|
+
**Mode A — Research fan-out.** Need information and the search is broad? Fire 2-5 subagents (usually \`explorer\`/\`scout\`) in parallel with \`run_in_background: true\`, then end your response. A \`<system-reminder>\` lands per completion; call \`subagent_output\` once per task_id to collect (it never blocks) and answer. Match the worker to the depth: a fast or narrow web lookup goes to \`scout\`; a fuzzy question that needs decomposition, many sources, cross-validation, and a synthesized verdict goes to \`researcher\` (don't do that grind inline with \`web_search\` yourself). When the user *explicitly* says "research"/"investigate" (or equivalent), you MUST spawn \`researcher\` — answering from training memory or a single inline \`web_search\` does not satisfy the request, even if you think you know the answer. (Fanning out \`scout\`/\`explorer\` underneath is fine, but it does not replace \`researcher\`.)
|
|
97
97
|
|
|
98
98
|
**Mode B — Delegate-and-converse.** Asked to DO something long-running (>~30s: installs, builds, \`docker\`, scrapes, long test suites, multi-host loops, any noisy "fetch N and synthesize" chain)? Don't run it inline — blocking your own \`bash\` freezes the conversation and stalls the channel typing heartbeat (\`MAX_TYPING_HEARTBEAT_MS\`). Spawn one subagent (\`operator\` for side effects, \`scout\` for a quick web lookup, \`researcher\` for a deep multi-source "fetch N and synthesize" investigation, \`planner\` when a multi-step goal needs a sequenced, risk-aware plan before anyone acts) with \`run_in_background: true\`, acknowledge, and KEEP TALKING. Single fast calls (\`git status\`, one known-endpoint \`curl\`) stay inline. When the completion reminder lands, weave the result in; in a channel session, the completion \`<system-reminder>\` is NOT a user message but plain text is still invisible — Surface the result via \`channel_reply\` (or \`channel_send\`). If you already posted the substantive answer in the spawn turn, prefer \`skip_response({ reason: "result confirms prior reply" })\` over going silent.
|
|
99
99
|
|
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
import type { ReviewVerdict } from '@/channels/github-review-turn-ledger'
|
|
2
2
|
|
|
3
|
+
// `NONE` covers "never reviewed" and "last decisive review was DISMISSED" — both
|
|
4
|
+
// mean a fresh verdict is legitimate (not a duplicate).
|
|
5
|
+
export type EffectiveVerdict = 'APPROVED' | 'CHANGES_REQUESTED' | 'NONE'
|
|
6
|
+
|
|
3
7
|
export type EffectiveApprovalResolver = (target: {
|
|
4
8
|
workspace: string
|
|
5
9
|
prNumber: number
|
|
6
|
-
}) => Promise<{ ok: true;
|
|
10
|
+
}) => Promise<{ ok: true; effective: EffectiveVerdict } | { ok: false }>
|
|
7
11
|
|
|
8
12
|
export type ApproveBlock = { block: true; reason: string }
|
|
9
13
|
|
|
10
|
-
export type
|
|
14
|
+
export type ReviewVerdictGuard = {
|
|
11
15
|
guard: (args: {
|
|
12
16
|
callId: string
|
|
13
17
|
workspace: string
|
|
@@ -17,78 +21,135 @@ export type ApproveIdempotencyGuard = {
|
|
|
17
21
|
release: (args: { callId: string; succeeded: boolean }) => void
|
|
18
22
|
}
|
|
19
23
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
24
|
+
// Back-compat alias: the guard now covers REQUEST_CHANGES too, not just APPROVE.
|
|
25
|
+
export type ApproveIdempotencyGuard = ReviewVerdictGuard
|
|
26
|
+
|
|
27
|
+
function duplicateReason(verdict: ReviewVerdict): string {
|
|
28
|
+
if (verdict === 'APPROVE') {
|
|
29
|
+
return (
|
|
30
|
+
'This bot already holds a standing APPROVED review on this pull request. A second APPROVE would ' +
|
|
31
|
+
'post a redundant review. If you intended to change your verdict, request changes or dismiss the ' +
|
|
32
|
+
'prior review instead of re-approving.'
|
|
33
|
+
)
|
|
34
|
+
}
|
|
35
|
+
return (
|
|
36
|
+
'This bot already holds a standing CHANGES_REQUESTED review on this pull request. A second ' +
|
|
37
|
+
'REQUEST_CHANGES would post a redundant blocking review. The prior review is still live — push a fix ' +
|
|
38
|
+
'and APPROVE, or reply in the existing thread, instead of re-requesting changes.'
|
|
39
|
+
)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const CONCURRENT_REASON =
|
|
43
|
+
'Another session in this agent is already submitting a formal review verdict for this pull request. ' +
|
|
44
|
+
'Only one verdict may land per PR — do not submit a second review; the in-flight one will post.'
|
|
45
|
+
|
|
46
|
+
// The standing verdict a fresh attempt would duplicate. APPROVE duplicates a
|
|
47
|
+
// standing APPROVED; REQUEST_CHANGES duplicates a standing CHANGES_REQUESTED.
|
|
48
|
+
function duplicatesStanding(verdict: ReviewVerdict, effective: EffectiveVerdict): boolean {
|
|
49
|
+
return verdict === 'APPROVE' ? effective === 'APPROVED' : effective === 'CHANGES_REQUESTED'
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// How long a reservation may sit before it is treated as abandoned. A normal
|
|
53
|
+
// `gh` review submit completes in seconds; this only guards against a tool.after
|
|
54
|
+
// that never fires (crash mid-command), so it must outlast a slow command yet
|
|
55
|
+
// never strand a PR for long.
|
|
56
|
+
const LEASE_TTL_MS = 5 * 60_000
|
|
57
|
+
|
|
58
|
+
type Reservation = { key: string; token: number; createdAt: number }
|
|
23
59
|
|
|
24
|
-
//
|
|
25
|
-
//
|
|
60
|
+
// MODULE-LEVEL singletons, shared by every plugin instance in this process. The
|
|
61
|
+
// github-cli-auth plugin's `plugin: async (ctx) => ...` factory may run once per
|
|
62
|
+
// session, giving each its own closure — but all of those closures import THIS
|
|
63
|
+
// module, so they coordinate through one Map. A closure-local Set (the prior
|
|
64
|
+
// design) could not see a concurrent session's in-flight verdict, which is how
|
|
65
|
+
// three sessions each landed an APPROVE on the same PR within ten seconds.
|
|
66
|
+
const inFlightByPr = new Map<string, Reservation>()
|
|
67
|
+
const reservationByCall = new Map<string, Reservation>()
|
|
68
|
+
let tokenSeq = 0
|
|
69
|
+
|
|
70
|
+
// Makes a formal `gh ... event=APPROVE|REQUEST_CHANGES` idempotent per PR across
|
|
71
|
+
// turns, sessions, and (in-process) concurrent fan-out. Two layers:
|
|
72
|
+
//
|
|
73
|
+
// 1. A process-wide in-flight lease keyed by `workspace#prNumber`, held from
|
|
74
|
+
// tool.before through tool.after. While one verdict is mid-flight, every
|
|
75
|
+
// other session's verdict for the same PR is blocked — even though GitHub
|
|
76
|
+
// has not yet recorded the in-flight review. This is the layer the old
|
|
77
|
+
// closure-local Set could not provide: separate plugin instances meant
|
|
78
|
+
// separate Sets, so concurrent sessions never saw each other.
|
|
26
79
|
//
|
|
27
|
-
//
|
|
28
|
-
//
|
|
29
|
-
//
|
|
30
|
-
//
|
|
31
|
-
//
|
|
32
|
-
//
|
|
33
|
-
// supersession: a later CHANGES_REQUESTED / DISMISSED demotes an earlier
|
|
34
|
-
// APPROVED, so the bot may legitimately re-approve.
|
|
80
|
+
// 2. The authoritative GitHub effective-state read, consulted AFTER the lease
|
|
81
|
+
// is acquired. It catches the cross-restart case (lease lost) and tracks
|
|
82
|
+
// supersession: a later CHANGES_REQUESTED/DISMISSED demotes an earlier
|
|
83
|
+
// APPROVED, so a genuine re-verdict is allowed. Reads fail OPEN — a
|
|
84
|
+
// transient error must never strand a genuine first verdict; the lease
|
|
85
|
+
// still covers the concurrent case while the command runs.
|
|
35
86
|
//
|
|
36
|
-
// The
|
|
37
|
-
//
|
|
38
|
-
//
|
|
39
|
-
//
|
|
40
|
-
// (PR back to CHANGES_REQUESTED), a stale local lock must not keep blocking a
|
|
41
|
-
// genuine re-approve — only the remote read decides, and it now reports
|
|
42
|
-
// alreadyApproved=false. Reads fail OPEN: a transient GitHub error must never
|
|
43
|
-
// permanently strand a first approval; the in-flight reservation still covers
|
|
44
|
-
// the concurrent case.
|
|
87
|
+
// The lease is released only in release() (tool.after) or on a terminal block,
|
|
88
|
+
// never after the remote read — releasing early reopens the TOCTOU the lease
|
|
89
|
+
// exists to close. Release is keyed by a per-call token so a late/stale
|
|
90
|
+
// tool.after for a superseded reservation cannot drop a newer session's lease.
|
|
45
91
|
export function createApproveIdempotencyGuard(deps: {
|
|
46
92
|
resolveEffectiveApproval: EffectiveApprovalResolver
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
const
|
|
93
|
+
now?: () => number
|
|
94
|
+
}): ReviewVerdictGuard {
|
|
95
|
+
const now = deps.now ?? Date.now
|
|
50
96
|
|
|
51
97
|
return {
|
|
52
98
|
async guard(args): Promise<ApproveBlock | null> {
|
|
53
|
-
if (args.verdict !== 'APPROVE') return null
|
|
99
|
+
if (args.verdict !== 'APPROVE' && args.verdict !== 'REQUEST_CHANGES') return null
|
|
54
100
|
const key = prKey(args.workspace, args.prNumber)
|
|
55
101
|
|
|
56
|
-
// Reserve BEFORE the await so two calls racing into guard() for the same
|
|
57
|
-
//
|
|
58
|
-
//
|
|
59
|
-
//
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
102
|
+
// Reserve BEFORE the await so two calls racing into guard() for the same PR
|
|
103
|
+
// cannot both observe an empty map: the loser sees the winner's in-flight
|
|
104
|
+
// lease and is blocked. An expired lease (tool.after never fired) is
|
|
105
|
+
// reclaimable so a crash cannot permanently strand the PR.
|
|
106
|
+
const held = inFlightByPr.get(key)
|
|
107
|
+
if (held !== undefined && now() - held.createdAt < LEASE_TTL_MS) {
|
|
108
|
+
return { block: true, reason: CONCURRENT_REASON }
|
|
109
|
+
}
|
|
110
|
+
const reservation: Reservation = { key, token: ++tokenSeq, createdAt: now() }
|
|
111
|
+
inFlightByPr.set(key, reservation)
|
|
112
|
+
reservationByCall.set(args.callId, reservation)
|
|
63
113
|
|
|
64
114
|
const remote = await deps.resolveEffectiveApproval({ workspace: args.workspace, prNumber: args.prNumber })
|
|
65
|
-
if (remote.ok && remote.
|
|
66
|
-
// Standing
|
|
67
|
-
// a blocked command never reaches tool.after, so release() won't run
|
|
68
|
-
// this callId. Leaving the
|
|
69
|
-
// GitHub read is authoritative for the standing
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
pendingApprovals.delete(key)
|
|
73
|
-
return { block: true, reason: DUPLICATE_REASON }
|
|
115
|
+
if (remote.ok && duplicatesStanding(args.verdict, remote.effective)) {
|
|
116
|
+
// Standing verdict upstream already matches. Block, and release the lease
|
|
117
|
+
// now: a blocked command never reaches tool.after, so release() won't run
|
|
118
|
+
// for this callId. Leaving the lease set would resurrect the strand bug —
|
|
119
|
+
// the GitHub read is authoritative for the standing case.
|
|
120
|
+
releaseReservation(args.callId, reservation)
|
|
121
|
+
return { block: true, reason: duplicateReason(args.verdict) }
|
|
74
122
|
}
|
|
75
123
|
|
|
76
124
|
return null
|
|
77
125
|
},
|
|
78
126
|
|
|
79
127
|
release(args): void {
|
|
80
|
-
const
|
|
81
|
-
if (
|
|
82
|
-
|
|
83
|
-
// Always drop the in-flight lock, success or fail. On success the standing
|
|
84
|
-
// approval now lives on GitHub, so future APPROVEs are caught by the remote
|
|
85
|
-
// read (which tracks supersession); the local lock must not outlive the
|
|
86
|
-
// in-flight window and shadow that read.
|
|
87
|
-
pendingApprovals.delete(key)
|
|
128
|
+
const reservation = reservationByCall.get(args.callId)
|
|
129
|
+
if (reservation === undefined) return
|
|
130
|
+
releaseReservation(args.callId, reservation)
|
|
88
131
|
},
|
|
89
132
|
}
|
|
90
133
|
}
|
|
91
134
|
|
|
135
|
+
// Drop the lease only if THIS reservation still owns the key. A stale tool.after
|
|
136
|
+
// for a reservation that was already superseded (e.g. reclaimed after TTL by a
|
|
137
|
+
// newer session) must not yank the live session's lease.
|
|
138
|
+
function releaseReservation(callId: string, reservation: Reservation): void {
|
|
139
|
+
reservationByCall.delete(callId)
|
|
140
|
+
const current = inFlightByPr.get(reservation.key)
|
|
141
|
+
if (current !== undefined && current.token === reservation.token) {
|
|
142
|
+
inFlightByPr.delete(reservation.key)
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
92
146
|
function prKey(workspace: string, prNumber: number): string {
|
|
93
147
|
return `${workspace}#${prNumber}`
|
|
94
148
|
}
|
|
149
|
+
|
|
150
|
+
// Test-only: clear the process-wide lease state between cases.
|
|
151
|
+
export function __resetReviewVerdictGuardForTest(): void {
|
|
152
|
+
inFlightByPr.clear()
|
|
153
|
+
reservationByCall.clear()
|
|
154
|
+
tokenSeq = 0
|
|
155
|
+
}
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
import { GITHUB_API_BASE, githubJsonHeaders } from '@/channels/adapters/github/auth-pat'
|
|
2
2
|
|
|
3
|
-
import type { EffectiveApprovalResolver } from './approve-idempotency'
|
|
4
|
-
|
|
5
|
-
// Resolves
|
|
6
|
-
//
|
|
7
|
-
//
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
// first approval.
|
|
3
|
+
import type { EffectiveApprovalResolver, EffectiveVerdict } from './approve-idempotency'
|
|
4
|
+
|
|
5
|
+
// Resolves THIS bot's standing decisive review on a PR, used by the review
|
|
6
|
+
// verdict guard to stop a second formal verdict after a restart (the in-process
|
|
7
|
+
// lease covers the same-container case but is lost when the container bounces).
|
|
8
|
+
// Every failure returns { ok: false } so the guard fails open — a transient read
|
|
9
|
+
// error must never permanently block a genuine first verdict.
|
|
11
10
|
export function createGithubEffectiveApprovalResolver(deps: {
|
|
12
11
|
resolveToken: (workspace: string) => Promise<string | null>
|
|
13
12
|
fetchImpl?: typeof fetch
|
|
@@ -27,10 +26,16 @@ export function createGithubEffectiveApprovalResolver(deps: {
|
|
|
27
26
|
if (reviews === null) return { ok: false }
|
|
28
27
|
|
|
29
28
|
const lastDecisive = reviews.filter((r) => isSelf(r.login, r.isBot, self) && isDecisive(r.state)).at(-1)
|
|
30
|
-
return { ok: true,
|
|
29
|
+
return { ok: true, effective: toEffective(lastDecisive?.state) }
|
|
31
30
|
}
|
|
32
31
|
}
|
|
33
32
|
|
|
33
|
+
function toEffective(state: string | undefined): EffectiveVerdict {
|
|
34
|
+
if (state === 'APPROVED') return 'APPROVED'
|
|
35
|
+
if (state === 'CHANGES_REQUESTED') return 'CHANGES_REQUESTED'
|
|
36
|
+
return 'NONE'
|
|
37
|
+
}
|
|
38
|
+
|
|
34
39
|
// A bot's effective review is its LATEST decisive one. COMMENTED/PENDING are
|
|
35
40
|
// non-deciding noise that must not clear an earlier APPROVED/CHANGES_REQUESTED;
|
|
36
41
|
// a later CHANGES_REQUESTED or DISMISSED supersedes an earlier APPROVED. The
|
|
@@ -11,7 +11,7 @@ import { classifyGhToken } from './token-class'
|
|
|
11
11
|
export default definePlugin({
|
|
12
12
|
plugin: async (ctx) => {
|
|
13
13
|
const resolveTokenForRepo = ctx.github.resolveTokenForRepo
|
|
14
|
-
const
|
|
14
|
+
const verdictGuard = createApproveIdempotencyGuard({
|
|
15
15
|
resolveEffectiveApproval: createGithubEffectiveApprovalResolver({
|
|
16
16
|
resolveToken: async (workspace) => {
|
|
17
17
|
const result = await resolveTokenForRepo(workspace)
|
|
@@ -28,7 +28,7 @@ export default definePlugin({
|
|
|
28
28
|
|
|
29
29
|
const review = await noteReviewCommand({ callId: event.callId, command })
|
|
30
30
|
if (review.detected !== null) {
|
|
31
|
-
const block = await
|
|
31
|
+
const block = await verdictGuard.guard({
|
|
32
32
|
callId: event.callId,
|
|
33
33
|
workspace: review.detected.workspace,
|
|
34
34
|
prNumber: review.detected.prNumber,
|
|
@@ -70,7 +70,7 @@ export default definePlugin({
|
|
|
70
70
|
callId: event.callId,
|
|
71
71
|
result: event.result,
|
|
72
72
|
})
|
|
73
|
-
|
|
73
|
+
verdictGuard.release({ callId: event.callId, succeeded: committed })
|
|
74
74
|
},
|
|
75
75
|
},
|
|
76
76
|
}
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
// Discord renders no GitHub-flavored Markdown tables — a `| a | b |` block
|
|
2
|
+
// shows up as literal pipes and dashes, so an agent reply that leans on a table
|
|
3
|
+
// (very common) becomes unreadable. Discord DOES preserve whitespace verbatim
|
|
4
|
+
// inside inline code spans, so we re-emit each table row as a single
|
|
5
|
+
// backtick-wrapped line with columns padded to a fixed width. Columns line up
|
|
6
|
+
// because every row is the same monospaced inline-code span. The header row is
|
|
7
|
+
// additionally wrapped in `**...**` so it reads as a bold caption above the body.
|
|
8
|
+
//
|
|
9
|
+
// This is a line-walker, not a Markdown parser: it only touches blocks that
|
|
10
|
+
// match the pipe-table shape (a `|`-bearing line followed by a `|---|` alignment
|
|
11
|
+
// row) and leaves every other byte — prose, code fences, lists — untouched.
|
|
12
|
+
|
|
13
|
+
const TABLE_SEP_RE = /^\s*\|?\s*:?-{3,}:?\s*(\|\s*:?-{3,}:?\s*)+\|?\s*$/
|
|
14
|
+
const FENCE_RE = /^(\s*)(```+|~~~+)(.*)$/
|
|
15
|
+
|
|
16
|
+
export function convertDiscordTables(input: string): string {
|
|
17
|
+
if (input === '') return ''
|
|
18
|
+
if (!input.includes('|')) return input
|
|
19
|
+
|
|
20
|
+
const lines = input.split('\n')
|
|
21
|
+
const out: string[] = []
|
|
22
|
+
let i = 0
|
|
23
|
+
let openFence: string | null = null
|
|
24
|
+
|
|
25
|
+
while (i < lines.length) {
|
|
26
|
+
const line = lines[i]!
|
|
27
|
+
|
|
28
|
+
// A code fence (``` / ~~~) suspends table detection until it closes — a
|
|
29
|
+
// table-shaped block inside a fence is literal text, not a table. The close
|
|
30
|
+
// must use the same fence char and be at least as long as the opener, per
|
|
31
|
+
// CommonMark.
|
|
32
|
+
const fence = FENCE_RE.exec(line)
|
|
33
|
+
if (fence !== null) {
|
|
34
|
+
const marker = fence[2]!
|
|
35
|
+
if (openFence === null) {
|
|
36
|
+
openFence = marker
|
|
37
|
+
} else if (marker[0] === openFence[0] && marker.length >= openFence.length) {
|
|
38
|
+
openFence = null
|
|
39
|
+
}
|
|
40
|
+
out.push(line)
|
|
41
|
+
i++
|
|
42
|
+
continue
|
|
43
|
+
}
|
|
44
|
+
if (openFence !== null) {
|
|
45
|
+
out.push(line)
|
|
46
|
+
i++
|
|
47
|
+
continue
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// A table needs a `|`-bearing header line immediately followed by the
|
|
51
|
+
// alignment row; same disambiguation rule chunkMarkdown uses so a stray
|
|
52
|
+
// leading `|` in prose is not mistaken for a table.
|
|
53
|
+
if (line.includes('|') && i + 1 < lines.length && TABLE_SEP_RE.test(lines[i + 1]!)) {
|
|
54
|
+
const start = i
|
|
55
|
+
i += 2
|
|
56
|
+
while (i < lines.length && lines[i]!.includes('|') && lines[i]!.trim() !== '') {
|
|
57
|
+
i++
|
|
58
|
+
}
|
|
59
|
+
const tableLines = lines.slice(start, i)
|
|
60
|
+
out.push(renderTable(tableLines))
|
|
61
|
+
continue
|
|
62
|
+
}
|
|
63
|
+
out.push(line)
|
|
64
|
+
i++
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return out.join('\n')
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function renderTable(tableLines: string[]): string {
|
|
71
|
+
const headerCells = splitRow(tableLines[0]!)
|
|
72
|
+
const bodyRows = tableLines.slice(2).map(splitRow)
|
|
73
|
+
const widths = computeWidths([headerCells, ...bodyRows])
|
|
74
|
+
|
|
75
|
+
const header = wrapCode(padRow(headerCells, widths))
|
|
76
|
+
const renderedRows = [`**${header}**`, ...bodyRows.map((cells) => wrapCode(padRow(cells, widths)))]
|
|
77
|
+
return renderedRows.join('\n')
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function splitRow(row: string): string[] {
|
|
81
|
+
// Trim one optional leading/trailing pipe, then split on the rest. A trailing
|
|
82
|
+
// backslash before a pipe escapes it, but GFM table escaping is rare in agent
|
|
83
|
+
// output — we keep it simple and split on bare pipes.
|
|
84
|
+
let trimmed = row.trim()
|
|
85
|
+
if (trimmed.startsWith('|')) trimmed = trimmed.slice(1)
|
|
86
|
+
if (trimmed.endsWith('|')) trimmed = trimmed.slice(0, -1)
|
|
87
|
+
return trimmed.split('|').map((cell) => cell.trim())
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function computeWidths(rows: string[][]): number[] {
|
|
91
|
+
const widths: number[] = []
|
|
92
|
+
for (const row of rows) {
|
|
93
|
+
for (let c = 0; c < row.length; c++) {
|
|
94
|
+
const cellWidth = displayWidth(row[c]!)
|
|
95
|
+
if (widths[c] === undefined || cellWidth > widths[c]!) {
|
|
96
|
+
widths[c] = cellWidth
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return widths
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function padRow(cells: string[], widths: number[]): string {
|
|
104
|
+
const padded = widths.map((width, c) => padToWidth(cells[c] ?? '', width))
|
|
105
|
+
// Two spaces between columns keeps them visually distinct inside the
|
|
106
|
+
// monospaced span without a vertical-bar separator.
|
|
107
|
+
return padded.join(' ')
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function padToWidth(cell: string, width: number): string {
|
|
111
|
+
const pad = width - displayWidth(cell)
|
|
112
|
+
return pad > 0 ? cell + ' '.repeat(pad) : cell
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Discord's monospaced inline-code font renders CJK ideographs, full-width
|
|
116
|
+
// punctuation, and most emoji at two columns, while combining/zero-width marks
|
|
117
|
+
// take none. `String.prototype.padEnd` counts UTF-16 code units, so padding by
|
|
118
|
+
// `.length` leaves wide-character tables visually ragged. We iterate by code
|
|
119
|
+
// point and sum per-glyph column widths so every cell pads to the same VISUAL
|
|
120
|
+
// width. The ranges below are the standard East-Asian-Wide / Wide blocks plus
|
|
121
|
+
// the common emoji planes; this is the same wcwidth approximation editors use.
|
|
122
|
+
export function displayWidth(text: string): number {
|
|
123
|
+
let width = 0
|
|
124
|
+
for (const ch of text) {
|
|
125
|
+
width += charWidth(ch.codePointAt(0)!)
|
|
126
|
+
}
|
|
127
|
+
return width
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function charWidth(cp: number): number {
|
|
131
|
+
if (isZeroWidth(cp)) return 0
|
|
132
|
+
if (isWide(cp)) return 2
|
|
133
|
+
return 1
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function isZeroWidth(cp: number): boolean {
|
|
137
|
+
return (
|
|
138
|
+
cp === 0x200b || // zero-width space
|
|
139
|
+
(cp >= 0x0300 && cp <= 0x036f) || // combining diacritical marks
|
|
140
|
+
(cp >= 0x200c && cp <= 0x200f) || // ZWNJ/ZWJ/directional marks
|
|
141
|
+
(cp >= 0xfe00 && cp <= 0xfe0f) // variation selectors
|
|
142
|
+
)
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function isWide(cp: number): boolean {
|
|
146
|
+
return (
|
|
147
|
+
(cp >= 0x1100 && cp <= 0x115f) || // Hangul Jamo
|
|
148
|
+
(cp >= 0x2e80 && cp <= 0x303e) || // CJK radicals, Kangxi
|
|
149
|
+
(cp >= 0x3041 && cp <= 0x33ff) || // Hiragana, Katakana, CJK symbols
|
|
150
|
+
(cp >= 0x3400 && cp <= 0x4dbf) || // CJK Ext A
|
|
151
|
+
(cp >= 0x4e00 && cp <= 0x9fff) || // CJK Unified Ideographs
|
|
152
|
+
(cp >= 0xa000 && cp <= 0xa4cf) || // Yi
|
|
153
|
+
(cp >= 0xac00 && cp <= 0xd7a3) || // Hangul Syllables
|
|
154
|
+
(cp >= 0xf900 && cp <= 0xfaff) || // CJK Compatibility Ideographs
|
|
155
|
+
(cp >= 0xfe30 && cp <= 0xfe4f) || // CJK Compatibility Forms
|
|
156
|
+
(cp >= 0xff00 && cp <= 0xff60) || // Fullwidth Forms
|
|
157
|
+
(cp >= 0xffe0 && cp <= 0xffe6) || // Fullwidth signs
|
|
158
|
+
(cp >= 0x2600 && cp <= 0x26ff) || // Miscellaneous Symbols (☀ ♻ ⚠ …)
|
|
159
|
+
(cp >= 0x2700 && cp <= 0x27bf) || // Dingbats (✅ ✔ ✨ ➡ …)
|
|
160
|
+
(cp >= 0x2b00 && cp <= 0x2bff) || // Misc Symbols and Arrows (⭐ …)
|
|
161
|
+
(cp >= 0x1f300 && cp <= 0x1faff) || // emoji, symbols, pictographs
|
|
162
|
+
(cp >= 0x20000 && cp <= 0x3fffd) // CJK Ext B+ (supplementary ideographic)
|
|
163
|
+
)
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// CommonMark inline code: the delimiter must be a backtick run LONGER than any
|
|
167
|
+
// run inside the content, otherwise an embedded `` ` `` (e.g. a cell holding
|
|
168
|
+
// `bun test`) closes the span early and corrupts the row. When the content
|
|
169
|
+
// begins or ends with a backtick, one space of padding is inserted on each side
|
|
170
|
+
// so the delimiter is not adjacent to a content backtick; CommonMark strips that
|
|
171
|
+
// single padding space on render, leaving our column widths intact.
|
|
172
|
+
function wrapCode(text: string): string {
|
|
173
|
+
const fence = '`'.repeat(longestBacktickRun(text) + 1)
|
|
174
|
+
const needsPad = text.startsWith('`') || text.endsWith('`')
|
|
175
|
+
const pad = needsPad ? ' ' : ''
|
|
176
|
+
return `${fence}${pad}${text}${pad}${fence}`
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function longestBacktickRun(text: string): number {
|
|
180
|
+
let longest = 0
|
|
181
|
+
let run = 0
|
|
182
|
+
for (const ch of text) {
|
|
183
|
+
if (ch === '`') {
|
|
184
|
+
run++
|
|
185
|
+
if (run > longest) longest = run
|
|
186
|
+
} else {
|
|
187
|
+
run = 0
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
return longest
|
|
191
|
+
}
|
|
@@ -39,6 +39,7 @@ import {
|
|
|
39
39
|
type InboundDropReason,
|
|
40
40
|
renderPlaceholder,
|
|
41
41
|
} from './discord-bot-classify'
|
|
42
|
+
import { convertDiscordTables } from './discord-bot-format'
|
|
42
43
|
import { createDiscordReactionCallback, createDiscordRemoveReactionCallback } from './discord-bot-reactions'
|
|
43
44
|
import { enrichDiscordMessageReferences } from './discord-bot-reference'
|
|
44
45
|
import {
|
|
@@ -647,7 +648,7 @@ export function createOutboundCallback(deps: {
|
|
|
647
648
|
if (msg.adapter !== 'discord-bot') {
|
|
648
649
|
return { ok: false, error: `unknown adapter: ${msg.adapter}` }
|
|
649
650
|
}
|
|
650
|
-
const text = msg.text ?? ''
|
|
651
|
+
const text = convertDiscordTables(msg.text ?? '')
|
|
651
652
|
const attachments = msg.attachments ?? []
|
|
652
653
|
if (text === '' && attachments.length === 0) {
|
|
653
654
|
return { ok: false, error: 'message has neither text nor attachments' }
|
|
@@ -9,6 +9,7 @@ import { removeRequestedReviewer } from './decoy-reviewer'
|
|
|
9
9
|
import type { DeliveryDedup } from './dedup'
|
|
10
10
|
import { isGithubEventAllowed } from './event-allowlist'
|
|
11
11
|
import { encodeGithubReactionRef, type GithubReactionTarget } from './reactions'
|
|
12
|
+
import { fetchSelfReviewBlocking } from './review-state'
|
|
12
13
|
import { listUnresolvedSelfReviewThreads } from './review-thread-resolver'
|
|
13
14
|
|
|
14
15
|
export type GithubInboundLogger = { info: (m: string) => void; warn: (m: string) => void; error: (m: string) => void }
|
|
@@ -83,14 +84,16 @@ export function createGithubWebhookHandler(options: GithubWebhookHandlerOptions)
|
|
|
83
84
|
}
|
|
84
85
|
|
|
85
86
|
// A push to an open PR (`synchronize`) is not a message to react to — it is
|
|
86
|
-
// a trigger to re-
|
|
87
|
-
//
|
|
88
|
-
//
|
|
89
|
-
//
|
|
87
|
+
// a trigger to re-evaluate the bot's own outstanding review obligations on
|
|
88
|
+
// this PR: unresolved review threads it authored AND a sticky
|
|
89
|
+
// CHANGES_REQUESTED block (which leaves no threads when filed as a top-level
|
|
90
|
+
// verdict — the black hole this path closes). Both need an API round-trip,
|
|
91
|
+
// so it runs OFF the ACK path (like the decoy-reviewer drop) and only wakes a
|
|
92
|
+
// session when an obligation is outstanding. Returning here also keeps
|
|
90
93
|
// synchronize out of the generic awareness-only fallthrough below.
|
|
91
94
|
if (event === 'pull_request' && action === 'synchronize') {
|
|
92
95
|
if (delivery !== '') options.dedup.add(delivery)
|
|
93
|
-
|
|
96
|
+
scheduleReviewFollowup({ payload, selfLogin, options })
|
|
94
97
|
return ok()
|
|
95
98
|
}
|
|
96
99
|
|
|
@@ -187,7 +190,7 @@ function defaultScheduleBackgroundTask(task: () => Promise<void>): void {
|
|
|
187
190
|
void task().catch(() => {})
|
|
188
191
|
}
|
|
189
192
|
|
|
190
|
-
function
|
|
193
|
+
function scheduleReviewFollowup(input: {
|
|
191
194
|
payload: Record<string, unknown>
|
|
192
195
|
selfLogin: string | null
|
|
193
196
|
options: GithubWebhookHandlerOptions
|
|
@@ -203,13 +206,27 @@ function scheduleReviewThreadRecheck(input: {
|
|
|
203
206
|
if (repository === null || pullNumber === null) return
|
|
204
207
|
const headSha = readString(readRecord(pr?.head), 'sha')
|
|
205
208
|
|
|
209
|
+
// Same webhook head SHA can arrive on several deliveries (a multi-commit push
|
|
210
|
+
// emits one synchronize per ref update). Dedup the follow-up on the head SHA
|
|
211
|
+
// so a single push wakes at most one re-review, distinct from the per-delivery
|
|
212
|
+
// dedup above. When headSha is absent we cannot dedup, so we skip the followup
|
|
213
|
+
// rather than risk a re-review storm.
|
|
214
|
+
if (headSha === null) {
|
|
215
|
+
options.logger.warn(`[github] synchronize for ${repository.owner}/${repository.name}#${pullNumber} has no head sha`)
|
|
216
|
+
return
|
|
217
|
+
}
|
|
218
|
+
const followupKey = `synchronize-followup:${repository.owner}/${repository.name}#${pullNumber}:${headSha}`
|
|
219
|
+
if (options.dedup.has(followupKey)) return
|
|
220
|
+
options.dedup.add(followupKey)
|
|
221
|
+
|
|
222
|
+
const reviewOn = options.reviewOn?.() ?? 'review_requested'
|
|
206
223
|
const fetchImpl = options.fetchImpl ?? fetch
|
|
207
224
|
const schedule = options.scheduleBackgroundTask ?? defaultScheduleBackgroundTask
|
|
208
225
|
const target = `${repository.owner}/${repository.name}#${pullNumber}`
|
|
209
226
|
schedule(async () => {
|
|
210
227
|
try {
|
|
211
228
|
const token = await authToken({ repoSlug: `${repository.owner}/${repository.name}` })
|
|
212
|
-
const
|
|
229
|
+
const threads = await listUnresolvedSelfReviewThreads({
|
|
213
230
|
token,
|
|
214
231
|
selfLogin,
|
|
215
232
|
owner: repository.owner,
|
|
@@ -217,46 +234,63 @@ function scheduleReviewThreadRecheck(input: {
|
|
|
217
234
|
prNumber: pullNumber,
|
|
218
235
|
fetchImpl,
|
|
219
236
|
})
|
|
220
|
-
if (!
|
|
221
|
-
options.logger.warn(`[github] review-thread recheck failed for ${target}: ${
|
|
237
|
+
if (!threads.ok) {
|
|
238
|
+
options.logger.warn(`[github] review-thread recheck failed for ${target}: ${threads.error}`)
|
|
222
239
|
return
|
|
223
240
|
}
|
|
224
|
-
|
|
241
|
+
|
|
242
|
+
// A held CHANGES_REQUESTED is the bot's own obligation regardless of how
|
|
243
|
+
// reviews are triggered, so re-evaluate it on push unless review is off.
|
|
244
|
+
let selfBlocking = false
|
|
245
|
+
if (reviewOn !== 'off') {
|
|
246
|
+
const blocking = await fetchSelfReviewBlocking({
|
|
247
|
+
token,
|
|
248
|
+
selfLogin,
|
|
249
|
+
owner: repository.owner,
|
|
250
|
+
repo: repository.name,
|
|
251
|
+
prNumber: pullNumber,
|
|
252
|
+
fetchImpl,
|
|
253
|
+
})
|
|
254
|
+
if (blocking.ok) selfBlocking = blocking.selfBlocking
|
|
255
|
+
else options.logger.warn(`[github] review-state recheck failed for ${target}: ${blocking.error}`)
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const rootCommentIds = threads.threads.map((t) => t.rootCommentId)
|
|
259
|
+
if (rootCommentIds.length === 0 && !selfBlocking) return
|
|
225
260
|
options.route(
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
261
|
+
withApprovalPolicy(
|
|
262
|
+
buildReviewFollowupInbound({
|
|
263
|
+
repository,
|
|
264
|
+
pullNumber,
|
|
265
|
+
headSha,
|
|
266
|
+
rootCommentIds,
|
|
267
|
+
selfBlocking,
|
|
268
|
+
title: readString(pr, 'title'),
|
|
269
|
+
}),
|
|
270
|
+
options.allowApprove?.() ?? true,
|
|
271
|
+
),
|
|
233
272
|
)
|
|
234
273
|
} catch (err) {
|
|
235
274
|
options.logger.warn(
|
|
236
|
-
`[github] review
|
|
275
|
+
`[github] review followup failed for ${target}: ${err instanceof Error ? err.message : String(err)}`,
|
|
237
276
|
)
|
|
238
277
|
}
|
|
239
278
|
})
|
|
240
279
|
}
|
|
241
280
|
|
|
242
|
-
function
|
|
281
|
+
function buildReviewFollowupInbound(input: {
|
|
243
282
|
repository: { owner: string; name: string }
|
|
244
283
|
pullNumber: number
|
|
245
|
-
headSha: string
|
|
284
|
+
headSha: string
|
|
246
285
|
rootCommentIds: readonly number[]
|
|
286
|
+
selfBlocking: boolean
|
|
247
287
|
title: string | null
|
|
248
288
|
}): InboundMessage {
|
|
249
|
-
const { repository, pullNumber, headSha, rootCommentIds, title } = input
|
|
289
|
+
const { repository, pullNumber, headSha, rootCommentIds, selfBlocking, title } = input
|
|
250
290
|
const titleSegment = title !== null && title.trim() !== '' ? `: "${title}"` : ''
|
|
251
|
-
const shaSegment = headSha !== null ? ` (now at ${headSha.slice(0, 7)})` : ''
|
|
252
|
-
const idList = rootCommentIds.join(', ')
|
|
253
291
|
const text =
|
|
254
|
-
`PR #${pullNumber}${titleSegment} received new commits${
|
|
255
|
-
|
|
256
|
-
`(root comment id(s): ${idList}). For each, check whether the new commits addressed your ` +
|
|
257
|
-
`concern. If addressed, reply on that thread via channel_send with a short acknowledgement ` +
|
|
258
|
-
`and resolve_review_thread: true (the thread id is the root comment id). If not addressed, ` +
|
|
259
|
-
`leave it open. If none are addressed, end your turn without replying.`
|
|
292
|
+
`PR #${pullNumber}${titleSegment} received new commits (now at ${headSha.slice(0, 7)}). ` +
|
|
293
|
+
followupInstruction(rootCommentIds, selfBlocking)
|
|
260
294
|
|
|
261
295
|
return {
|
|
262
296
|
adapter: 'github',
|
|
@@ -264,7 +298,7 @@ function buildRecheckInbound(input: {
|
|
|
264
298
|
chat: `pr:${pullNumber}`,
|
|
265
299
|
thread: null,
|
|
266
300
|
text,
|
|
267
|
-
externalMessageId: `pr-${pullNumber}-recheck-${headSha
|
|
301
|
+
externalMessageId: `pr-${pullNumber}-recheck-${headSha}`,
|
|
268
302
|
authorId: 'github-system',
|
|
269
303
|
authorName: 'github',
|
|
270
304
|
authorIsBot: false,
|
|
@@ -277,6 +311,30 @@ function buildRecheckInbound(input: {
|
|
|
277
311
|
}
|
|
278
312
|
}
|
|
279
313
|
|
|
314
|
+
function followupInstruction(rootCommentIds: readonly number[], selfBlocking: boolean): string {
|
|
315
|
+
const threadPart =
|
|
316
|
+
rootCommentIds.length > 0
|
|
317
|
+
? `You have ${rootCommentIds.length} unresolved review thread(s) you authored on this PR ` +
|
|
318
|
+
`(root comment id(s): ${rootCommentIds.join(', ')}). For each, check whether the new commits ` +
|
|
319
|
+
`addressed your concern. If addressed, reply on that thread via channel_send with a short ` +
|
|
320
|
+
`acknowledgement and resolve_review_thread: true (the thread id is the root comment id); ` +
|
|
321
|
+
`if not, leave it open. `
|
|
322
|
+
: ''
|
|
323
|
+
// A held CHANGES_REQUESTED never clears itself: GitHub keeps the block until a
|
|
324
|
+
// fresh APPROVE/COMMENT/dismiss, so a blocking follow-up must always end with a
|
|
325
|
+
// submitted verdict — the "end without replying" escape hatch is reserved for
|
|
326
|
+
// the thread-only path, where leaving every thread open is a valid no-op.
|
|
327
|
+
const blockingPart = selfBlocking
|
|
328
|
+
? `Your latest review on this PR is still CHANGES_REQUESTED, which keeps the PR blocked until you ` +
|
|
329
|
+
`submit a fresh review. Re-review the current head against the concerns from that blocking review ` +
|
|
330
|
+
`and always end with a new verdict: if the commits resolve your concerns, submit an APPROVE ` +
|
|
331
|
+
`(or COMMENT if approval is disabled) to clear the block; if concerns remain, submit a new ` +
|
|
332
|
+
`CHANGES_REQUESTED explaining what is still blocking. `
|
|
333
|
+
: ''
|
|
334
|
+
const tail = selfBlocking ? '' : 'If none are addressed, end your turn without replying.'
|
|
335
|
+
return `${threadPart}${blockingPart}${tail}`
|
|
336
|
+
}
|
|
337
|
+
|
|
280
338
|
export async function verifySignature(body: string, secret: string, sigHeader: string): Promise<boolean> {
|
|
281
339
|
const expected = `sha256=${createHmac('sha256', secret).update(body).digest('hex')}`
|
|
282
340
|
const a = Buffer.from(expected)
|
|
@@ -48,6 +48,33 @@ export function createGithubReviewStateResolver(deps: {
|
|
|
48
48
|
}
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
+
export type SelfReviewBlockingResult =
|
|
52
|
+
| { ok: true; selfBlocking: boolean }
|
|
53
|
+
| { ok: false; error: string; code: 'not-found' | 'permission-denied' | 'transient' }
|
|
54
|
+
|
|
55
|
+
// Last DECISIVE self review == CHANGES_REQUESTED? (COMMENTED/PENDING ignored, as
|
|
56
|
+
// in createGithubReviewStateResolver.) Standalone so the synchronize follow-up
|
|
57
|
+
// skips the reviewDecision round-trip the stranding guard needs but this doesn't.
|
|
58
|
+
export async function fetchSelfReviewBlocking(deps: {
|
|
59
|
+
token: string
|
|
60
|
+
selfLogin: string
|
|
61
|
+
owner: string
|
|
62
|
+
repo: string
|
|
63
|
+
prNumber: number
|
|
64
|
+
fetchImpl?: typeof fetch
|
|
65
|
+
}): Promise<SelfReviewBlockingResult> {
|
|
66
|
+
const fetchImpl = deps.fetchImpl ?? fetch
|
|
67
|
+
const reviews = await fetchSelfReviews(
|
|
68
|
+
fetchImpl,
|
|
69
|
+
deps.token,
|
|
70
|
+
{ owner: deps.owner, repo: deps.repo, prNumber: deps.prNumber },
|
|
71
|
+
deps.selfLogin,
|
|
72
|
+
)
|
|
73
|
+
if (!reviews.ok) return { ok: false, error: reviews.error, code: reviews.code }
|
|
74
|
+
const lastDecisive = reviews.states.filter(isDecisive).at(-1) ?? null
|
|
75
|
+
return { ok: true, selfBlocking: lastDecisive === 'CHANGES_REQUESTED' }
|
|
76
|
+
}
|
|
77
|
+
|
|
51
78
|
type Target = { owner: string; repo: string; prNumber: number }
|
|
52
79
|
|
|
53
80
|
function parseTarget(workspace: string, chat: string): Target | null {
|
|
@@ -3,9 +3,34 @@ export type OutboundFloodCheckResult = { ok: true } | { ok: false; reason: strin
|
|
|
3
3
|
const MIN_LENGTH = 40
|
|
4
4
|
const MAX_RUN = 30
|
|
5
5
|
const MIN_LONG_LENGTH = 80
|
|
6
|
-
const MIN_UNIQUE_RATIO = 0.05
|
|
7
6
|
const MAX_DOMINANCE = 0.9
|
|
8
7
|
|
|
8
|
+
// Contiguous-span detector for multi-character floods ("lollol...", "ababab...",
|
|
9
|
+
// repeated emoji pairs) — including a flood body buried inside otherwise-varied
|
|
10
|
+
// text, which a whole-message periodicity test misses. Strict equality (no
|
|
11
|
+
// mismatch budget) and a large span floor keep it clear of incidental prose
|
|
12
|
+
// repetition ("---", "....", "hahaha", code indentation, table separators).
|
|
13
|
+
const MAX_REPEATING_PERIOD = 32
|
|
14
|
+
// Span floor is deliberately a flood boundary, not a "never-deny" guarantee: it
|
|
15
|
+
// catches obvious short-period floods like "ab".repeat(300) (600 chars) and
|
|
16
|
+
// "lol".repeat(300) (900). Hundreds of byte-identical rows or box-art lines also
|
|
17
|
+
// trip it — that output is information-poor and flood-like, and raising the floor
|
|
18
|
+
// to clear it would let those real floods through. Tables/diagrams with varying
|
|
19
|
+
// cells break periodicity and pass.
|
|
20
|
+
const MIN_PERIODIC_SPAN = 384
|
|
21
|
+
const MIN_PERIODIC_REPETITIONS = 24
|
|
22
|
+
|
|
23
|
+
// Narrow last resort: structured text (code, tables, logs) is often lower-
|
|
24
|
+
// entropy than prose, so this only fires on a tiny alphabet at real length.
|
|
25
|
+
const MIN_ENTROPY_LENGTH = 200
|
|
26
|
+
const MAX_TINY_ALPHABET_SIZE = 4
|
|
27
|
+
const VERY_LOW_ENTROPY_BITS = 1.25
|
|
28
|
+
|
|
29
|
+
// Replaces the old `uniqueRatio = distinctChars / length` gate, which was
|
|
30
|
+
// length-coupled: natural language draws from a fixed alphabet, so any reply
|
|
31
|
+
// past ~(alphabet/0.05) chars failed it regardless of variety — a 2.9KB
|
|
32
|
+
// markdown report was silently dropped. Every check below is bounded-run or
|
|
33
|
+
// length-independent, so length alone never makes a reply look like a flood.
|
|
9
34
|
export function checkOutboundFlood(text: string): OutboundFloodCheckResult {
|
|
10
35
|
if (text.length < MIN_LENGTH) return { ok: true }
|
|
11
36
|
|
|
@@ -18,12 +43,18 @@ export function checkOutboundFlood(text: string): OutboundFloodCheckResult {
|
|
|
18
43
|
if (graphemes.length < MIN_LONG_LENGTH) return { ok: true }
|
|
19
44
|
|
|
20
45
|
const counts = countGraphemes(graphemes)
|
|
21
|
-
const uniqueRatio = counts.size / graphemes.length
|
|
22
|
-
if (uniqueRatio < MIN_UNIQUE_RATIO) return { ok: false, reason: `low-unique-ratio:${uniqueRatio.toFixed(3)}` }
|
|
23
46
|
|
|
24
47
|
const dominance = maxValue(counts) / graphemes.length
|
|
25
48
|
if (dominance > MAX_DOMINANCE) return { ok: false, reason: `char-dominance:${dominance.toFixed(2)}` }
|
|
26
49
|
|
|
50
|
+
const span = findLongestPeriodicSpan(graphemes)
|
|
51
|
+
if (span !== undefined) return { ok: false, reason: `repeated-pattern-span:${span.period}:${span.spanLength}` }
|
|
52
|
+
|
|
53
|
+
if (graphemes.length >= MIN_ENTROPY_LENGTH && counts.size <= MAX_TINY_ALPHABET_SIZE) {
|
|
54
|
+
const entropy = shannonEntropyBitsPerGrapheme(counts, graphemes.length)
|
|
55
|
+
if (entropy < VERY_LOW_ENTROPY_BITS) return { ok: false, reason: `low-entropy:${entropy.toFixed(2)}` }
|
|
56
|
+
}
|
|
57
|
+
|
|
27
58
|
return { ok: true }
|
|
28
59
|
}
|
|
29
60
|
|
|
@@ -42,6 +73,42 @@ function findLongestRun(graphemes: readonly string[]): number {
|
|
|
42
73
|
return longest
|
|
43
74
|
}
|
|
44
75
|
|
|
76
|
+
// Longest contiguous span (in graphemes) that is exactly periodic at some
|
|
77
|
+
// period 2..32, or undefined when no span clears the flood floor. Period 1 is
|
|
78
|
+
// left to the run check above. A span must reach MIN_PERIODIC_SPAN graphemes
|
|
79
|
+
// AND repeat its unit MIN_PERIODIC_REPETITIONS times — the larger bound wins,
|
|
80
|
+
// so a 32-period unit needs 768 graphemes, not three echoes of a 32-char line.
|
|
81
|
+
function findLongestPeriodicSpan(graphemes: readonly string[]): { period: number; spanLength: number } | undefined {
|
|
82
|
+
const maxPeriod = Math.min(MAX_REPEATING_PERIOD, Math.floor(graphemes.length / MIN_PERIODIC_REPETITIONS))
|
|
83
|
+
let best: { period: number; spanLength: number } | undefined
|
|
84
|
+
for (let period = 2; period <= maxPeriod; period++) {
|
|
85
|
+
let matches = 0
|
|
86
|
+
let longestForPeriod = 0
|
|
87
|
+
for (let i = period; i < graphemes.length; i++) {
|
|
88
|
+
if (graphemes[i] === graphemes[i - period]) {
|
|
89
|
+
matches++
|
|
90
|
+
const spanLength = matches + period
|
|
91
|
+
if (spanLength > longestForPeriod) longestForPeriod = spanLength
|
|
92
|
+
} else {
|
|
93
|
+
matches = 0
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
const requiredSpan = Math.max(MIN_PERIODIC_SPAN, period * MIN_PERIODIC_REPETITIONS)
|
|
97
|
+
if (longestForPeriod < requiredSpan) continue
|
|
98
|
+
if (best === undefined || longestForPeriod > best.spanLength) best = { period, spanLength: longestForPeriod }
|
|
99
|
+
}
|
|
100
|
+
return best
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function shannonEntropyBitsPerGrapheme(counts: Map<string, number>, length: number): number {
|
|
104
|
+
let entropy = 0
|
|
105
|
+
for (const count of counts.values()) {
|
|
106
|
+
const probability = count / length
|
|
107
|
+
entropy -= probability * Math.log2(probability)
|
|
108
|
+
}
|
|
109
|
+
return entropy
|
|
110
|
+
}
|
|
111
|
+
|
|
45
112
|
function countGraphemes(graphemes: readonly string[]): Map<string, number> {
|
|
46
113
|
const counts = new Map<string, number>()
|
|
47
114
|
for (const grapheme of graphemes) counts.set(grapheme, (counts.get(grapheme) ?? 0) + 1)
|
package/src/compose/discover.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { readdirSync } from 'node:fs'
|
|
2
2
|
import { join, resolve } from 'node:path'
|
|
3
3
|
|
|
4
|
+
import { loadConfigSyncOrDefaults } from '@/config'
|
|
4
5
|
import { containerNameFromCwd } from '@/container'
|
|
5
6
|
import { isInitialized } from '@/init'
|
|
6
7
|
|
|
@@ -17,7 +18,9 @@ export type AgentEntry = {
|
|
|
17
18
|
//
|
|
18
19
|
// Underscore-prefixed names are also skipped so operators can park a disabled
|
|
19
20
|
// or in-progress agent next to live ones (e.g. `_archived-coder/`) without
|
|
20
|
-
// compose touching it.
|
|
21
|
+
// compose touching it. Agents with `compose.exclude: true` in typeclaw.json
|
|
22
|
+
// are skipped too — the in-config opt-out for operators who don't want to rename
|
|
23
|
+
// the folder.
|
|
21
24
|
//
|
|
22
25
|
// Returns an empty array when rootCwd doesn't exist or is empty — discovery is
|
|
23
26
|
// not the place to fail; the caller decides what to do with zero agents.
|
|
@@ -40,6 +43,7 @@ export function discoverAgents(rootCwd: string): AgentEntry[] {
|
|
|
40
43
|
if (entry.name.startsWith('_')) continue
|
|
41
44
|
const cwd = join(root, entry.name)
|
|
42
45
|
if (!isInitialized(cwd)) continue
|
|
46
|
+
if (loadConfigSyncOrDefaults(cwd).compose.exclude) continue
|
|
43
47
|
agents.push({ name: entry.name, cwd, containerName: containerNameFromCwd(cwd) })
|
|
44
48
|
}
|
|
45
49
|
|
package/src/config/config.ts
CHANGED
|
@@ -338,6 +338,39 @@ export const networkSchema = z
|
|
|
338
338
|
|
|
339
339
|
export type NetworkConfig = z.infer<typeof networkSchema>
|
|
340
340
|
|
|
341
|
+
// `realProc` opts the per-tool bwrap sandbox into the 'real-proc' strategy
|
|
342
|
+
// (src/sandbox/build.ts): a fresh procfs scoped to a new PID namespace so
|
|
343
|
+
// external-package runners (`bunx`, `bun add <pkg>`, `bun run <pkg-bin>`) get a
|
|
344
|
+
// working /proc/self/{fd,maps} and stop aborting with Bun's "NotDir". Default
|
|
345
|
+
// `false` keeps the universally-portable '--tmpfs /proc' profile, under which
|
|
346
|
+
// sandboxed external-package execution is unsupported by design. Turning it on
|
|
347
|
+
// makes `typeclaw start` grant the container CAP_SYS_ADMIN (required to mount
|
|
348
|
+
// proc for the new PID namespace), which is a deliberate posture change on the
|
|
349
|
+
// single-tenant outer boundary — see docs/internals/sandbox.mdx. PID isolation
|
|
350
|
+
// and the /proc/N/environ leak guard are both preserved; the trade is the
|
|
351
|
+
// CAP_SYS_ADMIN grant, not sandbox strength.
|
|
352
|
+
export const sandboxSchema = z
|
|
353
|
+
.object({
|
|
354
|
+
realProc: z.boolean().default(false),
|
|
355
|
+
})
|
|
356
|
+
.default({ realProc: false })
|
|
357
|
+
|
|
358
|
+
export type SandboxConfig = z.infer<typeof sandboxSchema>
|
|
359
|
+
|
|
360
|
+
// Host-stage `typeclaw compose` knobs. `exclude: true` skips this agent during
|
|
361
|
+
// compose discovery (same effect as parking it under an `_`-prefixed dir, but
|
|
362
|
+
// without renaming the folder). The container never reads this block — it's a
|
|
363
|
+
// pure compose CLI hint, so omitting it keeps the agent in every compose
|
|
364
|
+
// operation. Namespaced under `compose` so future compose-only settings have a
|
|
365
|
+
// home without crowding the top level.
|
|
366
|
+
export const composeSchema = z
|
|
367
|
+
.object({
|
|
368
|
+
exclude: z.boolean().default(false),
|
|
369
|
+
})
|
|
370
|
+
.default({ exclude: false })
|
|
371
|
+
|
|
372
|
+
export type ComposeConfig = z.infer<typeof composeSchema>
|
|
373
|
+
|
|
341
374
|
// Reverse-proxy tunnels expose a container-private port to the public internet
|
|
342
375
|
// via a managed subprocess (cloudflared) or a user-supplied external URL.
|
|
343
376
|
// See AGENTS.md `## Tunnels`. Keeping the enum scoped to what's implemented
|
|
@@ -490,9 +523,11 @@ export const configSchema = z
|
|
|
490
523
|
// time. Defaults to `[]`. Hatching appends the agent's chosen name
|
|
491
524
|
// here, so a freshly-hatched bot already has its identity wired up.
|
|
492
525
|
alias: z.array(z.string().trim().min(1)).default([]),
|
|
526
|
+
compose: composeSchema,
|
|
493
527
|
channels: channelsSchema,
|
|
494
528
|
portForward: portForwardSchema,
|
|
495
529
|
network: networkSchema,
|
|
530
|
+
sandbox: sandboxSchema,
|
|
496
531
|
docker: dockerSchema,
|
|
497
532
|
git: gitSchema,
|
|
498
533
|
roles: rolesConfigSchema.optional(),
|
|
@@ -632,9 +667,11 @@ export const FIELD_EFFECTS: Record<string, FieldEffect> = {
|
|
|
632
667
|
mcpServers: 'restart-required',
|
|
633
668
|
plugins: 'restart-required',
|
|
634
669
|
alias: 'applied',
|
|
670
|
+
compose: 'ignored',
|
|
635
671
|
channels: 'applied',
|
|
636
672
|
portForward: 'restart-required',
|
|
637
673
|
network: 'restart-required',
|
|
674
|
+
sandbox: 'restart-required',
|
|
638
675
|
tunnels: 'restart-required',
|
|
639
676
|
'docker.file': 'restart-required',
|
|
640
677
|
'git.ignore': 'restart-required',
|
|
@@ -723,6 +760,7 @@ export function extractPluginConfigs(raw: unknown): Record<string, unknown> {
|
|
|
723
760
|
'mounts',
|
|
724
761
|
'plugins',
|
|
725
762
|
'alias',
|
|
763
|
+
'compose',
|
|
726
764
|
'channels',
|
|
727
765
|
'portForward',
|
|
728
766
|
'network',
|
package/src/container/start.ts
CHANGED
|
@@ -514,6 +514,20 @@ export async function planStart({
|
|
|
514
514
|
}
|
|
515
515
|
}
|
|
516
516
|
|
|
517
|
+
// sandbox.realProc opts the per-tool bwrap sandbox into the 'real-proc'
|
|
518
|
+
// strategy (src/sandbox/build.ts), which prefixes the sandbox with
|
|
519
|
+
// `unshare --pid --fork --mount --mount-proc`. Mounting a fresh procfs for the
|
|
520
|
+
// new PID namespace needs real CAP_SYS_ADMIN — seccomp=unconfined alone is not
|
|
521
|
+
// enough (it only unblocks the unshare/clone SYSCALLS; the kernel still
|
|
522
|
+
// rejects mount(2) of proc without the capability). This is the deliberate
|
|
523
|
+
// posture change documented in docs/internals/sandbox.mdx: the default keeps
|
|
524
|
+
// the narrower seccomp-only profile, and the operator grants the broad
|
|
525
|
+
// "new root" capability ONLY by opting into real-proc. Placed before the
|
|
526
|
+
// image tag (like --cap-add=NET_ADMIN) so docker applies it at run time.
|
|
527
|
+
if (cfg.sandbox.realProc) {
|
|
528
|
+
runArgs.push('--cap-add=SYS_ADMIN')
|
|
529
|
+
}
|
|
530
|
+
|
|
517
531
|
if (hostdControl) {
|
|
518
532
|
runArgs.push('--add-host', HOST_GATEWAY_ALIAS)
|
|
519
533
|
}
|
package/src/sandbox/build.ts
CHANGED
|
@@ -36,14 +36,35 @@ export function buildSandboxedCommand(command: string, policy: SandboxPolicy = {
|
|
|
36
36
|
|
|
37
37
|
function buildArgv(command: string, policy: SandboxPolicy): string[] {
|
|
38
38
|
const bwrap = policy.bwrapPath ?? 'bwrap'
|
|
39
|
-
const
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
39
|
+
const procStrategy = policy.proc ?? 'tmpfs'
|
|
40
|
+
const realProc = procStrategy === 'real-proc'
|
|
41
|
+
|
|
42
|
+
// 'real-proc' splits PID-namespace ownership from bwrap. `unshare --pid
|
|
43
|
+
// --fork --mount --mount-proc` (util-linux, baseline) creates the new PID +
|
|
44
|
+
// mount namespaces as REAL root and mounts a fresh procfs scoped to that PID
|
|
45
|
+
// namespace — which OrbStack permits only with CAP_SYS_ADMIN and NOT from
|
|
46
|
+
// bwrap's user namespace (bwrap's --proc is blocked there). bwrap then runs
|
|
47
|
+
// INSIDE that namespace and must NOT re-unshare pid (it would create a second
|
|
48
|
+
// PID ns with no matching procfs and reintroduce the ENOTDIR crash), so we
|
|
49
|
+
// unshare each namespace EXCEPT pid explicitly instead of --unshare-all. The
|
|
50
|
+
// freshly mounted /proc contains only the sandbox subtree, so --ro-bind /proc
|
|
51
|
+
// (below) binds that scoped procfs, never the agent runtime's /proc/N/environ.
|
|
52
|
+
const argv: string[] = realProc
|
|
53
|
+
? ['unshare', '--pid', '--fork', '--mount', '--mount-proc', '--', bwrap]
|
|
54
|
+
: [bwrap, '--unshare-all']
|
|
55
|
+
if (realProc) {
|
|
56
|
+
argv.push('--unshare-user', '--unshare-ipc', '--unshare-uts', '--unshare-cgroup')
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if (policy.network !== 'inherit') {
|
|
60
|
+
// Default ('none' / undefined) isolates the net namespace — prompt-injected
|
|
61
|
+
// bash cannot exfiltrate over the network unless the consumer opts in.
|
|
62
|
+
// --unshare-all already covers this in the non-real-proc path; under
|
|
63
|
+
// real-proc the explicit unshares above omit net, so add it here.
|
|
64
|
+
if (realProc) argv.push('--unshare-net')
|
|
65
|
+
} else if (!realProc) {
|
|
66
|
+
// --unshare-all unshared the net namespace; --share-net rejoins the outer
|
|
67
|
+
// container's network. Under real-proc we simply never add --unshare-net.
|
|
47
68
|
argv.push('--share-net')
|
|
48
69
|
}
|
|
49
70
|
|
|
@@ -97,7 +118,15 @@ function buildArgv(command: string, policy: SandboxPolicy): string[] {
|
|
|
97
118
|
'/lib64',
|
|
98
119
|
)
|
|
99
120
|
|
|
100
|
-
if (
|
|
121
|
+
if (realProc) {
|
|
122
|
+
// The outer `unshare --mount-proc` already mounted a fresh procfs scoped to
|
|
123
|
+
// the new PID namespace. --ro-bind /proc /proc binds THAT procfs (not the
|
|
124
|
+
// outer container's), so the child gets real /proc/self/{fd,maps} and the
|
|
125
|
+
// agent runtime's pids — and their /proc/N/environ secrets — are simply
|
|
126
|
+
// absent from this namespace. No /proc/self/exe symlink is needed: a real
|
|
127
|
+
// /proc/self/exe already resolves correctly.
|
|
128
|
+
argv.push('--ro-bind', '/proc', '/proc')
|
|
129
|
+
} else if (procStrategy === 'tmpfs') {
|
|
101
130
|
// --tmpfs /proc, never --proc /proc (OrbStack's kernel blocks
|
|
102
131
|
// mount("proc",...) from user namespaces) and never --dev-bind /proc /proc
|
|
103
132
|
// (leaks the outer container's /proc/N/environ — including
|
|
@@ -111,6 +140,9 @@ function buildArgv(command: string, policy: SandboxPolicy): string[] {
|
|
|
111
140
|
// /proc/self/exe. --symlink (not --ro-bind /proc/self/exe): /proc/self at
|
|
112
141
|
// setup time is bwrap's pid, so a bind would capture bwrap's own binary.
|
|
113
142
|
// Must come AFTER --tmpfs /proc (last-op-wins) or the tmpfs erases it.
|
|
143
|
+
// This restores only the runner's SELF-location; a spawned child still
|
|
144
|
+
// reads /proc/self/fd + /proc/self/maps, which the empty tmpfs lacks, so
|
|
145
|
+
// external-package execution requires the 'real-proc' strategy above.
|
|
114
146
|
if (policy.procSelfExe !== undefined) {
|
|
115
147
|
argv.push('--ro-bind', policy.procSelfExe, policy.procSelfExe)
|
|
116
148
|
argv.push('--symlink', policy.procSelfExe, '/proc/self/exe')
|
package/src/sandbox/policy.ts
CHANGED
|
@@ -6,7 +6,15 @@ export type SandboxMount =
|
|
|
6
6
|
|
|
7
7
|
export type SandboxNetwork = 'none' | 'inherit'
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
// 'tmpfs' (default): empty /proc + a single /proc/self/exe symlink. Works on
|
|
10
|
+
// every host but gives no /proc/self/{fd,maps}, so a JS package runner's CHILD
|
|
11
|
+
// (the spawned bin) crashes with ENOTDIR reading /proc/self/fd. 'none': no
|
|
12
|
+
// /proc at all. 'real-proc': mount a fresh procfs scoped to a NEW pid namespace
|
|
13
|
+
// so the child gets a real /proc/self/{fd,maps} WITHOUT seeing the agent
|
|
14
|
+
// runtime's pids (no /proc/<agent>/environ leak). 'real-proc' requires the
|
|
15
|
+
// outer container to hold CAP_SYS_ADMIN (mount(2) of proc); start.ts only grants
|
|
16
|
+
// it when the operator opts in via typeclaw.json#sandbox.realProc.
|
|
17
|
+
export type SandboxProcStrategy = 'tmpfs' | 'none' | 'real-proc'
|
|
10
18
|
|
|
11
19
|
export type SandboxEnvPolicy = {
|
|
12
20
|
set?: Record<string, string>
|
package/typeclaw.schema.json
CHANGED
|
@@ -190,6 +190,18 @@
|
|
|
190
190
|
"minLength": 1
|
|
191
191
|
}
|
|
192
192
|
},
|
|
193
|
+
"compose": {
|
|
194
|
+
"default": {
|
|
195
|
+
"exclude": false
|
|
196
|
+
},
|
|
197
|
+
"type": "object",
|
|
198
|
+
"properties": {
|
|
199
|
+
"exclude": {
|
|
200
|
+
"default": false,
|
|
201
|
+
"type": "boolean"
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
},
|
|
193
205
|
"channels": {
|
|
194
206
|
"default": {},
|
|
195
207
|
"type": "object",
|
|
@@ -1114,6 +1126,18 @@
|
|
|
1114
1126
|
}
|
|
1115
1127
|
}
|
|
1116
1128
|
},
|
|
1129
|
+
"sandbox": {
|
|
1130
|
+
"default": {
|
|
1131
|
+
"realProc": false
|
|
1132
|
+
},
|
|
1133
|
+
"type": "object",
|
|
1134
|
+
"properties": {
|
|
1135
|
+
"realProc": {
|
|
1136
|
+
"default": false,
|
|
1137
|
+
"type": "boolean"
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
},
|
|
1117
1141
|
"docker": {
|
|
1118
1142
|
"default": {
|
|
1119
1143
|
"file": {
|