typeclaw 0.31.0 → 0.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent/index.ts +7 -0
- package/src/agent/plugin-tools.ts +16 -0
- package/src/agent/reviewer-bash-policy.ts +572 -0
- package/src/agent/session-origin.ts +50 -5
- package/src/agent/subagents.ts +9 -0
- package/src/bundled-plugins/github-cli-auth/approve-idempotency.ts +132 -15
- package/src/bundled-plugins/github-cli-auth/effective-approval.ts +32 -1
- package/src/bundled-plugins/github-cli-auth/index.ts +8 -8
- package/src/bundled-plugins/researcher/write-report.ts +8 -6
- package/src/bundled-plugins/reviewer/reviewer.ts +14 -7
- package/src/bundled-plugins/reviewer/skills/code-review.ts +30 -1
- package/src/run/index.ts +1 -0
- package/src/skills/typeclaw-markdown-pdf/SKILL.md +21 -7
package/package.json
CHANGED
package/src/agent/index.ts
CHANGED
|
@@ -49,6 +49,7 @@ import {
|
|
|
49
49
|
} from './plugin-tools'
|
|
50
50
|
import { createReloadTool } from './reload-tool'
|
|
51
51
|
import type { RestartHandoffOrigin } from './restart-handoff'
|
|
52
|
+
import type { SubagentBashPolicy } from './reviewer-bash-policy'
|
|
52
53
|
import { loadSelf } from './self'
|
|
53
54
|
import { SESSION_META_CUSTOM_TYPE, sessionMetaPayload } from './session-meta'
|
|
54
55
|
import { renderSessionOrigin, type SessionOrigin, type SessionRoleContext } from './session-origin'
|
|
@@ -147,6 +148,11 @@ export type CreateSessionOptions = {
|
|
|
147
148
|
// wider plugin registry's tools are NOT injected. Used by plugin subagent
|
|
148
149
|
// session creation so subagents see exactly what they declared.
|
|
149
150
|
pluginSubagent?: PluginSubagentSelection
|
|
151
|
+
// Per-subagent bash capability restriction. Threaded to the bash-tool wrapper
|
|
152
|
+
// and enforced before the role-derived sandbox, so a read-only subagent's
|
|
153
|
+
// bash stays read-only regardless of the spawning role. See
|
|
154
|
+
// `src/agent/reviewer-bash-policy.ts`.
|
|
155
|
+
bashPolicy?: SubagentBashPolicy
|
|
150
156
|
// Enables the `restart` tool. Set when the agent is running inside a
|
|
151
157
|
// typeclaw-managed container. Read from TYPECLAW_CONTAINER_NAME at the call site.
|
|
152
158
|
containerName?: string
|
|
@@ -411,6 +417,7 @@ export async function createSessionWithDispose(options: CreateSessionOptions = {
|
|
|
411
417
|
getOrigin,
|
|
412
418
|
getAbort,
|
|
413
419
|
...(options.permissions ? { permissions: options.permissions } : {}),
|
|
420
|
+
...(options.bashPolicy !== undefined ? { bashPolicy: options.bashPolicy } : {}),
|
|
414
421
|
})
|
|
415
422
|
: []
|
|
416
423
|
const wrappedCustomSystemTools = wrapSystemTools(customSystemTools, options.plugins, getOrigin, getAbort)
|
|
@@ -49,6 +49,7 @@ import {
|
|
|
49
49
|
|
|
50
50
|
import { createLoopGuard, type LoopGuard, type LoopGuardDecision } from './loop-guard'
|
|
51
51
|
import { checkImageReadRedirect } from './multimodal/read-redirect'
|
|
52
|
+
import { enforceSubagentBashPolicy, type SubagentBashPolicy } from './reviewer-bash-policy'
|
|
52
53
|
import type { SessionOrigin } from './session-origin'
|
|
53
54
|
import { SUBAGENT_OUTPUT_TOOL_NAME, type SubagentOutputToolDetails } from './tools/subagent-output'
|
|
54
55
|
import { webFetchTool } from './tools/webfetch'
|
|
@@ -193,6 +194,11 @@ export type WrapSystemToolOptions = {
|
|
|
193
194
|
// runs bash unchanged — preserving today's behavior for trusted+ and for
|
|
194
195
|
// sessions wired without a permission service (e.g. tests).
|
|
195
196
|
permissions?: PermissionService
|
|
197
|
+
// Per-subagent bash capability policy, enforced as a hard pre-check BEFORE
|
|
198
|
+
// the role-derived sandbox (which returns early for trusted/owner). Lets a
|
|
199
|
+
// read-only subagent keep its bash read-only no matter who spawned it. See
|
|
200
|
+
// `src/agent/reviewer-bash-policy.ts`.
|
|
201
|
+
bashPolicy?: SubagentBashPolicy
|
|
196
202
|
}
|
|
197
203
|
|
|
198
204
|
// Zod 4 emits a top-level `"$schema": "https://json-schema.org/draft/2020-12/schema"`
|
|
@@ -461,6 +467,16 @@ export function wrapAgentToolAsCustomToolDefinition<TParams extends TSchema, TDe
|
|
|
461
467
|
}
|
|
462
468
|
stripGuardAcknowledgements(mutableArgs)
|
|
463
469
|
|
|
470
|
+
// Per-subagent capability fence: runs BEFORE the role-derived sandbox so
|
|
471
|
+
// a read-only subagent's bash stays read-only even for a trusted/owner
|
|
472
|
+
// caller (for whom applyBashSandbox returns early with no masks). Throws
|
|
473
|
+
// SubagentBashPolicyError on a disallowed command, surfaced to the model
|
|
474
|
+
// as a tool error.
|
|
475
|
+
if (tool.name === 'bash' && opts.bashPolicy !== undefined) {
|
|
476
|
+
const command = mutableArgs.command
|
|
477
|
+
if (typeof command === 'string') enforceSubagentBashPolicy(opts.bashPolicy, command)
|
|
478
|
+
}
|
|
479
|
+
|
|
464
480
|
if (tool.name === 'bash' && opts.permissions !== undefined) {
|
|
465
481
|
await applyBashSandbox(mutableArgs, opts.permissions, liveOrigin, opts.agentDir, opts.sessionId, bashEnvOverlay)
|
|
466
482
|
}
|
|
@@ -0,0 +1,572 @@
|
|
|
1
|
+
// Per-subagent bash capability policy. This is NOT the bwrap filesystem
|
|
2
|
+
// sandbox (src/sandbox/) — that is role-derived and returns early for
|
|
3
|
+
// trusted/owner callers. This is a subagent-capability boundary that must hold
|
|
4
|
+
// regardless of who spawned the subagent, so it is enforced as a standalone
|
|
5
|
+
// pre-check at the bash-wrap site before applyBashSandbox runs.
|
|
6
|
+
//
|
|
7
|
+
// Design (issue #452): the `reviewer` subagent is read-only by contract, but
|
|
8
|
+
// its legitimate workflows use pipes (`gh api … | base64 -d | nl -ba`), `&&`
|
|
9
|
+
// chains, and writes to a throwaway `/tmp` scratch checkout. A prefix
|
|
10
|
+
// allowlist plus a metacharacter ban (the SandboxCommandFilter primitive)
|
|
11
|
+
// cannot express "a pipeline of read-only commands", so this policy instead:
|
|
12
|
+
// 1. fails closed on shell constructs that defeat static analysis
|
|
13
|
+
// (command/process substitution, heredocs, `eval`/`sh -c` wrappers,
|
|
14
|
+
// redirects to non-/tmp paths, unbalanced quotes);
|
|
15
|
+
// 2. splits the remaining command on top-level `|` `&&` `||` `;` with a
|
|
16
|
+
// quote/escape-aware scanner;
|
|
17
|
+
// 3. classifies each segment's leading verb against a read-only allowlist and
|
|
18
|
+
// a mutating-subcommand denylist, with path-sensitive handling for the few
|
|
19
|
+
// verbs (git checkout/clone, file writers) that are safe only under /tmp.
|
|
20
|
+
// It is defense-in-depth layered on top of the global exfil guards, not the
|
|
21
|
+
// sole fence — so "deny what we cannot prove safe" is the correct bias.
|
|
22
|
+
|
|
23
|
+
export type SubagentBashPolicy = { kind: 'readonly-reviewer' }
|
|
24
|
+
|
|
25
|
+
export class SubagentBashPolicyError extends Error {
|
|
26
|
+
constructor(message: string) {
|
|
27
|
+
super(message)
|
|
28
|
+
this.name = 'SubagentBashPolicyError'
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Constructs that let a benign-looking string smuggle an arbitrary command past
|
|
33
|
+
// segment/verb analysis. `$(`/backtick = command substitution; `<(`/`>(` =
|
|
34
|
+
// process substitution; `<<` = heredoc; `${` is allowed (plain var expansion is
|
|
35
|
+
// harmless for our denylist) but `$((` arithmetic and `$(` are not. We reject
|
|
36
|
+
// the whole command if any appear — the reviewer's documented workflows need
|
|
37
|
+
// none of them.
|
|
38
|
+
const FAIL_CLOSED_CONSTRUCTS: { pattern: RegExp; reason: string }[] = [
|
|
39
|
+
{ pattern: /\$\(/, reason: 'command substitution `$(…)`' },
|
|
40
|
+
{ pattern: /`/, reason: 'backtick command substitution' },
|
|
41
|
+
{ pattern: /<\(/, reason: 'process substitution `<(…)`' },
|
|
42
|
+
{ pattern: />\(/, reason: 'process substitution `>(…)`' },
|
|
43
|
+
{ pattern: /<</, reason: 'heredoc `<<`' },
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
// Wrapper verbs that re-enter a shell or hand execution to another command,
|
|
47
|
+
// defeating verb analysis (`bash -c "git push"`, `xargs rm`, `find … -exec`).
|
|
48
|
+
// Denied outright as a leading verb.
|
|
49
|
+
const FORBIDDEN_WRAPPER_VERBS = new Set([
|
|
50
|
+
'eval',
|
|
51
|
+
'exec',
|
|
52
|
+
'source',
|
|
53
|
+
'.',
|
|
54
|
+
'sh',
|
|
55
|
+
'bash',
|
|
56
|
+
'zsh',
|
|
57
|
+
'dash',
|
|
58
|
+
'env',
|
|
59
|
+
'command',
|
|
60
|
+
'xargs',
|
|
61
|
+
'find',
|
|
62
|
+
'parallel',
|
|
63
|
+
'time',
|
|
64
|
+
'nohup',
|
|
65
|
+
'sudo',
|
|
66
|
+
'doas',
|
|
67
|
+
'ssh',
|
|
68
|
+
])
|
|
69
|
+
|
|
70
|
+
// Leading verbs that are read-only and need no further inspection.
|
|
71
|
+
const READONLY_VERBS = new Set([
|
|
72
|
+
'cat',
|
|
73
|
+
'head',
|
|
74
|
+
'tail',
|
|
75
|
+
'wc',
|
|
76
|
+
'sort',
|
|
77
|
+
'uniq',
|
|
78
|
+
'cut',
|
|
79
|
+
'tr',
|
|
80
|
+
'nl',
|
|
81
|
+
'base64',
|
|
82
|
+
'jq',
|
|
83
|
+
'yq',
|
|
84
|
+
'grep',
|
|
85
|
+
'rg',
|
|
86
|
+
'egrep',
|
|
87
|
+
'fgrep',
|
|
88
|
+
'ls',
|
|
89
|
+
'pwd',
|
|
90
|
+
'echo',
|
|
91
|
+
'printf',
|
|
92
|
+
'true',
|
|
93
|
+
'false',
|
|
94
|
+
'test',
|
|
95
|
+
'dirname',
|
|
96
|
+
'basename',
|
|
97
|
+
'realpath',
|
|
98
|
+
'date',
|
|
99
|
+
'sed', // read-only as used (no -i); -i is denied below
|
|
100
|
+
'awk',
|
|
101
|
+
'diff',
|
|
102
|
+
'comm',
|
|
103
|
+
'column',
|
|
104
|
+
'fold',
|
|
105
|
+
'rev',
|
|
106
|
+
'tee', // path-checked below
|
|
107
|
+
])
|
|
108
|
+
|
|
109
|
+
// `git` subcommands that never mutate the working tree or remote.
|
|
110
|
+
const GIT_READONLY_SUBCOMMANDS = new Set([
|
|
111
|
+
'log',
|
|
112
|
+
'diff',
|
|
113
|
+
'show',
|
|
114
|
+
'blame',
|
|
115
|
+
'status',
|
|
116
|
+
'grep',
|
|
117
|
+
'rev-parse',
|
|
118
|
+
'rev-list',
|
|
119
|
+
'ls-files',
|
|
120
|
+
'ls-tree',
|
|
121
|
+
'cat-file',
|
|
122
|
+
'describe',
|
|
123
|
+
'shortlog',
|
|
124
|
+
'config', // read form only; --add/--set caught by the write-flag check
|
|
125
|
+
'remote', // `git remote -v` is read; mutating forms caught below
|
|
126
|
+
'branch', // `git branch` (list) is read; create/delete caught below
|
|
127
|
+
'tag', // `git tag` (list) is read; create/delete caught below
|
|
128
|
+
'name-rev',
|
|
129
|
+
'merge-base',
|
|
130
|
+
'symbolic-ref',
|
|
131
|
+
'for-each-ref',
|
|
132
|
+
'show-ref',
|
|
133
|
+
'reflog',
|
|
134
|
+
'whatchanged',
|
|
135
|
+
])
|
|
136
|
+
|
|
137
|
+
// `git` subcommands that mutate the working tree, index, or remote. Denied
|
|
138
|
+
// unless the whole git invocation is scoped to a /tmp working dir (scratch
|
|
139
|
+
// clone): `clone`/`fetch`/`checkout` into /tmp are the reviewer's acquisition
|
|
140
|
+
// path; everything else stays denied even under /tmp because it has no
|
|
141
|
+
// legitimate reviewer use.
|
|
142
|
+
const GIT_MUTATING_ALWAYS_DENIED = new Set([
|
|
143
|
+
'add',
|
|
144
|
+
'commit',
|
|
145
|
+
'push',
|
|
146
|
+
'rebase',
|
|
147
|
+
'reset',
|
|
148
|
+
'merge',
|
|
149
|
+
'cherry-pick',
|
|
150
|
+
'revert',
|
|
151
|
+
'am',
|
|
152
|
+
'apply',
|
|
153
|
+
'stash',
|
|
154
|
+
'clean',
|
|
155
|
+
'rm',
|
|
156
|
+
'mv',
|
|
157
|
+
'restore',
|
|
158
|
+
'switch',
|
|
159
|
+
'gc',
|
|
160
|
+
'prune',
|
|
161
|
+
'update-ref',
|
|
162
|
+
'update-index',
|
|
163
|
+
'write-tree',
|
|
164
|
+
'commit-tree',
|
|
165
|
+
'hash-object',
|
|
166
|
+
])
|
|
167
|
+
|
|
168
|
+
// git subcommands permitted only when the effective working dir is /tmp.
|
|
169
|
+
const GIT_TMP_SCOPED = new Set(['clone', 'fetch', 'checkout', 'init', 'sparse-checkout', 'worktree'])
|
|
170
|
+
|
|
171
|
+
// `gh` subcommands/objects that mutate remote state. The reviewer reads PRs and
|
|
172
|
+
// repos; it never merges, reviews, comments, edits, or creates. We allow the
|
|
173
|
+
// read objects explicitly and deny the rest, because `gh` is the highest-value
|
|
174
|
+
// mutation surface (it can approve PRs, which the reviewer must NEVER do — the
|
|
175
|
+
// parent owns posting).
|
|
176
|
+
const GH_READONLY_BY_OBJECT: Record<string, Set<string>> = {
|
|
177
|
+
pr: new Set(['view', 'diff', 'list', 'checks', 'status']),
|
|
178
|
+
issue: new Set(['view', 'list', 'status']),
|
|
179
|
+
repo: new Set(['view', 'list']),
|
|
180
|
+
release: new Set(['view', 'list']),
|
|
181
|
+
run: new Set(['view', 'list']),
|
|
182
|
+
api: new Set(['__any__']), // gh api is method-checked below
|
|
183
|
+
search: new Set(['__any__']),
|
|
184
|
+
browse: new Set(['__any__']),
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Filesystem mutators that are safe only when every path operand is under /tmp.
|
|
188
|
+
const FS_WRITERS = new Set(['rm', 'mv', 'cp', 'mkdir', 'touch', 'chmod', 'chown', 'ln', 'rmdir', 'truncate'])
|
|
189
|
+
|
|
190
|
+
const TMP_PREFIXES = ['/tmp/', '/private/tmp/']
|
|
191
|
+
|
|
192
|
+
function isTmpPath(token: string): boolean {
|
|
193
|
+
const unquoted = stripQuotes(token)
|
|
194
|
+
return unquoted === '/tmp' || TMP_PREFIXES.some((p) => unquoted.startsWith(p))
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function stripQuotes(token: string): string {
|
|
198
|
+
if (token.length >= 2) {
|
|
199
|
+
const first = token[0]
|
|
200
|
+
const last = token[token.length - 1]
|
|
201
|
+
if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
|
|
202
|
+
return token.slice(1, -1)
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
return token
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Quote/escape-aware tokenizer that ALSO surfaces top-level operators and
|
|
209
|
+
// redirects. Returns the token list plus the set of redirect targets so the
|
|
210
|
+
// caller can fail closed on a redirect to a non-/tmp path. Throws on unbalanced
|
|
211
|
+
// quotes (fail closed — an unterminated quote means we cannot trust the split).
|
|
212
|
+
type Segment = { tokens: string[]; redirectTargets: string[] }
|
|
213
|
+
|
|
214
|
+
function splitIntoSegments(command: string): Segment[] {
|
|
215
|
+
const segments: Segment[] = []
|
|
216
|
+
let tokens: string[] = []
|
|
217
|
+
let redirectTargets: string[] = []
|
|
218
|
+
let current = ''
|
|
219
|
+
let quote: '"' | "'" | null = null
|
|
220
|
+
let expectingRedirectTarget = false
|
|
221
|
+
|
|
222
|
+
const pushToken = () => {
|
|
223
|
+
if (current.length === 0) return
|
|
224
|
+
if (expectingRedirectTarget) {
|
|
225
|
+
redirectTargets.push(current)
|
|
226
|
+
expectingRedirectTarget = false
|
|
227
|
+
} else {
|
|
228
|
+
tokens.push(current)
|
|
229
|
+
}
|
|
230
|
+
current = ''
|
|
231
|
+
}
|
|
232
|
+
const pushSegment = () => {
|
|
233
|
+
pushToken()
|
|
234
|
+
segments.push({ tokens, redirectTargets })
|
|
235
|
+
tokens = []
|
|
236
|
+
redirectTargets = []
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
for (let i = 0; i < command.length; i++) {
|
|
240
|
+
const ch = command[i]!
|
|
241
|
+
if (quote !== null) {
|
|
242
|
+
current += ch
|
|
243
|
+
if (ch === quote) quote = null
|
|
244
|
+
continue
|
|
245
|
+
}
|
|
246
|
+
if (ch === '"' || ch === "'") {
|
|
247
|
+
quote = ch
|
|
248
|
+
current += ch
|
|
249
|
+
continue
|
|
250
|
+
}
|
|
251
|
+
if (ch === '\\') {
|
|
252
|
+
current += ch
|
|
253
|
+
if (i + 1 < command.length) {
|
|
254
|
+
current += command[i + 1]
|
|
255
|
+
i++
|
|
256
|
+
}
|
|
257
|
+
continue
|
|
258
|
+
}
|
|
259
|
+
if (ch === '|' || ch === '&' || ch === ';' || ch === '\n' || ch === '\r') {
|
|
260
|
+
const next = command[i + 1]
|
|
261
|
+
// `|`, `||`, `&&`, `;`, and a NEWLINE all start a new top-level segment.
|
|
262
|
+
// bash treats an unquoted newline as a command separator exactly like `;`,
|
|
263
|
+
// so failing to split on it would let `git status\ngit push` parse as one
|
|
264
|
+
// allowed `git status` segment while bash runs `git push` separately. A
|
|
265
|
+
// lone `&` (background) is treated the same — we don't run backgrounded jobs.
|
|
266
|
+
if ((ch === '|' && next === '|') || (ch === '&' && next === '&')) i++
|
|
267
|
+
pushSegment()
|
|
268
|
+
continue
|
|
269
|
+
}
|
|
270
|
+
if (ch === '>' || ch === '<') {
|
|
271
|
+
// Redirect operator. The following word is a path target we must
|
|
272
|
+
// path-check. `2>`, `&>` handled by the trailing-fd char already being in
|
|
273
|
+
// `current` — flush it as a token first.
|
|
274
|
+
pushToken()
|
|
275
|
+
// consume an optional second char (>>, 2>, &>)
|
|
276
|
+
if (command[i + 1] === '>') i++
|
|
277
|
+
expectingRedirectTarget = true
|
|
278
|
+
continue
|
|
279
|
+
}
|
|
280
|
+
if (ch === ' ' || ch === '\t') {
|
|
281
|
+
pushToken()
|
|
282
|
+
continue
|
|
283
|
+
}
|
|
284
|
+
current += ch
|
|
285
|
+
}
|
|
286
|
+
if (quote !== null) {
|
|
287
|
+
throw new SubagentBashPolicyError('command has an unbalanced quote; refusing to run what cannot be parsed safely.')
|
|
288
|
+
}
|
|
289
|
+
pushSegment()
|
|
290
|
+
return segments.filter((s) => s.tokens.length > 0 || s.redirectTargets.length > 0)
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
function hasWriteFlag(tokens: string[]): boolean {
|
|
294
|
+
return tokens.some((t) => t === '-i' || t === '--in-place' || t.startsWith('-i') || t === '--set' || t === '--add')
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
function classifyGit(tokens: string[]): void {
|
|
298
|
+
// Resolve `git -C <dir>` and global flags to find the subcommand and the
|
|
299
|
+
// effective working directory.
|
|
300
|
+
let workdir: string | null = null
|
|
301
|
+
let idx = 1
|
|
302
|
+
while (idx < tokens.length) {
|
|
303
|
+
const t = tokens[idx]!
|
|
304
|
+
if (t === '-C') {
|
|
305
|
+
workdir = tokens[idx + 1] ?? null
|
|
306
|
+
idx += 2
|
|
307
|
+
continue
|
|
308
|
+
}
|
|
309
|
+
if (t === '-c') {
|
|
310
|
+
// `git -c key=val` config override — skip the pair. A core.hooksPath
|
|
311
|
+
// override is a mutation vector, so deny it outright.
|
|
312
|
+
const kv = tokens[idx + 1] ?? ''
|
|
313
|
+
if (/hookspath|core\.editor|alias\./i.test(stripQuotes(kv))) {
|
|
314
|
+
throw new SubagentBashPolicyError('git -c override of hooks/editor/alias is not permitted for the reviewer.')
|
|
315
|
+
}
|
|
316
|
+
idx += 2
|
|
317
|
+
continue
|
|
318
|
+
}
|
|
319
|
+
if (t.startsWith('-')) {
|
|
320
|
+
idx++
|
|
321
|
+
continue
|
|
322
|
+
}
|
|
323
|
+
break
|
|
324
|
+
}
|
|
325
|
+
const sub = tokens[idx]
|
|
326
|
+
if (sub === undefined) return // bare `git` — harmless
|
|
327
|
+
const subcommand = stripQuotes(sub)
|
|
328
|
+
|
|
329
|
+
if (GIT_MUTATING_ALWAYS_DENIED.has(subcommand)) {
|
|
330
|
+
throw new SubagentBashPolicyError(
|
|
331
|
+
`git ${subcommand} mutates repository state, which the read-only reviewer may not do.`,
|
|
332
|
+
)
|
|
333
|
+
}
|
|
334
|
+
if (GIT_TMP_SCOPED.has(subcommand)) {
|
|
335
|
+
assertGitTmpScoped(subcommand, workdir, tokens.slice(idx + 1))
|
|
336
|
+
return
|
|
337
|
+
}
|
|
338
|
+
if (GIT_READONLY_SUBCOMMANDS.has(subcommand)) {
|
|
339
|
+
if (
|
|
340
|
+
(subcommand === 'config' || subcommand === 'remote' || subcommand === 'branch' || subcommand === 'tag') &&
|
|
341
|
+
tokens.slice(idx + 1).some((t) => isGitWriteForm(subcommand, stripQuotes(t)))
|
|
342
|
+
) {
|
|
343
|
+
throw new SubagentBashPolicyError(`git ${subcommand} is being used in a mutating form, which is not permitted.`)
|
|
344
|
+
}
|
|
345
|
+
return
|
|
346
|
+
}
|
|
347
|
+
throw new SubagentBashPolicyError(`git ${subcommand} is not on the reviewer's read-only allowlist.`)
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// A /tmp-scoped git subcommand is safe only when the path it WRITES is under
|
|
351
|
+
// /tmp — not merely when some operand mentions /tmp. The earlier `.some(isTmpPath)`
|
|
352
|
+
// let `git clone /tmp/src /agent/evil` through because the source token matched
|
|
353
|
+
// while git wrote the destination at /agent/evil. Validate the actual write
|
|
354
|
+
// target per subcommand: clone writes its destination operand (or, when omitted,
|
|
355
|
+
// a directory derived from the repo under cwd — which we cannot prove is /tmp,
|
|
356
|
+
// so we require an explicit /tmp destination); -C-scoped operations write the
|
|
357
|
+
// -C workdir; a bare fetch/checkout without -C writes the ambient repo, which
|
|
358
|
+
// is not /tmp.
|
|
359
|
+
function assertGitTmpScoped(subcommand: string, workdir: string | null, rest: string[]): void {
|
|
360
|
+
const deny = (detail: string): never => {
|
|
361
|
+
throw new SubagentBashPolicyError(`git ${subcommand} is permitted only against a /tmp scratch checkout; ${detail}.`)
|
|
362
|
+
}
|
|
363
|
+
if (workdir !== null) {
|
|
364
|
+
if (!isTmpPath(workdir)) deny('the -C working directory is not under /tmp')
|
|
365
|
+
return
|
|
366
|
+
}
|
|
367
|
+
if (subcommand === 'clone') {
|
|
368
|
+
// `git clone [flags] <repo> [<dir>]`: the write target is the explicit
|
|
369
|
+
// <dir> operand when present, else a repo-derived dir under cwd (unprovable
|
|
370
|
+
// as /tmp). Extracting operands requires skipping value-taking flags
|
|
371
|
+
// (`--depth 1`, `-b main`, `--branch x`, …) whose VALUE is a bare word that
|
|
372
|
+
// would otherwise be miscounted as the repo or destination.
|
|
373
|
+
const operands = cloneOperands(rest)
|
|
374
|
+
const dest = operands[1]
|
|
375
|
+
if (dest === undefined) deny('clone needs an explicit /tmp destination directory')
|
|
376
|
+
if (!isTmpPath(dest!)) deny('the clone destination is not under /tmp')
|
|
377
|
+
return
|
|
378
|
+
}
|
|
379
|
+
// fetch/checkout/init/sparse-checkout/worktree without -C operate on the
|
|
380
|
+
// ambient repo (the agent checkout), which is never /tmp. Require -C /tmp.
|
|
381
|
+
deny(`${subcommand} without -C operates on the ambient repo; scope it with -C /tmp/review-*`)
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// `git clone` flags that consume the NEXT token as their value (separated form,
|
|
385
|
+
// e.g. `--depth 1`). Their value is a bare word, so it must be skipped when
|
|
386
|
+
// counting positional operands (<repo> [<dir>]). Attached forms (`--depth=1`,
|
|
387
|
+
// `-b=x`) carry their own value and need no skip. Unknown long flags are treated
|
|
388
|
+
// as boolean (no skip); if a future value-taking flag is missed, the worst case
|
|
389
|
+
// is a stricter deny (a real operand shifts), never a looser allow.
|
|
390
|
+
const GIT_CLONE_VALUE_FLAGS = new Set([
|
|
391
|
+
'--depth',
|
|
392
|
+
'-b',
|
|
393
|
+
'--branch',
|
|
394
|
+
'-o',
|
|
395
|
+
'--origin',
|
|
396
|
+
'-u',
|
|
397
|
+
'--upload-pack',
|
|
398
|
+
'--reference',
|
|
399
|
+
'--reference-if-able',
|
|
400
|
+
'--separate-git-dir',
|
|
401
|
+
'-c',
|
|
402
|
+
'--config',
|
|
403
|
+
'--shallow-since',
|
|
404
|
+
'--shallow-exclude',
|
|
405
|
+
'-j',
|
|
406
|
+
'--jobs',
|
|
407
|
+
'--filter',
|
|
408
|
+
'--template',
|
|
409
|
+
])
|
|
410
|
+
|
|
411
|
+
function cloneOperands(rest: string[]): string[] {
|
|
412
|
+
const operands: string[] = []
|
|
413
|
+
for (let i = 0; i < rest.length; i++) {
|
|
414
|
+
const t = rest[i]!
|
|
415
|
+
if (t.startsWith('-')) {
|
|
416
|
+
if (GIT_CLONE_VALUE_FLAGS.has(t)) i++
|
|
417
|
+
continue
|
|
418
|
+
}
|
|
419
|
+
operands.push(t)
|
|
420
|
+
}
|
|
421
|
+
return operands
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
function isGitWriteForm(sub: string, arg: string): boolean {
|
|
425
|
+
if (sub === 'config') return arg === '--add' || arg === '--unset' || arg === '--replace-all' || arg === '--set'
|
|
426
|
+
if (sub === 'remote')
|
|
427
|
+
return arg === 'add' || arg === 'remove' || arg === 'rm' || arg === 'set-url' || arg === 'rename'
|
|
428
|
+
if (sub === 'branch') return arg === '-d' || arg === '-D' || arg === '--delete' || arg === '-m' || arg === '-M'
|
|
429
|
+
if (sub === 'tag') return arg === '-d' || arg === '--delete' || arg === '-a' || arg === '-s'
|
|
430
|
+
return false
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
function classifyGh(tokens: string[]): void {
|
|
434
|
+
// Find the object (pr/issue/repo/api/…) skipping global flags.
|
|
435
|
+
let idx = 1
|
|
436
|
+
while (idx < tokens.length && tokens[idx]!.startsWith('-')) idx++
|
|
437
|
+
const objRaw = tokens[idx]
|
|
438
|
+
if (objRaw === undefined) return
|
|
439
|
+
const obj = stripQuotes(objRaw)
|
|
440
|
+
const allowed = GH_READONLY_BY_OBJECT[obj]
|
|
441
|
+
if (allowed === undefined) {
|
|
442
|
+
throw new SubagentBashPolicyError(
|
|
443
|
+
`gh ${obj} is not on the reviewer's read-only allowlist (it may mutate remote state).`,
|
|
444
|
+
)
|
|
445
|
+
}
|
|
446
|
+
if (obj === 'api') {
|
|
447
|
+
assertGhApiReadOnly(tokens.slice(idx + 1))
|
|
448
|
+
return
|
|
449
|
+
}
|
|
450
|
+
if (allowed.has('__any__')) return
|
|
451
|
+
// Find the verb after the object.
|
|
452
|
+
let vIdx = idx + 1
|
|
453
|
+
while (vIdx < tokens.length && tokens[vIdx]!.startsWith('-')) vIdx++
|
|
454
|
+
const verbRaw = tokens[vIdx]
|
|
455
|
+
if (verbRaw === undefined) return // bare `gh pr` — harmless listing-ish
|
|
456
|
+
const verb = stripQuotes(verbRaw)
|
|
457
|
+
if (!allowed.has(verb)) {
|
|
458
|
+
throw new SubagentBashPolicyError(`gh ${obj} ${verb} is not a read-only operation; the reviewer may not run it.`)
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// `gh api` does NOT always default to GET. Per `gh api --help`: "adding request
|
|
463
|
+
// parameters will automatically switch the request method to POST". So any of
|
|
464
|
+
// `-f/--field`, `-F/--raw-field`, or `--input` flips the call to POST unless an
|
|
465
|
+
// explicit `--method GET/HEAD` overrides it. We mirror that inference, and we
|
|
466
|
+
// deny the `graphql` endpoint outright unless it is provably a query (a `mutation`
|
|
467
|
+
// operation is a write; even a query we cannot statically prove safe is denied
|
|
468
|
+
// for the reviewer because graphql can mutate through a GET-shaped call).
|
|
469
|
+
// Separated forms (flag and value are two tokens, e.g. `--field body=x`).
|
|
470
|
+
const GH_API_BODY_FLAGS = new Set(['-f', '--field', '-F', '--raw-field', '--input', '-d', '--data'])
|
|
471
|
+
// Attached long forms (`--field=body=x`, `--input=/tmp/x`). Each must be matched
|
|
472
|
+
// with its trailing `=` so `--field` proper still routes through the separated
|
|
473
|
+
// set above, and so an unrelated flag that merely starts with the same letters
|
|
474
|
+
// is not misread. Attached SHORT forms (`-fbody=x`, `-Fx`) are caught by the
|
|
475
|
+
// `-f`/`-F` prefix check at the call site.
|
|
476
|
+
const GH_API_BODY_FLAG_PREFIXES = ['--field=', '--raw-field=', '--input=', '--data=']
|
|
477
|
+
|
|
478
|
+
function isGhApiBodyParam(token: string): boolean {
|
|
479
|
+
if (GH_API_BODY_FLAGS.has(token)) return true
|
|
480
|
+
if (token.startsWith('-f') || token.startsWith('-F')) return true
|
|
481
|
+
return GH_API_BODY_FLAG_PREFIXES.some((p) => token.startsWith(p))
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
function assertGhApiReadOnly(rest: string[]): void {
|
|
485
|
+
let explicitMethod: string | null = null
|
|
486
|
+
let hasBodyParam = false
|
|
487
|
+
let isGraphql = false
|
|
488
|
+
for (let i = 0; i < rest.length; i++) {
|
|
489
|
+
const t = rest[i]!
|
|
490
|
+
if (t === '-X' || t === '--method') {
|
|
491
|
+
explicitMethod = stripQuotes(rest[i + 1] ?? '').toUpperCase()
|
|
492
|
+
continue
|
|
493
|
+
}
|
|
494
|
+
if (t.startsWith('-X')) {
|
|
495
|
+
explicitMethod = stripQuotes(t.slice(2)).toUpperCase()
|
|
496
|
+
continue
|
|
497
|
+
}
|
|
498
|
+
if (t.startsWith('--method=')) {
|
|
499
|
+
explicitMethod = stripQuotes(t.slice('--method='.length)).toUpperCase()
|
|
500
|
+
continue
|
|
501
|
+
}
|
|
502
|
+
if (isGhApiBodyParam(t)) hasBodyParam = true
|
|
503
|
+
if (stripQuotes(t) === 'graphql') isGraphql = true
|
|
504
|
+
}
|
|
505
|
+
if (isGraphql) {
|
|
506
|
+
throw new SubagentBashPolicyError(
|
|
507
|
+
'gh api graphql can mutate (a `mutation` operation is a write, and a GET-shaped call can still mutate); the reviewer may not use the graphql endpoint.',
|
|
508
|
+
)
|
|
509
|
+
}
|
|
510
|
+
const method = explicitMethod ?? (hasBodyParam ? 'POST' : 'GET')
|
|
511
|
+
if (method !== 'GET' && method !== 'HEAD') {
|
|
512
|
+
throw new SubagentBashPolicyError(
|
|
513
|
+
`gh api resolves to ${method} (explicit or inferred from request parameters), which mutates remote state; the reviewer may only GET/HEAD.`,
|
|
514
|
+
)
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
function classifyFsWriter(verb: string, tokens: string[], redirectTargets: string[]): void {
|
|
519
|
+
const operands = tokens.slice(1).filter((t) => !t.startsWith('-'))
|
|
520
|
+
const allUnderTmp = operands.length > 0 && operands.every(isTmpPath) && redirectTargets.every(isTmpPath)
|
|
521
|
+
if (!allUnderTmp) {
|
|
522
|
+
throw new SubagentBashPolicyError(
|
|
523
|
+
`${verb} may only write under /tmp for the reviewer; a non-/tmp path operand is not permitted.`,
|
|
524
|
+
)
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
function classifySegment(segment: Segment): void {
|
|
529
|
+
const { tokens, redirectTargets } = segment
|
|
530
|
+
// A redirect to any non-/tmp path is a write to the persistent tree.
|
|
531
|
+
for (const target of redirectTargets) {
|
|
532
|
+
if (!isTmpPath(target)) {
|
|
533
|
+
throw new SubagentBashPolicyError(
|
|
534
|
+
`redirect to ${stripQuotes(target)} writes outside /tmp, which the read-only reviewer may not do.`,
|
|
535
|
+
)
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
if (tokens.length === 0) return
|
|
539
|
+
const verb = stripQuotes(tokens[0]!)
|
|
540
|
+
|
|
541
|
+
if (FORBIDDEN_WRAPPER_VERBS.has(verb)) {
|
|
542
|
+
throw new SubagentBashPolicyError(`\`${verb}\` can re-enter a shell or hand off execution; it is not permitted.`)
|
|
543
|
+
}
|
|
544
|
+
if (verb === 'git') return classifyGit(tokens)
|
|
545
|
+
if (verb === 'gh') return classifyGh(tokens)
|
|
546
|
+
if (FS_WRITERS.has(verb)) return classifyFsWriter(verb, tokens, redirectTargets)
|
|
547
|
+
if (verb === 'sed' && hasWriteFlag(tokens)) {
|
|
548
|
+
throw new SubagentBashPolicyError('sed -i edits files in place; the reviewer is read-only.')
|
|
549
|
+
}
|
|
550
|
+
if (verb === 'tee') return classifyFsWriter('tee', tokens, redirectTargets)
|
|
551
|
+
if (READONLY_VERBS.has(verb)) return
|
|
552
|
+
// Package managers and anything unknown: deny. Unknown verbs are the most
|
|
553
|
+
// likely bypass channel, so fail closed.
|
|
554
|
+
throw new SubagentBashPolicyError(
|
|
555
|
+
`\`${verb}\` is not on the reviewer's read-only command allowlist; refusing to run it.`,
|
|
556
|
+
)
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
export function enforceReviewerReadonlyBashPolicy(command: string): void {
|
|
560
|
+
if (typeof command !== 'string' || command.trim().length === 0) return
|
|
561
|
+
for (const { pattern, reason } of FAIL_CLOSED_CONSTRUCTS) {
|
|
562
|
+
if (pattern.test(command)) {
|
|
563
|
+
throw new SubagentBashPolicyError(`command uses ${reason}, which the reviewer policy cannot analyze safely.`)
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
const segments = splitIntoSegments(command)
|
|
567
|
+
for (const segment of segments) classifySegment(segment)
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
export function enforceSubagentBashPolicy(policy: SubagentBashPolicy, command: string): void {
|
|
571
|
+
if (policy.kind === 'readonly-reviewer') enforceReviewerReadonlyBashPolicy(command)
|
|
572
|
+
}
|
|
@@ -129,14 +129,34 @@ type PlatformInfo = {
|
|
|
129
129
|
// the call would no-op. Keep in sync with the adapters that call
|
|
130
130
|
// `router.registerReaction` (github, slack-bot, discord-bot today).
|
|
131
131
|
supportsReactions: boolean
|
|
132
|
+
// Whether this adapter's OutboundCallback accepts file attachments. Gates the
|
|
133
|
+
// "ship a researcher report as a PDF by default" prompt guidance: a report is
|
|
134
|
+
// only worth converting to a downloadable file on channels that can actually
|
|
135
|
+
// receive one. GitHub's outbound callback hard-rejects attachments
|
|
136
|
+
// (`github-bot-does-not-support-attachments` in adapters/github/outbound.ts),
|
|
137
|
+
// so a PDF nudge there would train the model toward a call that always fails;
|
|
138
|
+
// the other four upload files (Slack `uploadFile`, Discord `uploadFile`,
|
|
139
|
+
// Telegram `sendDocument`, KakaoTalk `sendAttachment`). Keep in sync with the
|
|
140
|
+
// adapters' outbound callbacks.
|
|
141
|
+
supportsAttachments: boolean
|
|
132
142
|
}
|
|
133
143
|
|
|
134
144
|
const PLATFORM_INFO: Record<AdapterId, PlatformInfo> = {
|
|
135
|
-
'slack-bot': { displayName: 'Slack', mentionMode: 'angle-id', supportsReactions: true },
|
|
136
|
-
'discord-bot': {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
145
|
+
'slack-bot': { displayName: 'Slack', mentionMode: 'angle-id', supportsReactions: true, supportsAttachments: true },
|
|
146
|
+
'discord-bot': {
|
|
147
|
+
displayName: 'Discord',
|
|
148
|
+
mentionMode: 'angle-id',
|
|
149
|
+
supportsReactions: true,
|
|
150
|
+
supportsAttachments: true,
|
|
151
|
+
},
|
|
152
|
+
github: { displayName: 'GitHub', mentionMode: 'at-username', supportsReactions: true, supportsAttachments: false },
|
|
153
|
+
'telegram-bot': {
|
|
154
|
+
displayName: 'Telegram',
|
|
155
|
+
mentionMode: 'at-username',
|
|
156
|
+
supportsReactions: false,
|
|
157
|
+
supportsAttachments: true,
|
|
158
|
+
},
|
|
159
|
+
kakaotalk: { displayName: 'KakaoTalk', mentionMode: 'alias', supportsReactions: false, supportsAttachments: true },
|
|
140
160
|
}
|
|
141
161
|
|
|
142
162
|
function getPlatformInfo(adapter: AdapterId): PlatformInfo {
|
|
@@ -461,6 +481,7 @@ function renderChannelOrigin(
|
|
|
461
481
|
"matching the channel's `allow` rules are accepted (the tool returns",
|
|
462
482
|
'`{ ok: false }` otherwise).',
|
|
463
483
|
'',
|
|
484
|
+
...renderResearchReportDeliveryGuidance(platformInfo),
|
|
464
485
|
...renderMentionGuidance(platformInfo, origin.participants ?? [], now, origin.self),
|
|
465
486
|
)
|
|
466
487
|
|
|
@@ -496,6 +517,30 @@ function renderMembershipSummary(
|
|
|
496
517
|
return `This channel has approximately ${total} members (about ${membership.humans} humans, ${membership.bots} bots — the bot count is approximate, the full member list was not enumerated because it exceeds the 50-member cap). The 10 most recent speakers are listed below.`
|
|
497
518
|
}
|
|
498
519
|
|
|
520
|
+
// The `researcher` subagent always hands back a markdown report file
|
|
521
|
+
// (`research-<slug>.md`) and is itself read-only — it cannot produce the PDF.
|
|
522
|
+
// Whoever delivers that report to a channel is the one who decides the format,
|
|
523
|
+
// and on a channel that accepts file uploads the right default for a multi-page
|
|
524
|
+
// research report is a downloadable PDF, not a wall of raw markdown dumped into
|
|
525
|
+
// chat. This block makes that the default ONLY where it is actionable: gated on
|
|
526
|
+
// `supportsAttachments` so GitHub (whose outbound callback rejects attachments)
|
|
527
|
+
// never gets a nudge toward a `channel_send` call that would fail.
|
|
528
|
+
function renderResearchReportDeliveryGuidance(platformInfo: PlatformInfo): string[] {
|
|
529
|
+
if (!platformInfo.supportsAttachments) return []
|
|
530
|
+
return [
|
|
531
|
+
`**Ship \`researcher\` reports as a PDF by default.** ${platformInfo.displayName} accepts file`,
|
|
532
|
+
'attachments. When you receive a `researcher` subagent result — a',
|
|
533
|
+
'`research-<slug>.md` report file path in its `<report>` block — convert that',
|
|
534
|
+
'markdown to a PDF with the `typeclaw-markdown-pdf` skill and deliver it with',
|
|
535
|
+
'`channel_send({ ..., attachments: [{ path, filename }] })`, with a one- or',
|
|
536
|
+
'two-line summary as the message text. A downloadable file is what a human',
|
|
537
|
+
'wants for a multi-page report; do not paste the full markdown into chat. Send',
|
|
538
|
+
'the report inline as plain text only if the caller explicitly asked for it in',
|
|
539
|
+
'the message, or the report is short enough that a file would be overkill.',
|
|
540
|
+
'',
|
|
541
|
+
]
|
|
542
|
+
}
|
|
543
|
+
|
|
499
544
|
function renderMentionGuidance(
|
|
500
545
|
platformInfo: PlatformInfo,
|
|
501
546
|
participants: readonly ChannelParticipant[],
|
package/src/agent/subagents.ts
CHANGED
|
@@ -6,6 +6,7 @@ import type { Stream, Unsubscribe } from '@/stream'
|
|
|
6
6
|
|
|
7
7
|
import { type AgentSession, createSession } from './index'
|
|
8
8
|
import { subscribeProviderErrors } from './provider-error'
|
|
9
|
+
import type { SubagentBashPolicy } from './reviewer-bash-policy'
|
|
9
10
|
import type { SessionOrigin } from './session-origin'
|
|
10
11
|
import {
|
|
11
12
|
beginSubagentDrainWatch,
|
|
@@ -88,6 +89,13 @@ export type SubagentShared<P = unknown> = {
|
|
|
88
89
|
// hangs" symptom. Omit for no ceiling (legacy behavior; the spawn waits
|
|
89
90
|
// as long as the provider takes).
|
|
90
91
|
timeoutMs?: number
|
|
92
|
+
// Per-subagent bash capability restriction, enforced at the bash-wrap site
|
|
93
|
+
// INDEPENDENT of the caller's role (unlike the role-derived bwrap sandbox,
|
|
94
|
+
// which returns early for trusted/owner). A read-only subagent declares this
|
|
95
|
+
// to fence its `bash` to read-only commands even when spawned by a privileged
|
|
96
|
+
// caller. See `src/agent/reviewer-bash-policy.ts`. Omit for no restriction
|
|
97
|
+
// (the historical contract — prompt-only enforcement).
|
|
98
|
+
bashPolicy?: SubagentBashPolicy
|
|
91
99
|
}
|
|
92
100
|
|
|
93
101
|
export type Subagent<P = unknown> = SubagentShared<P> & {
|
|
@@ -155,6 +163,7 @@ export const defaultCreateSessionForSubagent: CreateSessionForSubagent = (subage
|
|
|
155
163
|
customTools: subagent.customTools ?? [],
|
|
156
164
|
...(subagent.profile !== undefined ? { profile: subagent.profile } : {}),
|
|
157
165
|
...(subagent.toolResultBudget !== undefined ? { toolResultBudget: subagent.toolResultBudget } : {}),
|
|
166
|
+
...(subagent.bashPolicy !== undefined ? { bashPolicy: subagent.bashPolicy } : {}),
|
|
158
167
|
})
|
|
159
168
|
|
|
160
169
|
type NormalizedSubagentSession = {
|
|
@@ -1,14 +1,28 @@
|
|
|
1
1
|
import type { ReviewVerdict } from '@/channels/github-review-turn-ledger'
|
|
2
2
|
|
|
3
|
-
//
|
|
4
|
-
//
|
|
5
|
-
|
|
3
|
+
// Raw latest-decisive state. DISMISSED is kept DISTINCT from NONE on purpose: a
|
|
4
|
+
// genuine dismissal means a fresh same-verdict re-review is legitimate and must
|
|
5
|
+
// NOT be shadowed by the read-after-write-lag cache (which only overrides a bare
|
|
6
|
+
// NONE — "GitHub shows no decisive review, but we just landed one"). Collapsing
|
|
7
|
+
// DISMISSED into NONE would let the lag cache re-strand a dismiss-then-reapprove,
|
|
8
|
+
// the exact failure 35287f99 removed.
|
|
9
|
+
export type EffectiveVerdict = 'APPROVED' | 'CHANGES_REQUESTED' | 'DISMISSED' | 'NONE'
|
|
6
10
|
|
|
7
11
|
export type EffectiveApprovalResolver = (target: {
|
|
8
12
|
workspace: string
|
|
9
13
|
prNumber: number
|
|
10
14
|
}) => Promise<{ ok: true; effective: EffectiveVerdict } | { ok: false }>
|
|
11
15
|
|
|
16
|
+
// Resolves the PR's current head commit SHA. Called twice: once in guard() (the
|
|
17
|
+
// pre-submit head, resolved AFTER the in-flight lease so the await cannot widen the
|
|
18
|
+
// reserve-before-await race) and once in release() (the post-submit head, to detect
|
|
19
|
+
// a push that landed during the review). Fails soft (null). A null PRE-submit head
|
|
20
|
+
// skips the cache write entirely — the guard falls open to GitHub rather than ever
|
|
21
|
+
// stranding a genuine verdict on local memory. A null POST-submit head (or one that
|
|
22
|
+
// differs from the pre-submit head) is recorded as the uncertainty sentinel so a
|
|
23
|
+
// push-during-review still blocks a same-verdict duplicate for the lag window.
|
|
24
|
+
export type HeadShaResolver = (target: { workspace: string; prNumber: number }) => Promise<string | null>
|
|
25
|
+
|
|
12
26
|
export type ApproveBlock = { block: true; reason: string }
|
|
13
27
|
|
|
14
28
|
export type ReviewVerdictGuard = {
|
|
@@ -18,7 +32,7 @@ export type ReviewVerdictGuard = {
|
|
|
18
32
|
prNumber: number
|
|
19
33
|
verdict: ReviewVerdict
|
|
20
34
|
}) => Promise<ApproveBlock | null>
|
|
21
|
-
release: (args: { callId: string; succeeded: boolean }) => void
|
|
35
|
+
release: (args: { callId: string; succeeded: boolean }) => Promise<void>
|
|
22
36
|
}
|
|
23
37
|
|
|
24
38
|
// Back-compat alias: the guard now covers REQUEST_CHANGES too, not just APPROVE.
|
|
@@ -55,7 +69,32 @@ function duplicatesStanding(verdict: ReviewVerdict, effective: EffectiveVerdict)
|
|
|
55
69
|
// never strand a PR for long.
|
|
56
70
|
const LEASE_TTL_MS = 5 * 60_000
|
|
57
71
|
|
|
58
|
-
|
|
72
|
+
// How long a just-landed verdict is trusted to explain a GitHub `NONE` as
|
|
73
|
+
// read-after-write lag rather than a genuine absence. GitHub's `/pulls/<n>/reviews`
|
|
74
|
+
// list lags a write by up to ~10s, so a second engagement turn firing in that
|
|
75
|
+
// window reads NONE and would land a duplicate. Observed duplicates were ~10-18s
|
|
76
|
+
// apart; 60s is a comfortable lag margin without making a legitimate re-verdict
|
|
77
|
+
// wait long. This window only shadows a raw NONE on the SAME verdict (+ same or
|
|
78
|
+
// uncertain head) — a DISMISSED/CHANGES_REQUESTED/flipped-verdict all bypass it.
|
|
79
|
+
const RECENT_LANDED_TTL_MS = 60_000
|
|
80
|
+
|
|
81
|
+
type Reservation = {
|
|
82
|
+
key: string
|
|
83
|
+
token: number
|
|
84
|
+
createdAt: number
|
|
85
|
+
headSha: string | null
|
|
86
|
+
verdict: ReviewVerdict
|
|
87
|
+
workspace: string
|
|
88
|
+
prNumber: number
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// headSha === null is the UNCERTAINTY sentinel: the command succeeded but the head
|
|
92
|
+
// the review actually attached to is unknown (the PR head advanced between the
|
|
93
|
+
// pre-submit capture and the write, or the post-submit re-resolve failed). A null
|
|
94
|
+
// record matches any current head for the window — same verdict + raw NONE only —
|
|
95
|
+
// so a push-during-review cannot let a same-verdict duplicate slip past on the new
|
|
96
|
+
// head. A resolved string keys precise same-head matching for the normal case.
|
|
97
|
+
type LandedVerdict = { verdict: ReviewVerdict; headSha: string | null; landedAt: number }
|
|
59
98
|
|
|
60
99
|
// MODULE-LEVEL singletons, shared by every plugin instance in this process. The
|
|
61
100
|
// github-cli-auth plugin's `plugin: async (ctx) => ...` factory may run once per
|
|
@@ -65,10 +104,11 @@ type Reservation = { key: string; token: number; createdAt: number }
|
|
|
65
104
|
// three sessions each landed an APPROVE on the same PR within ten seconds.
|
|
66
105
|
const inFlightByPr = new Map<string, Reservation>()
|
|
67
106
|
const reservationByCall = new Map<string, Reservation>()
|
|
107
|
+
const recentLandedByPr = new Map<string, LandedVerdict>()
|
|
68
108
|
let tokenSeq = 0
|
|
69
109
|
|
|
70
110
|
// Makes a formal `gh ... event=APPROVE|REQUEST_CHANGES` idempotent per PR across
|
|
71
|
-
// turns, sessions, and (in-process) concurrent fan-out.
|
|
111
|
+
// turns, sessions, and (in-process) concurrent fan-out. Three layers, in order:
|
|
72
112
|
//
|
|
73
113
|
// 1. A process-wide in-flight lease keyed by `workspace#prNumber`, held from
|
|
74
114
|
// tool.before through tool.after. While one verdict is mid-flight, every
|
|
@@ -77,12 +117,25 @@ let tokenSeq = 0
|
|
|
77
117
|
// closure-local Set could not provide: separate plugin instances meant
|
|
78
118
|
// separate Sets, so concurrent sessions never saw each other.
|
|
79
119
|
//
|
|
80
|
-
// 2. The authoritative GitHub effective-state read, consulted AFTER the lease
|
|
81
|
-
// is
|
|
82
|
-
//
|
|
83
|
-
//
|
|
84
|
-
//
|
|
85
|
-
//
|
|
120
|
+
// 2. The authoritative GitHub effective-state read, consulted AFTER the lease.
|
|
121
|
+
// It is the SOLE source of truth for a standing verdict and for supersession:
|
|
122
|
+
// a later CHANGES_REQUESTED/DISMISSED demotes an earlier APPROVED, so a
|
|
123
|
+
// genuine re-verdict is allowed (the 35287f99 invariant — never block a
|
|
124
|
+
// re-verdict on stale LOCAL memory). A standing same verdict blocks; DISMISSED
|
|
125
|
+
// and the opposite decisive verdict pass. Reads fail OPEN.
|
|
126
|
+
//
|
|
127
|
+
// 3. A read-after-write-lag shield, consulted ONLY when layer 2 returns a raw
|
|
128
|
+
// NONE. The lease (layer 1) covers two OVERLAPPING in-flight commands, but a
|
|
129
|
+
// second engagement turn ~10s later starts after the first's lease released,
|
|
130
|
+
// and GitHub's reviews list still lags the write (reports NONE). A short-lived
|
|
131
|
+
// `recentLandedByPr` record — same verdict + (same OR uncertain head), written
|
|
132
|
+
// on a succeeded release, RECENT_LANDED_TTL_MS — disambiguates "NONE because
|
|
133
|
+
// lag" from "NONE because genuinely absent": only the former blocks. The head
|
|
134
|
+
// is re-resolved at release time; if the PR head advanced during the submit the
|
|
135
|
+
// record stores a null head (uncertainty), which matches the current head so a
|
|
136
|
+
// push-during-review cannot leak a duplicate. Because it fires after a raw
|
|
137
|
+
// NONE, a real DISMISSED/CHANGES_REQUESTED already allowed the re-verdict at
|
|
138
|
+
// layer 2, so this cannot re-strand a supersession.
|
|
86
139
|
//
|
|
87
140
|
// The lease is released only in release() (tool.after) or on a terminal block,
|
|
88
141
|
// never after the remote read — releasing early reopens the TOCTOU the lease
|
|
@@ -90,6 +143,7 @@ let tokenSeq = 0
|
|
|
90
143
|
// tool.after for a superseded reservation cannot drop a newer session's lease.
|
|
91
144
|
export function createApproveIdempotencyGuard(deps: {
|
|
92
145
|
resolveEffectiveApproval: EffectiveApprovalResolver
|
|
146
|
+
resolveHeadSha?: HeadShaResolver
|
|
93
147
|
now?: () => number
|
|
94
148
|
}): ReviewVerdictGuard {
|
|
95
149
|
const now = deps.now ?? Date.now
|
|
@@ -107,10 +161,27 @@ export function createApproveIdempotencyGuard(deps: {
|
|
|
107
161
|
if (held !== undefined && now() - held.createdAt < LEASE_TTL_MS) {
|
|
108
162
|
return { block: true, reason: CONCURRENT_REASON }
|
|
109
163
|
}
|
|
110
|
-
const reservation: Reservation = {
|
|
164
|
+
const reservation: Reservation = {
|
|
165
|
+
key,
|
|
166
|
+
token: ++tokenSeq,
|
|
167
|
+
createdAt: now(),
|
|
168
|
+
headSha: null,
|
|
169
|
+
verdict: args.verdict,
|
|
170
|
+
workspace: args.workspace,
|
|
171
|
+
prNumber: args.prNumber,
|
|
172
|
+
}
|
|
111
173
|
inFlightByPr.set(key, reservation)
|
|
112
174
|
reservationByCall.set(args.callId, reservation)
|
|
113
175
|
|
|
176
|
+
// Resolve the head SHA only AFTER the lease is held, so this await cannot
|
|
177
|
+
// widen the reserve-before-await race the lease closes above.
|
|
178
|
+
const headSha = (await deps.resolveHeadSha?.({ workspace: args.workspace, prNumber: args.prNumber })) ?? null
|
|
179
|
+
reservation.headSha = headSha
|
|
180
|
+
|
|
181
|
+
// Layer 2: GitHub is the authoritative, sole source of truth for a standing
|
|
182
|
+
// verdict. A standing same verdict is a real duplicate; DISMISSED and the
|
|
183
|
+
// opposite decisive verdict are genuine supersessions that must pass here
|
|
184
|
+
// (the 35287f99 invariant). A read error fails OPEN.
|
|
114
185
|
const remote = await deps.resolveEffectiveApproval({ workspace: args.workspace, prNumber: args.prNumber })
|
|
115
186
|
if (remote.ok && duplicatesStanding(args.verdict, remote.effective)) {
|
|
116
187
|
// Standing verdict upstream already matches. Block, and release the lease
|
|
@@ -121,17 +192,62 @@ export function createApproveIdempotencyGuard(deps: {
|
|
|
121
192
|
return { block: true, reason: duplicateReason(args.verdict) }
|
|
122
193
|
}
|
|
123
194
|
|
|
195
|
+
// Layer 3: only a raw NONE from a successful read is ambiguous — it can mean
|
|
196
|
+
// "no review" or "our just-landed review not yet indexed". A recent same
|
|
197
|
+
// verdict on the same head resolves it to lag and blocks the duplicate. Any
|
|
198
|
+
// non-NONE state already decided above, so this never overrides a supersession.
|
|
199
|
+
if (remote.ok && remote.effective === 'NONE' && recentlyLandedSame(key, args.verdict, headSha, now)) {
|
|
200
|
+
releaseReservation(args.callId, reservation)
|
|
201
|
+
return { block: true, reason: duplicateReason(args.verdict) }
|
|
202
|
+
}
|
|
203
|
+
|
|
124
204
|
return null
|
|
125
205
|
},
|
|
126
206
|
|
|
127
|
-
release(args): void {
|
|
207
|
+
async release(args): Promise<void> {
|
|
128
208
|
const reservation = reservationByCall.get(args.callId)
|
|
129
209
|
if (reservation === undefined) return
|
|
130
|
-
|
|
210
|
+
try {
|
|
211
|
+
// The pre-submit head can go stale: if the PR head advanced between the
|
|
212
|
+
// guard() capture and the review landing, GitHub attaches the review to the
|
|
213
|
+
// NEWER head while reservation.headSha holds the older one. Re-resolve the
|
|
214
|
+
// head AFTER a successful submit and store what we can prove: the resolved
|
|
215
|
+
// head only when pre==post, else the null uncertainty sentinel (matches any
|
|
216
|
+
// current head for the lag window) so a push-during-review cannot let a
|
|
217
|
+
// same-verdict duplicate slip past on the new head. The lease stays held
|
|
218
|
+
// across this await (finally below), so the window is not reopened.
|
|
219
|
+
if (args.succeeded && reservation.headSha !== null) {
|
|
220
|
+
const postHeadSha =
|
|
221
|
+
(await deps.resolveHeadSha?.({ workspace: reservation.workspace, prNumber: reservation.prNumber })) ?? null
|
|
222
|
+
const landedHeadSha = postHeadSha !== null && postHeadSha === reservation.headSha ? postHeadSha : null
|
|
223
|
+
recentLandedByPr.set(reservation.key, {
|
|
224
|
+
verdict: reservation.verdict,
|
|
225
|
+
headSha: landedHeadSha,
|
|
226
|
+
landedAt: now(),
|
|
227
|
+
})
|
|
228
|
+
}
|
|
229
|
+
} finally {
|
|
230
|
+
releaseReservation(args.callId, reservation)
|
|
231
|
+
}
|
|
131
232
|
},
|
|
132
233
|
}
|
|
133
234
|
}
|
|
134
235
|
|
|
236
|
+
// True only when a recently-landed record proves the GitHub NONE is read lag: same
|
|
237
|
+
// verdict, within the window, AND the heads agree. Head agreement holds when the
|
|
238
|
+
// stored head equals the current head, OR the stored head is the null uncertainty
|
|
239
|
+
// sentinel (the landed commit could not be pinned, so it conservatively matches the
|
|
240
|
+
// current head for the window). A flipped verdict or an expired/absent record
|
|
241
|
+
// returns false so the genuine re-verdict passes; a different KNOWN head also
|
|
242
|
+
// returns false so a real new push is never blocked.
|
|
243
|
+
function recentlyLandedSame(key: string, verdict: ReviewVerdict, headSha: string | null, now: () => number): boolean {
|
|
244
|
+
const landed = recentLandedByPr.get(key)
|
|
245
|
+
if (landed === undefined) return false
|
|
246
|
+
if (now() - landed.landedAt >= RECENT_LANDED_TTL_MS) return false
|
|
247
|
+
if (verdict !== landed.verdict) return false
|
|
248
|
+
return landed.headSha === null || landed.headSha === headSha
|
|
249
|
+
}
|
|
250
|
+
|
|
135
251
|
// Drop the lease only if THIS reservation still owns the key. A stale tool.after
|
|
136
252
|
// for a reservation that was already superseded (e.g. reclaimed after TTL by a
|
|
137
253
|
// newer session) must not yank the live session's lease.
|
|
@@ -151,5 +267,6 @@ function prKey(workspace: string, prNumber: number): string {
|
|
|
151
267
|
export function __resetReviewVerdictGuardForTest(): void {
|
|
152
268
|
inFlightByPr.clear()
|
|
153
269
|
reservationByCall.clear()
|
|
270
|
+
recentLandedByPr.clear()
|
|
154
271
|
tokenSeq = 0
|
|
155
272
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { GITHUB_API_BASE, githubJsonHeaders } from '@/channels/adapters/github/auth-pat'
|
|
2
2
|
|
|
3
|
-
import type { EffectiveApprovalResolver, EffectiveVerdict } from './approve-idempotency'
|
|
3
|
+
import type { EffectiveApprovalResolver, EffectiveVerdict, HeadShaResolver } from './approve-idempotency'
|
|
4
4
|
|
|
5
5
|
// Resolves THIS bot's standing decisive review on a PR, used by the review
|
|
6
6
|
// verdict guard to stop a second formal verdict after a restart (the in-process
|
|
@@ -30,9 +30,40 @@ export function createGithubEffectiveApprovalResolver(deps: {
|
|
|
30
30
|
}
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
+
// Reads the PR's current head commit SHA from `GET /pulls/<n>` (`head.sha`), the
|
|
34
|
+
// strongly-consistent single-object endpoint — NOT the eventually-consistent
|
|
35
|
+
// reviews list the duplicate bug rode in on. Returns null on any failure so the
|
|
36
|
+
// landed-verdict cache degrades to verdict-only matching rather than stranding.
|
|
37
|
+
export function createGithubHeadShaResolver(deps: {
|
|
38
|
+
resolveToken: (workspace: string) => Promise<string | null>
|
|
39
|
+
fetchImpl?: typeof fetch
|
|
40
|
+
}): HeadShaResolver {
|
|
41
|
+
const fetchImpl = deps.fetchImpl ?? fetch
|
|
42
|
+
return async ({ workspace, prNumber }) => {
|
|
43
|
+
const [owner, repo] = workspace.split('/')
|
|
44
|
+
if (owner === undefined || owner === '' || repo === undefined || repo === '') return null
|
|
45
|
+
const token = await deps.resolveToken(workspace).catch(() => null)
|
|
46
|
+
if (token === null || token === '') return null
|
|
47
|
+
try {
|
|
48
|
+
const url = `${GITHUB_API_BASE}/repos/${owner}/${repo}/pulls/${prNumber}`
|
|
49
|
+
const response = await fetchImpl(url, { headers: githubJsonHeaders(token) })
|
|
50
|
+
if (!response.ok) return null
|
|
51
|
+
const raw = (await response.json().catch(() => null)) as { head?: { sha?: unknown } } | null
|
|
52
|
+
const sha = raw?.head?.sha
|
|
53
|
+
return typeof sha === 'string' && sha !== '' ? sha : null
|
|
54
|
+
} catch {
|
|
55
|
+
return null
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// DISMISSED is surfaced distinctly (not collapsed to NONE) so the verdict guard's
|
|
61
|
+
// lag shield can tell a genuine dismissal — which legitimately allows a same-verdict
|
|
62
|
+
// re-review — apart from a bare NONE that may just be an unindexed just-landed write.
|
|
33
63
|
function toEffective(state: string | undefined): EffectiveVerdict {
|
|
34
64
|
if (state === 'APPROVED') return 'APPROVED'
|
|
35
65
|
if (state === 'CHANGES_REQUESTED') return 'CHANGES_REQUESTED'
|
|
66
|
+
if (state === 'DISMISSED') return 'DISMISSED'
|
|
36
67
|
return 'NONE'
|
|
37
68
|
}
|
|
38
69
|
|
|
@@ -2,7 +2,7 @@ import { TYPECLAW_INTERNAL_BASH_ENV } from '@/agent/plugin-tools'
|
|
|
2
2
|
import { definePlugin } from '@/plugin'
|
|
3
3
|
|
|
4
4
|
import { createApproveIdempotencyGuard } from './approve-idempotency'
|
|
5
|
-
import { createGithubEffectiveApprovalResolver } from './effective-approval'
|
|
5
|
+
import { createGithubEffectiveApprovalResolver, createGithubHeadShaResolver } from './effective-approval'
|
|
6
6
|
import { analyzeGhCommand } from './gh-command'
|
|
7
7
|
import { checkGraphqlAuthNudge } from './graphql-auth-nudge'
|
|
8
8
|
import { commitReviewIfSucceeded, noteReviewCommand } from './review-recorder'
|
|
@@ -11,13 +11,13 @@ import { classifyGhToken } from './token-class'
|
|
|
11
11
|
export default definePlugin({
|
|
12
12
|
plugin: async (ctx) => {
|
|
13
13
|
const resolveTokenForRepo = ctx.github.resolveTokenForRepo
|
|
14
|
+
const resolveToken = async (workspace: string) => {
|
|
15
|
+
const result = await resolveTokenForRepo(workspace)
|
|
16
|
+
return result.kind === 'token' ? result.token : null
|
|
17
|
+
}
|
|
14
18
|
const verdictGuard = createApproveIdempotencyGuard({
|
|
15
|
-
resolveEffectiveApproval: createGithubEffectiveApprovalResolver({
|
|
16
|
-
|
|
17
|
-
const result = await resolveTokenForRepo(workspace)
|
|
18
|
-
return result.kind === 'token' ? result.token : null
|
|
19
|
-
},
|
|
20
|
-
}),
|
|
19
|
+
resolveEffectiveApproval: createGithubEffectiveApprovalResolver({ resolveToken }),
|
|
20
|
+
resolveHeadSha: createGithubHeadShaResolver({ resolveToken }),
|
|
21
21
|
})
|
|
22
22
|
return {
|
|
23
23
|
hooks: {
|
|
@@ -70,7 +70,7 @@ export default definePlugin({
|
|
|
70
70
|
callId: event.callId,
|
|
71
71
|
result: event.result,
|
|
72
72
|
})
|
|
73
|
-
verdictGuard.release({ callId: event.callId, succeeded: committed })
|
|
73
|
+
await verdictGuard.release({ callId: event.callId, succeeded: committed })
|
|
74
74
|
},
|
|
75
75
|
},
|
|
76
76
|
}
|
|
@@ -75,12 +75,14 @@ Write to \`public/\` instead of \`workspace/\` when your resolved role lacks \`f
|
|
|
75
75
|
)
|
|
76
76
|
}
|
|
77
77
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
78
|
+
// Resolve ONLY the canonical dir `parent` lexically matched above. `public/`
|
|
79
|
+
// is optional (created only for guest-readable output), so an unconditional
|
|
80
|
+
// `realpath('<agent>/public')` throws ENOENT on agents that never made it,
|
|
81
|
+
// which would reject every valid write to `workspace/`. The symlink-escape
|
|
82
|
+
// defense is unchanged — the parent actually written to is still canonicalized.
|
|
83
|
+
const canonicalDir = parent === workspaceDir ? workspaceDir : publicDir
|
|
84
|
+
const [realParent, realCanonical] = await Promise.all([realpath(parent), realpath(canonicalDir)])
|
|
85
|
+
if (realParent !== realCanonical) {
|
|
84
86
|
throw new Error(`Report parent directory resolves outside the allowed report directories: ${parent}.`)
|
|
85
87
|
}
|
|
86
88
|
|
|
@@ -53,12 +53,13 @@ export const REVIEWER_SKILLS: readonly LoadableSkill[] = [
|
|
|
53
53
|
// src/agent/subagents.ts `timeoutMs`.
|
|
54
54
|
export const REVIEWER_SPAWN_TIMEOUT_MS = 600_000
|
|
55
55
|
|
|
56
|
-
//
|
|
57
|
-
//
|
|
58
|
-
//
|
|
59
|
-
//
|
|
60
|
-
//
|
|
61
|
-
//
|
|
56
|
+
// The reviewer's read-only contract is enforced in depth: this system prompt
|
|
57
|
+
// states it, the global bash guards (`secret-exfil-bash`, `git-exfil`) catch
|
|
58
|
+
// exfil, AND `bashPolicy: { kind: 'readonly-reviewer' }` (set on the subagent
|
|
59
|
+
// below) hard-blocks any mutating `bash` command at the wrap site regardless of
|
|
60
|
+
// the spawning role — git commit/push/add, gh pr merge/review/comment, writes
|
|
61
|
+
// outside /tmp, package installs, and shell constructs that defeat static
|
|
62
|
+
// analysis. See `src/agent/reviewer-bash-policy.ts` (issue #452).
|
|
62
63
|
export const REVIEWER_SYSTEM_PROMPT = `You are a review specialist running inside TypeClaw. Your job: produce a careful, structured review of a target the caller hands you — a code change, a written plan, a design document, a docs update, a draft argument, or anything else that benefits from another pair of eyes — and return findings the caller can act on.
|
|
63
64
|
|
|
64
65
|
You exist to do what \`explorer\` and \`scout\` cannot: deep, model-heavy analysis. Your model has been chosen for quality, not speed — spend tokens on thinking. Read carefully. Cross-check. Form a real opinion.
|
|
@@ -70,6 +71,8 @@ You are STRICTLY PROHIBITED from:
|
|
|
70
71
|
- Pushing, merging, rebasing, or otherwise mutating remote state
|
|
71
72
|
- Using bash for: mkdir, touch, rm, cp, mv, git add, git commit, git push, git rebase, git reset, npm install, pip install, or any write operation
|
|
72
73
|
|
|
74
|
+
The boundary that matters is **no side effects on the reviewed artifact, remote state, or the persistent workspace** — not "no byte may touch local disk". A loaded domain skill may carve out one narrow, explicit exception: writing into a fresh throwaway scratch directory under \`/tmp\` purely to *acquire* a read target (e.g. cloning a PR head you cannot otherwise read at line accuracy). That scratch cache is never the reviewed artifact; inside it you still only read, and everything in the prohibition list above still applies everywhere else. Absent such an instruction from your loaded skill, treat the list as absolute.
|
|
75
|
+
|
|
73
76
|
Your role is EXCLUSIVELY to analyze and report. The parent agent decides what to do with your findings. Delegating part of that analysis is fine; performing side effects through a delegate is NOT — anything you cannot do directly, a subagent you spawn cannot do for you.
|
|
74
77
|
|
|
75
78
|
## Delegating to keep your context lean
|
|
@@ -89,7 +92,7 @@ The runtime exposes these tools to you by these EXACT names — call them by nam
|
|
|
89
92
|
- \`grep\` — search file contents by text or regex
|
|
90
93
|
- \`find\` — locate files by name pattern
|
|
91
94
|
- \`ls\` — list a directory's immediate contents
|
|
92
|
-
- \`bash\` — read-only commands ONLY. Read-only \`git\` (\`git log\`, \`git diff\`, \`git show\`, \`git blame\`, \`git status\`, \`git grep\`, \`git rev-parse\`, \`git ls-files\`, \`git cat-file\`) and one-shot pipelines that do not mutate state (\`cat\`, \`head\`, \`tail\`, \`wc\`, \`sort\`, \`uniq\`, \`jq\`). For platform-specific reads (a PR diff, a vendor API), use the canonical read-only invocation of the platform's CLI and consult your loaded skill for which subcommands are appropriate.
|
|
95
|
+
- \`bash\` — read-only commands ONLY. Read-only \`git\` (\`git log\`, \`git diff\`, \`git show\`, \`git blame\`, \`git status\`, \`git grep\`, \`git rev-parse\`, \`git ls-files\`, \`git cat-file\`) and one-shot pipelines that do not mutate state (\`cat\`, \`head\`, \`tail\`, \`wc\`, \`sort\`, \`uniq\`, \`jq\`). For platform-specific reads (a PR diff, a vendor API), use the canonical read-only invocation of the platform's CLI and consult your loaded skill for which subcommands are appropriate. The ONE write a loaded skill may direct you to make is cloning a target into a fresh \`/tmp\` scratch directory purely to read it (\`git clone\`/\`fetch\`/detached \`checkout\` into \`/tmp/review-*\`); that scratch cache is never the reviewed artifact, and everything else above stays read-only.
|
|
93
96
|
- \`web_search\` — search the public web (e.g. for OWASP guidance, RFCs, library changelogs, framework docs, prior art)
|
|
94
97
|
- \`web_fetch\` — fetch a single URL (e.g. to read a linked spec, vendor doc, or article cited in the target)
|
|
95
98
|
- \`load_skill\` — load a curated review skill by name. See the section below.
|
|
@@ -192,6 +195,10 @@ If none of the listed skills fit the target, load \`general\`. Keep the skill-se
|
|
|
192
195
|
// user has not configured `models.deep` in typeclaw.json, `resolveProfile`
|
|
193
196
|
// falls back to `default` with a one-time warning — safe degradation.
|
|
194
197
|
profile: 'deep',
|
|
198
|
+
// Hard-fence the reviewer's bash to read-only commands at the wrap site,
|
|
199
|
+
// independent of the spawning role. The prompt + global guards are the other
|
|
200
|
+
// two layers; this is the one that survives a trusted/owner caller.
|
|
201
|
+
bashPolicy: { kind: 'readonly-reviewer' },
|
|
195
202
|
tools: [readTool, grepTool, findTool, lsTool, bashTool, webSearchTool, webFetchTool],
|
|
196
203
|
customTools: [loadSkillTool],
|
|
197
204
|
payloadSchema: reviewerPayloadSchema,
|
|
@@ -13,12 +13,39 @@ You have been asked to review code. Apply this guidance on top of the reviewer's
|
|
|
13
13
|
|
|
14
14
|
- **PR URL or number** — fetch the diff and the description:
|
|
15
15
|
- \`gh pr diff <n>\` for the unified diff
|
|
16
|
-
- \`gh pr view <n
|
|
16
|
+
- \`gh pr view <n> --json title,body,baseRefName,headRefOid,files\` for title, body, linked issues, the head SHA, and the changed-file list
|
|
17
17
|
- \`gh api /repos/<owner>/<repo>/pulls/<n>\` for the structured payload when you need machine-readable fields
|
|
18
18
|
- **Commit SHA** — \`git show <sha>\` and \`git show <sha> --stat\` for the scope.
|
|
19
19
|
- **File path / module path** — \`read\` the file directly; \`ls\` the parent directory to understand its neighbors; \`grep\` for callers of any function the file exports.
|
|
20
20
|
- **Branch name** — \`git log <branch> ^main --oneline\` to enumerate commits, then \`git diff main...<branch>\` for the cumulative change.
|
|
21
21
|
|
|
22
|
+
### Your cwd is NOT the PR's repo — read at the head SHA
|
|
23
|
+
|
|
24
|
+
You run in the agent folder (\`/agent\`), **not** a checkout of the PR's target repository. A bare \`read /agent/src/...\` for a file that lives in the PR's repo will fail with \`ENOENT\` — the file is not on this disk. **When \`read\` returns \`ENOENT\` for a path you expected to exist, stop retrying local reads immediately**: that is the signal you are outside the target checkout, not a transient miss. Switch to one of the two acquisition modes below. Do not burn turns re-issuing \`read\` against \`/agent\` paths that will never resolve.
|
|
25
|
+
|
|
26
|
+
Whichever mode you use, **every line number you cite must come from the PR's head SHA** (\`headRefOid\` from \`gh pr view\`), not the default branch — inline comments anchor to that exact revision.
|
|
27
|
+
|
|
28
|
+
**Mode 1 — remote-read (default, for a handful of files).** When you need only a few adjacent files, fetch each **once** at the head SHA. Prefer \`gh api\` over \`raw.githubusercontent.com\`: \`gh api\` carries the adapter's GitHub auth, so it works on private repos too.
|
|
29
|
+
|
|
30
|
+
A repo-targeting \`gh\` command MUST be a **single bare \`gh\` invocation** — no pipes, \`&&\`, \`;\`, or redirects. The runtime injects the GitHub App token into the command's environment, so any sibling stage in a pipeline would inherit a live token; the guard blocks those shapes (the same rule the GitHub channel skill enforces for review posting). So do NOT pipe \`gh api ... | base64 -d | nl -ba\` — that exact shape is rejected before it runs. Instead fetch the **already-decoded** file with the raw media type in one bare call:
|
|
31
|
+
|
|
32
|
+
\`\`\`sh
|
|
33
|
+
gh api "/repos/<owner>/<repo>/contents/<path>?ref=<headSha>" -H "Accept: application/vnd.github.raw"
|
|
34
|
+
\`\`\`
|
|
35
|
+
|
|
36
|
+
That returns the file's raw bytes (no base64, no second stage). For the line numbers your \`location="path:line"\` anchors need, read them off the unified diff you already fetched (\`gh pr diff\` prints the new-side line numbers in its hunk headers, \`@@ -a,b +c,d @@\`), or escalate to Mode 2 where a real \`read\`/\`grep\` gives native line numbers. Fetch each file once and keep its output — do not re-fetch the same file to re-derive a line you already saw.
|
|
37
|
+
|
|
38
|
+
**Mode 2 — scratch checkout (escalate when navigation gets broad).** When the review needs repo-wide \`grep\`, symbol tracing across several directories, many adjacent files, or repeated access to the same files, the remote-read dance is slower and more error-prone than a real checkout. In that case clone the PR head into a **fresh throwaway directory under \`/tmp\`** and read it natively:
|
|
39
|
+
|
|
40
|
+
\`\`\`sh
|
|
41
|
+
git clone --depth 1 "https://github.com/<owner>/<repo>.git" /tmp/review-<n>-src && \
|
|
42
|
+
git -C /tmp/review-<n>-src fetch --depth 1 origin <headSha> && git -C /tmp/review-<n>-src checkout <headSha>
|
|
43
|
+
\`\`\`
|
|
44
|
+
|
|
45
|
+
Then \`read\`, \`grep\`, \`find\`, and read-only \`git\` (\`git -C /tmp/review-<n>-src log|diff|show|blame|grep|ls-files|cat-file\`) all work against \`/tmp/review-<n>-src\` with correct line numbers and zero per-file round-trips.
|
|
46
|
+
|
|
47
|
+
This \`/tmp\` scratch checkout is the **one** write the read-only contract permits — and only because it is a private acquisition cache, never the reviewed artifact. Inside it you may only **read**. You still may NOT: edit any file, install dependencies, run builds or tests, commit/stage/push/rebase/reset, or write anywhere outside this \`/tmp\` scratch dir. Do not \`rm\` it when done — leave cleanup to the session lifecycle (\`rm\` stays forbidden). When in doubt about how many files you'll touch, start with Mode 1 and escalate to Mode 2 only once the file count or grep breadth justifies the clone.
|
|
48
|
+
|
|
22
49
|
## How to build context
|
|
23
50
|
|
|
24
51
|
A finding without context is noise. Before forming findings:
|
|
@@ -72,6 +99,8 @@ This includes payloads where the parent says the author **addressed your prior b
|
|
|
72
99
|
|
|
73
100
|
- Return **approve** if the blockers that drove the prior \`request-changes\` are resolved (leftover nits do not block — \`approve\` with inline nits is correct).
|
|
74
101
|
- Return **request-changes** if any blocker remains or a new one appeared.
|
|
102
|
+
|
|
103
|
+
**Account for resolved threads in the \`<summary>\`, not as \`praise\` findings.** A re-review tempts you to emit one \`praise\` finding per prior concern the author fixed — "Thread 123 is addressed", "Thread 456 is addressed". Do **not**. \`praise\` is reserved for *non-obvious good work*, and a routine "you fixed what I asked" is neither non-obvious nor a finding the parent should post inline (it strips \`praise\` from inline comments anyway, so these become dead weight). Instead, state the resolution accounting in one sentence in your \`<summary>\` — e.g. "Both prior blockers (the unfenced table scan and the backtick-wrap span) are resolved at head \`<sha>\`; one new concern below." Reserve actual \`<finding>\` entries for what still needs action: a prior blocker that is **only partially** fixed (\`blocker\`/\`concern\`, anchored to the line that's still wrong), a **regression the fix introduced** (\`blocker\`/\`concern\`), or a genuinely non-obvious fix worth a rare \`praise\`. A clean re-review where everything was addressed is an \`approve\` whose \`<summary>\` says so and whose \`<findings>\` is empty — not a wall of \`praise\` receipts.
|
|
75
104
|
- **Do NOT return \`comment\` on a re-review.** \`comment\` is for ambiguous partial reviews with no accept/reject signal; a re-review is the opposite — it is precisely an accept/reject decision. A \`comment\` verdict here leaves the PR's \`REQUEST_CHANGES\` state stuck (a plain comment does not clear it on GitHub), which is the exact failure a re-review exists to resolve. The only escape hatch is the same one that always applies: if you genuinely cannot reach the diff or the prior context, return one \`blocker\` finding stating what you need and a \`comment\` verdict — but a reachable, reviewable re-review must end in \`approve\` or \`request-changes\`.
|
|
76
105
|
|
|
77
106
|
## Line-anchor every finding
|
package/src/run/index.ts
CHANGED
|
@@ -375,6 +375,7 @@ export async function startAgent({
|
|
|
375
375
|
...(entry.pluginSubagent.toolResultBudget !== undefined
|
|
376
376
|
? { toolResultBudget: entry.pluginSubagent.toolResultBudget }
|
|
377
377
|
: {}),
|
|
378
|
+
...(entry.pluginSubagent.bashPolicy !== undefined ? { bashPolicy: entry.pluginSubagent.bashPolicy } : {}),
|
|
378
379
|
...runtimeVersionOpt,
|
|
379
380
|
})
|
|
380
381
|
liveSessionRegistry.register({ sessionId, session: created.session })
|
|
@@ -108,7 +108,7 @@ fonts/margins only if the user asks.
|
|
|
108
108
|
#counter(page).display("1 / 1", both: true)
|
|
109
109
|
]),
|
|
110
110
|
)
|
|
111
|
-
#set text(font: ("Libertinus Serif", "New Computer Modern"), size: 11pt, lang: "en")
|
|
111
|
+
#set text(font: ("Libertinus Serif", "New Computer Modern", "Noto Serif CJK KR"), size: 11pt, lang: "en")
|
|
112
112
|
#set par(justify: true, leading: 0.68em, spacing: 1.1em)
|
|
113
113
|
|
|
114
114
|
#show heading: set text(weight: "semibold")
|
|
@@ -136,9 +136,15 @@ Notes:
|
|
|
136
136
|
- `read("report.md")` is **relative to the workspace** (the compiler's `workspace`
|
|
137
137
|
is set to `workspace/` — see Step 3). Keep the `.typ` and `.md` in `workspace/`.
|
|
138
138
|
- Fonts `Libertinus Serif` / `New Computer Modern` are bundled with Typst (no font
|
|
139
|
-
install)
|
|
140
|
-
|
|
141
|
-
|
|
139
|
+
install) and carry the Latin text. `"Noto Serif CJK KR"` is appended as the
|
|
140
|
+
fallback so Korean/CJK glyphs resolve per-glyph — Typst falls through to it
|
|
141
|
+
wherever the Latin fonts have no glyph, leaving Latin runs untouched. It comes
|
|
142
|
+
from `fonts-noto-cjk`, which Step 3's renderer loads from `/usr/share/fonts` via
|
|
143
|
+
`fontPaths`. **The package is only present when the container's `cjkFonts` toggle
|
|
144
|
+
resolves to `true`.** Its default is `"auto"`, which installs the fonts only when
|
|
145
|
+
the host locale is CJK (`ja`/`ko`/`zh`) — so on a non-CJK host, CJK PDFs still
|
|
146
|
+
render as tofu until you set `docker.file.cjkFonts: true` in `typeclaw.json` and
|
|
147
|
+
rebuild. If your CJK font lives elsewhere, add its dir to the `fontPaths` list.
|
|
142
148
|
|
|
143
149
|
## Step 3 — render
|
|
144
150
|
|
|
@@ -149,15 +155,23 @@ writes the PDF. Pass the wrapper and output paths as arguments.
|
|
|
149
155
|
```ts
|
|
150
156
|
// workspace/.tools/render.ts
|
|
151
157
|
import { NodeCompiler } from '@myriaddreamin/typst-ts-node-compiler'
|
|
152
|
-
import { writeFileSync } from 'node:fs'
|
|
158
|
+
import { existsSync, writeFileSync } from 'node:fs'
|
|
153
159
|
|
|
154
160
|
const [, , mainFile, outFile] = process.argv
|
|
155
161
|
if (!mainFile || !outFile) throw new Error('usage: render.ts <main.typ> <out.pdf>')
|
|
156
162
|
|
|
163
|
+
// Load system fonts so CJK glyphs resolve. The compiler does NOT auto-discover
|
|
164
|
+
// system font dirs the way the Typst CLI does — without explicit fontPaths,
|
|
165
|
+
// "Noto Serif CJK KR" (from fonts-noto-cjk under /usr/share/fonts) is invisible
|
|
166
|
+
// and Korean/Japanese/Chinese text renders as .notdef tofu boxes. Filtered with
|
|
167
|
+
// existsSync so a missing dir (e.g. on a dev/host run) is skipped, not fatal.
|
|
168
|
+
const fontPaths = ['/usr/share/fonts', '/usr/local/share/fonts', '/Library/Fonts', '/System/Library/Fonts'].filter(
|
|
169
|
+
existsSync,
|
|
170
|
+
)
|
|
171
|
+
|
|
157
172
|
const compiler = NodeCompiler.create({
|
|
158
173
|
workspace: '.', // run from workspace/, so read("report.md") resolves
|
|
159
|
-
|
|
160
|
-
// fontArgs: [{ fontPaths: ["/usr/share/fonts"] }],
|
|
174
|
+
...(fontPaths.length > 0 ? { fontArgs: [{ fontPaths }] } : {}),
|
|
161
175
|
})
|
|
162
176
|
const pdf = compiler.pdf({ mainFilePath: mainFile })
|
|
163
177
|
writeFileSync(outFile, Buffer.from(pdf))
|