typeclaw 0.18.0 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/package.json +1 -1
  2. package/src/agent/index.ts +2 -1
  3. package/src/agent/model-overrides.ts +77 -0
  4. package/src/agent/plugin-tools.ts +53 -4
  5. package/src/agent/tools/grant-role.ts +102 -8
  6. package/src/bundled-plugins/github-cli-auth/gh-command.ts +372 -0
  7. package/src/bundled-plugins/github-cli-auth/index.ts +42 -0
  8. package/src/bundled-plugins/github-cli-auth/token-class.ts +11 -0
  9. package/src/bundled-plugins/reviewer/skills/code-review.ts +18 -1
  10. package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +9 -2
  11. package/src/channels/adapters/discord-bot.ts +21 -4
  12. package/src/channels/adapters/github/inbound.ts +30 -55
  13. package/src/channels/adapters/github/index.ts +80 -18
  14. package/src/channels/adapters/github/membership.ts +4 -0
  15. package/src/channels/adapters/slack-bot-slash-commands.ts +3 -1
  16. package/src/channels/adapters/slack-bot.ts +4 -4
  17. package/src/channels/commands.ts +10 -0
  18. package/src/channels/engagement.ts +34 -3
  19. package/src/channels/github-token-bridge.ts +42 -0
  20. package/src/channels/index.ts +6 -0
  21. package/src/channels/manager.ts +6 -0
  22. package/src/channels/membership.ts +9 -0
  23. package/src/channels/router.ts +155 -37
  24. package/src/cli/ui.ts +6 -0
  25. package/src/commands/index.ts +54 -4
  26. package/src/init/dockerfile.ts +60 -0
  27. package/src/init/validate-api-key.ts +15 -1
  28. package/src/plugin/context.ts +8 -0
  29. package/src/plugin/manager.ts +3 -0
  30. package/src/plugin/types.ts +6 -0
  31. package/src/run/bundled-plugins.ts +9 -0
  32. package/src/run/index.ts +4 -0
  33. package/src/skills/typeclaw-channel-github/SKILL.md +70 -43
@@ -0,0 +1,372 @@
1
+ export type GhCommandDecision =
2
+ | { kind: 'pass-through' }
3
+ | { kind: 'block'; reason: string }
4
+ | { kind: 'inject'; repoSlug: string }
5
+
6
+ const MISSING_REPO_REASON =
7
+ 'This GitHub App spans multiple owners, so `gh` has no single correct token. ' +
8
+ 'Re-run with an explicit repo: `gh <cmd> -R owner/repo` (or `gh api /repos/owner/repo/...`) ' +
9
+ 'so the right installation token can be injected.'
10
+
11
+ const MULTI_OWNER_REASON =
12
+ 'This command targets repos under more than one owner; a single GH_TOKEN cannot ' +
13
+ 'authenticate all of them. Split it into separate commands, one owner each.'
14
+
15
+ const API_REPO_CONFLICT_REASON =
16
+ 'This `gh api` call names a repo in its endpoint path that differs from its ' +
17
+ '`-R/--repo` flag. `gh api` ignores `-R` for a literal `/repos/{owner}/{repo}` ' +
18
+ 'endpoint — the path is where the request actually goes — so the flag cannot be ' +
19
+ 'used to mint a token for one repo while hitting another. Drop the mismatched ' +
20
+ '`-R`, or target the repo named in the path.'
21
+
22
+ // A gh segment can legitimately touch more than one repo (a `gh api` compare
23
+ // endpoint references both the base repo and a cross-fork head). The classifier
24
+ // returns EVERY effective target so analyzeGhCommand can allowlist-check and
25
+ // same-owner-check all of them — a single-slug return is what let a literal
26
+ // `gh api /repos/x/y` path slip past an `-R`-derived check.
27
+ type GhSegmentDecision =
28
+ | { kind: 'pass-through' }
29
+ | { kind: 'block'; reason: string }
30
+ | { kind: 'inject'; repoSlugs: readonly string[] }
31
+
32
+ const COMPOSITION_REASON =
33
+ 'A repo-targeting `gh` command receives a minted GitHub App token in its process ' +
34
+ 'environment, so it must run as a single bare `gh` command — no pipes, `;`, `&&`, ' +
35
+ '`||`, `&`, newlines, redirections, command/process substitution, subshells, heredocs, ' +
36
+ 'or unquoted `$` expansion (any sibling process or expansion would inherit the token ' +
37
+ 'and could exfiltrate it). jq/JSON metacharacters are fine INSIDE single quotes, e.g. ' +
38
+ "`gh api repos/o/r --jq '.[] | {id}'`. To feed JSON to `gh api`, write it to a temp " +
39
+ 'file and use `gh api --input <file>`.'
40
+
41
+ // Shell-active metacharacters that, OUTSIDE single quotes, either spawn another
42
+ // process sharing the shell env (where the minted GH_TOKEN lives) or expand
43
+ // shell state into an argument. `|;&` = pipeline/sequence/background; newline/CR
44
+ // = command separators; `()` `{}` = subshell/group; `<>` = redirection
45
+ // (incl. bash /dev/tcp networking and heredocs); backtick + `$` = command/
46
+ // parameter/arithmetic substitution (covers `$(`, `${`, `$((`, and a bare
47
+ // `$GH_TOKEN`). Single quotes make all of these literal, so jq pipes and JSON
48
+ // braces are allowed when single-quoted. Double quotes do NOT neutralize `$`
49
+ // or backticks, so they are treated as active.
50
+ const SHELL_ACTIVE_METACHARS = new Set(['|', ';', '&', '\n', '\r', '(', ')', '{', '}', '<', '>', '`', '$'])
51
+
52
+ // Returns true iff `command` is a single simple `gh ...` command: the first
53
+ // non-whitespace word is `gh`, and no shell-active metachar appears outside
54
+ // single quotes. This is the gate for token injection — see COMPOSITION_REASON.
55
+ function isSingleBareGhCommand(command: string): boolean {
56
+ const trimmed = command.trimStart()
57
+ if (!/^gh(\s|$)/.test(trimmed)) return false
58
+
59
+ let quote: '"' | "'" | null = null
60
+ for (let i = 0; i < trimmed.length; i++) {
61
+ const ch = trimmed[i]
62
+ if (ch === undefined) continue
63
+ if (quote === "'") {
64
+ if (ch === "'") quote = null
65
+ continue
66
+ }
67
+ if (quote === '"') {
68
+ // Inside double quotes `$` and backtick still expand; only `"` closes.
69
+ if (ch === '"') quote = null
70
+ else if (ch === '$' || ch === '`') return false
71
+ continue
72
+ }
73
+ if (ch === "'" || ch === '"') {
74
+ quote = ch
75
+ continue
76
+ }
77
+ if (SHELL_ACTIVE_METACHARS.has(ch)) return false
78
+ }
79
+ return quote === null
80
+ }
81
+
82
+ // GENUINELY repo-less subcommands (account/global, no -R/--repo): they need no
83
+ // token injection and pass through. The set is intentionally minimal —
84
+ // anything not listed (label, ruleset, secret, variable, cache, run, workflow,
85
+ // release, browse, pr, issue, repo, ...) is repo-scoped and falls through to
86
+ // the block-unless-explicit-repo rule, so an App-auth `gh label list` cannot
87
+ // silently run with the wrong installation token. Classification verified
88
+ // against gh source (commands using cmdutil.EnableRepoOverride are repo-scoped).
89
+ // `gh api` is handled separately (path-based repo extraction).
90
+ const REPO_LESS_SUBCOMMANDS = new Set([
91
+ 'auth',
92
+ 'config',
93
+ 'extension',
94
+ 'alias',
95
+ 'completion',
96
+ 'gpg-key',
97
+ 'ssh-key',
98
+ 'status',
99
+ 'org',
100
+ 'gist',
101
+ 'codespace',
102
+ 'search',
103
+ 'preview',
104
+ 'accessibility',
105
+ 'attestation',
106
+ ])
107
+
108
+ // A single GH_TOKEN is injected into the whole bash command's env, so every
109
+ // `gh` in a compound command shares it. That is correct only when all
110
+ // repo-targeting `gh` invocations resolve to the same owner (one App
111
+ // installation). We therefore inspect EVERY `gh` invocation, not just the
112
+ // first: a repo-targeting `gh` with no resolvable repo blocks (missing-repo),
113
+ // and invocations spanning more than one owner block (multi-owner).
114
+ export function analyzeGhCommand(command: string): GhCommandDecision {
115
+ const tokens = tokenize(command)
116
+ const ghStarts = findGhInvocations(tokens)
117
+ if (ghStarts.length === 0) return { kind: 'pass-through' }
118
+
119
+ const repoSlugs: string[] = []
120
+ for (let i = 0; i < ghStarts.length; i++) {
121
+ const start = ghStarts[i] as number
122
+ const end = ghStarts[i + 1] ?? tokens.length
123
+ const args = tokens.slice(start + 1, end)
124
+ const segment = classifyGhSegment(args)
125
+ if (segment.kind === 'block') return segment
126
+ if (segment.kind === 'inject') repoSlugs.push(...segment.repoSlugs)
127
+ }
128
+
129
+ if (repoSlugs.length === 0) return { kind: 'pass-through' }
130
+ const owners = new Set(repoSlugs.map((slug) => slug.split('/')[0]))
131
+ if (owners.size > 1) return { kind: 'block', reason: MULTI_OWNER_REASON }
132
+
133
+ // We would inject a token. Enforce the single-bare-`gh` shape: the token
134
+ // lands in the shell's env, so any sibling/upstream/downstream process or
135
+ // shell expansion would inherit it.
136
+ if (!isSingleBareGhCommand(command)) return { kind: 'block', reason: COMPOSITION_REASON }
137
+
138
+ return { kind: 'inject', repoSlug: repoSlugs[0] as string }
139
+ }
140
+
141
+ function classifyGhSegment(args: readonly string[]): GhSegmentDecision {
142
+ const subcommand = args.find((t) => !t.startsWith('-'))
143
+ if (subcommand === undefined) return { kind: 'pass-through' }
144
+
145
+ // `gh api` is resolved BEFORE the generic -R extraction: for a literal
146
+ // `/repos/{owner}/{repo}` endpoint the request goes to the PATH repo and `gh`
147
+ // ignores -R, so trusting -R here would mint a token for one repo while the
148
+ // call hits another (the allowlist-bypass this guards against).
149
+ if (subcommand === 'api') return classifyGhApiSegment(args)
150
+
151
+ const explicit = extractRepoFlag(args)
152
+ if (explicit !== null) return { kind: 'inject', repoSlugs: [explicit] }
153
+
154
+ if (REPO_LESS_SUBCOMMANDS.has(subcommand)) return { kind: 'pass-through' }
155
+
156
+ return { kind: 'block', reason: MISSING_REPO_REASON }
157
+ }
158
+
159
+ // Repo authority for `gh api`: the literal endpoint path wins. A `-R/--repo`
160
+ // that names a DIFFERENT repo than the path is a mint-for-X-but-hit-Y attempt
161
+ // and blocks. A placeholder endpoint (`repos/{owner}/{repo}`) has no literal
162
+ // target, so -R fills it and is authoritative. A non-repo endpoint (`graphql`,
163
+ // `/user`) passes through — -R does not make it repo-scoped, so no mint.
164
+ function classifyGhApiSegment(args: readonly string[]): GhSegmentDecision {
165
+ const pathRepos = extractReposFromApiPath(args)
166
+ const flagRepo = extractRepoFlag(args)
167
+
168
+ if (pathRepos.length > 0) {
169
+ if (flagRepo !== null && !pathRepos.includes(flagRepo)) {
170
+ return { kind: 'block', reason: API_REPO_CONFLICT_REASON }
171
+ }
172
+ return { kind: 'inject', repoSlugs: pathRepos }
173
+ }
174
+
175
+ if (flagRepo !== null && apiEndpointHasOwnerRepoPlaceholder(args)) {
176
+ return { kind: 'inject', repoSlugs: [flagRepo] }
177
+ }
178
+
179
+ return { kind: 'pass-through' }
180
+ }
181
+
182
+ function findGhInvocations(tokens: readonly string[]): number[] {
183
+ const starts: number[] = []
184
+ for (let i = 0; i < tokens.length; i++) {
185
+ if (tokens[i] !== 'gh') continue
186
+ // Skip leading `FOO=bar` env assignments; a `gh` is an invocation only at
187
+ // the start of a simple command (command position).
188
+ if (i === 0 || isCommandBoundaryBefore(tokens, i)) starts.push(i)
189
+ }
190
+ return starts
191
+ }
192
+
193
+ function isCommandBoundaryBefore(tokens: readonly string[], index: number): boolean {
194
+ let cursor = index - 1
195
+ while (cursor >= 0) {
196
+ const prev = tokens[cursor]
197
+ if (prev === undefined) return false
198
+ if (prev === '&&' || prev === '||' || prev === '|' || prev === ';') return true
199
+ if (/^[A-Za-z_][A-Za-z0-9_]*=/.test(prev)) {
200
+ cursor -= 1
201
+ continue
202
+ }
203
+ return false
204
+ }
205
+ return true
206
+ }
207
+
208
+ function extractRepoFlag(args: readonly string[]): string | null {
209
+ for (let i = 0; i < args.length; i++) {
210
+ const arg = args[i]
211
+ if (arg === undefined) continue
212
+ if (arg === '-R' || arg === '--repo') {
213
+ const value = args[i + 1]
214
+ if (value !== undefined && isRepoSlug(value)) return value
215
+ }
216
+ if (arg.startsWith('--repo=')) {
217
+ const value = arg.slice('--repo='.length)
218
+ if (isRepoSlug(value)) return value
219
+ }
220
+ if (arg.startsWith('-R=')) {
221
+ const value = arg.slice('-R='.length)
222
+ if (isRepoSlug(value)) return value
223
+ }
224
+ }
225
+ return null
226
+ }
227
+
228
+ // `gh api` flags that consume the FOLLOWING token as their value. The endpoint
229
+ // is the first positional arg that is neither a flag nor a flag's value; only
230
+ // THAT arg is parsed for owner/repo. Scanning every arg (as before) would let a
231
+ // `-f q=/repos/a/b` field value or `--jq` expression masquerade as the target.
232
+ const GH_API_VALUE_FLAGS = new Set([
233
+ '-X',
234
+ '--method',
235
+ '-f',
236
+ '--raw-field',
237
+ '-F',
238
+ '--field',
239
+ '-H',
240
+ '--header',
241
+ '-q',
242
+ '--jq',
243
+ '-t',
244
+ '--template',
245
+ '--input',
246
+ '--cache',
247
+ '-i',
248
+ '--include',
249
+ '--hostname',
250
+ ])
251
+
252
+ // The `gh api` endpoint is the first positional arg after `api` (skipping flags
253
+ // and the tokens that bare value-flags consume). Returns null if there is none.
254
+ function findApiEndpoint(args: readonly string[]): string | null {
255
+ const apiIndex = args.indexOf('api')
256
+ if (apiIndex === -1) return null
257
+ for (let i = apiIndex + 1; i < args.length; i++) {
258
+ const arg = args[i] as string
259
+ if (arg.startsWith('-')) {
260
+ if (!arg.includes('=') && GH_API_VALUE_FLAGS.has(arg)) i += 1
261
+ continue
262
+ }
263
+ return arg
264
+ }
265
+ return null
266
+ }
267
+
268
+ // Every LITERAL repo the endpoint path targets. Normally one (`/repos/{o}/{r}/…`),
269
+ // but a compare endpoint `/repos/{o}/{r}/compare/{base}...{owner}:{branch}` also
270
+ // reaches the cross-fork head repo `{owner}/{r}`, so both are returned and must
271
+ // be allowlisted. `{owner}/{repo}` placeholder segments are NOT literal targets
272
+ // (see apiEndpointHasOwnerRepoPlaceholder) and yield nothing here.
273
+ function extractReposFromApiPath(args: readonly string[]): string[] {
274
+ const endpoint = findApiEndpoint(args)
275
+ if (endpoint === null) return []
276
+ const normalized = endpoint.startsWith('/') ? endpoint.slice(1) : endpoint
277
+ const segments = normalized.split('/')
278
+ if (segments[0] !== 'repos') return []
279
+ const owner = segments[1]
280
+ const name = segments[2]
281
+ if (owner === undefined || name === undefined) return []
282
+ // A `{owner}`/`{repo}` placeholder is not a literal target; -R fills it.
283
+ if (isPlaceholderSegment(owner) || isPlaceholderSegment(name)) return []
284
+ const baseSlug = `${owner}/${name}`
285
+ if (!isRepoSlug(baseSlug)) return []
286
+
287
+ const repos = [baseSlug]
288
+ // compare/{base}...{headOwner}:{headBranch} reaches headOwner's fork.
289
+ const compareIndex = segments.indexOf('compare', 3)
290
+ if (compareIndex !== -1) {
291
+ const spec = segments.slice(compareIndex + 1).join('/')
292
+ const head = spec.split('...')[1]
293
+ const headOwner = head?.includes(':') ? head.split(':')[0] : undefined
294
+ if (headOwner !== undefined && headOwner !== '' && headOwner !== owner) {
295
+ const headSlug = `${headOwner}/${name}`
296
+ if (isRepoSlug(headSlug)) repos.push(headSlug)
297
+ }
298
+ }
299
+ return repos
300
+ }
301
+
302
+ // True when the endpoint uses gh's `{owner}`/`{repo}` template placeholders,
303
+ // which `-R/--repo` fills at runtime — so for these, -R is the authoritative
304
+ // target rather than a conflicting literal.
305
+ function apiEndpointHasOwnerRepoPlaceholder(args: readonly string[]): boolean {
306
+ const endpoint = findApiEndpoint(args)
307
+ if (endpoint === null) return false
308
+ return endpoint.includes('{owner}') || endpoint.includes('{repo}')
309
+ }
310
+
311
+ function isRepoSlug(value: string): boolean {
312
+ const [owner, name, ...rest] = value.split('/')
313
+ return owner !== undefined && owner !== '' && name !== undefined && name !== '' && rest.length === 0
314
+ }
315
+
316
+ function isPlaceholderSegment(segment: string): boolean {
317
+ return segment.includes('{') || segment.includes('}')
318
+ }
319
+
320
+ // Splits on whitespace AND shell control operators (; | & && ||) so a boundary
321
+ // like `true; gh ...` (no surrounding spaces) yields a standalone operator
322
+ // token. Quote-aware: operators inside quotes are literal. This is a
323
+ // command-position detector, not a full shell parser — it does not interpret
324
+ // redirections, subshells, or backgrounding semantics beyond boundary marking.
325
+ function tokenize(command: string): string[] {
326
+ const tokens: string[] = []
327
+ let current = ''
328
+ let quote: '"' | "'" | null = null
329
+ let hasContent = false
330
+
331
+ const flush = (): void => {
332
+ if (hasContent) {
333
+ tokens.push(current)
334
+ current = ''
335
+ hasContent = false
336
+ }
337
+ }
338
+
339
+ for (let i = 0; i < command.length; i++) {
340
+ const ch = command[i]
341
+ if (ch === undefined) continue
342
+ if (quote !== null) {
343
+ if (ch === quote) quote = null
344
+ else current += ch
345
+ continue
346
+ }
347
+ if (ch === '"' || ch === "'") {
348
+ quote = ch
349
+ hasContent = true
350
+ continue
351
+ }
352
+ if (ch === ' ' || ch === '\t' || ch === '\n') {
353
+ flush()
354
+ continue
355
+ }
356
+ if (ch === ';' || ch === '|' || ch === '&') {
357
+ flush()
358
+ const next = command[i + 1]
359
+ if ((ch === '|' && next === '|') || (ch === '&' && next === '&')) {
360
+ tokens.push(ch + ch)
361
+ i += 1
362
+ } else {
363
+ tokens.push(ch)
364
+ }
365
+ continue
366
+ }
367
+ current += ch
368
+ hasContent = true
369
+ }
370
+ flush()
371
+ return tokens
372
+ }
@@ -0,0 +1,42 @@
1
+ import { TYPECLAW_INTERNAL_BASH_ENV } from '@/agent/plugin-tools'
2
+ import { definePlugin } from '@/plugin'
3
+
4
+ import { analyzeGhCommand } from './gh-command'
5
+ import { classifyGhToken } from './token-class'
6
+
7
+ export default definePlugin({
8
+ plugin: async (ctx) => {
9
+ const resolveTokenForRepo = ctx.github.resolveTokenForRepo
10
+ return {
11
+ hooks: {
12
+ 'tool.before': async (event) => {
13
+ if (event.tool !== 'bash') return
14
+ const command = event.args.command
15
+ if (typeof command !== 'string' || !command.includes('gh')) return
16
+
17
+ const decision = analyzeGhCommand(command)
18
+ if (decision.kind === 'pass-through') return
19
+
20
+ const tokenClass = classifyGhToken(process.env.GH_TOKEN)
21
+ // Classic PATs reach every owner; nothing to inject or enforce.
22
+ if (tokenClass === 'cross-owner') return
23
+
24
+ if (decision.kind === 'block') return { block: true, reason: decision.reason }
25
+
26
+ // Fine-grained PATs are single-owner but cannot be re-minted per repo;
27
+ // the seeded GH_TOKEN is the only token we have. Leave it in place so
28
+ // `gh` fails honestly if the named repo is under a different owner.
29
+ if (tokenClass === 'fine-grained-pat') return
30
+
31
+ const result = await resolveTokenForRepo(decision.repoSlug)
32
+ if (result.kind === 'unavailable') return { block: true, reason: result.reason }
33
+ // Inject via the internal env overlay (delivered to the spawn / bwrap
34
+ // --setenv by the bash wrapper) so the token never enters the command
35
+ // string, where it could leak through logs or later hooks.
36
+ event.args[TYPECLAW_INTERNAL_BASH_ENV] = { GH_TOKEN: result.token }
37
+ return
38
+ },
39
+ },
40
+ }
41
+ },
42
+ })
@@ -0,0 +1,11 @@
1
+ export type GhTokenClass = 'cross-owner' | 'fine-grained-pat' | 'app' | 'none'
2
+
3
+ export function classifyGhToken(token: string | undefined): GhTokenClass {
4
+ if (token === undefined || token === '') return 'none'
5
+ if (token.startsWith('ghp_')) return 'cross-owner'
6
+ if (token.startsWith('github_pat_')) return 'fine-grained-pat'
7
+ if (token.startsWith('ghs_')) return 'app'
8
+ // Unknown/legacy formats: treat as App so a repo-targeting call still resolves
9
+ // a per-repo token rather than silently using a possibly-wrong global one.
10
+ return 'app'
11
+ }
@@ -35,11 +35,12 @@ Prioritize in this order:
35
35
  1. **Correctness.** Does the change do what its description claims? Off-by-one errors, missing null/undefined handling, race conditions, incorrect error propagation, broken invariants.
36
36
  2. **Security.** Injection vectors (SQL, shell, HTML), missing authz/authn checks, secret leakage in logs or error messages, unsafe deserialization, SSRF, path traversal, time-of-check-time-of-use. Cite OWASP / CWE / RFC by number when relevant; verify with \`websearch\` or \`webfetch\` before asserting.
37
37
  3. **Architecture fit.** Does the change respect existing layering? Does it introduce a new dependency where the existing pattern would have worked? Does it duplicate logic that already exists elsewhere in the repo?
38
- 4. **Test coverage.** New behavior should have new tests. Edge cases the description names should be tested. If existing tests were deleted or skipped, that is a blocker absent a stated reason.
38
+ 4. **Test coverage.** New behavior should have new tests. Edge cases the description names should be tested. If existing tests were deleted or skipped, that is a blocker absent a stated reason. Look past the raw test count, but only flag a redundant case when you can show the *inputs themselves* reach the same path — same branch, same validation rule, same boundary — not merely that the assertion shape is identical. Table-driven and parametrized tests legitimately share one assertion across many inputs while each input exercises a distinct branch, parser, or edge case; that is coverage, not duplication. The finding is "these inputs are indistinguishable to the code under test," and you must name the path they collapse onto — never "the assertions look the same."
39
39
  5. **Error handling.** Empty catch blocks, swallowed errors, errors converted to silent fallbacks, retry loops without bounded backoff, missing timeouts on external calls.
40
40
  6. **Performance.** Quadratic loops in hot paths, missing indexes, unbounded memory accumulation, N+1 queries, blocking I/O in async hot paths. Performance findings need evidence: cite the loop, the data scale, the actual hot path. "Could be slow" without evidence is not a finding.
41
41
  7. **API surface.** Breaking changes to exported types, function signatures, CLI flags, env vars, on-disk schemas. Are they documented? Versioned? Migration noted in CHANGELOG / release notes?
42
42
  8. **Naming.** Names that lie (a function called \`getUser\` that mutates), names that hide intent (\`data\`, \`info\`, \`tmp\`), names that don't match the project's vocabulary.
43
+ 9. **Change hygiene.** Temporary scaffolding that escaped into the change: \`wip\`/\`fixup!\`/\`squash!\` commits left in the history, debug logging, commented-out code, leftover \`TODO\` markers for work the PR claims to finish. When you flag a stray commit, name the commit it should fold into so the author can squash it — don't just say "this looks temporary".
43
44
 
44
45
  ## What NOT to find
45
46
 
@@ -47,6 +48,8 @@ Prioritize in this order:
47
48
  - **Settled convention objections.** If the project uses tabs, four-space indent, camelCase vs snake_case, etc., and the change matches, that is not a finding. Only the deviation is.
48
49
  - **Generic best-practice essays.** "Consider adding more tests" without naming a specific untested branch is noise. "Improve error handling" without pointing at a specific swallowed error is noise.
49
50
  - **Restating the code.** "This function reads the file and returns its contents" is not a finding.
51
+ - **Restating the change description.** Summarizing what the PR does back to its author — "this PR adds caching to the user lookup" — is not a review. They wrote the description; they know.
52
+ - **Already-acknowledged gaps.** A weakness the author already flagged with a \`TODO\`/\`FIXME\` in the diff, or named in the PR body as out of scope, is not a finding — they're already aware. Only raise it if you have new information: the gap is worse than they think, or it's a blocker they've mislabeled as deferrable. Say which.
50
53
 
51
54
  ## Severity hints specific to code
52
55
 
@@ -61,6 +64,20 @@ Prioritize in this order:
61
64
  - **request-changes** — At least one blocker, OR a load-bearing concern that needs an answer before this lands.
62
65
  - **comment** — Mixed signal: useful observations without a clear approve/reject. Common on large refactors where you reviewed part of the change, or on early-draft PRs where the author asked for direction more than approval.
63
66
 
67
+ ## Line-anchor every finding
68
+
69
+ Code review is line-level work, and your findings are meant to land as **inline comments on the exact lines they describe**. The parent agent posts them that way — it reads the \`location\` on each \`<finding>\` and attaches your \`<issue>\`/\`<evidence>\`/\`<suggestion>\` to that line. A finding with no line anchor cannot be posted inline; the parent can only fold it into a top-level summary, which strips the one thing that made it actionable.
70
+
71
+ So:
72
+
73
+ - **Anchor every code finding to \`path:line\`** (or \`path:start-end\` for a span). Use the file's real line number at the revision you reviewed — for a PR, the line in the diff's new (\`RIGHT\`) side, or the old (\`LEFT\`) side when you're flagging a removed line. Cite the path exactly as the diff/repo spells it.
74
+ - **Do not collapse multiple lines into one vague anchor.** One finding, one location. If the same defect recurs at three call sites, that is three findings (or one finding whose \`location\` names the canonical site and whose \`<issue>\` lists the others) — not a single "see throughout" comment.
75
+ - **Reserve \`location="general"\` for findings that genuinely have no single line:** a missing file, an absent test, an architecture concern that spans the whole change. State *why* it can't be anchored in the \`<issue>\` so the parent knows to route it to the summary, not to a line.
76
+ - **State the blast radius.** A line anchor says *where* the defect is; it doesn't say *how far it reaches*. When the effect isn't obvious from the line itself, add one sentence on what the bug touches — which callers break, which inputs trigger it, what data gets corrupted. This is what tells the author whether your \`concern\` is actually a \`blocker\`, and it's the difference between a finding they can triage and one they have to re-investigate.
77
+ - **Pin the evidence when you cite code outside the diff.** A finding often rests on code the change doesn't touch — a caller that will break, an invariant defined elsewhere. The anchor points at the diff; the *evidence* lives in that other file. Cite it as \`path:line\` at the revision you read, and when the review target is a PR, prefer a permalink to the exact commit (\`gh\` exposes the head SHA; a \`blob/<sha>/path#Lline\` URL survives later edits) so the parent — and the author — land on the same line you did, not whatever that file looks like next week.
78
+
79
+ You never post the comments yourself (you are read-only). Your job is to hand the parent findings precise enough to post without guessing where they go.
80
+
64
81
  ## Final output
65
82
 
66
83
  Return findings inside the reviewer's neutral \`<review>\` block. Do NOT invent your own output format. The parent agent parses the structured shape.
@@ -49,10 +49,17 @@ const DANGEROUS_COMMAND_PATTERNS: ReadonlyArray<{ pattern: RegExp; label: string
49
49
  // `set -e` / `set -euo pipefail`) and require the posix-mode opt-in.
50
50
  { pattern: /set\s+-o\s+posix[\s\S]{0,40}(?:^|[\s;|&(`])set(?:[\s;|&)`]|$)/m, label: 'set -o posix; set (env dump)' },
51
51
  {
52
- pattern: /(cat|less|more|head|tail|bat|xxd|od|hexdump|strings)\s+[^\n;|&`]*\.env(\s|$|[;|&`])/,
52
+ // jq/yq read+emit arbitrary files just like cat (e.g. `jq . .env`,
53
+ // `yq '.x' .env`) and both ship in the container baseline, so they are
54
+ // first-class .env exfil vectors and must be gated here, not just the
55
+ // pager/dumper family.
56
+ pattern: /(cat|less|more|head|tail|bat|xxd|od|hexdump|strings|jq|yq)\s+[^\n;|&`]*\.env(\s|$|[;|&`])/,
53
57
  label: 'reading .env file',
54
58
  },
55
- { pattern: /(cat|less|more|head|tail|bat)\s+[^\n;|&`]*\.envrc(\s|$|[;|&`])/, label: 'reading .envrc file' },
59
+ {
60
+ pattern: /(cat|less|more|head|tail|bat|jq|yq)\s+[^\n;|&`]*\.envrc(\s|$|[;|&`])/,
61
+ label: 'reading .envrc file',
62
+ },
56
63
  { pattern: /\.ssh\/(id_[a-z0-9]+|authorized_keys|known_hosts|config)/i, label: '~/.ssh/* private material' },
57
64
  {
58
65
  pattern: /(cat|less|more|head|tail|ls|find|grep|rg|bat)\s+[^\n;|&`]*~?\/?\.ssh(\/|\s|$|[;|&`])/,
@@ -42,6 +42,7 @@ import {
42
42
  // commands here is the documented extension point: declare the entry here,
43
43
  // then add the matching handler in createChannelRouter's command registry.
44
44
  const SLASH_COMMANDS: readonly DiscordCommandDeclaration[] = [
45
+ { name: 'help', description: 'List available commands' },
45
46
  { name: 'stop', description: 'Abort the current turn in this channel' },
46
47
  ]
47
48
  const SLASH_COMMAND_NAMES: ReadonlySet<string> = new Set(SLASH_COMMANDS.map((c) => c.name))
@@ -208,11 +209,25 @@ export function createDiscordMembershipResolver(deps: {
208
209
 
209
210
  let bots = 0
210
211
  let humans = 0
212
+ const humanMemberIds: string[] = []
213
+ let everyHumanIdentified = true
211
214
  for (const member of members.value) {
212
- if (member.user?.bot === true) bots++
213
- else humans++
215
+ if (member.user?.bot === true) {
216
+ bots++
217
+ continue
218
+ }
219
+ humans++
220
+ const userId = member.user?.id
221
+ if (userId === undefined) everyHumanIdentified = false
222
+ else humanMemberIds.push(userId)
214
223
  }
215
- return { humans, bots, fetchedAt: now(), truncated: false }
224
+ // Only attach identities when every human was identifiable; an
225
+ // unidentifiable human must not be silently dropped, or a consumer proving
226
+ // "all humans trusted" would skip an unaccounted member. Falling back to
227
+ // counts-only keeps that consumer fail-closed.
228
+ return everyHumanIdentified
229
+ ? { humans, bots, fetchedAt: now(), truncated: false, humanMemberIds }
230
+ : { humans, bots, fetchedAt: now(), truncated: false }
216
231
  }
217
232
  }
218
233
 
@@ -508,7 +523,9 @@ export function createInteractionHandler(
508
523
  })
509
524
  const replyContent =
510
525
  result.kind === 'handled'
511
- ? STOP_REPLY_ABORTED
526
+ ? // Dynamic commands (e.g. /help) carry their own reply; static
527
+ // control commands (/stop) fall back to the fixed confirmation.
528
+ (result.reply ?? STOP_REPLY_ABORTED)
512
529
  : result.kind === 'no-live-session'
513
530
  ? STOP_REPLY_NO_LIVE_SESSION
514
531
  : result.kind === 'permission-denied'