npm - typeclaw - Versions diffs - 0.18.0 → 0.20.0 - Mend

typeclaw 0.18.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/package.json +1 -1
package/src/agent/index.ts +9 -1
package/src/agent/live-subagents.ts +4 -0
package/src/agent/model-overrides.ts +77 -0
package/src/agent/plugin-tools.ts +53 -4
package/src/agent/session-origin.ts +32 -10
package/src/agent/tools/channel-react.ts +79 -0
package/src/agent/tools/grant-role.ts +102 -8
package/src/agent/tools/spawn-subagent.ts +1 -0
package/src/agent/tools/subagent-access.ts +67 -0
package/src/agent/tools/subagent-cancel.ts +11 -6
package/src/agent/tools/subagent-output.ts +10 -2
package/src/bundled-plugins/github-cli-auth/gh-command.ts +372 -0
package/src/bundled-plugins/github-cli-auth/index.ts +42 -0
package/src/bundled-plugins/github-cli-auth/token-class.ts +11 -0
package/src/bundled-plugins/reviewer/skills/code-review.ts +18 -1
package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +9 -2
package/src/channels/adapters/discord-bot.ts +242 -7
package/src/channels/adapters/github/inbound.ts +40 -55
package/src/channels/adapters/github/index.ts +89 -18
package/src/channels/adapters/github/membership.ts +4 -0
package/src/channels/adapters/github/permission-guidance.ts +20 -1
package/src/channels/adapters/github/reactions.ts +142 -0
package/src/channels/adapters/slack-bot-slash-commands.ts +3 -1
package/src/channels/adapters/slack-bot.ts +4 -4
package/src/channels/commands.ts +10 -0
package/src/channels/engagement.ts +30 -2
package/src/channels/github-token-bridge.ts +42 -0
package/src/channels/index.ts +6 -0
package/src/channels/manager.ts +6 -0
package/src/channels/membership.ts +9 -0
package/src/channels/router.ts +295 -42
package/src/channels/types.ts +42 -0
package/src/cli/inspect.ts +3 -0
package/src/cli/ui.ts +6 -0
package/src/commands/index.ts +54 -4
package/src/init/dockerfile.ts +60 -0
package/src/init/validate-api-key.ts +15 -1
package/src/inspect/loop.ts +12 -1
package/src/permissions/permissions.ts +24 -0
package/src/plugin/context.ts +8 -0
package/src/plugin/manager.ts +3 -0
package/src/plugin/types.ts +6 -0
package/src/run/bundled-plugins.ts +9 -0
package/src/run/index.ts +4 -0
package/src/skills/typeclaw-channel-github/SKILL.md +80 -43

package/src/agent/tools/subagent-cancel.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import type { PermissionService } from '@/permissions'
 import type { LiveSubagentRegistry } from '../live-subagents'
 import type { SessionOrigin } from '../session-origin'
+import { authorizeLiveSubagentAccess } from './subagent-access'
 export type SubagentCancelToolDetails =
   | { ok: true; taskId: string; subagent: string; alreadyDone: boolean }
@@ -33,13 +34,17 @@ export function createSubagentCancelTool(options: CreateSubagentCancelToolOption
     }),
     async execute(_toolCallId, params): Promise<ToolReturn> {
-      if (permissions !== undefined && !permissions.has(getOrigin(), 'subagent.cancel')) {
-        return errorResult('subagent.cancel denied: insufficient permissions')
-      }
-      const live = liveRegistry.get(params.task_id)
-      if (live === undefined) {
-        return errorResult(`Unknown task_id: ${params.task_id}.`)
+      const access = authorizeLiveSubagentAccess({
+        permissions,
+        origin: getOrigin(),
+        liveRegistry,
+        taskId: params.task_id,
+        permission: 'subagent.cancel',
+      })
+      if (!access.ok) {
+        return errorResult(access.message)
       }
+      const live = access.live
       if (live.status !== 'running') {
         const details: SubagentCancelToolDetails = {
           ok: true,

package/src/agent/tools/subagent-output.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import type { PermissionService } from '@/permissions'
 import type { LiveSubagentRegistry, StatusSnapshot, SubagentProgressEvent } from '../live-subagents'
 import type { SessionOrigin } from '../session-origin'
+import { authorizeLiveSubagentAccess } from './subagent-access'
 export type SubagentOutputToolDetails =
   | {
@@ -64,8 +65,15 @@ export function createSubagentOutputTool(options: CreateSubagentOutputToolOption
     }),
     async execute(_toolCallId, params) {
-      if (permissions !== undefined && !permissions.has(getOrigin(), 'subagent.output')) {
-        return errorResult('subagent.output denied: insufficient permissions')
+      const access = authorizeLiveSubagentAccess({
+        permissions,
+        origin: getOrigin(),
+        liveRegistry,
+        taskId: params.task_id,
+        permission: 'subagent.output',
+      })
+      if (!access.ok) {
+        return errorResult(access.message)
       }
       const snap = liveRegistry.snapshot(params.task_id, now())
       if (snap === undefined) {

package/src/bundled-plugins/github-cli-auth/gh-command.ts ADDED Viewed

@@ -0,0 +1,372 @@
+export type GhCommandDecision =
+  | { kind: 'pass-through' }
+  | { kind: 'block'; reason: string }
+  | { kind: 'inject'; repoSlug: string }
+const MISSING_REPO_REASON =
+  'This GitHub App spans multiple owners, so `gh` has no single correct token. ' +
+  'Re-run with an explicit repo: `gh <cmd> -R owner/repo` (or `gh api /repos/owner/repo/...`) ' +
+  'so the right installation token can be injected.'
+const MULTI_OWNER_REASON =
+  'This command targets repos under more than one owner; a single GH_TOKEN cannot ' +
+  'authenticate all of them. Split it into separate commands, one owner each.'
+const API_REPO_CONFLICT_REASON =
+  'This `gh api` call names a repo in its endpoint path that differs from its ' +
+  '`-R/--repo` flag. `gh api` ignores `-R` for a literal `/repos/{owner}/{repo}` ' +
+  'endpoint — the path is where the request actually goes — so the flag cannot be ' +
+  'used to mint a token for one repo while hitting another. Drop the mismatched ' +
+  '`-R`, or target the repo named in the path.'
+// A gh segment can legitimately touch more than one repo (a `gh api` compare
+// endpoint references both the base repo and a cross-fork head). The classifier
+// returns EVERY effective target so analyzeGhCommand can allowlist-check and
+// same-owner-check all of them — a single-slug return is what let a literal
+// `gh api /repos/x/y` path slip past an `-R`-derived check.
+type GhSegmentDecision =
+  | { kind: 'pass-through' }
+  | { kind: 'block'; reason: string }
+  | { kind: 'inject'; repoSlugs: readonly string[] }
+const COMPOSITION_REASON =
+  'A repo-targeting `gh` command receives a minted GitHub App token in its process ' +
+  'environment, so it must run as a single bare `gh` command — no pipes, `;`, `&&`, ' +
+  '`||`, `&`, newlines, redirections, command/process substitution, subshells, heredocs, ' +
+  'or unquoted `$` expansion (any sibling process or expansion would inherit the token ' +
+  'and could exfiltrate it). jq/JSON metacharacters are fine INSIDE single quotes, e.g. ' +
+  "`gh api repos/o/r --jq '.[] | {id}'`. To feed JSON to `gh api`, write it to a temp " +
+  'file and use `gh api --input <file>`.'
+// Shell-active metacharacters that, OUTSIDE single quotes, either spawn another
+// process sharing the shell env (where the minted GH_TOKEN lives) or expand
+// shell state into an argument. `|;&` = pipeline/sequence/background; newline/CR
+// = command separators; `()` `{}` = subshell/group; `<>` = redirection
+// (incl. bash /dev/tcp networking and heredocs); backtick + `$` = command/
+// parameter/arithmetic substitution (covers `$(`, `${`, `$((`, and a bare
+// `$GH_TOKEN`). Single quotes make all of these literal, so jq pipes and JSON
+// braces are allowed when single-quoted. Double quotes do NOT neutralize `$`
+// or backticks, so they are treated as active.
+const SHELL_ACTIVE_METACHARS = new Set(['|', ';', '&', '\n', '\r', '(', ')', '{', '}', '<', '>', '`', '$'])
+// Returns true iff `command` is a single simple `gh ...` command: the first
+// non-whitespace word is `gh`, and no shell-active metachar appears outside
+// single quotes. This is the gate for token injection — see COMPOSITION_REASON.
+function isSingleBareGhCommand(command: string): boolean {
+  const trimmed = command.trimStart()
+  if (!/^gh(\s|$)/.test(trimmed)) return false
+  let quote: '"' | "'" | null = null
+  for (let i = 0; i < trimmed.length; i++) {
+    const ch = trimmed[i]
+    if (ch === undefined) continue
+    if (quote === "'") {
+      if (ch === "'") quote = null
+      continue
+    }
+    if (quote === '"') {
+      // Inside double quotes `$` and backtick still expand; only `"` closes.
+      if (ch === '"') quote = null
+      else if (ch === '$' || ch === '`') return false
+      continue
+    }
+    if (ch === "'" || ch === '"') {
+      quote = ch
+      continue
+    }
+    if (SHELL_ACTIVE_METACHARS.has(ch)) return false
+  }
+  return quote === null
+}
+// GENUINELY repo-less subcommands (account/global, no -R/--repo): they need no
+// token injection and pass through. The set is intentionally minimal —
+// anything not listed (label, ruleset, secret, variable, cache, run, workflow,
+// release, browse, pr, issue, repo, ...) is repo-scoped and falls through to
+// the block-unless-explicit-repo rule, so an App-auth `gh label list` cannot
+// silently run with the wrong installation token. Classification verified
+// against gh source (commands using cmdutil.EnableRepoOverride are repo-scoped).
+// `gh api` is handled separately (path-based repo extraction).
+const REPO_LESS_SUBCOMMANDS = new Set([
+  'auth',
+  'config',
+  'extension',
+  'alias',
+  'completion',
+  'gpg-key',
+  'ssh-key',
+  'status',
+  'org',
+  'gist',
+  'codespace',
+  'search',
+  'preview',
+  'accessibility',
+  'attestation',
+])
+// A single GH_TOKEN is injected into the whole bash command's env, so every
+// `gh` in a compound command shares it. That is correct only when all
+// repo-targeting `gh` invocations resolve to the same owner (one App
+// installation). We therefore inspect EVERY `gh` invocation, not just the
+// first: a repo-targeting `gh` with no resolvable repo blocks (missing-repo),
+// and invocations spanning more than one owner block (multi-owner).
+export function analyzeGhCommand(command: string): GhCommandDecision {
+  const tokens = tokenize(command)
+  const ghStarts = findGhInvocations(tokens)
+  if (ghStarts.length === 0) return { kind: 'pass-through' }
+  const repoSlugs: string[] = []
+  for (let i = 0; i < ghStarts.length; i++) {
+    const start = ghStarts[i] as number
+    const end = ghStarts[i + 1] ?? tokens.length
+    const args = tokens.slice(start + 1, end)
+    const segment = classifyGhSegment(args)
+    if (segment.kind === 'block') return segment
+    if (segment.kind === 'inject') repoSlugs.push(...segment.repoSlugs)
+  }
+  if (repoSlugs.length === 0) return { kind: 'pass-through' }
+  const owners = new Set(repoSlugs.map((slug) => slug.split('/')[0]))
+  if (owners.size > 1) return { kind: 'block', reason: MULTI_OWNER_REASON }
+  // We would inject a token. Enforce the single-bare-`gh` shape: the token
+  // lands in the shell's env, so any sibling/upstream/downstream process or
+  // shell expansion would inherit it.
+  if (!isSingleBareGhCommand(command)) return { kind: 'block', reason: COMPOSITION_REASON }
+  return { kind: 'inject', repoSlug: repoSlugs[0] as string }
+}
+function classifyGhSegment(args: readonly string[]): GhSegmentDecision {
+  const subcommand = args.find((t) => !t.startsWith('-'))
+  if (subcommand === undefined) return { kind: 'pass-through' }
+  // `gh api` is resolved BEFORE the generic -R extraction: for a literal
+  // `/repos/{owner}/{repo}` endpoint the request goes to the PATH repo and `gh`
+  // ignores -R, so trusting -R here would mint a token for one repo while the
+  // call hits another (the allowlist-bypass this guards against).
+  if (subcommand === 'api') return classifyGhApiSegment(args)
+  const explicit = extractRepoFlag(args)
+  if (explicit !== null) return { kind: 'inject', repoSlugs: [explicit] }
+  if (REPO_LESS_SUBCOMMANDS.has(subcommand)) return { kind: 'pass-through' }
+  return { kind: 'block', reason: MISSING_REPO_REASON }
+}
+// Repo authority for `gh api`: the literal endpoint path wins. A `-R/--repo`
+// that names a DIFFERENT repo than the path is a mint-for-X-but-hit-Y attempt
+// and blocks. A placeholder endpoint (`repos/{owner}/{repo}`) has no literal
+// target, so -R fills it and is authoritative. A non-repo endpoint (`graphql`,
+// `/user`) passes through — -R does not make it repo-scoped, so no mint.
+function classifyGhApiSegment(args: readonly string[]): GhSegmentDecision {
+  const pathRepos = extractReposFromApiPath(args)
+  const flagRepo = extractRepoFlag(args)
+  if (pathRepos.length > 0) {
+    if (flagRepo !== null && !pathRepos.includes(flagRepo)) {
+      return { kind: 'block', reason: API_REPO_CONFLICT_REASON }
+    }
+    return { kind: 'inject', repoSlugs: pathRepos }
+  }
+  if (flagRepo !== null && apiEndpointHasOwnerRepoPlaceholder(args)) {
+    return { kind: 'inject', repoSlugs: [flagRepo] }
+  }
+  return { kind: 'pass-through' }
+}
+function findGhInvocations(tokens: readonly string[]): number[] {
+  const starts: number[] = []
+  for (let i = 0; i < tokens.length; i++) {
+    if (tokens[i] !== 'gh') continue
+    // Skip leading `FOO=bar` env assignments; a `gh` is an invocation only at
+    // the start of a simple command (command position).
+    if (i === 0 || isCommandBoundaryBefore(tokens, i)) starts.push(i)
+  }
+  return starts
+}
+function isCommandBoundaryBefore(tokens: readonly string[], index: number): boolean {
+  let cursor = index - 1
+  while (cursor >= 0) {
+    const prev = tokens[cursor]
+    if (prev === undefined) return false
+    if (prev === '&&' || prev === '||' || prev === '|' || prev === ';') return true
+    if (/^[A-Za-z_][A-Za-z0-9_]*=/.test(prev)) {
+      cursor -= 1
+      continue
+    }
+    return false
+  }
+  return true
+}
+function extractRepoFlag(args: readonly string[]): string | null {
+  for (let i = 0; i < args.length; i++) {
+    const arg = args[i]
+    if (arg === undefined) continue
+    if (arg === '-R' || arg === '--repo') {
+      const value = args[i + 1]
+      if (value !== undefined && isRepoSlug(value)) return value
+    }
+    if (arg.startsWith('--repo=')) {
+      const value = arg.slice('--repo='.length)
+      if (isRepoSlug(value)) return value
+    }
+    if (arg.startsWith('-R=')) {
+      const value = arg.slice('-R='.length)
+      if (isRepoSlug(value)) return value
+    }
+  }
+  return null
+}
+// `gh api` flags that consume the FOLLOWING token as their value. The endpoint
+// is the first positional arg that is neither a flag nor a flag's value; only
+// THAT arg is parsed for owner/repo. Scanning every arg (as before) would let a
+// `-f q=/repos/a/b` field value or `--jq` expression masquerade as the target.
+const GH_API_VALUE_FLAGS = new Set([
+  '-X',
+  '--method',
+  '-f',
+  '--raw-field',
+  '-F',
+  '--field',
+  '-H',
+  '--header',
+  '-q',
+  '--jq',
+  '-t',
+  '--template',
+  '--input',
+  '--cache',
+  '-i',
+  '--include',
+  '--hostname',
+])
+// The `gh api` endpoint is the first positional arg after `api` (skipping flags
+// and the tokens that bare value-flags consume). Returns null if there is none.
+function findApiEndpoint(args: readonly string[]): string | null {
+  const apiIndex = args.indexOf('api')
+  if (apiIndex === -1) return null
+  for (let i = apiIndex + 1; i < args.length; i++) {
+    const arg = args[i] as string
+    if (arg.startsWith('-')) {
+      if (!arg.includes('=') && GH_API_VALUE_FLAGS.has(arg)) i += 1
+      continue
+    }
+    return arg
+  }
+  return null
+}
+// Every LITERAL repo the endpoint path targets. Normally one (`/repos/{o}/{r}/…`),
+// but a compare endpoint `/repos/{o}/{r}/compare/{base}...{owner}:{branch}` also
+// reaches the cross-fork head repo `{owner}/{r}`, so both are returned and must
+// be allowlisted. `{owner}/{repo}` placeholder segments are NOT literal targets
+// (see apiEndpointHasOwnerRepoPlaceholder) and yield nothing here.
+function extractReposFromApiPath(args: readonly string[]): string[] {
+  const endpoint = findApiEndpoint(args)
+  if (endpoint === null) return []
+  const normalized = endpoint.startsWith('/') ? endpoint.slice(1) : endpoint
+  const segments = normalized.split('/')
+  if (segments[0] !== 'repos') return []
+  const owner = segments[1]
+  const name = segments[2]
+  if (owner === undefined || name === undefined) return []
+  // A `{owner}`/`{repo}` placeholder is not a literal target; -R fills it.
+  if (isPlaceholderSegment(owner) || isPlaceholderSegment(name)) return []
+  const baseSlug = `${owner}/${name}`
+  if (!isRepoSlug(baseSlug)) return []
+  const repos = [baseSlug]
+  // compare/{base}...{headOwner}:{headBranch} reaches headOwner's fork.
+  const compareIndex = segments.indexOf('compare', 3)
+  if (compareIndex !== -1) {
+    const spec = segments.slice(compareIndex + 1).join('/')
+    const head = spec.split('...')[1]
+    const headOwner = head?.includes(':') ? head.split(':')[0] : undefined
+    if (headOwner !== undefined && headOwner !== '' && headOwner !== owner) {
+      const headSlug = `${headOwner}/${name}`
+      if (isRepoSlug(headSlug)) repos.push(headSlug)
+    }
+  }
+  return repos
+}
+// True when the endpoint uses gh's `{owner}`/`{repo}` template placeholders,
+// which `-R/--repo` fills at runtime — so for these, -R is the authoritative
+// target rather than a conflicting literal.
+function apiEndpointHasOwnerRepoPlaceholder(args: readonly string[]): boolean {
+  const endpoint = findApiEndpoint(args)
+  if (endpoint === null) return false
+  return endpoint.includes('{owner}') || endpoint.includes('{repo}')
+}
+function isRepoSlug(value: string): boolean {
+  const [owner, name, ...rest] = value.split('/')
+  return owner !== undefined && owner !== '' && name !== undefined && name !== '' && rest.length === 0
+}
+function isPlaceholderSegment(segment: string): boolean {
+  return segment.includes('{') || segment.includes('}')
+}
+// Splits on whitespace AND shell control operators (; | & && ||) so a boundary
+// like `true; gh ...` (no surrounding spaces) yields a standalone operator
+// token. Quote-aware: operators inside quotes are literal. This is a
+// command-position detector, not a full shell parser — it does not interpret
+// redirections, subshells, or backgrounding semantics beyond boundary marking.
+function tokenize(command: string): string[] {
+  const tokens: string[] = []
+  let current = ''
+  let quote: '"' | "'" | null = null
+  let hasContent = false
+  const flush = (): void => {
+    if (hasContent) {
+      tokens.push(current)
+      current = ''
+      hasContent = false
+    }
+  }
+  for (let i = 0; i < command.length; i++) {
+    const ch = command[i]
+    if (ch === undefined) continue
+    if (quote !== null) {
+      if (ch === quote) quote = null
+      else current += ch
+      continue
+    }
+    if (ch === '"' || ch === "'") {
+      quote = ch
+      hasContent = true
+      continue
+    }
+    if (ch === ' ' || ch === '\t' || ch === '\n') {
+      flush()
+      continue
+    }
+    if (ch === ';' || ch === '|' || ch === '&') {
+      flush()
+      const next = command[i + 1]
+      if ((ch === '|' && next === '|') || (ch === '&' && next === '&')) {
+        tokens.push(ch + ch)
+        i += 1
+      } else {
+        tokens.push(ch)
+      }
+      continue
+    }
+    current += ch
+    hasContent = true
+  }
+  flush()
+  return tokens
+}

package/src/bundled-plugins/github-cli-auth/index.ts ADDED Viewed

@@ -0,0 +1,42 @@
+import { TYPECLAW_INTERNAL_BASH_ENV } from '@/agent/plugin-tools'
+import { definePlugin } from '@/plugin'
+import { analyzeGhCommand } from './gh-command'
+import { classifyGhToken } from './token-class'
+export default definePlugin({
+  plugin: async (ctx) => {
+    const resolveTokenForRepo = ctx.github.resolveTokenForRepo
+    return {
+      hooks: {
+        'tool.before': async (event) => {
+          if (event.tool !== 'bash') return
+          const command = event.args.command
+          if (typeof command !== 'string' || !command.includes('gh')) return
+          const decision = analyzeGhCommand(command)
+          if (decision.kind === 'pass-through') return
+          const tokenClass = classifyGhToken(process.env.GH_TOKEN)
+          // Classic PATs reach every owner; nothing to inject or enforce.
+          if (tokenClass === 'cross-owner') return
+          if (decision.kind === 'block') return { block: true, reason: decision.reason }
+          // Fine-grained PATs are single-owner but cannot be re-minted per repo;
+          // the seeded GH_TOKEN is the only token we have. Leave it in place so
+          // `gh` fails honestly if the named repo is under a different owner.
+          if (tokenClass === 'fine-grained-pat') return
+          const result = await resolveTokenForRepo(decision.repoSlug)
+          if (result.kind === 'unavailable') return { block: true, reason: result.reason }
+          // Inject via the internal env overlay (delivered to the spawn / bwrap
+          // --setenv by the bash wrapper) so the token never enters the command
+          // string, where it could leak through logs or later hooks.
+          event.args[TYPECLAW_INTERNAL_BASH_ENV] = { GH_TOKEN: result.token }
+          return
+        },
+      },
+    }
+  },
+})

package/src/bundled-plugins/github-cli-auth/token-class.ts ADDED Viewed

@@ -0,0 +1,11 @@
+export type GhTokenClass = 'cross-owner' | 'fine-grained-pat' | 'app' | 'none'
+export function classifyGhToken(token: string | undefined): GhTokenClass {
+  if (token === undefined || token === '') return 'none'
+  if (token.startsWith('ghp_')) return 'cross-owner'
+  if (token.startsWith('github_pat_')) return 'fine-grained-pat'
+  if (token.startsWith('ghs_')) return 'app'
+  // Unknown/legacy formats: treat as App so a repo-targeting call still resolves
+  // a per-repo token rather than silently using a possibly-wrong global one.
+  return 'app'
+}

package/src/bundled-plugins/reviewer/skills/code-review.ts CHANGED Viewed

@@ -35,11 +35,12 @@ Prioritize in this order:
 1. **Correctness.** Does the change do what its description claims? Off-by-one errors, missing null/undefined handling, race conditions, incorrect error propagation, broken invariants.
 2. **Security.** Injection vectors (SQL, shell, HTML), missing authz/authn checks, secret leakage in logs or error messages, unsafe deserialization, SSRF, path traversal, time-of-check-time-of-use. Cite OWASP / CWE / RFC by number when relevant; verify with \`websearch\` or \`webfetch\` before asserting.
 3. **Architecture fit.** Does the change respect existing layering? Does it introduce a new dependency where the existing pattern would have worked? Does it duplicate logic that already exists elsewhere in the repo?
-4. **Test coverage.** New behavior should have new tests. Edge cases the description names should be tested. If existing tests were deleted or skipped, that is a blocker absent a stated reason.
+4. **Test coverage.** New behavior should have new tests. Edge cases the description names should be tested. If existing tests were deleted or skipped, that is a blocker absent a stated reason. Look past the raw test count, but only flag a redundant case when you can show the *inputs themselves* reach the same path — same branch, same validation rule, same boundary — not merely that the assertion shape is identical. Table-driven and parametrized tests legitimately share one assertion across many inputs while each input exercises a distinct branch, parser, or edge case; that is coverage, not duplication. The finding is "these inputs are indistinguishable to the code under test," and you must name the path they collapse onto — never "the assertions look the same."
 5. **Error handling.** Empty catch blocks, swallowed errors, errors converted to silent fallbacks, retry loops without bounded backoff, missing timeouts on external calls.
 6. **Performance.** Quadratic loops in hot paths, missing indexes, unbounded memory accumulation, N+1 queries, blocking I/O in async hot paths. Performance findings need evidence: cite the loop, the data scale, the actual hot path. "Could be slow" without evidence is not a finding.
 7. **API surface.** Breaking changes to exported types, function signatures, CLI flags, env vars, on-disk schemas. Are they documented? Versioned? Migration noted in CHANGELOG / release notes?
 8. **Naming.** Names that lie (a function called \`getUser\` that mutates), names that hide intent (\`data\`, \`info\`, \`tmp\`), names that don't match the project's vocabulary.
+9. **Change hygiene.** Temporary scaffolding that escaped into the change: \`wip\`/\`fixup!\`/\`squash!\` commits left in the history, debug logging, commented-out code, leftover \`TODO\` markers for work the PR claims to finish. When you flag a stray commit, name the commit it should fold into so the author can squash it — don't just say "this looks temporary".
 ## What NOT to find
@@ -47,6 +48,8 @@ Prioritize in this order:
 - **Settled convention objections.** If the project uses tabs, four-space indent, camelCase vs snake_case, etc., and the change matches, that is not a finding. Only the deviation is.
 - **Generic best-practice essays.** "Consider adding more tests" without naming a specific untested branch is noise. "Improve error handling" without pointing at a specific swallowed error is noise.
 - **Restating the code.** "This function reads the file and returns its contents" is not a finding.
+- **Restating the change description.** Summarizing what the PR does back to its author — "this PR adds caching to the user lookup" — is not a review. They wrote the description; they know.
+- **Already-acknowledged gaps.** A weakness the author already flagged with a \`TODO\`/\`FIXME\` in the diff, or named in the PR body as out of scope, is not a finding — they're already aware. Only raise it if you have new information: the gap is worse than they think, or it's a blocker they've mislabeled as deferrable. Say which.
 ## Severity hints specific to code
@@ -61,6 +64,20 @@ Prioritize in this order:
 - **request-changes** — At least one blocker, OR a load-bearing concern that needs an answer before this lands.
 - **comment** — Mixed signal: useful observations without a clear approve/reject. Common on large refactors where you reviewed part of the change, or on early-draft PRs where the author asked for direction more than approval.
+## Line-anchor every finding
+Code review is line-level work, and your findings are meant to land as **inline comments on the exact lines they describe**. The parent agent posts them that way — it reads the \`location\` on each \`<finding>\` and attaches your \`<issue>\`/\`<evidence>\`/\`<suggestion>\` to that line. A finding with no line anchor cannot be posted inline; the parent can only fold it into a top-level summary, which strips the one thing that made it actionable.
+So:
+- **Anchor every code finding to \`path:line\`** (or \`path:start-end\` for a span). Use the file's real line number at the revision you reviewed — for a PR, the line in the diff's new (\`RIGHT\`) side, or the old (\`LEFT\`) side when you're flagging a removed line. Cite the path exactly as the diff/repo spells it.
+- **Do not collapse multiple lines into one vague anchor.** One finding, one location. If the same defect recurs at three call sites, that is three findings (or one finding whose \`location\` names the canonical site and whose \`<issue>\` lists the others) — not a single "see throughout" comment.
+- **Reserve \`location="general"\` for findings that genuinely have no single line:** a missing file, an absent test, an architecture concern that spans the whole change. State *why* it can't be anchored in the \`<issue>\` so the parent knows to route it to the summary, not to a line.
+- **State the blast radius.** A line anchor says *where* the defect is; it doesn't say *how far it reaches*. When the effect isn't obvious from the line itself, add one sentence on what the bug touches — which callers break, which inputs trigger it, what data gets corrupted. This is what tells the author whether your \`concern\` is actually a \`blocker\`, and it's the difference between a finding they can triage and one they have to re-investigate.
+- **Pin the evidence when you cite code outside the diff.** A finding often rests on code the change doesn't touch — a caller that will break, an invariant defined elsewhere. The anchor points at the diff; the *evidence* lives in that other file. Cite it as \`path:line\` at the revision you read, and when the review target is a PR, prefer a permalink to the exact commit (\`gh\` exposes the head SHA; a \`blob/<sha>/path#Lline\` URL survives later edits) so the parent — and the author — land on the same line you did, not whatever that file looks like next week.
+You never post the comments yourself (you are read-only). Your job is to hand the parent findings precise enough to post without guessing where they go.
 ## Final output
 Return findings inside the reviewer's neutral \`<review>\` block. Do NOT invent your own output format. The parent agent parses the structured shape.

package/src/bundled-plugins/security/policies/secret-exfil-bash.ts CHANGED Viewed

@@ -49,10 +49,17 @@ const DANGEROUS_COMMAND_PATTERNS: ReadonlyArray<{ pattern: RegExp; label: string
   // `set -e` / `set -euo pipefail`) and require the posix-mode opt-in.
   { pattern: /set\s+-o\s+posix[\s\S]{0,40}(?:^|[\s;|&(`])set(?:[\s;|&)`]|$)/m, label: 'set -o posix; set (env dump)' },
   {
-    pattern: /(cat|less|more|head|tail|bat|xxd|od|hexdump|strings)\s+[^\n;|&`]*\.env(\s|$|[;|&`])/,
+    // jq/yq read+emit arbitrary files just like cat (e.g. `jq . .env`,
+    // `yq '.x' .env`) and both ship in the container baseline, so they are
+    // first-class .env exfil vectors and must be gated here, not just the
+    // pager/dumper family.
+    pattern: /(cat|less|more|head|tail|bat|xxd|od|hexdump|strings|jq|yq)\s+[^\n;|&`]*\.env(\s|$|[;|&`])/,
     label: 'reading .env file',
   },
-  { pattern: /(cat|less|more|head|tail|bat)\s+[^\n;|&`]*\.envrc(\s|$|[;|&`])/, label: 'reading .envrc file' },
+  {
+    pattern: /(cat|less|more|head|tail|bat|jq|yq)\s+[^\n;|&`]*\.envrc(\s|$|[;|&`])/,
+    label: 'reading .envrc file',
+  },
   { pattern: /\.ssh\/(id_[a-z0-9]+|authorized_keys|known_hosts|config)/i, label: '~/.ssh/* private material' },
   {
     pattern: /(cat|less|more|head|tail|ls|find|grep|rg|bat)\s+[^\n;|&`]*~?\/?\.ssh(\/|\s|$|[;|&`])/,