agent-gov-core 0.4.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +101 -0
- package/README.md +53 -5
- package/dist/action.d.ts +30 -0
- package/dist/action.js +98 -0
- package/dist/exceptions.d.ts +83 -0
- package/dist/exceptions.js +129 -0
- package/dist/finding.js +7 -4
- package/dist/index.d.ts +12 -2
- package/dist/index.js +7 -2
- package/dist/jsonc.js +2 -1
- package/dist/locators.d.ts +3 -1
- package/dist/locators.js +66 -34
- package/dist/mcp.js +61 -2
- package/dist/merge.d.ts +91 -0
- package/dist/merge.js +154 -0
- package/dist/parse-error.d.ts +54 -0
- package/dist/parse-error.js +85 -0
- package/dist/report.d.ts +85 -0
- package/dist/report.js +156 -0
- package/dist/secrets.d.ts +67 -0
- package/dist/secrets.js +81 -0
- package/dist/shell.d.ts +26 -0
- package/dist/shell.js +210 -1
- package/dist/toml.js +28 -7
- package/package.json +3 -2
- package/schemas/report.schema.json +55 -0
package/dist/jsonc.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { readFileSync } from 'node:fs';
|
|
2
|
+
import { toConfigParseError } from './parse-error.js';
|
|
2
3
|
/**
|
|
3
4
|
* Strip `//` line comments, `/* ... *\/` block comments, and trailing commas from JSONC,
|
|
4
5
|
* preserving byte offsets (replacement is space-filled, newlines preserved) so downstream
|
|
@@ -113,7 +114,7 @@ export function readJsonObjectWithSource(path) {
|
|
|
113
114
|
return { value: parsed, json: parsed, text };
|
|
114
115
|
}
|
|
115
116
|
catch (err) {
|
|
116
|
-
return { value: undefined, json: undefined, text, parseError: err };
|
|
117
|
+
return { value: undefined, json: undefined, text, parseError: toConfigParseError(text, err) };
|
|
117
118
|
}
|
|
118
119
|
}
|
|
119
120
|
//# sourceMappingURL=jsonc.js.map
|
package/dist/locators.d.ts
CHANGED
|
@@ -28,7 +28,9 @@ export declare function lineOfJsonKey(text: string, key: string, scope?: ByteRan
|
|
|
28
28
|
* The value is JSON-encoded before matching so values containing backslashes
|
|
29
29
|
* (e.g. Windows paths like `C:\Temp` written as `"C:\\Temp"` in JSON) are
|
|
30
30
|
* located correctly. The scan ignores JSONC comments so a commented-out
|
|
31
|
-
* matching value does not shadow the real one.
|
|
31
|
+
* matching value does not shadow the real one. The negative lookahead skips
|
|
32
|
+
* occurrences in key position (`"command":`) so a value matching a key name
|
|
33
|
+
* elsewhere in the document doesn't return the key's line.
|
|
32
34
|
*/
|
|
33
35
|
export declare function lineOfJsonStringValue(text: string, value: string, scope?: ByteRange): number;
|
|
34
36
|
/**
|
package/dist/locators.js
CHANGED
|
@@ -26,11 +26,13 @@ export function lineOfJsonKey(text, key, scope) {
|
|
|
26
26
|
* The value is JSON-encoded before matching so values containing backslashes
|
|
27
27
|
* (e.g. Windows paths like `C:\Temp` written as `"C:\\Temp"` in JSON) are
|
|
28
28
|
* located correctly. The scan ignores JSONC comments so a commented-out
|
|
29
|
-
* matching value does not shadow the real one.
|
|
29
|
+
* matching value does not shadow the real one. The negative lookahead skips
|
|
30
|
+
* occurrences in key position (`"command":`) so a value matching a key name
|
|
31
|
+
* elsewhere in the document doesn't return the key's line.
|
|
30
32
|
*/
|
|
31
33
|
export function lineOfJsonStringValue(text, value, scope) {
|
|
32
34
|
const encoded = jsonEncodeForRegex(value);
|
|
33
|
-
return findLineByRegex(text, new RegExp(`"${encoded}"`), scope);
|
|
35
|
+
return findLineByRegex(text, new RegExp(`"${encoded}"(?!\\s*:)`), scope);
|
|
34
36
|
}
|
|
35
37
|
/**
|
|
36
38
|
* Convert a string to the form it would appear in JSON source bytes, then
|
|
@@ -56,14 +58,9 @@ export function lineOfTomlKey(text, dottedKey, scope) {
|
|
|
56
58
|
const parts = splitTomlDottedKey(dottedKey);
|
|
57
59
|
if (parts.length === 0)
|
|
58
60
|
return 0;
|
|
59
|
-
const leaf = parts[parts.length - 1];
|
|
60
|
-
const prefix = parts.slice(0, -1);
|
|
61
61
|
const lines = text.split(/\r?\n/);
|
|
62
62
|
const inScope = scopeLineFilter(text, scope);
|
|
63
|
-
// Find header range we're inside of.
|
|
64
|
-
let inTargetTable = prefix.length === 0;
|
|
65
63
|
let currentTable = [];
|
|
66
|
-
const targetHeader = prefix.join('.');
|
|
67
64
|
// Track multi-line basic (`"""`) and literal (`'''`) string state. A leaf-key
|
|
68
65
|
// pattern can otherwise match against decoy text inside a multi-line string
|
|
69
66
|
// value — see lineOfTomlKey regression tests.
|
|
@@ -81,40 +78,85 @@ export function lineOfTomlKey(text, dottedKey, scope) {
|
|
|
81
78
|
const headerMatch = /^\[\[?\s*([^\]]+?)\s*\]\]?\s*(#.*)?$/.exec(trimmed);
|
|
82
79
|
if (headerMatch) {
|
|
83
80
|
currentTable = splitTomlDottedKey(headerMatch[1]);
|
|
84
|
-
inTargetTable = currentTable.join('.') === targetHeader;
|
|
85
81
|
continue;
|
|
86
82
|
}
|
|
87
|
-
if (!inTargetTable)
|
|
88
|
-
continue;
|
|
89
83
|
if (trimmed === '' || trimmed.startsWith('#'))
|
|
90
84
|
continue;
|
|
91
85
|
if (!inScope(lineNumber))
|
|
92
86
|
continue;
|
|
93
|
-
//
|
|
94
|
-
|
|
95
|
-
|
|
87
|
+
// Generalized dotted-key matching: if the current table is a strict
|
|
88
|
+
// prefix of (or equal to) the target dotted key, try matching the
|
|
89
|
+
// REMAINING dotted segments on this line. Covers all three cases:
|
|
90
|
+
// - Top-level (`a.b.c = 1` at root): currentTable=[] → match `a.b.c`
|
|
91
|
+
// - Inside a parent (`[a]\nb.c = 1`): currentTable=['a'] → match `b.c`
|
|
92
|
+
// - Inside the exact table (`[a.b]\nc = 1`): currentTable=['a','b'] → match `c`
|
|
93
|
+
const tableIsPrefix = currentTable.length <= parts.length &&
|
|
94
|
+
currentTable.every((seg, idx) => seg === parts[idx]);
|
|
95
|
+
if (!tableIsPrefix)
|
|
96
|
+
continue;
|
|
97
|
+
const remaining = parts.slice(currentTable.length);
|
|
98
|
+
if (remaining.length === 0)
|
|
99
|
+
continue;
|
|
100
|
+
// Remaining-as-dotted-key match (covers any depth ≥ 1). Build the
|
|
101
|
+
// regex from individual segments joined by `\s*\.\s*` so spaced dotted
|
|
102
|
+
// keys (`a . b . c = 1` — valid TOML) match as well as compact ones.
|
|
103
|
+
const segmentsPattern = remaining.map(escapeForRegex).join('\\s*\\.\\s*');
|
|
104
|
+
const dottedPattern = new RegExp(`^\\s*${segmentsPattern}\\s*=`);
|
|
105
|
+
if (dottedPattern.test(raw))
|
|
96
106
|
return lineNumber;
|
|
97
|
-
//
|
|
98
|
-
if (
|
|
99
|
-
const
|
|
100
|
-
|
|
107
|
+
// If remaining is exactly the leaf, also try the quoted-leaf forms
|
|
108
|
+
if (remaining.length === 1) {
|
|
109
|
+
const leafKey = remaining[0];
|
|
110
|
+
const leafPattern = new RegExp(`^\\s*(?:${escapeForRegex(leafKey)}|"${escapeForRegex(leafKey)}"|'${escapeForRegex(leafKey)}')\\s*(?:\\.|=)`);
|
|
111
|
+
if (leafPattern.test(raw))
|
|
101
112
|
return lineNumber;
|
|
102
113
|
}
|
|
103
114
|
}
|
|
104
115
|
return 0;
|
|
105
116
|
}
|
|
106
117
|
/**
|
|
107
|
-
* Walk a line and update multi-line string state.
|
|
108
|
-
*
|
|
109
|
-
*
|
|
110
|
-
*
|
|
118
|
+
* Walk a line and update multi-line string state.
|
|
119
|
+
*
|
|
120
|
+
* Inside a basic multi-line string (`"""…"""`), a backslash escapes the next
|
|
121
|
+
* character — so `\"""` is a literal `"""` inside the value, NOT the string's
|
|
122
|
+
* closing delimiter. The walker must skip the next character after each `\`
|
|
123
|
+
* or it'll terminate the string state early and start matching key patterns
|
|
124
|
+
* against text that's still inside the value.
|
|
125
|
+
*
|
|
126
|
+
* Literal multi-line strings (`'''…'''`) do not process escapes per TOML spec,
|
|
127
|
+
* so backslash is inert there.
|
|
111
128
|
*/
|
|
112
129
|
function updateMultilineStringState(line, current) {
|
|
113
130
|
let state = current;
|
|
114
131
|
let pos = 0;
|
|
115
|
-
while (pos
|
|
116
|
-
|
|
117
|
-
|
|
132
|
+
while (pos < line.length) {
|
|
133
|
+
if (state === '"""') {
|
|
134
|
+
// Inside a basic multi-line string — honor backslash escapes
|
|
135
|
+
if (line[pos] === '\\') {
|
|
136
|
+
pos += 2; // skip the backslash AND the next character
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
139
|
+
if (pos <= line.length - 3 && line.substr(pos, 3) === '"""') {
|
|
140
|
+
state = null;
|
|
141
|
+
pos += 3;
|
|
142
|
+
continue;
|
|
143
|
+
}
|
|
144
|
+
pos++;
|
|
145
|
+
continue;
|
|
146
|
+
}
|
|
147
|
+
if (state === "'''") {
|
|
148
|
+
// Literal multi-line — no escapes per spec
|
|
149
|
+
if (pos <= line.length - 3 && line.substr(pos, 3) === "'''") {
|
|
150
|
+
state = null;
|
|
151
|
+
pos += 3;
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
pos++;
|
|
155
|
+
continue;
|
|
156
|
+
}
|
|
157
|
+
// state === null
|
|
158
|
+
if (pos <= line.length - 3) {
|
|
159
|
+
const window = line.substr(pos, 3);
|
|
118
160
|
if (window === '"""') {
|
|
119
161
|
state = '"""';
|
|
120
162
|
pos += 3;
|
|
@@ -126,16 +168,6 @@ function updateMultilineStringState(line, current) {
|
|
|
126
168
|
continue;
|
|
127
169
|
}
|
|
128
170
|
}
|
|
129
|
-
else if (state === '"""' && window === '"""') {
|
|
130
|
-
state = null;
|
|
131
|
-
pos += 3;
|
|
132
|
-
continue;
|
|
133
|
-
}
|
|
134
|
-
else if (state === "'''" && window === "'''") {
|
|
135
|
-
state = null;
|
|
136
|
-
pos += 3;
|
|
137
|
-
continue;
|
|
138
|
-
}
|
|
139
171
|
pos++;
|
|
140
172
|
}
|
|
141
173
|
return state;
|
package/dist/mcp.js
CHANGED
|
@@ -41,13 +41,47 @@ export function normalizeMcpCommand(spec) {
|
|
|
41
41
|
}
|
|
42
42
|
return parts.join('\n');
|
|
43
43
|
}
|
|
44
|
-
/**
|
|
44
|
+
/**
|
|
45
|
+
* Strip `.cmd`/`.exe`/`.bat`/`.ps1` suffix on Windows-style paths and
|
|
46
|
+
* lowercase those — Windows filesystem lookup is case-insensitive, so
|
|
47
|
+
* `NPX.CMD`, `npx.cmd`, and `npx` all refer to the same executable and
|
|
48
|
+
* should produce identical identity strings. POSIX paths (no backslash
|
|
49
|
+
* separator, no Windows suffix) keep their case because `./curl` and
|
|
50
|
+
* `./CURL` are genuinely different files there.
|
|
51
|
+
*/
|
|
45
52
|
function normalizeExecutable(cmd) {
|
|
46
53
|
const trimmed = cmd.trim();
|
|
47
54
|
const base = trimmed.replace(/\\/g, '/');
|
|
55
|
+
const hadWindowsSuffix = /\.(cmd|exe|bat|ps1)$/i.test(base);
|
|
48
56
|
const withoutSuffix = base.replace(/\.(cmd|exe|bat|ps1)$/i, '');
|
|
49
|
-
|
|
57
|
+
// Windows-shaped if the original used `\` separators or had a Windows
|
|
58
|
+
// executable suffix. In either case, case-fold for cross-machine identity.
|
|
59
|
+
const isWindowsShaped = hadWindowsSuffix || trimmed.includes('\\');
|
|
60
|
+
const cased = isWindowsShaped ? withoutSuffix.toLowerCase() : withoutSuffix;
|
|
61
|
+
// De-noise PATH-resolved runtimes: `/usr/bin/node` and `node` both run node.
|
|
62
|
+
// Only fold when the basename matches a known runtime so custom scripts at
|
|
63
|
+
// absolute paths (e.g. `/opt/internal/orchestrator.sh`) keep their identity.
|
|
64
|
+
const basename = cased.split('/').pop() ?? cased;
|
|
65
|
+
if (KNOWN_RUNTIMES.has(basename.toLowerCase())) {
|
|
66
|
+
return isWindowsShaped ? basename.toLowerCase() : basename;
|
|
67
|
+
}
|
|
68
|
+
return cased;
|
|
50
69
|
}
|
|
70
|
+
/**
|
|
71
|
+
* Common runtime executables whose absolute-path location varies across
|
|
72
|
+
* machines (PATH lookup resolves them) but whose identity for MCP-config
|
|
73
|
+
* purposes is the runtime name itself. Conservative — only entries where
|
|
74
|
+
* basename collapse is provably safe across the platforms an MCP config
|
|
75
|
+
* might be authored on.
|
|
76
|
+
*/
|
|
77
|
+
const KNOWN_RUNTIMES = new Set([
|
|
78
|
+
'node', 'npx', 'npm', 'pnpm', 'yarn',
|
|
79
|
+
'python', 'python3', 'pip', 'pip3', 'pipx', 'uvx', 'uv',
|
|
80
|
+
'ruby', 'gem', 'bundle',
|
|
81
|
+
'perl', 'cpan',
|
|
82
|
+
'bash', 'sh', 'zsh', 'fish', 'powershell', 'pwsh',
|
|
83
|
+
'deno', 'bun', 'tsx', 'ts-node',
|
|
84
|
+
]);
|
|
51
85
|
function normalizePath(p) {
|
|
52
86
|
return p.trim().replace(/\\/g, '/').replace(/\/+$/, '');
|
|
53
87
|
}
|
|
@@ -59,6 +93,22 @@ function normalizePath(p) {
|
|
|
59
93
|
* (npx, uvx, pipx, node).
|
|
60
94
|
*/
|
|
61
95
|
const NEUTRAL_BOOLEAN_FLAGS = new Set(['-y', '--yes']);
|
|
96
|
+
/**
|
|
97
|
+
* Flags universally treated as boolean (no value follows) by the runners we
|
|
98
|
+
* care about. Listed so `canonicalizeArgs` doesn't greedily pair them with the
|
|
99
|
+
* next positional argument, which would conflate `--verbose pkg` with
|
|
100
|
+
* `--verbose=pkg`. Unlike NEUTRAL_BOOLEAN_FLAGS these stay in the canonical
|
|
101
|
+
* form — they're load-bearing (different identity vs. their absence) but
|
|
102
|
+
* standalone.
|
|
103
|
+
*
|
|
104
|
+
* Conservative — only flags where "takes a value" is essentially never their
|
|
105
|
+
* meaning in any CLI we'd see in an MCP config.
|
|
106
|
+
*/
|
|
107
|
+
const KNOWN_BOOLEAN_FLAGS = new Set([
|
|
108
|
+
'-v', '-V', '-q', '-h', '-d',
|
|
109
|
+
'--verbose', '--quiet', '--silent', '--debug', '--help', '--version',
|
|
110
|
+
'--force', '--dry-run', '--no-cache', '--no-color', '--no-progress', '--json',
|
|
111
|
+
]);
|
|
62
112
|
/**
|
|
63
113
|
* Sort *neutral* flag/value pairs so reordering doesn't change identity, but
|
|
64
114
|
* preserve the order of positional arguments (which are usually load-bearing —
|
|
@@ -87,6 +137,15 @@ function canonicalizeArgs(args) {
|
|
|
87
137
|
flagPairs.push([a.slice(0, eq), a.slice(eq + 1)]);
|
|
88
138
|
continue;
|
|
89
139
|
}
|
|
140
|
+
// Known-boolean flags never consume the next argument, so `--verbose pkg`
|
|
141
|
+
// leaves `pkg` as a positional rather than collapsing into a fake pair.
|
|
142
|
+
// Without this guard, reordering ['--host', 'localhost', '--verbose', 'pkg']
|
|
143
|
+
// vs ['--verbose', '--host', 'localhost', 'pkg'] produced different
|
|
144
|
+
// canonical strings because `--verbose` greedily ate the next non-flag.
|
|
145
|
+
if (KNOWN_BOOLEAN_FLAGS.has(a)) {
|
|
146
|
+
flagPairs.push([a, null]);
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
90
149
|
const next = filtered[i + 1];
|
|
91
150
|
if (next !== undefined && !next.startsWith('-')) {
|
|
92
151
|
flagPairs.push([a, next]);
|
package/dist/merge.d.ts
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { type Finding, type Severity, type ToolKind } from './finding.js';
|
|
2
|
+
export interface MergeOptions {
|
|
3
|
+
/**
|
|
4
|
+
* Lower bound for findings included in the merged output. Anything below
|
|
5
|
+
* this severity is dropped from `findings` (but still counted in
|
|
6
|
+
* `droppedBelowThreshold`). Defaults to `'low'` (include everything).
|
|
7
|
+
*/
|
|
8
|
+
threshold?: Severity;
|
|
9
|
+
/**
|
|
10
|
+
* When two reports contribute findings with the same fingerprint, the
|
|
11
|
+
* default keeps the one with the higher severity. Set this to `'first'`
|
|
12
|
+
* to keep the first report's finding instead. Default: `'highest_severity'`.
|
|
13
|
+
*/
|
|
14
|
+
duplicatePolicy?: 'highest_severity' | 'first';
|
|
15
|
+
}
|
|
16
|
+
export interface MergeSource {
|
|
17
|
+
tool: ToolKind;
|
|
18
|
+
toolVersion?: string;
|
|
19
|
+
/** Conversation ID declared by this source, if any. */
|
|
20
|
+
conversationId?: string;
|
|
21
|
+
/** Number of findings in this source report (BEFORE dedup or threshold filtering). */
|
|
22
|
+
findingCount: number;
|
|
23
|
+
/** Aggregate rating reported by the source. */
|
|
24
|
+
rating: 'none' | Severity;
|
|
25
|
+
}
|
|
26
|
+
export interface InvalidReport {
|
|
27
|
+
/** Index into the input `reports` array. */
|
|
28
|
+
index: number;
|
|
29
|
+
/** Tool name from the malformed report, if recoverable. */
|
|
30
|
+
tool?: ToolKind;
|
|
31
|
+
errors: string[];
|
|
32
|
+
}
|
|
33
|
+
export interface InvalidFinding {
|
|
34
|
+
/** Originating tool's report index. */
|
|
35
|
+
reportIndex: number;
|
|
36
|
+
/** Index of the finding within that report's `findings` array. */
|
|
37
|
+
findingIndex: number;
|
|
38
|
+
/** Tool name from the report. */
|
|
39
|
+
tool: ToolKind;
|
|
40
|
+
errors: string[];
|
|
41
|
+
}
|
|
42
|
+
export interface MergedReport {
|
|
43
|
+
schemaVersion: '1.0';
|
|
44
|
+
/** Per-tool provenance for the reports that fed into this merge. */
|
|
45
|
+
sources: MergeSource[];
|
|
46
|
+
/** Aggregate rating across all surviving findings. */
|
|
47
|
+
rating: 'none' | Severity;
|
|
48
|
+
/**
|
|
49
|
+
* Conversation ID shared by all valid source reports — set iff every source
|
|
50
|
+
* declared the same `conversationId`. When sources disagree (or some lack the
|
|
51
|
+
* field), this is omitted so a meta-reviewer can detect cross-conversation
|
|
52
|
+
* mixing.
|
|
53
|
+
*/
|
|
54
|
+
conversationId?: string;
|
|
55
|
+
/** Deduped findings, sorted by severity (highest first). */
|
|
56
|
+
findings: Finding[];
|
|
57
|
+
/** Count of findings dropped because their severity was below `threshold`. */
|
|
58
|
+
droppedBelowThreshold: number;
|
|
59
|
+
/** Count of finding pairs collapsed via fingerprint dedup. */
|
|
60
|
+
duplicateCollapsed: number;
|
|
61
|
+
/** Reports rejected by envelope validation. */
|
|
62
|
+
invalidReports: InvalidReport[];
|
|
63
|
+
/** Individual findings rejected by finding validation. */
|
|
64
|
+
invalidFindings: InvalidFinding[];
|
|
65
|
+
/** Severity counts across the surviving findings. */
|
|
66
|
+
severityCounts: Record<Severity, number>;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Merge N reports from different tools into one normalized report. Validates
|
|
70
|
+
* each input report and each finding, deduplicates by fingerprint, applies an
|
|
71
|
+
* optional severity threshold, and rolls up the aggregate rating.
|
|
72
|
+
*
|
|
73
|
+
* Invalid reports / findings are NOT silently dropped — they're collected in
|
|
74
|
+
* `invalidReports` and `invalidFindings` so a meta-reviewer can surface them
|
|
75
|
+
* to the user instead of letting bad data disappear.
|
|
76
|
+
*
|
|
77
|
+
* @example
|
|
78
|
+
* import { readFileSync } from 'node:fs';
|
|
79
|
+
* import { mergeFindings } from 'agent-gov-core';
|
|
80
|
+
*
|
|
81
|
+
* const reports = [
|
|
82
|
+
* JSON.parse(readFileSync('scopetrail-report.json', 'utf8')),
|
|
83
|
+
* JSON.parse(readFileSync('policymesh-report.json', 'utf8')),
|
|
84
|
+
* JSON.parse(readFileSync('capabilityecho-report.json', 'utf8')),
|
|
85
|
+
* ];
|
|
86
|
+
* const merged = mergeFindings(reports, { threshold: 'medium' });
|
|
87
|
+
* console.log(`Merged rating: ${merged.rating}`);
|
|
88
|
+
* console.log(`${merged.findings.length} unique findings across ${merged.sources.length} tools`);
|
|
89
|
+
*/
|
|
90
|
+
export declare function mergeFindings(reports: readonly unknown[], opts?: MergeOptions): MergedReport;
|
|
91
|
+
//# sourceMappingURL=merge.d.ts.map
|
package/dist/merge.js
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import { SEVERITIES, TOOL_KINDS, isToolKind, validateFinding } from './finding.js';
|
|
2
|
+
import { REPORT_SCHEMA_VERSION, maxSeverity } from './report.js';
|
|
3
|
+
import { rankSeverity } from './action.js';
|
|
4
|
+
/**
|
|
5
|
+
* Merge N reports from different tools into one normalized report. Validates
|
|
6
|
+
* each input report and each finding, deduplicates by fingerprint, applies an
|
|
7
|
+
* optional severity threshold, and rolls up the aggregate rating.
|
|
8
|
+
*
|
|
9
|
+
* Invalid reports / findings are NOT silently dropped — they're collected in
|
|
10
|
+
* `invalidReports` and `invalidFindings` so a meta-reviewer can surface them
|
|
11
|
+
* to the user instead of letting bad data disappear.
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* import { readFileSync } from 'node:fs';
|
|
15
|
+
* import { mergeFindings } from 'agent-gov-core';
|
|
16
|
+
*
|
|
17
|
+
* const reports = [
|
|
18
|
+
* JSON.parse(readFileSync('scopetrail-report.json', 'utf8')),
|
|
19
|
+
* JSON.parse(readFileSync('policymesh-report.json', 'utf8')),
|
|
20
|
+
* JSON.parse(readFileSync('capabilityecho-report.json', 'utf8')),
|
|
21
|
+
* ];
|
|
22
|
+
* const merged = mergeFindings(reports, { threshold: 'medium' });
|
|
23
|
+
* console.log(`Merged rating: ${merged.rating}`);
|
|
24
|
+
* console.log(`${merged.findings.length} unique findings across ${merged.sources.length} tools`);
|
|
25
|
+
*/
|
|
26
|
+
export function mergeFindings(reports, opts = {}) {
|
|
27
|
+
const threshold = opts.threshold ?? 'low';
|
|
28
|
+
const duplicatePolicy = opts.duplicatePolicy ?? 'highest_severity';
|
|
29
|
+
const thresholdRank = rankSeverity(threshold);
|
|
30
|
+
const sources = [];
|
|
31
|
+
const invalidReports = [];
|
|
32
|
+
const invalidFindings = [];
|
|
33
|
+
// fingerprint → Finding chosen so far
|
|
34
|
+
const dedupe = new Map();
|
|
35
|
+
let droppedBelowThreshold = 0;
|
|
36
|
+
let duplicateCollapsed = 0;
|
|
37
|
+
for (let i = 0; i < reports.length; i++) {
|
|
38
|
+
const candidate = reports[i];
|
|
39
|
+
// Structural envelope check — does NOT recurse into individual findings.
|
|
40
|
+
// A report with some malformed findings is still partially mergeable; we
|
|
41
|
+
// collect the bad ones into `invalidFindings` and pass through the good
|
|
42
|
+
// ones. Only a structurally broken envelope (wrong tool, missing array,
|
|
43
|
+
// etc.) gets rejected wholesale.
|
|
44
|
+
const envelope = validateReportEnvelope(candidate);
|
|
45
|
+
if (!envelope.ok) {
|
|
46
|
+
const tool = candidateTool(candidate);
|
|
47
|
+
invalidReports.push({ index: i, tool, errors: envelope.errors });
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
const report = candidate;
|
|
51
|
+
const source = {
|
|
52
|
+
tool: report.tool,
|
|
53
|
+
findingCount: report.findings.length,
|
|
54
|
+
rating: report.rating,
|
|
55
|
+
};
|
|
56
|
+
if (report.toolVersion !== undefined)
|
|
57
|
+
source.toolVersion = report.toolVersion;
|
|
58
|
+
if (report.conversationId !== undefined)
|
|
59
|
+
source.conversationId = report.conversationId;
|
|
60
|
+
sources.push(source);
|
|
61
|
+
for (let j = 0; j < report.findings.length; j++) {
|
|
62
|
+
const finding = report.findings[j];
|
|
63
|
+
const findingCheck = validateFinding(finding);
|
|
64
|
+
if (!findingCheck.ok) {
|
|
65
|
+
invalidFindings.push({
|
|
66
|
+
reportIndex: i,
|
|
67
|
+
findingIndex: j,
|
|
68
|
+
tool: report.tool,
|
|
69
|
+
errors: findingCheck.errors,
|
|
70
|
+
});
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
if (rankSeverity(finding.severity) < thresholdRank) {
|
|
74
|
+
droppedBelowThreshold++;
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
// Dedupe by fingerprint. Fall back to the finding's structural identity
|
|
78
|
+
// when fingerprint is missing — though by v0.5.0 it should always be
|
|
79
|
+
// populated by `createFinding`.
|
|
80
|
+
const key = finding.fingerprint ?? `${finding.kind}|${finding.location?.file ?? ''}|${finding.location?.line ?? ''}|${finding.salientKey ?? ''}`;
|
|
81
|
+
const existing = dedupe.get(key);
|
|
82
|
+
if (existing === undefined) {
|
|
83
|
+
dedupe.set(key, finding);
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
duplicateCollapsed++;
|
|
87
|
+
if (duplicatePolicy === 'highest_severity') {
|
|
88
|
+
if (rankSeverity(finding.severity) > rankSeverity(existing.severity)) {
|
|
89
|
+
dedupe.set(key, finding);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
// 'first' policy: keep existing — do nothing
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
const findings = Array.from(dedupe.values()).sort((a, b) => rankSeverity(b.severity) - rankSeverity(a.severity));
|
|
96
|
+
const severityCounts = { low: 0, medium: 0, high: 0, critical: 0 };
|
|
97
|
+
for (const f of findings)
|
|
98
|
+
severityCounts[f.severity]++;
|
|
99
|
+
// Propagate conversationId iff every source agrees. When sources disagree
|
|
100
|
+
// or some lack the field, leave it undefined — silent unification of cross-
|
|
101
|
+
// conversation reports would hide a meta-reviewer misuse.
|
|
102
|
+
const conversationIds = sources.map((s) => s.conversationId);
|
|
103
|
+
const allSame = conversationIds.length > 0
|
|
104
|
+
&& conversationIds.every((id) => id !== undefined && id === conversationIds[0]);
|
|
105
|
+
const merged = {
|
|
106
|
+
schemaVersion: '1.0',
|
|
107
|
+
sources,
|
|
108
|
+
rating: maxSeverity(findings),
|
|
109
|
+
findings,
|
|
110
|
+
droppedBelowThreshold,
|
|
111
|
+
duplicateCollapsed,
|
|
112
|
+
invalidReports,
|
|
113
|
+
invalidFindings,
|
|
114
|
+
severityCounts,
|
|
115
|
+
};
|
|
116
|
+
if (allSame)
|
|
117
|
+
merged.conversationId = conversationIds[0];
|
|
118
|
+
return merged;
|
|
119
|
+
}
|
|
120
|
+
function candidateTool(value) {
|
|
121
|
+
if (value === null || typeof value !== 'object')
|
|
122
|
+
return undefined;
|
|
123
|
+
const t = value.tool;
|
|
124
|
+
return typeof t === 'string' && /^(scope_trail|policy_mesh|capability_echo|task_bound|session_trail)$/.test(t)
|
|
125
|
+
? t
|
|
126
|
+
: undefined;
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Envelope-only structural check. Unlike `validateReport`, this does NOT
|
|
130
|
+
* recurse into individual findings — that's done separately by mergeFindings
|
|
131
|
+
* so a single bad finding doesn't poison the rest of the report.
|
|
132
|
+
*/
|
|
133
|
+
function validateReportEnvelope(value) {
|
|
134
|
+
const errors = [];
|
|
135
|
+
if (value === null || typeof value !== 'object' || Array.isArray(value)) {
|
|
136
|
+
return { ok: false, errors: ['report must be a plain object'] };
|
|
137
|
+
}
|
|
138
|
+
const v = value;
|
|
139
|
+
if (v.schemaVersion !== REPORT_SCHEMA_VERSION) {
|
|
140
|
+
errors.push(`schemaVersion must be '${REPORT_SCHEMA_VERSION}'`);
|
|
141
|
+
}
|
|
142
|
+
if (!isToolKind(v.tool)) {
|
|
143
|
+
errors.push(`tool must be one of: ${TOOL_KINDS.join(', ')}`);
|
|
144
|
+
}
|
|
145
|
+
const ratingValues = new Set(['none', ...SEVERITIES]);
|
|
146
|
+
if (typeof v.rating !== 'string' || !ratingValues.has(v.rating)) {
|
|
147
|
+
errors.push(`rating must be one of: none, ${SEVERITIES.join(', ')}`);
|
|
148
|
+
}
|
|
149
|
+
if (!Array.isArray(v.findings)) {
|
|
150
|
+
errors.push('findings must be an array');
|
|
151
|
+
}
|
|
152
|
+
return { ok: errors.length === 0, errors };
|
|
153
|
+
}
|
|
154
|
+
//# sourceMappingURL=merge.js.map
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured config-file parse error. Carries the 1-based line and column of
|
|
3
|
+
* the failure so consumers can emit a `*.config_syntax_error` Finding pointing
|
|
4
|
+
* at the exact spot without recomputing line numbers from the raw offset.
|
|
5
|
+
*
|
|
6
|
+
* Thrown nowhere directly — instead, {@link readJsonObjectWithSource} and
|
|
7
|
+
* {@link readTomlObject} populate the `parseError` field of their result with
|
|
8
|
+
* this type whenever they can resolve a byte offset from the underlying parser.
|
|
9
|
+
* When the underlying error lacks position info, the original `Error` is
|
|
10
|
+
* preserved unchanged.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* import { readTomlObject, ConfigParseError } from 'agent-gov-core';
|
|
14
|
+
* const { parseError } = readTomlObject('.codex/config.toml');
|
|
15
|
+
* if (parseError instanceof ConfigParseError) {
|
|
16
|
+
* emitFinding({
|
|
17
|
+
* kind: 'policy_mesh.config_syntax_error',
|
|
18
|
+
* location: { file: '.codex/config.toml', line: parseError.line, column: parseError.column },
|
|
19
|
+
* message: parseError.message,
|
|
20
|
+
* });
|
|
21
|
+
* }
|
|
22
|
+
*/
|
|
23
|
+
export declare class ConfigParseError extends Error {
|
|
24
|
+
readonly line: number;
|
|
25
|
+
readonly column: number;
|
|
26
|
+
readonly rawOffset: number;
|
|
27
|
+
constructor(message: string, opts: {
|
|
28
|
+
line: number;
|
|
29
|
+
column: number;
|
|
30
|
+
rawOffset: number;
|
|
31
|
+
cause?: Error;
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
/** Convert a 0-based byte offset to 1-based line and column. */
|
|
35
|
+
export declare function lineColumnOfOffset(text: string, offset: number): {
|
|
36
|
+
line: number;
|
|
37
|
+
column: number;
|
|
38
|
+
};
|
|
39
|
+
/**
|
|
40
|
+
* Extract a byte offset from a parser error message. Both this library's TOML
|
|
41
|
+
* parser ("at offset N") and Node's `JSON.parse` ("at position N", or a
|
|
42
|
+
* `position` property on newer runtimes) use compatible-enough formats that
|
|
43
|
+
* one helper handles both.
|
|
44
|
+
*
|
|
45
|
+
* Returns `null` when no offset can be recovered — most semantic errors
|
|
46
|
+
* (duplicate-key, table redefinition) don't include one.
|
|
47
|
+
*/
|
|
48
|
+
export declare function extractParseOffset(err: Error): number | null;
|
|
49
|
+
/**
|
|
50
|
+
* Wrap an arbitrary parser error into a {@link ConfigParseError} when offset
|
|
51
|
+
* recovery is possible; otherwise return the original error unchanged.
|
|
52
|
+
*/
|
|
53
|
+
export declare function toConfigParseError(text: string, err: Error): Error;
|
|
54
|
+
//# sourceMappingURL=parse-error.d.ts.map
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured config-file parse error. Carries the 1-based line and column of
|
|
3
|
+
* the failure so consumers can emit a `*.config_syntax_error` Finding pointing
|
|
4
|
+
* at the exact spot without recomputing line numbers from the raw offset.
|
|
5
|
+
*
|
|
6
|
+
* Thrown nowhere directly — instead, {@link readJsonObjectWithSource} and
|
|
7
|
+
* {@link readTomlObject} populate the `parseError` field of their result with
|
|
8
|
+
* this type whenever they can resolve a byte offset from the underlying parser.
|
|
9
|
+
* When the underlying error lacks position info, the original `Error` is
|
|
10
|
+
* preserved unchanged.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* import { readTomlObject, ConfigParseError } from 'agent-gov-core';
|
|
14
|
+
* const { parseError } = readTomlObject('.codex/config.toml');
|
|
15
|
+
* if (parseError instanceof ConfigParseError) {
|
|
16
|
+
* emitFinding({
|
|
17
|
+
* kind: 'policy_mesh.config_syntax_error',
|
|
18
|
+
* location: { file: '.codex/config.toml', line: parseError.line, column: parseError.column },
|
|
19
|
+
* message: parseError.message,
|
|
20
|
+
* });
|
|
21
|
+
* }
|
|
22
|
+
*/
|
|
23
|
+
export class ConfigParseError extends Error {
|
|
24
|
+
line;
|
|
25
|
+
column;
|
|
26
|
+
rawOffset;
|
|
27
|
+
constructor(message, opts) {
|
|
28
|
+
super(message);
|
|
29
|
+
this.name = 'ConfigParseError';
|
|
30
|
+
this.line = opts.line;
|
|
31
|
+
this.column = opts.column;
|
|
32
|
+
this.rawOffset = opts.rawOffset;
|
|
33
|
+
if (opts.cause) {
|
|
34
|
+
// Node 16.9+ supports the `cause` option on Error; some runtimes don't.
|
|
35
|
+
this.cause = opts.cause;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
/** Convert a 0-based byte offset to 1-based line and column. */
|
|
40
|
+
export function lineColumnOfOffset(text, offset) {
|
|
41
|
+
const safe = Math.max(0, Math.min(offset, text.length));
|
|
42
|
+
let line = 1;
|
|
43
|
+
let column = 1;
|
|
44
|
+
for (let i = 0; i < safe; i++) {
|
|
45
|
+
if (text[i] === '\n') {
|
|
46
|
+
line++;
|
|
47
|
+
column = 1;
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
column++;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return { line, column };
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Extract a byte offset from a parser error message. Both this library's TOML
|
|
57
|
+
* parser ("at offset N") and Node's `JSON.parse` ("at position N", or a
|
|
58
|
+
* `position` property on newer runtimes) use compatible-enough formats that
|
|
59
|
+
* one helper handles both.
|
|
60
|
+
*
|
|
61
|
+
* Returns `null` when no offset can be recovered — most semantic errors
|
|
62
|
+
* (duplicate-key, table redefinition) don't include one.
|
|
63
|
+
*/
|
|
64
|
+
export function extractParseOffset(err) {
|
|
65
|
+
const m = /at (?:offset|position)\s+(\d+)/i.exec(err.message);
|
|
66
|
+
if (m)
|
|
67
|
+
return Number.parseInt(m[1], 10);
|
|
68
|
+
// Newer Node (≥21) attaches `position` to SyntaxError from JSON.parse.
|
|
69
|
+
const maybePos = err.position;
|
|
70
|
+
if (typeof maybePos === 'number')
|
|
71
|
+
return maybePos;
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Wrap an arbitrary parser error into a {@link ConfigParseError} when offset
|
|
76
|
+
* recovery is possible; otherwise return the original error unchanged.
|
|
77
|
+
*/
|
|
78
|
+
export function toConfigParseError(text, err) {
|
|
79
|
+
const offset = extractParseOffset(err);
|
|
80
|
+
if (offset === null)
|
|
81
|
+
return err;
|
|
82
|
+
const { line, column } = lineColumnOfOffset(text, offset);
|
|
83
|
+
return new ConfigParseError(err.message, { line, column, rawOffset: offset, cause: err });
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=parse-error.js.map
|