agent-gov-core 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
Binary file
package/README.md CHANGED
@@ -81,14 +81,14 @@ The JSON schema at [`schemas/finding.schema.json`](./schemas/finding.schema.json
81
81
  - `lineOfTomlKey(text, dottedKey, scope?)` — 1-based line of a TOML key, optionally scoped to a byte range. Use scope to disambiguate `[[array]]`-of-tables entries that share the same leaf key.
82
82
 
83
83
  ### MCP command normalization
84
- - `normalizeMcpCommand({ command, args, url, serverUrl, env, cwd })` — canonical identity string for an MCP server entry. Drops neutral flags (`-y`, `--yes`), resolves npx/uvx invocations, includes env+cwd in the identity. Used to dedupe `mcp_command_mismatch` false positives when servers are equivalent but syntactically different (`npx -y foo@1.2.3` vs `npx foo@1.2.3`).
84
+ - `normalizeMcpCommand({ command, args, url, env, cwd })` — canonical identity string for an MCP server entry. Drops neutral confirm flags (`-y`, `--yes`), strips Windows executable suffixes (`.cmd`, `.exe`, `.bat`, `.ps1`), sorts non-neutral flags alphabetically, preserves positional argument order, and includes env + cwd in the identity. Used to dedupe `mcp_command_mismatch` false positives when servers are equivalent but syntactically different (`npx -y foo@1.2.3` vs `npx foo@1.2.3`). Does not interpret what npx/uvx invocations resolve to at runtime — that's outside the substrate's scope.
85
85
 
86
86
  ### Shell tokenization
87
87
  - `tokenizeShell(command)` — quote-aware split on `;`, `|`, `&&`, `||` plus trivial obfuscation neutralization (`c""url` → `curl`, `c\\url` → `curl`)
88
88
  - `getCommandHead(subcommand)` — extract the leading verb after tokenization
89
89
 
90
90
  ### GitHub Action helpers
91
- - `rankSeverity(s)` — numeric rank `none=0 critical=4`
91
+ - `rankSeverity(s)` — numeric rank `low=1, medium=2, high=3, critical=4` (matches the schema's closed severity enum; there is no `none`)
92
92
  - `passesSeverityThreshold(s, threshold)`, `anyAtOrAbove(findings, threshold)` — fail-on plumbing
93
93
  - `emitFindingAnnotation(f)` — render a Finding as a `::warning file=…,line=…,title=…::…` GitHub workflow annotation
94
94
 
@@ -11,12 +11,24 @@ export interface ByteRange {
11
11
  /** Exclusive end offset. */
12
12
  end: number;
13
13
  }
14
- /** 1-based line number for the first occurrence of `"key"` followed by `:`. */
14
+ /**
15
+ * 1-based line number for the first occurrence of `"key"` followed by `:`.
16
+ *
17
+ * The key is JSON-encoded before matching so keys containing backslashes or
18
+ * quotes (rare but legal) are located in the source bytes. The scan ignores
19
+ * lines inside JSONC `//` and `/* *\/` comments so a commented-out `"key":`
20
+ * does not shadow the real one.
21
+ */
15
22
  export declare function lineOfJsonKey(text: string, key: string, scope?: ByteRange): number;
16
23
  /**
17
24
  * 1-based line number for the first JSON string value equal to `value`.
18
25
  * If `scope` is supplied (a byte range), only matches inside that range count —
19
26
  * this is the fix for the multi-server-ambiguity bug.
27
+ *
28
+ * The value is JSON-encoded before matching so values containing backslashes
29
+ * (e.g. Windows paths like `C:\Temp` written as `"C:\\Temp"` in JSON) are
30
+ * located correctly. The scan ignores JSONC comments so a commented-out
31
+ * matching value does not shadow the real one.
20
32
  */
21
33
  export declare function lineOfJsonStringValue(text: string, value: string, scope?: ByteRange): number;
22
34
  /**
package/dist/locators.js CHANGED
@@ -5,19 +5,41 @@
5
5
  * All returned line numbers are 1-based. `0` is reserved for "not found"; callers
6
6
  * generally treat that as "fall back to file-level annotation".
7
7
  */
8
- /** 1-based line number for the first occurrence of `"key"` followed by `:`. */
8
+ import { stripJsonComments } from './jsonc.js';
9
+ /**
10
+ * 1-based line number for the first occurrence of `"key"` followed by `:`.
11
+ *
12
+ * The key is JSON-encoded before matching so keys containing backslashes or
13
+ * quotes (rare but legal) are located in the source bytes. The scan ignores
14
+ * lines inside JSONC `//` and `/* *\/` comments so a commented-out `"key":`
15
+ * does not shadow the real one.
16
+ */
9
17
  export function lineOfJsonKey(text, key, scope) {
10
- const needle = `"${escapeForRegex(key)}"\\s*:`;
11
- return findLineByRegex(text, new RegExp(needle), scope);
18
+ const encoded = jsonEncodeForRegex(key);
19
+ return findLineByRegex(text, new RegExp(`"${encoded}"\\s*:`), scope);
12
20
  }
13
21
  /**
14
22
  * 1-based line number for the first JSON string value equal to `value`.
15
23
  * If `scope` is supplied (a byte range), only matches inside that range count —
16
24
  * this is the fix for the multi-server-ambiguity bug.
25
+ *
26
+ * The value is JSON-encoded before matching so values containing backslashes
27
+ * (e.g. Windows paths like `C:\Temp` written as `"C:\\Temp"` in JSON) are
28
+ * located correctly. The scan ignores JSONC comments so a commented-out
29
+ * matching value does not shadow the real one.
17
30
  */
18
31
  export function lineOfJsonStringValue(text, value, scope) {
19
- const needle = `"${escapeForRegex(value)}"`;
20
- return findLineByRegex(text, new RegExp(needle), scope);
32
+ const encoded = jsonEncodeForRegex(value);
33
+ return findLineByRegex(text, new RegExp(`"${encoded}"`), scope);
34
+ }
35
+ /**
36
+ * Convert a string to the form it would appear in JSON source bytes, then
37
+ * regex-escape. `JSON.stringify('C:\\Temp')` yields `'"C:\\\\Temp"'` — slice
38
+ * off the surrounding quotes to get the inner byte sequence.
39
+ */
40
+ function jsonEncodeForRegex(input) {
41
+ const jsonBody = JSON.stringify(input).slice(1, -1);
42
+ return escapeForRegex(jsonBody);
21
43
  }
22
44
  /**
23
45
  * 1-based line number for a TOML key. Supports dotted keys (`a.b.c`) — the
@@ -79,7 +101,12 @@ function scopeLineFilter(text, scope) {
79
101
  return (line) => line >= startLine && line <= endLine;
80
102
  }
81
103
  function findLineByRegex(text, regex, scope) {
82
- const haystack = scope ? text.slice(scope.start, scope.end) : text;
104
+ // stripJsonComments is position-preserving: it replaces comment bytes with
105
+ // spaces while leaving newlines intact. Offsets in the stripped text map
106
+ // 1:1 to offsets in the original text, so line numbers stay correct, but
107
+ // commented-out keys/values no longer match.
108
+ const searchable = stripJsonComments(text);
109
+ const haystack = scope ? searchable.slice(scope.start, scope.end) : searchable;
83
110
  const m = regex.exec(haystack);
84
111
  if (!m)
85
112
  return 0;
package/dist/shell.js CHANGED
@@ -127,15 +127,43 @@ export function getCommandHead(subcommand) {
127
127
  break;
128
128
  s = s.slice(m[0].length);
129
129
  }
130
- // Strip leading sudo / env wrappers
131
- const wrapperMatch = /^(sudo|nohup|env|exec|command|builtin)\s+(.*)$/.exec(s);
130
+ // Strip leading sudo / env wrappers, then also strip any wrapper flags
131
+ // (`sudo -E`, `env -i`) and embedded env vars (`env FOO=1 BAZ=qux curl`)
132
+ // before recursing. Without this, `sudo -E curl` would return `-E`.
133
+ const wrapperMatch = /^(sudo|nohup|env|exec|command|builtin|stdbuf|nice|ionice|setsid)\s+(.*)$/.exec(s);
132
134
  if (wrapperMatch) {
133
- return getCommandHead(wrapperMatch[2]);
135
+ return getCommandHead(stripWrapperPrefixes(wrapperMatch[2]));
134
136
  }
135
137
  // Now extract first token, honoring quoting and obfuscation neutralization.
136
138
  const head = readFirstToken(s);
137
139
  return deobfuscate(head);
138
140
  }
141
+ /**
142
+ * Consume any leading flags (`-x`, `--xxx`, `--xxx=value`) and env var
143
+ * assignments (`FOO=bar`) so the next recursion finds the real command. We
144
+ * intentionally do NOT consume a non-flag token after a short flag (so
145
+ * `sudo -u user curl` still misclassifies as `user` — a known edge case
146
+ * that we accept rather than maintain a per-wrapper flag database).
147
+ */
148
+ function stripWrapperPrefixes(input) {
149
+ let s = input.trimStart();
150
+ while (s.length > 0) {
151
+ if (s.startsWith('-')) {
152
+ const flagMatch = /^\S+\s*/.exec(s);
153
+ if (!flagMatch)
154
+ break;
155
+ s = s.slice(flagMatch[0].length);
156
+ continue;
157
+ }
158
+ const envMatch = /^([A-Za-z_][A-Za-z0-9_]*)=([^\s'"]*|"[^"]*"|'[^']*')\s+/.exec(s);
159
+ if (envMatch) {
160
+ s = s.slice(envMatch[0].length);
161
+ continue;
162
+ }
163
+ break;
164
+ }
165
+ return s;
166
+ }
139
167
  function readFirstToken(s) {
140
168
  let out = '';
141
169
  let i = 0;
package/dist/toml.js CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-gov-core",
3
- "version": "0.4.1",
3
+ "version": "0.4.2",
4
4
  "description": "Shared primitives for the AI-agent governance suite: Finding schema, JSONC/TOML readers, line locators, MCP command normalization, shell tokenization, and GitHub Action helpers.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",