@clear-capabilities/agentic-security-scanner 0.78.0 → 0.80.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/.agentic-security/findings.json +16 -16
- package/bin/.agentic-security/last-scan.json +16 -16
- package/bin/.agentic-security/last-scan.json.sig +1 -1
- package/bin/.agentic-security/scan-history.json +51 -0
- package/bin/.agentic-security/streak.json +5 -5
- package/bin/agentic-security.js +22 -7
- package/dist/178.index.js +1 -1
- package/dist/333.index.js +283 -0
- package/dist/384.index.js +1 -1
- package/dist/476.index.js +5 -5
- package/dist/637.index.js +1 -1
- package/dist/700.index.js +138 -0
- package/dist/718.index.js +53 -0
- package/dist/838.index.js +1 -1
- package/dist/985.index.js +95 -1
- package/dist/agentic-security.mjs +83 -83
- package/dist/agentic-security.mjs.sha256 +1 -1
- package/package.json +6 -4
- package/src/.agentic-security/findings.json +29799 -7803
- package/src/.agentic-security/last-scan.json +29799 -7803
- package/src/.agentic-security/last-scan.json.sig +1 -1
- package/src/.agentic-security/scan-history.json +5119 -2611
- package/src/.agentic-security/streak.json +6 -6
- package/src/dataflow/.agentic-security/findings.json +2879 -308
- package/src/dataflow/.agentic-security/last-scan.json +2879 -308
- package/src/dataflow/.agentic-security/last-scan.json.sig +1 -1
- package/src/dataflow/.agentic-security/scan-history.json +68 -520
- package/src/dataflow/.agentic-security/streak.json +6 -7
- package/src/dataflow/cross-service-taint.js +201 -0
- package/src/dataflow/engine.js +52 -8
- package/src/dataflow/formal-verify.js +204 -0
- package/src/dataflow/ifds-precise.js +222 -0
- package/src/dataflow/k2-summary-cache.js +153 -0
- package/src/dataflow/lib-taint-summaries.js +198 -0
- package/src/dataflow/privacy-taint.js +205 -0
- package/src/dataflow/smt-feasibility.js +189 -0
- package/src/engine.js +890 -132
- package/src/integrations/index.js +2 -1
- package/src/ir/.agentic-security/findings.json +240 -6
- package/src/ir/.agentic-security/last-scan.json +240 -6
- package/src/ir/.agentic-security/last-scan.json.sig +1 -1
- package/src/ir/.agentic-security/scan-history.json +16 -594
- package/src/ir/.agentic-security/streak.json +8 -9
- package/src/ir/callgraph.js +27 -7
- package/src/ir/cpp-preprocessor.js +142 -0
- package/src/ir/csharp-ir.js +604 -0
- package/src/ir/universal-ir.js +403 -0
- package/src/llm-validator/index.js +7 -5
- package/src/mcp/.agentic-security/findings.json +8632 -0
- package/src/mcp/.agentic-security/last-scan.json +8632 -0
- package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
- package/src/mcp/.agentic-security/scan-history.json +143 -0
- package/src/mcp/.agentic-security/streak.json +20 -0
- package/src/mcp/audit.js +5 -0
- package/src/mcp/tools.js +90 -1
- package/src/posture/.agentic-security/findings.json +16809 -4367
- package/src/posture/.agentic-security/last-scan.json +16809 -4367
- package/src/posture/.agentic-security/last-scan.json.sig +1 -1
- package/src/posture/.agentic-security/scan-history.json +6689 -177
- package/src/posture/.agentic-security/streak.json +8 -7
- package/src/posture/api-contract.js +193 -0
- package/src/posture/attack-taxonomy.js +227 -0
- package/src/posture/calibration-drift.js +2 -1
- package/src/posture/calibration.js +3 -2
- package/src/posture/compliance-policy.js +218 -0
- package/src/posture/composite-risk.js +122 -0
- package/src/posture/csharp-analysis.js +330 -0
- package/src/posture/exploit-bundle.js +210 -0
- package/src/posture/federated-learning.js +172 -0
- package/src/posture/fix-history.js +8 -2
- package/src/posture/license-attributions.js +94 -0
- package/src/posture/license-graph.js +238 -0
- package/src/posture/pqc-migration-plan.js +158 -0
- package/src/posture/profile.js +4 -5
- package/src/posture/reachability-filter.js +33 -2
- package/src/posture/realtime-cve-monitor.js +214 -0
- package/src/posture/rule-overrides.js +2 -3
- package/src/posture/rule-pack-signing.js +2 -3
- package/src/posture/rule-synthesis.js +5 -6
- package/src/posture/runtime-correlation.js +174 -0
- package/src/posture/sbom-diff.js +171 -0
- package/src/posture/sca-policy.js +235 -0
- package/src/posture/sca-upgrade.js +259 -0
- package/src/posture/security-trend.js +4 -7
- package/src/posture/state-dir.js +124 -0
- package/src/posture/streak.js +3 -0
- package/src/posture/suppressions.js +5 -8
- package/src/posture/threat-model-auto.js +268 -0
- package/src/posture/triage-learning.js +170 -0
- package/src/posture/triage.js +29 -6
- package/src/posture/validator-metrics.js +3 -6
- package/src/sast/.agentic-security/findings.json +996 -32
- package/src/sast/.agentic-security/last-scan.json +996 -32
- package/src/sast/.agentic-security/last-scan.json.sig +1 -1
- package/src/sast/.agentic-security/scan-history.json +565 -32
- package/src/sast/.agentic-security/streak.json +10 -8
- package/src/sast/_secret-entropy.js +145 -0
- package/src/sast/cloud-iam.js +312 -0
- package/src/sast/cpp.js +138 -4
- package/src/sast/crypto-protocol.js +388 -0
- package/src/sast/csharp-tokenizer.js +392 -0
- package/src/sast/csharp.js +924 -138
- package/src/sast/dapp-frontend.js +200 -0
- package/src/sast/db-taint.js +24 -0
- package/src/sast/k8s-admission.js +271 -0
- package/src/sast/llm-app.js +272 -0
- package/src/sast/ml-supply-chain.js +259 -0
- package/src/sast/mobile.js +224 -0
- package/src/sast/post-quantum-crypto.js +348 -0
- package/src/sast/rust.js +26 -0
- package/src/sast/web3-advanced.js +375 -0
- package/src/sca/.agentic-security/findings.json +6044 -171
- package/src/sca/.agentic-security/last-scan.json +6044 -171
- package/src/sca/.agentic-security/last-scan.json.sig +1 -1
- package/src/sca/.agentic-security/scan-history.json +83 -6
- package/src/sca/.agentic-security/streak.json +9 -9
- package/src/sca/CLAUDE.md +161 -0
- package/src/sca/binary-metadata.js +146 -0
- package/src/sca/py-package-functions.js +118 -0
- package/src/sca/sigstore-verify.js +215 -0
- package/src/sca/vendor-detect.js +53 -0
- package/src/report/.agentic-security/findings.json +0 -80
- package/src/report/.agentic-security/last-scan.json +0 -80
- package/src/report/.agentic-security/last-scan.json.sig +0 -1
- package/src/report/.agentic-security/scan-history.json +0 -35
- package/src/report/.agentic-security/streak.json +0 -22
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
// IFDS-precise extensions — Recommendation #2 of the world-class roadmap.
|
|
2
|
+
//
|
|
3
|
+
// The existing scanner/src/dataflow/ifds.js implements the core IFDS
|
|
4
|
+
// worklist algorithm with k=1 summarized return-taint. This module adds
|
|
5
|
+
// the three world-class pieces still missing:
|
|
6
|
+
//
|
|
7
|
+
// 1. Per-call-site summary REFINEMENT — instead of "this function
|
|
8
|
+
// returns tainted unconditionally," cache "returns tainted under
|
|
9
|
+
// entry state X" so the same callee at different sites uses
|
|
10
|
+
// different summaries.
|
|
11
|
+
// 2. On-demand BACKWARD SLICING for high-confidence findings —
|
|
12
|
+
// starting from a critical sink, walk backwards through the
|
|
13
|
+
// use-def chain and emit a minimal trace that explains exactly
|
|
14
|
+
// which lines contribute taint.
|
|
15
|
+
// 3. PERSISTENT cross-scan summary cache — write the summary table
|
|
16
|
+
// to .agentic-security/ifds-summaries.json after each scan and
|
|
17
|
+
// reload on the next scan. Skip re-analysis of unchanged
|
|
18
|
+
// functions (incremental analysis).
|
|
19
|
+
//
|
|
20
|
+
// Opt-in via AGENTIC_SECURITY_IFDS_PRECISE=1 alongside the existing
|
|
21
|
+
// AGENTIC_SECURITY_DEEP=1.
|
|
22
|
+
|
|
23
|
+
import * as fs from 'node:fs';
|
|
24
|
+
import * as path from 'node:path';
|
|
25
|
+
import * as crypto from 'node:crypto';
|
|
26
|
+
|
|
27
|
+
// ── Per-call-site refined summaries ────────────────────────────────────────
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* RefinedSummaryCache — extends the base summary cache with per-entry-state
|
|
31
|
+
* refinement. Whereas the base cache stores ONE summary per function under
|
|
32
|
+
* empty entry state, this layer caches a MAP of (entryStateHash → summary)
|
|
33
|
+
* per function.
|
|
34
|
+
*
|
|
35
|
+
* The intent: at call site A→B(x), the entry state captures which of B's
|
|
36
|
+
* formal parameters are tainted by A's actual argument expressions. If x
|
|
37
|
+
* is tainted at site 1 but not at site 2, we cache TWO summaries for B,
|
|
38
|
+
* and the caller's worklist consults the right one.
|
|
39
|
+
*
|
|
40
|
+
* Capped at MAX_REFINEMENTS_PER_FN to keep cache size bounded.
|
|
41
|
+
*/
|
|
42
|
+
const MAX_REFINEMENTS_PER_FN = 4;
|
|
43
|
+
|
|
44
|
+
export class RefinedSummaryCache {
|
|
45
|
+
constructor(baseCache, opts = {}) {
|
|
46
|
+
this._base = baseCache;
|
|
47
|
+
this._refinements = new Map(); // qid → Map<stateHash, summary>
|
|
48
|
+
this._lru = new Map(); // qid → array (recency)
|
|
49
|
+
this.maxPerFn = opts.maxPerFn || MAX_REFINEMENTS_PER_FN;
|
|
50
|
+
this.metrics = { refinementHits: 0, refinementMisses: 0, refinementEvictions: 0 };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
_hash(entryState) {
|
|
54
|
+
if (!entryState) return '∅';
|
|
55
|
+
if (entryState instanceof Set) {
|
|
56
|
+
if (entryState.size === 0) return '∅';
|
|
57
|
+
return [...entryState].sort().join('|');
|
|
58
|
+
}
|
|
59
|
+
if (Array.isArray(entryState)) {
|
|
60
|
+
if (entryState.length === 0) return '∅';
|
|
61
|
+
return entryState.slice().sort().join('|');
|
|
62
|
+
}
|
|
63
|
+
if (typeof entryState === 'object') {
|
|
64
|
+
// Object keyed by parameter index → tainted bool.
|
|
65
|
+
const keys = Object.keys(entryState).sort();
|
|
66
|
+
return keys.map(k => `${k}=${entryState[k] ? 1 : 0}`).join(',') || '∅';
|
|
67
|
+
}
|
|
68
|
+
return String(entryState);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
get(qid, entryState) {
|
|
72
|
+
const h = this._hash(entryState);
|
|
73
|
+
const m = this._refinements.get(qid);
|
|
74
|
+
if (m && m.has(h)) {
|
|
75
|
+
this._touch(qid, h);
|
|
76
|
+
this.metrics.refinementHits++;
|
|
77
|
+
return m.get(h);
|
|
78
|
+
}
|
|
79
|
+
// Fallback to base for empty entry state (matches k=1 behavior).
|
|
80
|
+
if (this._base && typeof this._base.get === 'function') {
|
|
81
|
+
const v = this._base.get(qid, entryState);
|
|
82
|
+
if (v) return v;
|
|
83
|
+
}
|
|
84
|
+
this.metrics.refinementMisses++;
|
|
85
|
+
return undefined;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
store(qid, entryState, summary) {
|
|
89
|
+
const h = this._hash(entryState);
|
|
90
|
+
let m = this._refinements.get(qid);
|
|
91
|
+
let order = this._lru.get(qid);
|
|
92
|
+
if (!m) { m = new Map(); this._refinements.set(qid, m); }
|
|
93
|
+
if (!order) { order = []; this._lru.set(qid, order); }
|
|
94
|
+
if (!m.has(h)) {
|
|
95
|
+
while (order.length >= this.maxPerFn) {
|
|
96
|
+
const evict = order.shift();
|
|
97
|
+
m.delete(evict);
|
|
98
|
+
this.metrics.refinementEvictions++;
|
|
99
|
+
}
|
|
100
|
+
order.push(h);
|
|
101
|
+
}
|
|
102
|
+
m.set(h, summary);
|
|
103
|
+
// Also seed base for the empty-entry path.
|
|
104
|
+
if ((entryState instanceof Set && entryState.size === 0) && this._base && typeof this._base.set === 'function') {
|
|
105
|
+
try { this._base.set(qid, new Set(), summary); } catch {}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
_touch(qid, h) {
|
|
110
|
+
const order = this._lru.get(qid);
|
|
111
|
+
if (!order) return;
|
|
112
|
+
const idx = order.indexOf(h);
|
|
113
|
+
if (idx >= 0) { order.splice(idx, 1); order.push(h); }
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
size() {
|
|
117
|
+
let n = 0;
|
|
118
|
+
for (const m of this._refinements.values()) n += m.size;
|
|
119
|
+
return n;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// ── On-demand backward slicing ─────────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* backwardSlice(callGraph, finding) — given a finding at a sink, walk
|
|
127
|
+
* backwards through use-def edges to produce a minimal trace explaining
|
|
128
|
+
* each step from source to sink. Returns an array of { line, file,
|
|
129
|
+
* snippet, reason } entries ordered source-first.
|
|
130
|
+
*
|
|
131
|
+
* The traversal is intentionally bounded (depth ≤ MAX_SLICE_DEPTH) and
|
|
132
|
+
* cycle-aware. For very deep flows we emit a `...` elision rather than
|
|
133
|
+
* unbounded growth.
|
|
134
|
+
*/
|
|
135
|
+
const MAX_SLICE_DEPTH = 16;
|
|
136
|
+
|
|
137
|
+
export function backwardSlice(callGraph, finding, opts = {}) {
|
|
138
|
+
const seen = new Set();
|
|
139
|
+
const out = [];
|
|
140
|
+
if (!finding) return out;
|
|
141
|
+
let cur = finding.sink || finding;
|
|
142
|
+
let depth = 0;
|
|
143
|
+
while (cur && depth < MAX_SLICE_DEPTH) {
|
|
144
|
+
const key = `${cur.file || finding.file}:${cur.line}`;
|
|
145
|
+
if (seen.has(key)) { out.push({ ...cur, reason: 'cycle-detected' }); break; }
|
|
146
|
+
seen.add(key);
|
|
147
|
+
out.push({
|
|
148
|
+
file: cur.file || finding.file,
|
|
149
|
+
line: cur.line,
|
|
150
|
+
snippet: cur.snippet || cur.expr || null,
|
|
151
|
+
reason: cur.reason || 'use-def-pred',
|
|
152
|
+
});
|
|
153
|
+
cur = cur.predecessor || (callGraph && callGraph.getPred && callGraph.getPred(cur)) || null;
|
|
154
|
+
depth++;
|
|
155
|
+
}
|
|
156
|
+
if (depth >= MAX_SLICE_DEPTH) out.push({ reason: 'slice-depth-cap' });
|
|
157
|
+
return out.reverse(); // source-first
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ── Persistent cross-scan summary cache ────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
function _cachePath(scanRoot) {
|
|
163
|
+
return path.join(scanRoot, '.agentic-security', 'ifds-summaries.json');
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function _fileHash(content) {
|
|
167
|
+
return crypto.createHash('sha256').update(content).digest('hex').slice(0, 16);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Load a previously-persisted IFDS summary cache. Returns:
|
|
172
|
+
* { summaries: Map<qid, summary>, fileHashes: Map<filePath, sha>, scanTs }
|
|
173
|
+
* or null if no persisted cache exists / is unreadable.
|
|
174
|
+
*/
|
|
175
|
+
export function loadPersistedCache(scanRoot) {
|
|
176
|
+
const fp = _cachePath(scanRoot);
|
|
177
|
+
if (!fs.existsSync(fp)) return null;
|
|
178
|
+
try {
|
|
179
|
+
const raw = JSON.parse(fs.readFileSync(fp, 'utf8'));
|
|
180
|
+
return {
|
|
181
|
+
summaries: new Map(Object.entries(raw.summaries || {})),
|
|
182
|
+
fileHashes: new Map(Object.entries(raw.fileHashes || {})),
|
|
183
|
+
scanTs: raw.scanTs || null,
|
|
184
|
+
};
|
|
185
|
+
} catch { return null; }
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Persist the current scan's summaries to disk. Subsequent scans can
|
|
190
|
+
* skip re-analysis of functions whose file hash hasn't changed.
|
|
191
|
+
*/
|
|
192
|
+
export function persistCache(scanRoot, cache, perFileIR) {
|
|
193
|
+
const dir = path.join(scanRoot, '.agentic-security');
|
|
194
|
+
try { fs.mkdirSync(dir, { recursive: true }); } catch {}
|
|
195
|
+
const fileHashes = {};
|
|
196
|
+
for (const [filePath, ir] of (perFileIR || new Map())) {
|
|
197
|
+
if (ir && typeof ir._content === 'string') fileHashes[filePath] = _fileHash(ir._content);
|
|
198
|
+
}
|
|
199
|
+
const summaries = {};
|
|
200
|
+
for (const [qid, sum] of (cache._refinements || new Map())) {
|
|
201
|
+
// Serialize only the empty-entry-state summary — the refinements are
|
|
202
|
+
// ephemeral per scan; the empty-entry summary is the stable contract.
|
|
203
|
+
if (sum.has('∅')) summaries[qid] = sum.get('∅');
|
|
204
|
+
}
|
|
205
|
+
const out = { scanTs: new Date().toISOString(), summaries, fileHashes };
|
|
206
|
+
try { fs.writeFileSync(_cachePath(scanRoot), JSON.stringify(out, null, 2)); }
|
|
207
|
+
catch { /* best-effort */ }
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Skip analysis of an unchanged function — when the file containing the
|
|
212
|
+
* function hasn't changed since the last persisted cache, reuse the prior
|
|
213
|
+
* summary.
|
|
214
|
+
*/
|
|
215
|
+
export function shouldSkipReanalysis(prevCache, filePath, currentContent) {
|
|
216
|
+
if (!prevCache || !prevCache.fileHashes) return false;
|
|
217
|
+
const prevHash = prevCache.fileHashes.get(filePath);
|
|
218
|
+
if (!prevHash) return false;
|
|
219
|
+
return prevHash === _fileHash(currentContent);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
export const _internals = { _cachePath, _fileHash, MAX_REFINEMENTS_PER_FN, MAX_SLICE_DEPTH };
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
// k=2 monovariant summary cache — Recommendation #9 of the SCA/SAST plan.
|
|
2
|
+
//
|
|
3
|
+
// The existing scanner/src/dataflow/summaries.js (referenced by engine.js)
|
|
4
|
+
// implements k=1: per-function ONE summary computed under empty entry state.
|
|
5
|
+
// That misses the common Juliet pattern of "function is pure when called
|
|
6
|
+
// with clean args but vulnerable when called with tainted args" because
|
|
7
|
+
// only the empty-state summary is cached.
|
|
8
|
+
//
|
|
9
|
+
// This module wraps SummaryCache with a per-(qid, entry-state-class) lookup,
|
|
10
|
+
// up to 2 distinct entry-state classes per function. The "class" is computed
|
|
11
|
+
// from a stable hash of which parameter positions are tainted — at k=2 we
|
|
12
|
+
// cache the all-clean state and one tainted state per function. Three or
|
|
13
|
+
// more distinct states evict to LRU.
|
|
14
|
+
//
|
|
15
|
+
// Usage:
|
|
16
|
+
// const k2 = new K2SummaryCache(opts.baseCache);
|
|
17
|
+
// k2.get(qid, entryState) → summary | undefined
|
|
18
|
+
// k2.compute(qid, entryState, fn) → summary
|
|
19
|
+
// k2.applyAtCallSite(qid, entryState, callerCtx) → mutations
|
|
20
|
+
//
|
|
21
|
+
// Falls back to k=1 behaviour transparently when summaries.js's
|
|
22
|
+
// SummaryCache.get returns a summary that doesn't carry entry-state info,
|
|
23
|
+
// so the rest of the engine continues to work unchanged.
|
|
24
|
+
|
|
25
|
+
const _MAX_STATES_PER_FN = 2;
|
|
26
|
+
|
|
27
|
+
function _hashEntryState(entryState) {
|
|
28
|
+
// Stable string from a Set of "tainted parameter positions" / variable
|
|
29
|
+
// names. For k=2 we only care about taint cardinality + which positions
|
|
30
|
+
// — the actual values are not modelled (premortem: no value sensitivity
|
|
31
|
+
// until field-sensitive cache lifts in v3).
|
|
32
|
+
if (!entryState) return '∅';
|
|
33
|
+
if (entryState instanceof Set) {
|
|
34
|
+
if (entryState.size === 0) return '∅';
|
|
35
|
+
return [...entryState].sort().join(',');
|
|
36
|
+
}
|
|
37
|
+
if (Array.isArray(entryState)) {
|
|
38
|
+
if (entryState.length === 0) return '∅';
|
|
39
|
+
return entryState.slice().sort().join(',');
|
|
40
|
+
}
|
|
41
|
+
// Fallback for opaque entry states — single bucket.
|
|
42
|
+
return '*';
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export class K2SummaryCache {
|
|
46
|
+
constructor(baseCache) {
|
|
47
|
+
this._base = baseCache; // existing k=1 cache (SummaryCache)
|
|
48
|
+
this._states = new Map(); // qid → Map<stateHash, summary>
|
|
49
|
+
this._stateOrder = new Map(); // qid → array (LRU order)
|
|
50
|
+
this.metrics = { hits: 0, misses: 0, evictions: 0, computes: 0 };
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Read a summary for (qid, entry). Returns undefined if uncached.
|
|
55
|
+
* Falls back to the base cache when our k=2 table has no entry.
|
|
56
|
+
*/
|
|
57
|
+
get(qid, entryState) {
|
|
58
|
+
const hash = _hashEntryState(entryState);
|
|
59
|
+
const states = this._states.get(qid);
|
|
60
|
+
if (states && states.has(hash)) {
|
|
61
|
+
this.metrics.hits++;
|
|
62
|
+
this._touch(qid, hash);
|
|
63
|
+
return states.get(hash);
|
|
64
|
+
}
|
|
65
|
+
// k=1 fallback — accept whatever the base cache stored.
|
|
66
|
+
if (this._base && typeof this._base.get === 'function') {
|
|
67
|
+
const v = this._base.get(qid, entryState);
|
|
68
|
+
if (v) { this.metrics.hits++; return v; }
|
|
69
|
+
}
|
|
70
|
+
this.metrics.misses++;
|
|
71
|
+
return undefined;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Compute (or retrieve) a summary for (qid, entry). Uses the supplied
|
|
76
|
+
* `compute` function only on miss. Caches per-state at k=2.
|
|
77
|
+
*/
|
|
78
|
+
compute(qid, entryState, computeFn) {
|
|
79
|
+
const existing = this.get(qid, entryState);
|
|
80
|
+
if (existing) return existing;
|
|
81
|
+
this.metrics.computes++;
|
|
82
|
+
const summary = computeFn();
|
|
83
|
+
this._store(qid, entryState, summary);
|
|
84
|
+
// Also seed the base cache under empty-entry-state so the k=1 engine
|
|
85
|
+
// paths that don't know about k=2 still see the cleanest summary.
|
|
86
|
+
if (this._base && typeof this._base.set === 'function' && (!entryState || (entryState instanceof Set && entryState.size === 0))) {
|
|
87
|
+
try { this._base.set(qid, new Set(), summary); } catch {}
|
|
88
|
+
}
|
|
89
|
+
return summary;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Apply the cached summary at a call site, propagating return-taint and
|
|
94
|
+
* mutated-parameter taint into the caller's mutation set. Mirrors the
|
|
95
|
+
* base cache's applyAtCallSite signature.
|
|
96
|
+
*/
|
|
97
|
+
applyAtCallSite(qid, entryState, callerCtx) {
|
|
98
|
+
const summary = this.get(qid, entryState);
|
|
99
|
+
if (!summary) return null;
|
|
100
|
+
// Defer to the base implementation when present — we don't reimplement
|
|
101
|
+
// the mutation algebra here.
|
|
102
|
+
if (this._base && typeof this._base.applyAtCallSite === 'function') {
|
|
103
|
+
try { return this._base.applyAtCallSite(qid, entryState, callerCtx, summary); }
|
|
104
|
+
catch { return null; }
|
|
105
|
+
}
|
|
106
|
+
return summary;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
_store(qid, entryState, summary) {
|
|
110
|
+
const hash = _hashEntryState(entryState);
|
|
111
|
+
let states = this._states.get(qid);
|
|
112
|
+
let order = this._stateOrder.get(qid);
|
|
113
|
+
if (!states) { states = new Map(); this._states.set(qid, states); }
|
|
114
|
+
if (!order) { order = []; this._stateOrder.set(qid, order); }
|
|
115
|
+
if (!states.has(hash)) {
|
|
116
|
+
// LRU eviction at k=2.
|
|
117
|
+
while (order.length >= _MAX_STATES_PER_FN) {
|
|
118
|
+
const evict = order.shift();
|
|
119
|
+
states.delete(evict);
|
|
120
|
+
this.metrics.evictions++;
|
|
121
|
+
}
|
|
122
|
+
order.push(hash);
|
|
123
|
+
}
|
|
124
|
+
states.set(hash, summary);
|
|
125
|
+
}
|
|
126
|
+
_touch(qid, hash) {
|
|
127
|
+
const order = this._stateOrder.get(qid);
|
|
128
|
+
if (!order) return;
|
|
129
|
+
const idx = order.indexOf(hash);
|
|
130
|
+
if (idx >= 0) { order.splice(idx, 1); order.push(hash); }
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Size of the cache — for diagnostics / metrics dashboards.
|
|
135
|
+
*/
|
|
136
|
+
size() {
|
|
137
|
+
let n = 0;
|
|
138
|
+
for (const states of this._states.values()) n += states.size;
|
|
139
|
+
return n;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Wrap an existing k=1 SummaryCache with k=2 behavior. The engine can opt
|
|
145
|
+
* into this via AGENTIC_SECURITY_K2_TAINT=1.
|
|
146
|
+
*/
|
|
147
|
+
export function wrapAsK2(baseCache) {
|
|
148
|
+
if (!baseCache) return new K2SummaryCache(null);
|
|
149
|
+
if (baseCache instanceof K2SummaryCache) return baseCache;
|
|
150
|
+
return new K2SummaryCache(baseCache);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
export const _internals = { _hashEntryState, _MAX_STATES_PER_FN };
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
// Library taint summaries — Recommendation #5 of the SCA/SAST plan.
|
|
2
|
+
//
|
|
3
|
+
// Hand-curated knowledge that "this library method returns tainted data" or
|
|
4
|
+
// "this method propagates taint from arg N to its return." Used by the
|
|
5
|
+
// existing dataflow engine + per-language detectors when classifying the
|
|
6
|
+
// taint state of a declaration's rhs.
|
|
7
|
+
//
|
|
8
|
+
// The summaries are intentionally per-language because the same concept
|
|
9
|
+
// (a user-input source) has different idioms in each ecosystem. Each entry:
|
|
10
|
+
//
|
|
11
|
+
// { pattern: RegExp, kind: 'source' | 'sanitizer' | 'passthrough',
|
|
12
|
+
// framework: 'spring' | 'aspnet' | 'glibc' | … }
|
|
13
|
+
//
|
|
14
|
+
// Kinds:
|
|
15
|
+
// source — return value is unconditionally tainted
|
|
16
|
+
// sanitizer — return value is unconditionally clean, even if any arg
|
|
17
|
+
// was tainted (e.g. HtmlEncode, parameterized prepare)
|
|
18
|
+
// passthrough — return value is tainted iff arg N is tainted (taint
|
|
19
|
+
// flows through). Not modelled in v1; reserved for future
|
|
20
|
+
// inter-procedural extensions (Recommendation #9).
|
|
21
|
+
//
|
|
22
|
+
// Usage: detectors call `isLibrarySource(text, lang)` and `isLibrarySanitizer
|
|
23
|
+
// (text, lang)` to refine their per-call decisions.
|
|
24
|
+
|
|
25
|
+
const JAVA_SUMMARIES = {
|
|
26
|
+
sources: [
|
|
27
|
+
// Servlet API — every request-scoped getter is a user-input source.
|
|
28
|
+
/\bHttpServletRequest\b[\s\S]{0,2000}?\.\s*(?:getParameter(?:Values|Map)?|getQueryString|getHeader(?:Names)?|getInputStream|getReader|getCookies?|getRequestURI|getRequestURL|getQueryString|getPathInfo)\s*\(/,
|
|
29
|
+
/\bjavax\.servlet\.http\.HttpServletRequest\b/,
|
|
30
|
+
// Spring MVC — controller method annotations bind to request data.
|
|
31
|
+
/@RequestParam\b/,
|
|
32
|
+
/@RequestBody\b/,
|
|
33
|
+
/@PathVariable\b/,
|
|
34
|
+
/@RequestHeader\b/,
|
|
35
|
+
/@CookieValue\b/,
|
|
36
|
+
/@ModelAttribute\b/,
|
|
37
|
+
// Spring Security — the principal is user-controlled in the trust sense
|
|
38
|
+
// (it identifies WHO the request is from; not auto-sanitized).
|
|
39
|
+
/\bSecurityContextHolder\s*\.\s*getContext\s*\(\s*\)\s*\.\s*getAuthentication\s*\(\s*\)/,
|
|
40
|
+
// Java Files API — file content is untrusted when source is unknown.
|
|
41
|
+
/\bFiles\s*\.\s*(?:readString|readAllBytes|readAllLines|lines|newBufferedReader|newInputStream)\b/,
|
|
42
|
+
/\bPaths\s*\.\s*get\s*\([^)]*(?:System\.getProperty|args)\b/,
|
|
43
|
+
// BufferedReader / Scanner reading user input.
|
|
44
|
+
/\bBufferedReader\b[\s\S]{0,500}?\.\s*readLine\s*\(/,
|
|
45
|
+
/\bScanner\b[\s\S]{0,500}?\.\s*(?:next(?:Line)?|nextInt|nextLong)\s*\(/,
|
|
46
|
+
// System.getenv / System.getProperty — environment is configurable.
|
|
47
|
+
/\bSystem\s*\.\s*(?:getenv|getProperty)\s*\(/,
|
|
48
|
+
// Jackson — deserialization input is untrusted.
|
|
49
|
+
/\bObjectMapper\b[\s\S]{0,500}?\.\s*readValue\s*\(/,
|
|
50
|
+
/\bJsonParser\b[\s\S]{0,500}?\.\s*getValueAsString\s*\(/,
|
|
51
|
+
// Apache Commons IO.
|
|
52
|
+
/\bIOUtils\s*\.\s*toString\s*\(/,
|
|
53
|
+
/\bFileUtils\s*\.\s*readFileToString\s*\(/,
|
|
54
|
+
// Spring WebFlux ServerWebExchange.
|
|
55
|
+
/\bServerWebExchange\b[\s\S]{0,500}?\.\s*getRequest\s*\(/,
|
|
56
|
+
],
|
|
57
|
+
sanitizers: [
|
|
58
|
+
/\bOWASP\.Encoder\b/,
|
|
59
|
+
/\bESAPI\b[\s\S]{0,200}?\.\s*encoder\s*\(\s*\)/,
|
|
60
|
+
/\bStringEscapeUtils\s*\.\s*escape(?:Html\d?|Xml|Sql|Java|JavaScript)\b/,
|
|
61
|
+
/\bHtmlUtils\s*\.\s*htmlEscape\b/,
|
|
62
|
+
/\bUriUtils\s*\.\s*encode\b/,
|
|
63
|
+
// JDBC PreparedStatement parameter setters — taint is cleaned at bind.
|
|
64
|
+
/\bPreparedStatement\b[\s\S]{0,500}?\.\s*set(?:String|Int|Long|Object|BigDecimal|Date|Timestamp)\s*\(/,
|
|
65
|
+
/\bNamedParameterJdbcTemplate\b[\s\S]{0,500}?\.\s*(?:query|update|queryForObject)\s*\([^,]+,\s*new\s+MapSqlParameterSource\b/,
|
|
66
|
+
// Java validators.
|
|
67
|
+
/\bjakarta\.validation\b/,
|
|
68
|
+
/\bjavax\.validation\b/,
|
|
69
|
+
/\b@Valid\b/,
|
|
70
|
+
],
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
const CSHARP_SUMMARIES = {
|
|
74
|
+
sources: [
|
|
75
|
+
// ASP.NET request surfaces.
|
|
76
|
+
/\bHttpRequest\b[\s\S]{0,500}?\.\s*(?:Query|Form|Headers|Cookies|RouteValues|Body|InputStream|QueryString|Params|Path|Url)\b/,
|
|
77
|
+
/\bHttpContext\s*\.\s*Request\b/,
|
|
78
|
+
/\bIFormCollection\b/,
|
|
79
|
+
/\bIFormFile\b/,
|
|
80
|
+
/\bIFormFileCollection\b/,
|
|
81
|
+
// ASP.NET Core model binding.
|
|
82
|
+
/\[FromQuery\]/,
|
|
83
|
+
/\[FromBody\]/,
|
|
84
|
+
/\[FromForm\]/,
|
|
85
|
+
/\[FromRoute\]/,
|
|
86
|
+
/\[FromHeader\]/,
|
|
87
|
+
// Configuration may carry secrets but the VALUES are environment-supplied.
|
|
88
|
+
/\bIConfiguration\b[\s\S]{0,500}?\.\s*(?:GetSection|GetValue|GetConnectionString|GetChildren)\s*\(/,
|
|
89
|
+
// Newtonsoft.Json deserialization.
|
|
90
|
+
/\bJsonConvert\s*\.\s*Deserialize(?:Object|XmlNode)\s*</,
|
|
91
|
+
/\bJsonSerializer\s*\.\s*Deserialize\s*</,
|
|
92
|
+
// Files / streams.
|
|
93
|
+
/\bFile\s*\.\s*(?:ReadAllText|ReadAllLines|ReadAllBytes|OpenRead|OpenText)\s*\(/,
|
|
94
|
+
/\bStreamReader\b[\s\S]{0,500}?\.\s*(?:ReadLine|ReadToEnd|Read)\s*\(/,
|
|
95
|
+
/\bBinaryReader\b[\s\S]{0,500}?\.\s*Read(?:String|Bytes|Char|Int32|Int64|UInt32|UInt64)\s*\(/,
|
|
96
|
+
// Network reads.
|
|
97
|
+
/\bWebClient\b[\s\S]{0,500}?\.\s*Download(?:String|Data|File)\s*\(/,
|
|
98
|
+
/\bHttpClient\b[\s\S]{0,500}?\.\s*(?:GetAsync|GetStringAsync|PostAsync|SendAsync)\s*\(/,
|
|
99
|
+
// Environment + console.
|
|
100
|
+
/\bEnvironment\s*\.\s*GetEnvironmentVariable\s*\(/,
|
|
101
|
+
/\bConsole\s*\.\s*ReadLine\s*\(/,
|
|
102
|
+
],
|
|
103
|
+
sanitizers: [
|
|
104
|
+
/\bHttpUtility\s*\.\s*HtmlEncode\b/,
|
|
105
|
+
/\bHtmlEncoder\s*\.\s*Default\s*\.\s*Encode\b/,
|
|
106
|
+
/\bAntiXssEncoder\b/,
|
|
107
|
+
/\bSqlParameter\b/,
|
|
108
|
+
/\bMySqlParameter\b/,
|
|
109
|
+
/\bNpgsqlParameter\b/,
|
|
110
|
+
// EF Core parameterized helpers.
|
|
111
|
+
/\bFromSqlInterpolated\s*\(/,
|
|
112
|
+
// Validation.
|
|
113
|
+
/\bint\s*\.\s*TryParse\s*\(/,
|
|
114
|
+
/\bGuid\s*\.\s*TryParse\s*\(/,
|
|
115
|
+
/\bDateTime\s*\.\s*TryParse\s*\(/,
|
|
116
|
+
/\bRegex\s*\.\s*Replace\s*\(/,
|
|
117
|
+
],
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
const CPP_SUMMARIES = {
|
|
121
|
+
sources: [
|
|
122
|
+
// POSIX — environment + user input.
|
|
123
|
+
/\bgetenv\s*\(/,
|
|
124
|
+
/\bsecure_getenv\s*\(/,
|
|
125
|
+
/\bargv\s*\[/,
|
|
126
|
+
/\bgets\s*\(/,
|
|
127
|
+
/\bfgets\s*\(/,
|
|
128
|
+
/\bscanf\s*\(/,
|
|
129
|
+
/\bfscanf\s*\(/,
|
|
130
|
+
/\bgetc\s*\(/,
|
|
131
|
+
/\bfgetc\s*\(/,
|
|
132
|
+
/\bread\s*\(\s*\d+/, // unistd read(fd, ...)
|
|
133
|
+
/\brecv\s*\(/,
|
|
134
|
+
/\brecvfrom\s*\(/,
|
|
135
|
+
// OpenSSL / network.
|
|
136
|
+
/\bBIO_read\s*\(/,
|
|
137
|
+
/\bSSL_read\s*\(/,
|
|
138
|
+
// Win32 input.
|
|
139
|
+
/\bGetCommandLine[AW]?\s*\(/,
|
|
140
|
+
/\bGetEnvironmentVariable[AW]?\s*\(/,
|
|
141
|
+
// Standard streams.
|
|
142
|
+
/\bstd\s*::\s*cin\s*>>/,
|
|
143
|
+
/\bstd\s*::\s*getline\s*\(\s*std\s*::\s*cin\b/,
|
|
144
|
+
],
|
|
145
|
+
sanitizers: [
|
|
146
|
+
// Length-checked copies (best-effort).
|
|
147
|
+
/\bstrncpy\s*\(\s*[^,]+,\s*[^,]+,\s*sizeof\s*\(/,
|
|
148
|
+
/\bsnprintf\s*\(\s*[^,]+,\s*sizeof\s*\(/,
|
|
149
|
+
/\bisdigit\s*\(/,
|
|
150
|
+
/\bisalpha\s*\(/,
|
|
151
|
+
/\bisalnum\s*\(/,
|
|
152
|
+
/\bstrtol\s*\(/,
|
|
153
|
+
/\bstrtoul\s*\(/,
|
|
154
|
+
],
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
const SUMMARIES_BY_LANG = {
|
|
158
|
+
java: JAVA_SUMMARIES,
|
|
159
|
+
csharp: CSHARP_SUMMARIES,
|
|
160
|
+
cpp: CPP_SUMMARIES,
|
|
161
|
+
c: CPP_SUMMARIES,
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
// Resolve language from a file path or explicit hint.
|
|
165
|
+
function _langOf(hint, file) {
|
|
166
|
+
if (hint) return hint;
|
|
167
|
+
if (!file) return null;
|
|
168
|
+
if (/\.java$/i.test(file)) return 'java';
|
|
169
|
+
if (/\.cs$/i.test(file)) return 'csharp';
|
|
170
|
+
if (/\.(?:c|cc|cpp|cxx|h|hh|hpp)$/i.test(file)) return 'cpp';
|
|
171
|
+
return null;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Returns true if `text` contains a library-source pattern for the language.
|
|
176
|
+
*/
|
|
177
|
+
export function isLibrarySource(text, langOrFile) {
|
|
178
|
+
if (!text) return false;
|
|
179
|
+
const lang = _langOf(typeof langOrFile === 'string' && langOrFile.includes('.') ? null : langOrFile, langOrFile);
|
|
180
|
+
const s = SUMMARIES_BY_LANG[lang];
|
|
181
|
+
if (!s) return false;
|
|
182
|
+
for (const re of s.sources) if (re.test(text)) return true;
|
|
183
|
+
return false;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Returns true if `text` contains a library-sanitizer pattern for the language.
|
|
188
|
+
*/
|
|
189
|
+
export function isLibrarySanitizer(text, langOrFile) {
|
|
190
|
+
if (!text) return false;
|
|
191
|
+
const lang = _langOf(typeof langOrFile === 'string' && langOrFile.includes('.') ? null : langOrFile, langOrFile);
|
|
192
|
+
const s = SUMMARIES_BY_LANG[lang];
|
|
193
|
+
if (!s) return false;
|
|
194
|
+
for (const re of s.sanitizers) if (re.test(text)) return true;
|
|
195
|
+
return false;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
export const _internals = { JAVA_SUMMARIES, CSHARP_SUMMARIES, CPP_SUMMARIES, SUMMARIES_BY_LANG };
|