@clear-capabilities/agentic-security-scanner 0.78.0 → 0.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/bin/.agentic-security/findings.json +16 -16
  2. package/bin/.agentic-security/last-scan.json +16 -16
  3. package/bin/.agentic-security/last-scan.json.sig +1 -1
  4. package/bin/.agentic-security/scan-history.json +51 -0
  5. package/bin/.agentic-security/streak.json +5 -5
  6. package/bin/agentic-security.js +22 -7
  7. package/dist/178.index.js +1 -1
  8. package/dist/333.index.js +283 -0
  9. package/dist/384.index.js +1 -1
  10. package/dist/476.index.js +5 -5
  11. package/dist/637.index.js +1 -1
  12. package/dist/700.index.js +138 -0
  13. package/dist/718.index.js +53 -0
  14. package/dist/838.index.js +1 -1
  15. package/dist/985.index.js +95 -1
  16. package/dist/agentic-security.mjs +83 -83
  17. package/dist/agentic-security.mjs.sha256 +1 -1
  18. package/package.json +6 -4
  19. package/src/.agentic-security/findings.json +29799 -7803
  20. package/src/.agentic-security/last-scan.json +29799 -7803
  21. package/src/.agentic-security/last-scan.json.sig +1 -1
  22. package/src/.agentic-security/scan-history.json +5119 -2611
  23. package/src/.agentic-security/streak.json +6 -6
  24. package/src/dataflow/.agentic-security/findings.json +2879 -308
  25. package/src/dataflow/.agentic-security/last-scan.json +2879 -308
  26. package/src/dataflow/.agentic-security/last-scan.json.sig +1 -1
  27. package/src/dataflow/.agentic-security/scan-history.json +68 -520
  28. package/src/dataflow/.agentic-security/streak.json +6 -7
  29. package/src/dataflow/cross-service-taint.js +201 -0
  30. package/src/dataflow/engine.js +52 -8
  31. package/src/dataflow/formal-verify.js +204 -0
  32. package/src/dataflow/ifds-precise.js +222 -0
  33. package/src/dataflow/k2-summary-cache.js +153 -0
  34. package/src/dataflow/lib-taint-summaries.js +198 -0
  35. package/src/dataflow/privacy-taint.js +205 -0
  36. package/src/dataflow/smt-feasibility.js +189 -0
  37. package/src/engine.js +890 -132
  38. package/src/integrations/index.js +2 -1
  39. package/src/ir/.agentic-security/findings.json +240 -6
  40. package/src/ir/.agentic-security/last-scan.json +240 -6
  41. package/src/ir/.agentic-security/last-scan.json.sig +1 -1
  42. package/src/ir/.agentic-security/scan-history.json +16 -594
  43. package/src/ir/.agentic-security/streak.json +8 -9
  44. package/src/ir/callgraph.js +27 -7
  45. package/src/ir/cpp-preprocessor.js +142 -0
  46. package/src/ir/csharp-ir.js +604 -0
  47. package/src/ir/universal-ir.js +403 -0
  48. package/src/llm-validator/index.js +7 -5
  49. package/src/mcp/.agentic-security/findings.json +8632 -0
  50. package/src/mcp/.agentic-security/last-scan.json +8632 -0
  51. package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
  52. package/src/mcp/.agentic-security/scan-history.json +143 -0
  53. package/src/mcp/.agentic-security/streak.json +20 -0
  54. package/src/mcp/audit.js +5 -0
  55. package/src/mcp/tools.js +90 -1
  56. package/src/posture/.agentic-security/findings.json +16809 -4367
  57. package/src/posture/.agentic-security/last-scan.json +16809 -4367
  58. package/src/posture/.agentic-security/last-scan.json.sig +1 -1
  59. package/src/posture/.agentic-security/scan-history.json +6689 -177
  60. package/src/posture/.agentic-security/streak.json +8 -7
  61. package/src/posture/api-contract.js +193 -0
  62. package/src/posture/attack-taxonomy.js +227 -0
  63. package/src/posture/calibration-drift.js +2 -1
  64. package/src/posture/calibration.js +3 -2
  65. package/src/posture/compliance-policy.js +218 -0
  66. package/src/posture/composite-risk.js +122 -0
  67. package/src/posture/csharp-analysis.js +330 -0
  68. package/src/posture/exploit-bundle.js +210 -0
  69. package/src/posture/federated-learning.js +172 -0
  70. package/src/posture/fix-history.js +8 -2
  71. package/src/posture/license-attributions.js +94 -0
  72. package/src/posture/license-graph.js +238 -0
  73. package/src/posture/pqc-migration-plan.js +158 -0
  74. package/src/posture/profile.js +4 -5
  75. package/src/posture/reachability-filter.js +33 -2
  76. package/src/posture/realtime-cve-monitor.js +214 -0
  77. package/src/posture/rule-overrides.js +2 -3
  78. package/src/posture/rule-pack-signing.js +2 -3
  79. package/src/posture/rule-synthesis.js +5 -6
  80. package/src/posture/runtime-correlation.js +174 -0
  81. package/src/posture/sbom-diff.js +171 -0
  82. package/src/posture/sca-policy.js +235 -0
  83. package/src/posture/sca-upgrade.js +259 -0
  84. package/src/posture/security-trend.js +4 -7
  85. package/src/posture/state-dir.js +124 -0
  86. package/src/posture/streak.js +3 -0
  87. package/src/posture/suppressions.js +5 -8
  88. package/src/posture/threat-model-auto.js +268 -0
  89. package/src/posture/triage-learning.js +170 -0
  90. package/src/posture/triage.js +29 -6
  91. package/src/posture/validator-metrics.js +3 -6
  92. package/src/sast/.agentic-security/findings.json +996 -32
  93. package/src/sast/.agentic-security/last-scan.json +996 -32
  94. package/src/sast/.agentic-security/last-scan.json.sig +1 -1
  95. package/src/sast/.agentic-security/scan-history.json +565 -32
  96. package/src/sast/.agentic-security/streak.json +10 -8
  97. package/src/sast/_secret-entropy.js +145 -0
  98. package/src/sast/cloud-iam.js +312 -0
  99. package/src/sast/cpp.js +138 -4
  100. package/src/sast/crypto-protocol.js +388 -0
  101. package/src/sast/csharp-tokenizer.js +392 -0
  102. package/src/sast/csharp.js +924 -138
  103. package/src/sast/dapp-frontend.js +200 -0
  104. package/src/sast/db-taint.js +24 -0
  105. package/src/sast/k8s-admission.js +271 -0
  106. package/src/sast/llm-app.js +272 -0
  107. package/src/sast/ml-supply-chain.js +259 -0
  108. package/src/sast/mobile.js +224 -0
  109. package/src/sast/post-quantum-crypto.js +348 -0
  110. package/src/sast/rust.js +26 -0
  111. package/src/sast/web3-advanced.js +375 -0
  112. package/src/sca/.agentic-security/findings.json +6044 -171
  113. package/src/sca/.agentic-security/last-scan.json +6044 -171
  114. package/src/sca/.agentic-security/last-scan.json.sig +1 -1
  115. package/src/sca/.agentic-security/scan-history.json +83 -6
  116. package/src/sca/.agentic-security/streak.json +9 -9
  117. package/src/sca/CLAUDE.md +161 -0
  118. package/src/sca/binary-metadata.js +146 -0
  119. package/src/sca/py-package-functions.js +118 -0
  120. package/src/sca/sigstore-verify.js +215 -0
  121. package/src/sca/vendor-detect.js +53 -0
  122. package/src/report/.agentic-security/findings.json +0 -80
  123. package/src/report/.agentic-security/last-scan.json +0 -80
  124. package/src/report/.agentic-security/last-scan.json.sig +0 -1
  125. package/src/report/.agentic-security/scan-history.json +0 -35
  126. package/src/report/.agentic-security/streak.json +0 -22
@@ -0,0 +1,222 @@
1
+ // IFDS-precise extensions — Recommendation #2 of the world-class roadmap.
2
+ //
3
+ // The existing scanner/src/dataflow/ifds.js implements the core IFDS
4
+ // worklist algorithm with k=1 summarized return-taint. This module adds
5
+ // the three world-class pieces still missing:
6
+ //
7
+ // 1. Per-call-site summary REFINEMENT — instead of "this function
8
+ // returns tainted unconditionally," cache "returns tainted under
9
+ // entry state X" so the same callee at different sites uses
10
+ // different summaries.
11
+ // 2. On-demand BACKWARD SLICING for high-confidence findings —
12
+ // starting from a critical sink, walk backwards through the
13
+ // use-def chain and emit a minimal trace that explains exactly
14
+ // which lines contribute taint.
15
+ // 3. PERSISTENT cross-scan summary cache — write the summary table
16
+ // to .agentic-security/ifds-summaries.json after each scan and
17
+ // reload on the next scan. Skip re-analysis of unchanged
18
+ // functions (incremental analysis).
19
+ //
20
+ // Opt-in via AGENTIC_SECURITY_IFDS_PRECISE=1 alongside the existing
21
+ // AGENTIC_SECURITY_DEEP=1.
22
+
23
+ import * as fs from 'node:fs';
24
+ import * as path from 'node:path';
25
+ import * as crypto from 'node:crypto';
26
+
27
+ // ── Per-call-site refined summaries ────────────────────────────────────────
28
+
29
+ /**
30
+ * RefinedSummaryCache — extends the base summary cache with per-entry-state
31
+ * refinement. Whereas the base cache stores ONE summary per function under
32
+ * empty entry state, this layer caches a MAP of (entryStateHash → summary)
33
+ * per function.
34
+ *
35
+ * The intent: at call site A→B(x), the entry state captures which of B's
36
+ * formal parameters are tainted by A's actual argument expressions. If x
37
+ * is tainted at site 1 but not at site 2, we cache TWO summaries for B,
38
+ * and the caller's worklist consults the right one.
39
+ *
40
+ * Capped at MAX_REFINEMENTS_PER_FN to keep cache size bounded.
41
+ */
42
+ const MAX_REFINEMENTS_PER_FN = 4;
43
+
44
+ export class RefinedSummaryCache {
45
+ constructor(baseCache, opts = {}) {
46
+ this._base = baseCache;
47
+ this._refinements = new Map(); // qid → Map<stateHash, summary>
48
+ this._lru = new Map(); // qid → array (recency)
49
+ this.maxPerFn = opts.maxPerFn || MAX_REFINEMENTS_PER_FN;
50
+ this.metrics = { refinementHits: 0, refinementMisses: 0, refinementEvictions: 0 };
51
+ }
52
+
53
+ _hash(entryState) {
54
+ if (!entryState) return '∅';
55
+ if (entryState instanceof Set) {
56
+ if (entryState.size === 0) return '∅';
57
+ return [...entryState].sort().join('|');
58
+ }
59
+ if (Array.isArray(entryState)) {
60
+ if (entryState.length === 0) return '∅';
61
+ return entryState.slice().sort().join('|');
62
+ }
63
+ if (typeof entryState === 'object') {
64
+ // Object keyed by parameter index → tainted bool.
65
+ const keys = Object.keys(entryState).sort();
66
+ return keys.map(k => `${k}=${entryState[k] ? 1 : 0}`).join(',') || '∅';
67
+ }
68
+ return String(entryState);
69
+ }
70
+
71
+ get(qid, entryState) {
72
+ const h = this._hash(entryState);
73
+ const m = this._refinements.get(qid);
74
+ if (m && m.has(h)) {
75
+ this._touch(qid, h);
76
+ this.metrics.refinementHits++;
77
+ return m.get(h);
78
+ }
79
+ // Fallback to base for empty entry state (matches k=1 behavior).
80
+ if (this._base && typeof this._base.get === 'function') {
81
+ const v = this._base.get(qid, entryState);
82
+ if (v) return v;
83
+ }
84
+ this.metrics.refinementMisses++;
85
+ return undefined;
86
+ }
87
+
88
+ store(qid, entryState, summary) {
89
+ const h = this._hash(entryState);
90
+ let m = this._refinements.get(qid);
91
+ let order = this._lru.get(qid);
92
+ if (!m) { m = new Map(); this._refinements.set(qid, m); }
93
+ if (!order) { order = []; this._lru.set(qid, order); }
94
+ if (!m.has(h)) {
95
+ while (order.length >= this.maxPerFn) {
96
+ const evict = order.shift();
97
+ m.delete(evict);
98
+ this.metrics.refinementEvictions++;
99
+ }
100
+ order.push(h);
101
+ }
102
+ m.set(h, summary);
103
+ // Also seed base for the empty-entry path.
104
+ if ((entryState instanceof Set && entryState.size === 0) && this._base && typeof this._base.set === 'function') {
105
+ try { this._base.set(qid, new Set(), summary); } catch {}
106
+ }
107
+ }
108
+
109
+ _touch(qid, h) {
110
+ const order = this._lru.get(qid);
111
+ if (!order) return;
112
+ const idx = order.indexOf(h);
113
+ if (idx >= 0) { order.splice(idx, 1); order.push(h); }
114
+ }
115
+
116
+ size() {
117
+ let n = 0;
118
+ for (const m of this._refinements.values()) n += m.size;
119
+ return n;
120
+ }
121
+ }
122
+
123
+ // ── On-demand backward slicing ─────────────────────────────────────────────
124
+
125
+ /**
126
+ * backwardSlice(callGraph, finding) — given a finding at a sink, walk
127
+ * backwards through use-def edges to produce a minimal trace explaining
128
+ * each step from source to sink. Returns an array of { line, file,
129
+ * snippet, reason } entries ordered source-first.
130
+ *
131
+ * The traversal is intentionally bounded (depth ≤ MAX_SLICE_DEPTH) and
132
+ * cycle-aware. For very deep flows we emit a `...` elision rather than
133
+ * unbounded growth.
134
+ */
135
+ const MAX_SLICE_DEPTH = 16;
136
+
137
+ export function backwardSlice(callGraph, finding, opts = {}) {
138
+ const seen = new Set();
139
+ const out = [];
140
+ if (!finding) return out;
141
+ let cur = finding.sink || finding;
142
+ let depth = 0;
143
+ while (cur && depth < MAX_SLICE_DEPTH) {
144
+ const key = `${cur.file || finding.file}:${cur.line}`;
145
+ if (seen.has(key)) { out.push({ ...cur, reason: 'cycle-detected' }); break; }
146
+ seen.add(key);
147
+ out.push({
148
+ file: cur.file || finding.file,
149
+ line: cur.line,
150
+ snippet: cur.snippet || cur.expr || null,
151
+ reason: cur.reason || 'use-def-pred',
152
+ });
153
+ cur = cur.predecessor || (callGraph && callGraph.getPred && callGraph.getPred(cur)) || null;
154
+ depth++;
155
+ }
156
+ if (depth >= MAX_SLICE_DEPTH) out.push({ reason: 'slice-depth-cap' });
157
+ return out.reverse(); // source-first
158
+ }
159
+
160
+ // ── Persistent cross-scan summary cache ────────────────────────────────────
161
+
162
+ function _cachePath(scanRoot) {
163
+ return path.join(scanRoot, '.agentic-security', 'ifds-summaries.json');
164
+ }
165
+
166
+ function _fileHash(content) {
167
+ return crypto.createHash('sha256').update(content).digest('hex').slice(0, 16);
168
+ }
169
+
170
+ /**
171
+ * Load a previously-persisted IFDS summary cache. Returns:
172
+ * { summaries: Map<qid, summary>, fileHashes: Map<filePath, sha>, scanTs }
173
+ * or null if no persisted cache exists / is unreadable.
174
+ */
175
+ export function loadPersistedCache(scanRoot) {
176
+ const fp = _cachePath(scanRoot);
177
+ if (!fs.existsSync(fp)) return null;
178
+ try {
179
+ const raw = JSON.parse(fs.readFileSync(fp, 'utf8'));
180
+ return {
181
+ summaries: new Map(Object.entries(raw.summaries || {})),
182
+ fileHashes: new Map(Object.entries(raw.fileHashes || {})),
183
+ scanTs: raw.scanTs || null,
184
+ };
185
+ } catch { return null; }
186
+ }
187
+
188
+ /**
189
+ * Persist the current scan's summaries to disk. Subsequent scans can
190
+ * skip re-analysis of functions whose file hash hasn't changed.
191
+ */
192
+ export function persistCache(scanRoot, cache, perFileIR) {
193
+ const dir = path.join(scanRoot, '.agentic-security');
194
+ try { fs.mkdirSync(dir, { recursive: true }); } catch {}
195
+ const fileHashes = {};
196
+ for (const [filePath, ir] of (perFileIR || new Map())) {
197
+ if (ir && typeof ir._content === 'string') fileHashes[filePath] = _fileHash(ir._content);
198
+ }
199
+ const summaries = {};
200
+ for (const [qid, sum] of (cache._refinements || new Map())) {
201
+ // Serialize only the empty-entry-state summary — the refinements are
202
+ // ephemeral per scan; the empty-entry summary is the stable contract.
203
+ if (sum.has('∅')) summaries[qid] = sum.get('∅');
204
+ }
205
+ const out = { scanTs: new Date().toISOString(), summaries, fileHashes };
206
+ try { fs.writeFileSync(_cachePath(scanRoot), JSON.stringify(out, null, 2)); }
207
+ catch { /* best-effort */ }
208
+ }
209
+
210
+ /**
211
+ * Skip analysis of an unchanged function — when the file containing the
212
+ * function hasn't changed since the last persisted cache, reuse the prior
213
+ * summary.
214
+ */
215
+ export function shouldSkipReanalysis(prevCache, filePath, currentContent) {
216
+ if (!prevCache || !prevCache.fileHashes) return false;
217
+ const prevHash = prevCache.fileHashes.get(filePath);
218
+ if (!prevHash) return false;
219
+ return prevHash === _fileHash(currentContent);
220
+ }
221
+
222
+ export const _internals = { _cachePath, _fileHash, MAX_REFINEMENTS_PER_FN, MAX_SLICE_DEPTH };
@@ -0,0 +1,153 @@
1
+ // k=2 monovariant summary cache — Recommendation #9 of the SCA/SAST plan.
2
+ //
3
+ // The existing scanner/src/dataflow/summaries.js (referenced by engine.js)
4
+ // implements k=1: per-function ONE summary computed under empty entry state.
5
+ // That misses the common Juliet pattern of "function is pure when called
6
+ // with clean args but vulnerable when called with tainted args" because
7
+ // only the empty-state summary is cached.
8
+ //
9
+ // This module wraps SummaryCache with a per-(qid, entry-state-class) lookup,
10
+ // up to 2 distinct entry-state classes per function. The "class" is computed
11
+ // from a stable hash of which parameter positions are tainted — at k=2 we
12
+ // cache the all-clean state and one tainted state per function. Three or
13
+ // more distinct states evict to LRU.
14
+ //
15
+ // Usage:
16
+ // const k2 = new K2SummaryCache(opts.baseCache);
17
+ // k2.get(qid, entryState) → summary | undefined
18
+ // k2.compute(qid, entryState, fn) → summary
19
+ // k2.applyAtCallSite(qid, entryState, callerCtx) → mutations
20
+ //
21
+ // Falls back to k=1 behaviour transparently when summaries.js's
22
+ // SummaryCache.get returns a summary that doesn't carry entry-state info,
23
+ // so the rest of the engine continues to work unchanged.
24
+
25
+ const _MAX_STATES_PER_FN = 2;
26
+
27
+ function _hashEntryState(entryState) {
28
+ // Stable string from a Set of "tainted parameter positions" / variable
29
+ // names. For k=2 we only care about taint cardinality + which positions
30
+ // — the actual values are not modelled (premortem: no value sensitivity
31
+ // until field-sensitive cache lifts in v3).
32
+ if (!entryState) return '∅';
33
+ if (entryState instanceof Set) {
34
+ if (entryState.size === 0) return '∅';
35
+ return [...entryState].sort().join(',');
36
+ }
37
+ if (Array.isArray(entryState)) {
38
+ if (entryState.length === 0) return '∅';
39
+ return entryState.slice().sort().join(',');
40
+ }
41
+ // Fallback for opaque entry states — single bucket.
42
+ return '*';
43
+ }
44
+
45
+ export class K2SummaryCache {
46
+ constructor(baseCache) {
47
+ this._base = baseCache; // existing k=1 cache (SummaryCache)
48
+ this._states = new Map(); // qid → Map<stateHash, summary>
49
+ this._stateOrder = new Map(); // qid → array (LRU order)
50
+ this.metrics = { hits: 0, misses: 0, evictions: 0, computes: 0 };
51
+ }
52
+
53
+ /**
54
+ * Read a summary for (qid, entry). Returns undefined if uncached.
55
+ * Falls back to the base cache when our k=2 table has no entry.
56
+ */
57
+ get(qid, entryState) {
58
+ const hash = _hashEntryState(entryState);
59
+ const states = this._states.get(qid);
60
+ if (states && states.has(hash)) {
61
+ this.metrics.hits++;
62
+ this._touch(qid, hash);
63
+ return states.get(hash);
64
+ }
65
+ // k=1 fallback — accept whatever the base cache stored.
66
+ if (this._base && typeof this._base.get === 'function') {
67
+ const v = this._base.get(qid, entryState);
68
+ if (v) { this.metrics.hits++; return v; }
69
+ }
70
+ this.metrics.misses++;
71
+ return undefined;
72
+ }
73
+
74
+ /**
75
+ * Compute (or retrieve) a summary for (qid, entry). Uses the supplied
76
+ * `compute` function only on miss. Caches per-state at k=2.
77
+ */
78
+ compute(qid, entryState, computeFn) {
79
+ const existing = this.get(qid, entryState);
80
+ if (existing) return existing;
81
+ this.metrics.computes++;
82
+ const summary = computeFn();
83
+ this._store(qid, entryState, summary);
84
+ // Also seed the base cache under empty-entry-state so the k=1 engine
85
+ // paths that don't know about k=2 still see the cleanest summary.
86
+ if (this._base && typeof this._base.set === 'function' && (!entryState || (entryState instanceof Set && entryState.size === 0))) {
87
+ try { this._base.set(qid, new Set(), summary); } catch {}
88
+ }
89
+ return summary;
90
+ }
91
+
92
+ /**
93
+ * Apply the cached summary at a call site, propagating return-taint and
94
+ * mutated-parameter taint into the caller's mutation set. Mirrors the
95
+ * base cache's applyAtCallSite signature.
96
+ */
97
+ applyAtCallSite(qid, entryState, callerCtx) {
98
+ const summary = this.get(qid, entryState);
99
+ if (!summary) return null;
100
+ // Defer to the base implementation when present — we don't reimplement
101
+ // the mutation algebra here.
102
+ if (this._base && typeof this._base.applyAtCallSite === 'function') {
103
+ try { return this._base.applyAtCallSite(qid, entryState, callerCtx, summary); }
104
+ catch { return null; }
105
+ }
106
+ return summary;
107
+ }
108
+
109
+ _store(qid, entryState, summary) {
110
+ const hash = _hashEntryState(entryState);
111
+ let states = this._states.get(qid);
112
+ let order = this._stateOrder.get(qid);
113
+ if (!states) { states = new Map(); this._states.set(qid, states); }
114
+ if (!order) { order = []; this._stateOrder.set(qid, order); }
115
+ if (!states.has(hash)) {
116
+ // LRU eviction at k=2.
117
+ while (order.length >= _MAX_STATES_PER_FN) {
118
+ const evict = order.shift();
119
+ states.delete(evict);
120
+ this.metrics.evictions++;
121
+ }
122
+ order.push(hash);
123
+ }
124
+ states.set(hash, summary);
125
+ }
126
+ _touch(qid, hash) {
127
+ const order = this._stateOrder.get(qid);
128
+ if (!order) return;
129
+ const idx = order.indexOf(hash);
130
+ if (idx >= 0) { order.splice(idx, 1); order.push(hash); }
131
+ }
132
+
133
+ /**
134
+ * Size of the cache — for diagnostics / metrics dashboards.
135
+ */
136
+ size() {
137
+ let n = 0;
138
+ for (const states of this._states.values()) n += states.size;
139
+ return n;
140
+ }
141
+ }
142
+
143
+ /**
144
+ * Wrap an existing k=1 SummaryCache with k=2 behavior. The engine can opt
145
+ * into this via AGENTIC_SECURITY_K2_TAINT=1.
146
+ */
147
+ export function wrapAsK2(baseCache) {
148
+ if (!baseCache) return new K2SummaryCache(null);
149
+ if (baseCache instanceof K2SummaryCache) return baseCache;
150
+ return new K2SummaryCache(baseCache);
151
+ }
152
+
153
+ export const _internals = { _hashEntryState, _MAX_STATES_PER_FN };
@@ -0,0 +1,198 @@
1
+ // Library taint summaries — Recommendation #5 of the SCA/SAST plan.
2
+ //
3
+ // Hand-curated knowledge that "this library method returns tainted data" or
4
+ // "this method propagates taint from arg N to its return." Used by the
5
+ // existing dataflow engine + per-language detectors when classifying the
6
+ // taint state of a declaration's rhs.
7
+ //
8
+ // The summaries are intentionally per-language because the same concept
9
+ // (a user-input source) has different idioms in each ecosystem. Each entry:
10
+ //
11
+ // { pattern: RegExp, kind: 'source' | 'sanitizer' | 'passthrough',
12
+ // framework: 'spring' | 'aspnet' | 'glibc' | … }
13
+ //
14
+ // Kinds:
15
+ // source — return value is unconditionally tainted
16
+ // sanitizer — return value is unconditionally clean, even if any arg
17
+ // was tainted (e.g. HtmlEncode, parameterized prepare)
18
+ // passthrough — return value is tainted iff arg N is tainted (taint
19
+ // flows through). Not modelled in v1; reserved for future
20
+ // inter-procedural extensions (Recommendation #9).
21
+ //
22
+ // Usage: detectors call `isLibrarySource(text, lang)` and `isLibrarySanitizer
23
+ // (text, lang)` to refine their per-call decisions.
24
+
25
+ const JAVA_SUMMARIES = {
26
+ sources: [
27
+ // Servlet API — every request-scoped getter is a user-input source.
28
+ /\bHttpServletRequest\b[\s\S]{0,2000}?\.\s*(?:getParameter(?:Values|Map)?|getQueryString|getHeader(?:Names)?|getInputStream|getReader|getCookies?|getRequestURI|getRequestURL|getQueryString|getPathInfo)\s*\(/,
29
+ /\bjavax\.servlet\.http\.HttpServletRequest\b/,
30
+ // Spring MVC — controller method annotations bind to request data.
31
+ /@RequestParam\b/,
32
+ /@RequestBody\b/,
33
+ /@PathVariable\b/,
34
+ /@RequestHeader\b/,
35
+ /@CookieValue\b/,
36
+ /@ModelAttribute\b/,
37
+ // Spring Security — the principal is user-controlled in the trust sense
38
+ // (it identifies WHO the request is from; not auto-sanitized).
39
+ /\bSecurityContextHolder\s*\.\s*getContext\s*\(\s*\)\s*\.\s*getAuthentication\s*\(\s*\)/,
40
+ // Java Files API — file content is untrusted when source is unknown.
41
+ /\bFiles\s*\.\s*(?:readString|readAllBytes|readAllLines|lines|newBufferedReader|newInputStream)\b/,
42
+ /\bPaths\s*\.\s*get\s*\([^)]*(?:System\.getProperty|args)\b/,
43
+ // BufferedReader / Scanner reading user input.
44
+ /\bBufferedReader\b[\s\S]{0,500}?\.\s*readLine\s*\(/,
45
+ /\bScanner\b[\s\S]{0,500}?\.\s*(?:next(?:Line)?|nextInt|nextLong)\s*\(/,
46
+ // System.getenv / System.getProperty — environment is configurable.
47
+ /\bSystem\s*\.\s*(?:getenv|getProperty)\s*\(/,
48
+ // Jackson — deserialization input is untrusted.
49
+ /\bObjectMapper\b[\s\S]{0,500}?\.\s*readValue\s*\(/,
50
+ /\bJsonParser\b[\s\S]{0,500}?\.\s*getValueAsString\s*\(/,
51
+ // Apache Commons IO.
52
+ /\bIOUtils\s*\.\s*toString\s*\(/,
53
+ /\bFileUtils\s*\.\s*readFileToString\s*\(/,
54
+ // Spring WebFlux ServerWebExchange.
55
+ /\bServerWebExchange\b[\s\S]{0,500}?\.\s*getRequest\s*\(/,
56
+ ],
57
+ sanitizers: [
58
+ /\bOWASP\.Encoder\b/,
59
+ /\bESAPI\b[\s\S]{0,200}?\.\s*encoder\s*\(\s*\)/,
60
+ /\bStringEscapeUtils\s*\.\s*escape(?:Html\d?|Xml|Sql|Java|JavaScript)\b/,
61
+ /\bHtmlUtils\s*\.\s*htmlEscape\b/,
62
+ /\bUriUtils\s*\.\s*encode\b/,
63
+ // JDBC PreparedStatement parameter setters — taint is cleaned at bind.
64
+ /\bPreparedStatement\b[\s\S]{0,500}?\.\s*set(?:String|Int|Long|Object|BigDecimal|Date|Timestamp)\s*\(/,
65
+ /\bNamedParameterJdbcTemplate\b[\s\S]{0,500}?\.\s*(?:query|update|queryForObject)\s*\([^,]+,\s*new\s+MapSqlParameterSource\b/,
66
+ // Java validators.
67
+ /\bjakarta\.validation\b/,
68
+ /\bjavax\.validation\b/,
69
+ /\b@Valid\b/,
70
+ ],
71
+ };
72
+
73
+ const CSHARP_SUMMARIES = {
74
+ sources: [
75
+ // ASP.NET request surfaces.
76
+ /\bHttpRequest\b[\s\S]{0,500}?\.\s*(?:Query|Form|Headers|Cookies|RouteValues|Body|InputStream|QueryString|Params|Path|Url)\b/,
77
+ /\bHttpContext\s*\.\s*Request\b/,
78
+ /\bIFormCollection\b/,
79
+ /\bIFormFile\b/,
80
+ /\bIFormFileCollection\b/,
81
+ // ASP.NET Core model binding.
82
+ /\[FromQuery\]/,
83
+ /\[FromBody\]/,
84
+ /\[FromForm\]/,
85
+ /\[FromRoute\]/,
86
+ /\[FromHeader\]/,
87
+ // Configuration may carry secrets but the VALUES are environment-supplied.
88
+ /\bIConfiguration\b[\s\S]{0,500}?\.\s*(?:GetSection|GetValue|GetConnectionString|GetChildren)\s*\(/,
89
+ // Newtonsoft.Json deserialization.
90
+ /\bJsonConvert\s*\.\s*Deserialize(?:Object|XmlNode)\s*</,
91
+ /\bJsonSerializer\s*\.\s*Deserialize\s*</,
92
+ // Files / streams.
93
+ /\bFile\s*\.\s*(?:ReadAllText|ReadAllLines|ReadAllBytes|OpenRead|OpenText)\s*\(/,
94
+ /\bStreamReader\b[\s\S]{0,500}?\.\s*(?:ReadLine|ReadToEnd|Read)\s*\(/,
95
+ /\bBinaryReader\b[\s\S]{0,500}?\.\s*Read(?:String|Bytes|Char|Int32|Int64|UInt32|UInt64)\s*\(/,
96
+ // Network reads.
97
+ /\bWebClient\b[\s\S]{0,500}?\.\s*Download(?:String|Data|File)\s*\(/,
98
+ /\bHttpClient\b[\s\S]{0,500}?\.\s*(?:GetAsync|GetStringAsync|PostAsync|SendAsync)\s*\(/,
99
+ // Environment + console.
100
+ /\bEnvironment\s*\.\s*GetEnvironmentVariable\s*\(/,
101
+ /\bConsole\s*\.\s*ReadLine\s*\(/,
102
+ ],
103
+ sanitizers: [
104
+ /\bHttpUtility\s*\.\s*HtmlEncode\b/,
105
+ /\bHtmlEncoder\s*\.\s*Default\s*\.\s*Encode\b/,
106
+ /\bAntiXssEncoder\b/,
107
+ /\bSqlParameter\b/,
108
+ /\bMySqlParameter\b/,
109
+ /\bNpgsqlParameter\b/,
110
+ // EF Core parameterized helpers.
111
+ /\bFromSqlInterpolated\s*\(/,
112
+ // Validation.
113
+ /\bint\s*\.\s*TryParse\s*\(/,
114
+ /\bGuid\s*\.\s*TryParse\s*\(/,
115
+ /\bDateTime\s*\.\s*TryParse\s*\(/,
116
+ /\bRegex\s*\.\s*Replace\s*\(/,
117
+ ],
118
+ };
119
+
120
+ const CPP_SUMMARIES = {
121
+ sources: [
122
+ // POSIX — environment + user input.
123
+ /\bgetenv\s*\(/,
124
+ /\bsecure_getenv\s*\(/,
125
+ /\bargv\s*\[/,
126
+ /\bgets\s*\(/,
127
+ /\bfgets\s*\(/,
128
+ /\bscanf\s*\(/,
129
+ /\bfscanf\s*\(/,
130
+ /\bgetc\s*\(/,
131
+ /\bfgetc\s*\(/,
132
+ /\bread\s*\(\s*\d+/, // unistd read(fd, ...)
133
+ /\brecv\s*\(/,
134
+ /\brecvfrom\s*\(/,
135
+ // OpenSSL / network.
136
+ /\bBIO_read\s*\(/,
137
+ /\bSSL_read\s*\(/,
138
+ // Win32 input.
139
+ /\bGetCommandLine[AW]?\s*\(/,
140
+ /\bGetEnvironmentVariable[AW]?\s*\(/,
141
+ // Standard streams.
142
+ /\bstd\s*::\s*cin\s*>>/,
143
+ /\bstd\s*::\s*getline\s*\(\s*std\s*::\s*cin\b/,
144
+ ],
145
+ sanitizers: [
146
+ // Length-checked copies (best-effort).
147
+ /\bstrncpy\s*\(\s*[^,]+,\s*[^,]+,\s*sizeof\s*\(/,
148
+ /\bsnprintf\s*\(\s*[^,]+,\s*sizeof\s*\(/,
149
+ /\bisdigit\s*\(/,
150
+ /\bisalpha\s*\(/,
151
+ /\bisalnum\s*\(/,
152
+ /\bstrtol\s*\(/,
153
+ /\bstrtoul\s*\(/,
154
+ ],
155
+ };
156
+
157
+ const SUMMARIES_BY_LANG = {
158
+ java: JAVA_SUMMARIES,
159
+ csharp: CSHARP_SUMMARIES,
160
+ cpp: CPP_SUMMARIES,
161
+ c: CPP_SUMMARIES,
162
+ };
163
+
164
+ // Resolve language from a file path or explicit hint.
165
+ function _langOf(hint, file) {
166
+ if (hint) return hint;
167
+ if (!file) return null;
168
+ if (/\.java$/i.test(file)) return 'java';
169
+ if (/\.cs$/i.test(file)) return 'csharp';
170
+ if (/\.(?:c|cc|cpp|cxx|h|hh|hpp)$/i.test(file)) return 'cpp';
171
+ return null;
172
+ }
173
+
174
+ /**
175
+ * Returns true if `text` contains a library-source pattern for the language.
176
+ */
177
+ export function isLibrarySource(text, langOrFile) {
178
+ if (!text) return false;
179
+ const lang = _langOf(typeof langOrFile === 'string' && langOrFile.includes('.') ? null : langOrFile, langOrFile);
180
+ const s = SUMMARIES_BY_LANG[lang];
181
+ if (!s) return false;
182
+ for (const re of s.sources) if (re.test(text)) return true;
183
+ return false;
184
+ }
185
+
186
+ /**
187
+ * Returns true if `text` contains a library-sanitizer pattern for the language.
188
+ */
189
+ export function isLibrarySanitizer(text, langOrFile) {
190
+ if (!text) return false;
191
+ const lang = _langOf(typeof langOrFile === 'string' && langOrFile.includes('.') ? null : langOrFile, langOrFile);
192
+ const s = SUMMARIES_BY_LANG[lang];
193
+ if (!s) return false;
194
+ for (const re of s.sanitizers) if (re.test(text)) return true;
195
+ return false;
196
+ }
197
+
198
+ export const _internals = { JAVA_SUMMARIES, CSHARP_SUMMARIES, CPP_SUMMARIES, SUMMARIES_BY_LANG };