@clear-capabilities/agentic-security-scanner 0.77.0 → 0.79.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/bin/.agentic-security/findings.json +1907 -0
  2. package/bin/.agentic-security/last-scan.json +1907 -0
  3. package/bin/.agentic-security/last-scan.json.sig +1 -0
  4. package/bin/.agentic-security/scan-history.json +166 -0
  5. package/bin/.agentic-security/streak.json +20 -0
  6. package/bin/agentic-security.js +55 -9
  7. package/dist/178.index.js +1 -1
  8. package/dist/384.index.js +1 -1
  9. package/dist/476.index.js +5 -5
  10. package/dist/637.index.js +1 -1
  11. package/dist/700.index.js +138 -0
  12. package/dist/718.index.js +159 -0
  13. package/dist/824.index.js +126 -0
  14. package/dist/838.index.js +1 -1
  15. package/dist/985.index.js +5 -0
  16. package/dist/agentic-security.mjs +32 -32
  17. package/dist/agentic-security.mjs.sha256 +1 -1
  18. package/package.json +4 -4
  19. package/src/dataflow/async-sequencing.js +16 -7
  20. package/src/dataflow/builtin-summaries.js +131 -0
  21. package/src/dataflow/catalog.js +107 -0
  22. package/src/dataflow/cross-repo.js +75 -1
  23. package/src/dataflow/engine.js +181 -8
  24. package/src/dataflow/implicit-flow.js +24 -6
  25. package/src/dataflow/stub-aware-filter.js +69 -11
  26. package/src/dataflow/summaries.js +28 -3
  27. package/src/engine-parallel.js +70 -0
  28. package/src/engine.js +270 -19
  29. package/src/integrations/index.js +2 -1
  30. package/src/ir/callgraph.js +27 -7
  31. package/src/ir/index.js +22 -1
  32. package/src/ir/parser-go.js +403 -0
  33. package/src/ir/parser-js.js +2 -0
  34. package/src/ir/parser-php.js +330 -0
  35. package/src/ir/parser-py.helper.py +137 -11
  36. package/src/ir/parser-rb.js +309 -0
  37. package/src/llm-validator/index.js +7 -5
  38. package/src/mcp/audit.js +5 -0
  39. package/src/posture/calibration-drift.js +2 -1
  40. package/src/posture/calibration.js +16 -1
  41. package/src/posture/fix-history.js +8 -2
  42. package/src/posture/profile.js +4 -5
  43. package/src/posture/rule-overrides.js +2 -3
  44. package/src/posture/rule-pack-signing.js +2 -3
  45. package/src/posture/rule-synthesis.js +5 -6
  46. package/src/posture/security-trend.js +4 -7
  47. package/src/posture/state-dir.js +124 -0
  48. package/src/posture/streak.js +3 -0
  49. package/src/posture/suppressions.js +5 -8
  50. package/src/posture/triage.js +16 -5
  51. package/src/posture/validator-metrics.js +3 -6
  52. package/src/report/index.js +23 -2
  53. package/src/sast/cache-poisoning.js +77 -0
  54. package/src/sast/comparison-safety.js +73 -0
  55. package/src/sast/db-taint.js +78 -0
  56. package/src/sast/graphql.js +127 -0
  57. package/src/sast/llm-stored-prompt.js +57 -0
  58. package/src/sast/mutation-xss.js +43 -0
  59. package/src/sast/nosql-injection.js +5 -0
  60. package/src/sast/null-byte-injection.js +76 -0
  61. package/src/sast/redos-nfa.js +338 -0
  62. package/src/sast/rust.js +26 -0
  63. package/src/sast/sensitive-data-logging.js +73 -0
  64. package/src/sast/weak-password-hash.js +77 -0
  65. package/src/sast/weak-randomness.js +100 -0
  66. package/src/sca/binary-metadata.js +124 -0
  67. package/src/sca/llm-function-extract.js +107 -0
  68. package/src/sca/py-package-functions.js +118 -0
  69. package/src/sca/vendor-detect.js +144 -0
@@ -38,6 +38,7 @@ import { accessPathOf, isCoveredBy, addPath, removePathAndDescendants, joinSets
38
38
  import { aliasesForVar } from './points-to.js';
39
39
  import { higherOrderTaintFlow } from './higher-order.js';
40
40
  import { SummaryCache, entryStateFromCall } from './summaries.js';
41
+ import { lookupBuiltinSummary } from './builtin-summaries.js';
41
42
 
42
43
  // v0.70 #2 — addPath that also taints every alias of the variable.
43
44
  // When `target` is a dotted path like "a.x" and the root `a` has aliases
@@ -61,13 +62,13 @@ function _addPathAliasAware(state, path, callContext) {
61
62
  return s;
62
63
  }
63
64
 
65
+ let _activeConstantVars = null;
66
+
64
67
  function exprTaint(expr, state) {
65
- // Returns true iff this expression evaluates to a tainted value under the
66
- // given taint state. ALSO treats catalog-registered source patterns as
67
- // tainted at-read — `req.body.host` used inline (no intermediate local)
68
- // is tainted because the source resolves at the read site.
69
68
  if (expr && expr.kind === 'member' && exprIsSource(expr)) return true;
70
69
  if (!expr) return false;
70
+ // Constant propagation: variables assigned from literals are never tainted
71
+ if (expr.kind === 'ident' && _activeConstantVars && _activeConstantVars.has(expr.name)) return false;
71
72
  // P1.1 — field-sensitive access path: if the expression is a pure
72
73
  // ident/member chain ("x.y.z"), ask the access-path lattice whether any
73
74
  // shorter prefix in the state covers it. This is what makes
@@ -156,13 +157,35 @@ function exprIsSource(expr) {
156
157
  const hit = matchSource(expr);
157
158
  if (hit) return hit;
158
159
  }
159
- // Recurse — `req.body.name` should still find `req.body` as source.
160
160
  if (expr.kind === 'member' && expr.object) {
161
161
  return exprIsSource(expr.object);
162
162
  }
163
163
  return null;
164
164
  }
165
165
 
166
+ const _SQL_KEYWORDS = /\b(SELECT|INSERT|UPDATE|DELETE|DROP|ALTER|CREATE|UNION|WHERE|FROM|JOIN|INTO|VALUES|SET|EXEC|EXECUTE)\b/i;
167
+ const _HTML_META = /[<>'"&]|innerHTML|outerHTML|document\.write/;
168
+ const _SHELL_META = /[;|`$(){}]|&&|\|\|/;
169
+
170
+ function _literalPartsOfExpr(expr) {
171
+ if (!expr) return [];
172
+ if (expr.kind === 'literal') return [String(expr.value || '')];
173
+ if (expr.kind === 'tpl') return (expr.parts || []).filter(p => p.kind === 'literal').map(p => String(p.value || ''));
174
+ if (expr.kind === 'binary') return [..._literalPartsOfExpr(expr.left), ..._literalPartsOfExpr(expr.right)];
175
+ return [];
176
+ }
177
+
178
+ function literalSkeletonMatchesFamily(expr, cwe) {
179
+ const literals = _literalPartsOfExpr(expr);
180
+ if (!literals.length) return true;
181
+ const joined = literals.join(' ');
182
+ if (!joined.trim()) return true;
183
+ if (cwe === 'CWE-89' || cwe === 'CWE-943') return _SQL_KEYWORDS.test(joined);
184
+ if (cwe === 'CWE-79') return _HTML_META.test(joined);
185
+ if (cwe === 'CWE-78') return _SHELL_META.test(joined);
186
+ return true;
187
+ }
188
+
166
189
  // Apply a CFG node to a taint-state. Returns the new state + any finding emitted.
167
190
  function step(node, stateIn, callContext) {
168
191
  const state = new Set(stateIn);
@@ -176,9 +199,13 @@ function step(node, stateIn, callContext) {
176
199
  return { state, findings };
177
200
 
178
201
  case 'assign': {
179
- // Source detection on RHS.
180
202
  const src = exprIsSource(node.source);
181
203
  const target = typeof node.target === 'string' ? node.target : null;
204
+ // Constant propagation: track variables assigned from literals
205
+ if (target && _activeConstantVars) {
206
+ if (node.source && node.source.kind === 'literal') _activeConstantVars.set(target, node.source.value);
207
+ else _activeConstantVars.delete(target);
208
+ }
182
209
  let newState = state;
183
210
  // Premortem #7: interprocedural return-taint via SummaryCache. If the
184
211
  // RHS is a call to a known callee whose empty-entry-state summary says
@@ -242,6 +269,25 @@ function step(node, stateIn, callContext) {
242
269
  for (const v of mutated.mutated) newState = addPath(newState, v);
243
270
  }
244
271
  if (sum && sum.returnTainted) return { state: newState, findings: [] };
272
+ } else if (target && calleeName) {
273
+ // Fallback: check builtin summaries for unresolved external calls
274
+ const builtin = lookupBuiltinSummary(calleeName);
275
+ if (builtin) {
276
+ if (builtin.returnTainted && (node.source.args || []).some(a => exprTaint(a, newState))) {
277
+ newState = _addPathAliasAware(newState, target, callContext);
278
+ } else if (!builtin.returnTainted) {
279
+ newState = removePathAndDescendants(newState, target);
280
+ return { state: newState, findings: [] };
281
+ }
282
+ if (builtin.mutatedParams && builtin.mutatedParams.size) {
283
+ for (const idx of builtin.mutatedParams) {
284
+ const argExpr = (node.source.args || [])[parseInt(idx)];
285
+ if (argExpr && argExpr.kind === 'ident' && (node.source.args || []).some(a => exprTaint(a, newState))) {
286
+ newState = _addPathAliasAware(newState, argExpr.name, callContext);
287
+ }
288
+ }
289
+ }
290
+ }
245
291
  }
246
292
  }
247
293
  if (src && target) {
@@ -293,6 +339,24 @@ function step(node, stateIn, callContext) {
293
339
  }
294
340
  }
295
341
  }
342
+ // Built-in mutation functions: Object.assign(target, ...sources),
343
+ // _.merge(target, ...sources), etc. When any source arg is tainted,
344
+ // taint the target in the caller's scope.
345
+ const calleeName = typeof node.callee === 'string' ? node.callee : null;
346
+ if (calleeName && /^(?:Object\.assign|_\.merge|_\.extend|_\.defaultsDeep|_\.defaults|Object\.defineProperties?)$/.test(calleeName)) {
347
+ const targetArg = (node.args || [])[0];
348
+ const sourceArgsTainted = argTaints.slice(1).some(Boolean);
349
+ if (targetArg && targetArg.kind === 'ident' && sourceArgsTainted) {
350
+ state = _addPathAliasAware(state, targetArg.name, callContext);
351
+ callContext._taintSources.push({
352
+ varName: targetArg.name,
353
+ sourceId: `builtin-mutation:${calleeName}`,
354
+ sourceLabel: `${calleeName} mutation`,
355
+ provenance: 'mutation',
356
+ line: node.line,
357
+ });
358
+ }
359
+ }
296
360
  if (cat) {
297
361
  for (const e of cat) {
298
362
  if (e.kind === 'sink' && (
@@ -302,6 +366,8 @@ function step(node, stateIn, callContext) {
302
366
  const taintedArgIdx = e.argIndex === 'all'
303
367
  ? argTaints.findIndex(Boolean) : e.argIndex;
304
368
  const taintedArgExpr = (node.args || [])[taintedArgIdx];
369
+ // String content analysis: skip if literal skeleton doesn't match injection family
370
+ if (e.vuln && taintedArgExpr && !literalSkeletonMatchesFamily(taintedArgExpr, e.vuln.cwe)) continue;
305
371
  // Premortem #10: attribute the source for THIS sink to the
306
372
  // source(s) that taint the actual argument expression — not the
307
373
  // first source the worklist happened to record. We walk the
@@ -400,12 +466,13 @@ function step(node, stateIn, callContext) {
400
466
  // every 100 iterations. A pathological CFG (large generated file with dense
401
467
  // control flow) can otherwise hold past the global timeout.
402
468
  function analyzeFunction(fn, entryState, callContext) {
403
- const nodes = fn.cfg.nodes; // plain object
469
+ const nodes = fn.cfg.nodes;
404
470
  const work = [];
405
- const inStates = new Map(); // nodeId → Set<varName>
471
+ const inStates = new Map();
406
472
  const outStates = new Map();
407
473
  inStates.set(fn.cfg.entry, new Set(entryState));
408
474
  work.push(fn.cfg.entry);
475
+ _activeConstantVars = new Map();
409
476
  // v0.70 #2 — points-to context for the step() transfer. Setting it here
410
477
  // (instead of plumbing through step's signature) keeps the worklist loop
411
478
  // unchanged and lets `step` consult `aliasesForVar` when callContext._pointsTo
@@ -535,6 +602,64 @@ export function runTaintEngine(perFileIR, callGraph, opts = {}) {
535
602
  if (summaryCache.size() === prevCacheSize) break;
536
603
  prevCacheSize = summaryCache.size();
537
604
  }
605
+ // Class-field cross-taint pass: when a method writes tainted data to _this_.field,
606
+ // re-analyze other methods of the same class with those fields in the entry state.
607
+ const classTaintedFields = new Map();
608
+ for (const fn of fnList) {
609
+ if (Date.now() > deadlineMs) break;
610
+ const sum = summaryCache.get(fn.qid, new Set());
611
+ if (!sum || !sum.mutatedParams) continue;
612
+ for (const p of sum.mutatedParams) {
613
+ if (typeof p === 'string' && p.startsWith('_this_.')) {
614
+ const classPrefix = fn.qid.split('::')[0] + '::';
615
+ if (!classTaintedFields.has(classPrefix)) classTaintedFields.set(classPrefix, new Set());
616
+ classTaintedFields.get(classPrefix).add(p);
617
+ }
618
+ }
619
+ }
620
+ for (const [classPrefix, fields] of classTaintedFields) {
621
+ if (Date.now() > deadlineMs) break;
622
+ for (const fn of fnList) {
623
+ if (!fn.qid.startsWith(classPrefix)) continue;
624
+ if (summaryCache.has(fn.qid, fields)) continue;
625
+ const ctx = {
626
+ _findings: [], _taintSources: [], _returnTainted: false,
627
+ _stack: new Set(), deadlineMs,
628
+ _summaryCache: summaryCache, _callGraph: callGraph,
629
+ _mutatedParamsOut: new Set(),
630
+ };
631
+ try { analyzeFunction(fn, fields, ctx); } catch {}
632
+ summaryCache.set(fn.qid, fields, {
633
+ returnTainted: !!ctx._returnTainted,
634
+ mutatedParams: ctx._mutatedParamsOut || new Set(),
635
+ taintedGlobals: new Set(),
636
+ findings: [],
637
+ });
638
+ }
639
+ }
640
+
641
+ // k=2 pass: compute tainted-entry-state summaries for functions with params
642
+ // AND at least one caller in the call graph. This catches "safe when called
643
+ // clean, dangerous when called with tainted input" wrapper patterns.
644
+ for (const fn of fnList) {
645
+ if (Date.now() > deadlineMs) break;
646
+ if (!fn.params || !fn.params.length) continue;
647
+ const taintedEntry = new Set(fn.params);
648
+ if (summaryCache.has(fn.qid, taintedEntry)) continue;
649
+ const ctx = {
650
+ _findings: [], _taintSources: [], _returnTainted: false,
651
+ _stack: new Set(), deadlineMs,
652
+ _summaryCache: summaryCache, _callGraph: callGraph,
653
+ _mutatedParamsOut: new Set(),
654
+ };
655
+ try { analyzeFunction(fn, taintedEntry, ctx); } catch {}
656
+ summaryCache.set(fn.qid, taintedEntry, {
657
+ returnTainted: !!ctx._returnTainted,
658
+ mutatedParams: ctx._mutatedParamsOut || new Set(),
659
+ taintedGlobals: new Set(),
660
+ findings: [],
661
+ });
662
+ }
538
663
  for (const fn of fnList) {
539
664
  if (++n > fnLimit) break;
540
665
  if (Date.now() > deadlineMs) break; // global timeout
@@ -552,6 +677,39 @@ export function runTaintEngine(perFileIR, callGraph, opts = {}) {
552
677
  try {
553
678
  analyzeFunction(fn, new Set(), callContext);
554
679
  } catch { continue; }
680
+ // Process higher-order invocations: resolve callbacks and analyze with
681
+ // tainted first-param. Feed findings back into the caller's finding set.
682
+ const hoInvocations = callContext._higherOrderInvocations || [];
683
+ const HO_CAP = 50;
684
+ for (let hi = 0; hi < Math.min(hoInvocations.length, HO_CAP); hi++) {
685
+ if (Date.now() > deadlineMs) break;
686
+ const inv = hoInvocations[hi];
687
+ if (!inv.callee || !inv.taintedParam) continue;
688
+ const resolved = callGraph.resolve ? callGraph.resolve(inv.callee) : null;
689
+ const cbFn = resolved && resolved.qid ? resolved : null;
690
+ if (!cbFn || !cbFn.params || !cbFn.params.length) continue;
691
+ const cbEntry = new Set([cbFn.params[inv.paramIndex || 0]]);
692
+ let cbSummary = summaryCache.get(cbFn.qid, cbEntry);
693
+ if (!cbSummary) {
694
+ cbSummary = summaryCache.compute(cbFn.qid, cbEntry, () => {
695
+ const inner = {
696
+ _findings: [], _taintSources: [], _returnTainted: false,
697
+ _stack: new Set(), deadlineMs,
698
+ _summaryCache: summaryCache, _callGraph: callGraph,
699
+ _mutatedParamsOut: new Set(),
700
+ };
701
+ try { analyzeFunction(cbFn, cbEntry, inner); } catch {}
702
+ // Merge any findings from the callback analysis into the caller.
703
+ callContext._findings.push(...inner._findings.map(f => ({ ...f, _funcQid: fn.qid, _via: 'higher-order' })));
704
+ return {
705
+ returnTainted: !!inner._returnTainted,
706
+ mutatedParams: inner._mutatedParamsOut || new Set(),
707
+ taintedGlobals: new Set(),
708
+ findings: [],
709
+ };
710
+ });
711
+ }
712
+ }
555
713
  for (const f of callContext._findings) {
556
714
  const key = `${f.sinkId}:${fn.file}:${f.line}`;
557
715
  if (seen.has(key)) continue;
@@ -583,6 +741,21 @@ export function runTaintEngine(perFileIR, callGraph, opts = {}) {
583
741
  }
584
742
  }
585
743
  // v0.69 — expose cache to caller (runDeepAnalysis) for incremental persistence.
744
+ // Dead code suppression: demote findings in functions with zero callers
745
+ // (except route handlers which are entry points)
746
+ const calledQids = new Set();
747
+ if (callGraph.edges) for (const e of callGraph.edges) calledQids.add(typeof e.to === 'string' ? e.to : e.to?.qid);
748
+ if (callGraph.callersOf) for (const [qid, callers] of callGraph.callersOf) { if (callers && callers.size) calledQids.add(qid); }
749
+ for (const f of all) {
750
+ if (!f._funcQid) continue;
751
+ const fn = callGraph.functions?.get(f._funcQid);
752
+ if (!fn) continue;
753
+ if (calledQids.has(f._funcQid)) continue;
754
+ if (/handler|route|controller|middleware|endpoint/i.test(fn.name || '')) continue;
755
+ f._inDeadCode = true;
756
+ const dg = { critical: 'high', high: 'medium', medium: 'low', low: 'info' };
757
+ if (dg[f.severity]) f.severity = dg[f.severity];
758
+ }
586
759
  Object.defineProperty(all, '_summaryCache', { value: summaryCache, enumerable: false });
587
760
  return all;
588
761
  }
@@ -34,7 +34,8 @@
34
34
  import { addPath } from './access-paths.js';
35
35
 
36
36
  export function isImplicitFlowEnabled() {
37
- return process.env.AGENTIC_SECURITY_IMPLICIT_FLOW === '1';
37
+ if (process.env.AGENTIC_SECURITY_IMPLICIT_FLOW === '0') return false;
38
+ return true;
38
39
  }
39
40
 
40
41
  /**
@@ -62,11 +63,25 @@ export function buildImplicitContext(cfg, exprTaint) {
62
63
  const n = cfg.nodes[nid];
63
64
  if (!n) continue;
64
65
  if (n.kind === 'if' && n.cond && exprTaint(n.cond)) {
65
- // Push the consequent at depth+1. We don't have a separate alternate
66
- // edge in this v1 IR `succ` carries both. v2 should add `then`/`else`
67
- // distinguishing edges.
66
+ // Config-constant filter: if condition is `ident === literal` where
67
+ // ident is NOT tainted, skip (it's a config check, not a taint branch).
68
+ if (n.cond.kind === 'binary' && (n.cond.op === '===' || n.cond.op === '==' || n.cond.op === 'Eq') &&
69
+ n.cond.right && n.cond.right.kind === 'literal' &&
70
+ n.cond.left && n.cond.left.kind === 'ident' &&
71
+ !exprTaint(n.cond.left)) {
72
+ for (const s of (n.succ || [])) {
73
+ stack.push({ nid: s, depth, label });
74
+ }
75
+ } else {
76
+ for (const s of (n.succ || [])) {
77
+ stack.push({ nid: s, depth: depth + 1, label: _formatCondLabel(n.cond) });
78
+ }
79
+ }
80
+ } else if (n.kind === 'loop-header' && depth > 0) {
81
+ // Loop-body exclusion: don't escalate implicit depth inside loops —
82
+ // loop iteration count is not a taint channel for most vuln classes.
68
83
  for (const s of (n.succ || [])) {
69
- stack.push({ nid: s, depth: depth + 1, label: _formatCondLabel(n.cond) });
84
+ stack.push({ nid: s, depth: Math.max(depth - 1, 0), label });
70
85
  }
71
86
  } else {
72
87
  for (const s of (n.succ || [])) {
@@ -94,6 +109,9 @@ export function implicitAssignTarget(node, ctx) {
94
109
  if (!node || node.kind !== 'assign') return null;
95
110
  if (!ctx || !ctx.tainted) return null;
96
111
  if (typeof node.target !== 'string') return null;
112
+ // Literal-assignment filter: assigning a constant in a tainted branch
113
+ // is not an implicit information leak.
114
+ if (node.source && node.source.kind === 'literal') return null;
97
115
  return node.target;
98
116
  }
99
117
 
@@ -119,7 +137,7 @@ export function createImplicitFinding(node, conditionLabel) {
119
137
  return {
120
138
  kind: 'taint',
121
139
  implicit: true,
122
- confidence: 0.5,
140
+ confidence: 0.40,
123
141
  vuln: `Implicit flow — variable mutated inside tainted-conditional branch (condition: ${conditionLabel || '?'})`,
124
142
  severity: 'medium',
125
143
  cwe: 'CWE-200',
@@ -72,23 +72,81 @@ function _normalizeType(t) {
72
72
  * Returns the (mutated) findings array with `_stubFilterStats` non-
73
73
  * enumerable sidecar.
74
74
  */
75
- export function applyStubAwareFilter(findings, stubs) {
75
+ const TYPE_GUARD_PATTERNS = [
76
+ { re: /typeof\s+(\w+)\s*===?\s*['"]number['"]/, type: 'number' },
77
+ { re: /typeof\s+(\w+)\s*===?\s*['"]boolean['"]/, type: 'boolean' },
78
+ { re: /Number\.isInteger\s*\(\s*(\w+)\s*\)/, type: 'number' },
79
+ { re: /Number\.isFinite\s*\(\s*(\w+)\s*\)/, type: 'number' },
80
+ { re: /!isNaN\s*\(\s*(\w+)\s*\)/, type: 'number' },
81
+ ];
82
+
83
+ function _extractTypeGuardType(condExpr) {
84
+ if (!condExpr) return null;
85
+ const condStr = _exprToString(condExpr);
86
+ if (!condStr) return null;
87
+ for (const { re, type } of TYPE_GUARD_PATTERNS) {
88
+ if (re.test(condStr)) return type;
89
+ }
90
+ return null;
91
+ }
92
+
93
+ function _exprToString(expr) {
94
+ if (!expr) return null;
95
+ if (expr.kind === 'literal') return String(expr.value || '');
96
+ if (expr.kind === 'ident') return expr.name;
97
+ if (expr.kind === 'binary') return `${_exprToString(expr.left)} ${expr.op} ${_exprToString(expr.right)}`;
98
+ if (expr.kind === 'call') return `${typeof expr.callee === 'string' ? expr.callee : _exprToString(expr.callee)}(${(expr.args || []).map(_exprToString).join(',')})`;
99
+ if (expr.kind === 'member') return `${_exprToString(expr.object)}.${expr.prop}`;
100
+ if (expr.kind === 'unknown') return 'typeof';
101
+ return null;
102
+ }
103
+
104
+ function _hasTypeGuardOnPath(finding, perFileIR) {
105
+ if (!perFileIR || !finding.file) return null;
106
+ const ir = perFileIR[finding.file];
107
+ if (!ir || !ir.functions) return null;
108
+ const fn = ir.functions.find(f => {
109
+ const sinkLine = finding.line || 0;
110
+ return sinkLine >= f.line && sinkLine <= f.line + Object.keys(f.cfg.nodes).length * 3;
111
+ });
112
+ if (!fn) return null;
113
+ for (const node of Object.values(fn.cfg.nodes)) {
114
+ if (node.kind === 'if' && node.cond) {
115
+ const guardType = _extractTypeGuardType(node.cond);
116
+ if (guardType) return guardType;
117
+ }
118
+ }
119
+ return null;
120
+ }
121
+
122
+ export function applyStubAwareFilter(findings, stubs, perFileIR) {
76
123
  if (!Array.isArray(findings) || findings.length === 0) return findings;
77
- if (!stubs || !stubs.signatures) return findings;
78
124
  let demoted = 0;
79
125
  for (const f of findings) {
80
126
  if (!f || f.parser !== 'IR-TAINT') continue;
81
127
  const safeSet = FAMILY_SAFE_TYPES[f.cwe];
82
128
  if (!safeSet) continue;
83
- const sourceType = _sourceTypeFromStubs(f, stubs);
84
- if (!sourceType) continue;
85
- if (!safeSet.has(sourceType)) continue;
86
- f._stubTypeDemoted = true;
87
- f._stubTypeReason = `source type ${sourceType} cannot carry ${f.cwe} metacharacters`;
88
- f._stubTypeOriginalSeverity = f.severity;
89
- const downgrade = { critical: 'high', high: 'medium', medium: 'low', low: 'info' };
90
- if (downgrade[f.severity]) f.severity = downgrade[f.severity];
91
- demoted++;
129
+ // Check 1: stub-based type demotion
130
+ const sourceType = stubs ? _sourceTypeFromStubs(f, stubs) : null;
131
+ if (sourceType && safeSet.has(sourceType)) {
132
+ f._stubTypeDemoted = true;
133
+ f._stubTypeReason = `source type ${sourceType} cannot carry ${f.cwe} metacharacters`;
134
+ f._stubTypeOriginalSeverity = f.severity;
135
+ const downgrade = { critical: 'high', high: 'medium', medium: 'low', low: 'info' };
136
+ if (downgrade[f.severity]) f.severity = downgrade[f.severity];
137
+ demoted++;
138
+ continue;
139
+ }
140
+ // Check 2: type-guard narrowing on CFG path
141
+ const guardType = _hasTypeGuardOnPath(f, perFileIR);
142
+ if (guardType && safeSet.has(guardType)) {
143
+ f._stubTypeDemoted = true;
144
+ f._stubTypeReason = `type guard narrows to ${guardType}, safe for ${f.cwe}`;
145
+ f._stubTypeOriginalSeverity = f.severity;
146
+ const downgrade = { critical: 'high', high: 'medium', medium: 'low', low: 'info' };
147
+ if (downgrade[f.severity]) f.severity = downgrade[f.severity];
148
+ demoted++;
149
+ }
92
150
  }
93
151
  Object.defineProperty(findings, '_stubFilterStats', {
94
152
  value: { demoted, totalConsidered: findings.length },
@@ -68,20 +68,38 @@ export class SummaryCache {
68
68
  // Compute the summary for a function (or return cached). The `analyze`
69
69
  // callback is the per-function walker that returns
70
70
  // { returnTainted, mutatedParams: Set, taintedGlobals: Set, findings: [] }
71
+ //
72
+ // Fixed-point iteration: when a recursive call returns a bottom stub,
73
+ // re-analyze up to FP_MAX times until the summary stabilizes.
71
74
  compute(qid, taintedParams, analyze) {
72
75
  const k = this._key(qid, taintedParams);
73
- if (this._cache.has(k)) return this._cache.get(k);
76
+ if (this._cache.has(k)) {
77
+ const cached = this._cache.get(k);
78
+ if (!cached._recursive) return cached;
79
+ }
74
80
  if (this._stack.has(qid)) {
75
- // Recursion — return bottom summary; fixed-point iter will refine.
81
+ this._hitRecursion = true;
76
82
  return { returnTainted: false, mutatedParams: new Set(), taintedGlobals: new Set(), findings: [], _recursive: true };
77
83
  }
78
84
  if (++this._iter > this._maxIter) {
79
85
  return { returnTainted: false, mutatedParams: new Set(), taintedGlobals: new Set(), findings: [], _budgetExceeded: true };
80
86
  }
81
87
  this._stack.add(qid);
88
+ this._hitRecursion = false;
82
89
  try {
83
- const summary = analyze(qid, taintedParams);
90
+ let summary = analyze(qid, taintedParams);
84
91
  this._cache.set(k, summary);
92
+ if (this._hitRecursion) {
93
+ const FP_MAX = 3;
94
+ for (let fp = 0; fp < FP_MAX; fp++) {
95
+ if (++this._iter > this._maxIter) break;
96
+ const prev = summary;
97
+ summary = analyze(qid, taintedParams);
98
+ if (_summaryEq(prev, summary)) break;
99
+ this._cache.set(k, summary);
100
+ }
101
+ }
102
+ if (summary._recursive) delete summary._recursive;
85
103
  return summary;
86
104
  } finally {
87
105
  this._stack.delete(qid);
@@ -110,6 +128,13 @@ export class SummaryCache {
110
128
  clear() { this._cache.clear(); this._iter = 0; }
111
129
  }
112
130
 
131
+ function _summaryEq(a, b) {
132
+ if (!a || !b) return a === b;
133
+ if (!!a.returnTainted !== !!b.returnTainted) return false;
134
+ if ((a.mutatedParams?.size || 0) !== (b.mutatedParams?.size || 0)) return false;
135
+ return true;
136
+ }
137
+
113
138
  // Build the entry-taint-state for a callee from a call site:
114
139
  // given the callee's param names + the caller's tainted-var set + the
115
140
  // call args, return a Set of param names that are tainted at entry.
@@ -0,0 +1,70 @@
1
+ // Worker-thread parallelism infrastructure for per-file SAST analysis.
2
+ //
3
+ // Gated behind AGENTIC_SECURITY_PARALLEL=1 (default OFF).
4
+ // When enabled, distributes per-file detector execution across a bounded
5
+ // worker pool (default 2 workers, max 4).
6
+ //
7
+ // Architecture:
8
+ // - Main thread: orchestrates file distribution, collects findings
9
+ // - Workers: receive (filepath, content), run detectors, return findings[]
10
+ // - Bounded queue prevents memory exhaustion on large monorepos
11
+ //
12
+ // v1: stub infrastructure. The actual worker dispatch is deferred until
13
+ // the per-file detectors are refactored into a single function that can
14
+ // be serialized to a worker. Today the detectors import 60+ modules with
15
+ // shared state (e.g., _GLOBAL_JAVA_TAINTED_METHODS), making them
16
+ // non-trivially parallelizable.
17
+
18
+ import { availableParallelism } from 'node:os';
19
+
20
+ export function isParallelEnabled() {
21
+ return process.env.AGENTIC_SECURITY_PARALLEL === '1';
22
+ }
23
+
24
+ export function recommendedWorkerCount() {
25
+ const cpus = availableParallelism();
26
+ return Math.max(1, Math.min(4, Math.floor(cpus / 2)));
27
+ }
28
+
29
+ export function createParallelContext(opts = {}) {
30
+ const workerCount = opts.workers || recommendedWorkerCount();
31
+ return {
32
+ enabled: isParallelEnabled(),
33
+ workerCount,
34
+ filesProcessed: 0,
35
+ totalMs: 0,
36
+ _stats: {
37
+ dispatched: 0,
38
+ completed: 0,
39
+ errors: 0,
40
+ avgMs: 0,
41
+ },
42
+ };
43
+ }
44
+
45
+ export async function runParallelFileScans(files, fileContents, detectorFn, opts = {}) {
46
+ if (!isParallelEnabled()) return null;
47
+
48
+ const ctx = createParallelContext(opts);
49
+ const results = [];
50
+
51
+ // v1 stub: run sequentially but through the parallel context for testing.
52
+ // v2 will use worker_threads with a bounded queue.
53
+ for (const fp of files) {
54
+ const content = fileContents[fp];
55
+ if (!content) continue;
56
+ const t0 = Date.now();
57
+ try {
58
+ const findings = detectorFn(fp, content);
59
+ results.push(...(findings || []));
60
+ ctx._stats.completed++;
61
+ } catch {
62
+ ctx._stats.errors++;
63
+ }
64
+ ctx._stats.dispatched++;
65
+ ctx.totalMs += Date.now() - t0;
66
+ ctx.filesProcessed++;
67
+ }
68
+ ctx._stats.avgMs = ctx.filesProcessed ? Math.round(ctx.totalMs / ctx.filesProcessed) : 0;
69
+ return { findings: results, stats: ctx._stats };
70
+ }