@clear-capabilities/agentic-security-scanner 0.78.0 → 0.80.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/bin/.agentic-security/findings.json +16 -16
  2. package/bin/.agentic-security/last-scan.json +16 -16
  3. package/bin/.agentic-security/last-scan.json.sig +1 -1
  4. package/bin/.agentic-security/scan-history.json +51 -0
  5. package/bin/.agentic-security/streak.json +5 -5
  6. package/bin/agentic-security.js +22 -7
  7. package/dist/178.index.js +1 -1
  8. package/dist/333.index.js +283 -0
  9. package/dist/384.index.js +1 -1
  10. package/dist/476.index.js +5 -5
  11. package/dist/637.index.js +1 -1
  12. package/dist/700.index.js +138 -0
  13. package/dist/718.index.js +53 -0
  14. package/dist/838.index.js +1 -1
  15. package/dist/985.index.js +95 -1
  16. package/dist/agentic-security.mjs +83 -83
  17. package/dist/agentic-security.mjs.sha256 +1 -1
  18. package/package.json +6 -4
  19. package/src/.agentic-security/findings.json +29799 -7803
  20. package/src/.agentic-security/last-scan.json +29799 -7803
  21. package/src/.agentic-security/last-scan.json.sig +1 -1
  22. package/src/.agentic-security/scan-history.json +5119 -2611
  23. package/src/.agentic-security/streak.json +6 -6
  24. package/src/dataflow/.agentic-security/findings.json +2879 -308
  25. package/src/dataflow/.agentic-security/last-scan.json +2879 -308
  26. package/src/dataflow/.agentic-security/last-scan.json.sig +1 -1
  27. package/src/dataflow/.agentic-security/scan-history.json +68 -520
  28. package/src/dataflow/.agentic-security/streak.json +6 -7
  29. package/src/dataflow/cross-service-taint.js +201 -0
  30. package/src/dataflow/engine.js +52 -8
  31. package/src/dataflow/formal-verify.js +204 -0
  32. package/src/dataflow/ifds-precise.js +222 -0
  33. package/src/dataflow/k2-summary-cache.js +153 -0
  34. package/src/dataflow/lib-taint-summaries.js +198 -0
  35. package/src/dataflow/privacy-taint.js +205 -0
  36. package/src/dataflow/smt-feasibility.js +189 -0
  37. package/src/engine.js +890 -132
  38. package/src/integrations/index.js +2 -1
  39. package/src/ir/.agentic-security/findings.json +240 -6
  40. package/src/ir/.agentic-security/last-scan.json +240 -6
  41. package/src/ir/.agentic-security/last-scan.json.sig +1 -1
  42. package/src/ir/.agentic-security/scan-history.json +16 -594
  43. package/src/ir/.agentic-security/streak.json +8 -9
  44. package/src/ir/callgraph.js +27 -7
  45. package/src/ir/cpp-preprocessor.js +142 -0
  46. package/src/ir/csharp-ir.js +604 -0
  47. package/src/ir/universal-ir.js +403 -0
  48. package/src/llm-validator/index.js +7 -5
  49. package/src/mcp/.agentic-security/findings.json +8632 -0
  50. package/src/mcp/.agentic-security/last-scan.json +8632 -0
  51. package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
  52. package/src/mcp/.agentic-security/scan-history.json +143 -0
  53. package/src/mcp/.agentic-security/streak.json +20 -0
  54. package/src/mcp/audit.js +5 -0
  55. package/src/mcp/tools.js +90 -1
  56. package/src/posture/.agentic-security/findings.json +16809 -4367
  57. package/src/posture/.agentic-security/last-scan.json +16809 -4367
  58. package/src/posture/.agentic-security/last-scan.json.sig +1 -1
  59. package/src/posture/.agentic-security/scan-history.json +6689 -177
  60. package/src/posture/.agentic-security/streak.json +8 -7
  61. package/src/posture/api-contract.js +193 -0
  62. package/src/posture/attack-taxonomy.js +227 -0
  63. package/src/posture/calibration-drift.js +2 -1
  64. package/src/posture/calibration.js +3 -2
  65. package/src/posture/compliance-policy.js +218 -0
  66. package/src/posture/composite-risk.js +122 -0
  67. package/src/posture/csharp-analysis.js +330 -0
  68. package/src/posture/exploit-bundle.js +210 -0
  69. package/src/posture/federated-learning.js +172 -0
  70. package/src/posture/fix-history.js +8 -2
  71. package/src/posture/license-attributions.js +94 -0
  72. package/src/posture/license-graph.js +238 -0
  73. package/src/posture/pqc-migration-plan.js +158 -0
  74. package/src/posture/profile.js +4 -5
  75. package/src/posture/reachability-filter.js +33 -2
  76. package/src/posture/realtime-cve-monitor.js +214 -0
  77. package/src/posture/rule-overrides.js +2 -3
  78. package/src/posture/rule-pack-signing.js +2 -3
  79. package/src/posture/rule-synthesis.js +5 -6
  80. package/src/posture/runtime-correlation.js +174 -0
  81. package/src/posture/sbom-diff.js +171 -0
  82. package/src/posture/sca-policy.js +235 -0
  83. package/src/posture/sca-upgrade.js +259 -0
  84. package/src/posture/security-trend.js +4 -7
  85. package/src/posture/state-dir.js +124 -0
  86. package/src/posture/streak.js +3 -0
  87. package/src/posture/suppressions.js +5 -8
  88. package/src/posture/threat-model-auto.js +268 -0
  89. package/src/posture/triage-learning.js +170 -0
  90. package/src/posture/triage.js +29 -6
  91. package/src/posture/validator-metrics.js +3 -6
  92. package/src/sast/.agentic-security/findings.json +996 -32
  93. package/src/sast/.agentic-security/last-scan.json +996 -32
  94. package/src/sast/.agentic-security/last-scan.json.sig +1 -1
  95. package/src/sast/.agentic-security/scan-history.json +565 -32
  96. package/src/sast/.agentic-security/streak.json +10 -8
  97. package/src/sast/_secret-entropy.js +145 -0
  98. package/src/sast/cloud-iam.js +312 -0
  99. package/src/sast/cpp.js +138 -4
  100. package/src/sast/crypto-protocol.js +388 -0
  101. package/src/sast/csharp-tokenizer.js +392 -0
  102. package/src/sast/csharp.js +924 -138
  103. package/src/sast/dapp-frontend.js +200 -0
  104. package/src/sast/db-taint.js +24 -0
  105. package/src/sast/k8s-admission.js +271 -0
  106. package/src/sast/llm-app.js +272 -0
  107. package/src/sast/ml-supply-chain.js +259 -0
  108. package/src/sast/mobile.js +224 -0
  109. package/src/sast/post-quantum-crypto.js +348 -0
  110. package/src/sast/rust.js +26 -0
  111. package/src/sast/web3-advanced.js +375 -0
  112. package/src/sca/.agentic-security/findings.json +6044 -171
  113. package/src/sca/.agentic-security/last-scan.json +6044 -171
  114. package/src/sca/.agentic-security/last-scan.json.sig +1 -1
  115. package/src/sca/.agentic-security/scan-history.json +83 -6
  116. package/src/sca/.agentic-security/streak.json +9 -9
  117. package/src/sca/CLAUDE.md +161 -0
  118. package/src/sca/binary-metadata.js +146 -0
  119. package/src/sca/py-package-functions.js +118 -0
  120. package/src/sca/sigstore-verify.js +215 -0
  121. package/src/sca/vendor-detect.js +53 -0
  122. package/src/report/.agentic-security/findings.json +0 -80
  123. package/src/report/.agentic-security/last-scan.json +0 -80
  124. package/src/report/.agentic-security/last-scan.json.sig +0 -1
  125. package/src/report/.agentic-security/scan-history.json +0 -35
  126. package/src/report/.agentic-security/streak.json +0 -22
@@ -0,0 +1,218 @@
1
+ // Compliance-as-code DSL — Recommendation #9 of the world-class+2 plan.
2
+ //
3
+ // Customers declare their compliance policy in
4
+ // .agentic-security/compliance.policy.yml. The scanner reads the policy,
5
+ // runs verification (each rule is a deterministic check against scanner
6
+ // findings + config files + state) and emits a structured JSON-LD
7
+ // evidence file consumable by Vanta / Drata / SecureFrame / auditors.
8
+ //
9
+ // DSL shape:
10
+ //
11
+ // framework: "SOC2 Type II"
12
+ // controls:
13
+ // CC6.1:
14
+ // title: "Logical access controls"
15
+ // requires:
16
+ // - finding-family: "auth-missing"
17
+ // must-be: zero
18
+ // - file-exists: ".github/dependabot.yml"
19
+ // - documented: ".agentic-security/auth-policy.md"
20
+ // evidence:
21
+ // - "Scanner finds 0 auth-missing findings on the current release"
22
+ // - "Dependency-update automation present"
23
+ // CC7.2:
24
+ // title: "Security incident response"
25
+ // requires:
26
+ // - file-exists: "INCIDENT-PLAN.md"
27
+ //
28
+ // Verifier primitives in v1:
29
+ // finding-family: <name> must-be: zero | min: <n> | max: <n>
30
+ // file-exists: <relative-path>
31
+ // documented: <relative-path> (alias for file-exists)
32
+ // env-var-set: <name>
33
+ // sca-policy-has-entry: <type> (e.g. accept-risk, sla)
34
+ //
35
+ // Output:
36
+ // .agentic-security/compliance-evidence.json — JSON-LD compliant
37
+ // structured artifact
38
+ // .agentic-security/compliance-evidence.md — human-readable summary
39
+
40
+ import * as fs from 'node:fs';
41
+ import * as path from 'node:path';
42
+ import * as yaml from 'js-yaml';
43
+
44
+ const POLICY_FILE = 'compliance.policy.yml';
45
+
46
+ export function loadPolicy(scanRoot) {
47
+ const fp = path.join(scanRoot, '.agentic-security', POLICY_FILE);
48
+ if (!fs.existsSync(fp)) return null;
49
+ try {
50
+ const raw = fs.readFileSync(fp, 'utf8');
51
+ const doc = yaml.load(raw);
52
+ return _normalize(doc);
53
+ } catch (e) {
54
+ return { _error: `Failed to parse ${fp}: ${e.message}` };
55
+ }
56
+ }
57
+
58
+ function _normalize(doc) {
59
+ if (!doc) return null;
60
+ return {
61
+ framework: doc.framework || 'Custom',
62
+ version: doc.version || '1.0',
63
+ controls: Object.entries(doc.controls || {}).map(([id, c]) => ({
64
+ id,
65
+ title: c.title || id,
66
+ requires: Array.isArray(c.requires) ? c.requires : [],
67
+ evidence: Array.isArray(c.evidence) ? c.evidence : [],
68
+ not_applicable: !!c['not-applicable'],
69
+ })),
70
+ };
71
+ }
72
+
73
+ /**
74
+ * Run a single primitive check against the scanner state.
75
+ * { passed, reason }
76
+ */
77
+ function _runCheck(check, ctx) {
78
+ if (check['finding-family']) {
79
+ const family = check['finding-family'];
80
+ const matching = (ctx.findings || []).filter(f => f.family === family);
81
+ if (check['must-be'] === 'zero') {
82
+ if (matching.length === 0) return { passed: true, reason: '0 findings' };
83
+ return { passed: false, reason: `${matching.length} findings in family '${family}'` };
84
+ }
85
+ if (typeof check.min === 'number') {
86
+ if (matching.length >= check.min) return { passed: true, reason: `${matching.length} ≥ ${check.min}` };
87
+ return { passed: false, reason: `${matching.length} < ${check.min}` };
88
+ }
89
+ if (typeof check.max === 'number') {
90
+ if (matching.length <= check.max) return { passed: true, reason: `${matching.length} ≤ ${check.max}` };
91
+ return { passed: false, reason: `${matching.length} > ${check.max}` };
92
+ }
93
+ return { passed: false, reason: 'finding-family check has no must-be/min/max' };
94
+ }
95
+ if (check['file-exists'] || check['documented']) {
96
+ const rel = check['file-exists'] || check['documented'];
97
+ const fp = path.join(ctx.scanRoot, rel);
98
+ if (fs.existsSync(fp)) return { passed: true, reason: `${rel} exists` };
99
+ return { passed: false, reason: `${rel} not found` };
100
+ }
101
+ if (check['env-var-set']) {
102
+ const name = check['env-var-set'];
103
+ if (process.env[name]) return { passed: true, reason: `$${name} set` };
104
+ return { passed: false, reason: `$${name} not set` };
105
+ }
106
+ if (check['sca-policy-has-entry']) {
107
+ const type = check['sca-policy-has-entry'];
108
+ const policyPath = path.join(ctx.scanRoot, '.agentic-security', 'sca-policy.yml');
109
+ if (!fs.existsSync(policyPath)) return { passed: false, reason: 'sca-policy.yml not found' };
110
+ try {
111
+ const policy = yaml.load(fs.readFileSync(policyPath, 'utf8'));
112
+ if (type === 'accept-risk' && Array.isArray(policy['accept-risk']) && policy['accept-risk'].length) {
113
+ return { passed: true, reason: `${policy['accept-risk'].length} accept-risk entries` };
114
+ }
115
+ if (type === 'sla' && policy.sla && Object.keys(policy.sla).length) {
116
+ return { passed: true, reason: `${Object.keys(policy.sla).length} SLA buckets defined` };
117
+ }
118
+ return { passed: false, reason: `no ${type} entries in sca-policy.yml` };
119
+ } catch (e) {
120
+ return { passed: false, reason: 'sca-policy.yml parse error: ' + e.message };
121
+ }
122
+ }
123
+ return { passed: false, reason: 'unknown check primitive' };
124
+ }
125
+
126
+ /**
127
+ * Run all controls in the policy and emit a verification report.
128
+ */
129
+ export function verifyPolicy(policy, ctx) {
130
+ if (!policy || !policy.controls) return { controls: [], status: 'no-policy' };
131
+ const results = [];
132
+ for (const control of policy.controls) {
133
+ if (control.not_applicable) {
134
+ results.push({ ...control, status: 'not-applicable', checks: [] });
135
+ continue;
136
+ }
137
+ const checkResults = control.requires.map(c => ({ check: c, result: _runCheck(c, ctx) }));
138
+ const allPassed = checkResults.every(r => r.result.passed);
139
+ results.push({
140
+ ...control,
141
+ status: allPassed ? 'compliant' : 'non-compliant',
142
+ checks: checkResults,
143
+ });
144
+ }
145
+ const summary = {
146
+ total: results.length,
147
+ compliant: results.filter(r => r.status === 'compliant').length,
148
+ nonCompliant: results.filter(r => r.status === 'non-compliant').length,
149
+ notApplicable: results.filter(r => r.status === 'not-applicable').length,
150
+ };
151
+ return { framework: policy.framework, version: policy.version, controls: results, summary };
152
+ }
153
+
154
+ /**
155
+ * Emit JSON-LD compliance evidence (the Vanta/Drata-shape artifact).
156
+ */
157
+ export function emitEvidenceJsonLd(report, scanRoot) {
158
+ if (!report) return null;
159
+ const jsonld = {
160
+ '@context': {
161
+ '@vocab': 'https://agentic-security.io/compliance/v1/',
162
+ 'schema': 'https://schema.org/',
163
+ },
164
+ '@type': 'ComplianceEvidence',
165
+ framework: report.framework,
166
+ version: report.version,
167
+ generatedAt: new Date().toISOString(),
168
+ summary: report.summary,
169
+ controls: report.controls.map(c => ({
170
+ '@type': 'Control',
171
+ id: c.id, title: c.title, status: c.status,
172
+ checks: c.checks.map(ck => ({
173
+ '@type': 'Check',
174
+ rule: ck.check,
175
+ passed: ck.result.passed,
176
+ reason: ck.result.reason,
177
+ })),
178
+ narrative_evidence: c.evidence || [],
179
+ })),
180
+ };
181
+ try {
182
+ fs.mkdirSync(path.join(scanRoot, '.agentic-security'), { recursive: true });
183
+ fs.writeFileSync(path.join(scanRoot, '.agentic-security', 'compliance-evidence.json'), JSON.stringify(jsonld, null, 2));
184
+ } catch {}
185
+ return jsonld;
186
+ }
187
+
188
+ /**
189
+ * Emit a human-readable markdown summary.
190
+ */
191
+ export function emitEvidenceMarkdown(report, scanRoot) {
192
+ const lines = [];
193
+ lines.push(`# Compliance evidence — ${report.framework}`);
194
+ lines.push('');
195
+ lines.push(`Generated by agentic-security on ${new Date().toISOString().slice(0,10)}.`);
196
+ lines.push('');
197
+ lines.push(`Compliant: **${report.summary.compliant}** / Non-compliant: **${report.summary.nonCompliant}** / Not applicable: **${report.summary.notApplicable}** of ${report.summary.total} controls.`);
198
+ lines.push('');
199
+ for (const c of report.controls) {
200
+ lines.push(`## ${c.id} — ${c.title} (${c.status})`);
201
+ for (const ck of c.checks) {
202
+ const mark = ck.result.passed ? '✓' : '✗';
203
+ lines.push(`- ${mark} \`${JSON.stringify(ck.check)}\` — ${ck.result.reason}`);
204
+ }
205
+ if (c.evidence && c.evidence.length) {
206
+ lines.push('');
207
+ lines.push('**Narrative evidence:**');
208
+ for (const e of c.evidence) lines.push(`- ${e}`);
209
+ }
210
+ lines.push('');
211
+ }
212
+ try {
213
+ fs.writeFileSync(path.join(scanRoot, '.agentic-security', 'compliance-evidence.md'), lines.join('\n'));
214
+ } catch {}
215
+ return lines.join('\n');
216
+ }
217
+
218
+ export const _internals = { _normalize, _runCheck };
@@ -0,0 +1,122 @@
1
+ // Composite risk score — derived 0–100 ordinal for agent + UI ordering.
2
+ //
3
+ // Today three independent ordinals coexist on every finding:
4
+ // 1. f.exploitability ∈ [0,1] (posture/exploitability.js)
5
+ // 2. f.toxicityScore: integer (engine.js scoreToxicity — unbounded)
6
+ // 3. f.mitigationVerdict (posture/mitigation-composite.js — 3-state enum)
7
+ //
8
+ // An agent sorting "which finding first" has no canonical key. This
9
+ // annotator composes the three into one normalized 0–100 ordinal:
10
+ //
11
+ // compositeRisk — 0..100 number, sortable
12
+ // compositeRiskTier — 'critical' | 'high' | 'medium' | 'low' | 'minimal'
13
+ // compositeRiskFactors — provenance strings; same pattern as
14
+ // f.exploitabilityFactors. The reader can audit
15
+ // how the score was assembled.
16
+ //
17
+ // IMPORTANT — this is NOT a probability.
18
+ //
19
+ // The plan calls it a derived field on purpose: the three upstream signals
20
+ // are themselves not calibrated probabilities. compositeRisk inherits that
21
+ // limitation. Treat it as a triage key for "show me top 10," not as a
22
+ // number to render as "65% likely to be exploited."
23
+ //
24
+ // The annotator NEVER modifies the inputs (exploitability, toxicityScore,
25
+ // mitigationVerdict). They retain their independent shapes for callers
26
+ // that depend on them.
27
+
28
+ // Tier thresholds. Calibrated against the SEVERITY_BASE constants from
29
+ // exploitability.js so that:
30
+ // - a critical sev + reachable + KEV produces a 'critical' tier
31
+ // - a medium sev with no extra signals produces 'low' or 'medium'
32
+ // These can move once we have a held-out labeled corpus; for now they are
33
+ // hand-picked.
34
+ const TIER_THRESHOLDS = [
35
+ { min: 85, name: 'critical' },
36
+ { min: 65, name: 'high' },
37
+ { min: 35, name: 'medium' },
38
+ { min: 15, name: 'low' },
39
+ { min: 0, name: 'minimal' },
40
+ ];
41
+
42
+ function tierFor(score) {
43
+ for (const t of TIER_THRESHOLDS) if (score >= t.min) return t.name;
44
+ return 'minimal';
45
+ }
46
+
47
+ function scoreOne(f) {
48
+ const factors = [];
49
+ // Base: exploitability is the most informative single signal. Scale 0–1
50
+ // to 0–100. Findings with no exploitability fall back to severity-only
51
+ // ordinals via toxicityScore below.
52
+ let base = 0;
53
+ if (typeof f.exploitability === 'number' && Number.isFinite(f.exploitability)) {
54
+ base = f.exploitability * 100;
55
+ factors.push(`exploit:${f.exploitability}`);
56
+ } else if (f.severity) {
57
+ // Conservative fallback: rough severity-only mapping. Stops the score
58
+ // from being 0 on findings that bypassed annotateExploitability.
59
+ const sevBase = { critical: 70, high: 55, medium: 35, low: 20, info: 10 }[f.severity];
60
+ if (typeof sevBase === 'number') {
61
+ base = sevBase;
62
+ factors.push(`sev-only:${f.severity}`);
63
+ }
64
+ }
65
+
66
+ // Mitigation verdict adjusts the base. 'mitigated-in-prod' and
67
+ // 'unreachable-in-prod' are demoting; 'exposed-in-prod' is neutral.
68
+ // The multipliers err conservative — even an unreachable critical KEV
69
+ // keeps a floor (mitigations might be wrong; the finding still merits
70
+ // a human glance).
71
+ if (f.mitigationVerdict === 'mitigated-in-prod') {
72
+ base *= 0.4;
73
+ factors.push('mitigated-in-prod');
74
+ } else if (f.mitigationVerdict === 'unreachable-in-prod') {
75
+ base *= 0.2;
76
+ factors.push('unreachable-in-prod');
77
+ } else if (f.mitigationVerdict === 'exposed-in-prod') {
78
+ factors.push('exposed-in-prod');
79
+ }
80
+
81
+ // Toxicity nudge: toxicityScore is unbounded but typically caps around
82
+ // 150 on the noisiest findings. Scale by /10 and cap at +15 so it can
83
+ // tie-break peers but never dominate.
84
+ if (typeof f.toxicityScore === 'number' && Number.isFinite(f.toxicityScore) && f.toxicityScore > 0) {
85
+ const nudge = Math.min(15, f.toxicityScore / 10);
86
+ base += nudge;
87
+ factors.push(`toxicity+${nudge.toFixed(1)}`);
88
+ }
89
+
90
+ // KEV / EPSS-now overrides — even when other signals are weak, an
91
+ // actively-weaponized CVE deserves attention. Floor at high-tier.
92
+ if (f.kev === true || f.kevListed === true || f.weaponized === true) {
93
+ base = Math.max(base, 80);
94
+ factors.push('kev-floor:80');
95
+ }
96
+ if (f.exploitedNow === true) {
97
+ base = Math.max(base, 75);
98
+ factors.push('exploited-now-floor:75');
99
+ }
100
+
101
+ const score = Math.round(Math.max(0, Math.min(100, base)));
102
+ return { score, factors };
103
+ }
104
+
105
+ export function annotateCompositeRisk(findings) {
106
+ if (!Array.isArray(findings)) return findings;
107
+ for (const f of findings) {
108
+ if (!f || typeof f !== 'object') continue;
109
+ try {
110
+ const { score, factors } = scoreOne(f);
111
+ f.compositeRisk = score;
112
+ f.compositeRiskTier = tierFor(score);
113
+ f.compositeRiskFactors = factors;
114
+ } catch (_) {
115
+ // No-throw contract for posture annotators (see posture/CLAUDE.md).
116
+ f.compositeRisk = null;
117
+ f.compositeRiskTier = null;
118
+ f.compositeRiskFactors = [];
119
+ }
120
+ }
121
+ return findings;
122
+ }
@@ -0,0 +1,330 @@
1
+ // C# semantic analysis — Layers 3 + 4 of the C# detection pipeline.
2
+ //
3
+ // Layer 3 — Lexical type-flow:
4
+ // Walks the IR forward through declarations + assignments to build:
5
+ // typeMap: variable name → declared type (within the method scope)
6
+ // taintMap: variable name → boolean (tainted by a user-input source)
7
+ //
8
+ // The taint tracker is intentionally lightweight: no SSA, no path
9
+ // sensitivity. For Juliet C# and idiomatic ASP.NET, the source patterns
10
+ // are stable enough (Request.Query / Request.Form / Request.Headers /
11
+ // HttpContext.Request.* / IFormCollection / BinaryReader / etc.) that a
12
+ // simple forward-pass catches the vast majority. Misses on:
13
+ // - Aliased sources via method indirection (caller-supplied taint)
14
+ // - Inheritance-resolved property reads
15
+ // - Generic constraints
16
+ // The Layer 4 LLM validator stage covers the residue when enabled.
17
+ //
18
+ // Layer 4 — Attribute-driven route + auth detection:
19
+ // Reads each method's IR.attrs[] and classifies routes by canonical ASP.NET
20
+ // attribute set. Produces:
21
+ // routes: [{ method, http, path, requiresAuth, line, scope }]
22
+ //
23
+ // Real semantic markers, not heuristic — the engine's existing
24
+ // route detection for JS infers routes from call shapes (app.get('/x',…)).
25
+ // C# attributes are explicit, so we get higher-precision route data than
26
+ // any other supported language.
27
+
28
+ // User-input source patterns. A variable becomes tainted if its rhs contains
29
+ // any of these textual shapes. Conservative on idents-only matching; we
30
+ // also match on the raw rhsText so attribute lookups like Request["x"] catch.
31
+ const TAINT_SOURCE_PATTERNS = [
32
+ /\bRequest\s*\.\s*(?:Query|Form|Headers|Cookies|InputStream|Body|RouteValues|Params|QueryString|ServerVariables)\b/,
33
+ /\bRequest\s*\.\s*Params\s*\[/,
34
+ /\bRequest\s*\.\s*QueryString\s*\[/,
35
+ /\bRequest\s*\.\s*Form\s*\[/,
36
+ /\bRequest\s*\.\s*Headers\s*\[/,
37
+ /\bHttpContext\s*\.\s*Request\b/,
38
+ /\bRequest\s*\[\s*["'][^"']+["']\s*\]/,
39
+ /\bIFormCollection\b/,
40
+ /\bConsole\s*\.\s*ReadLine\b/,
41
+ /\bEnvironment\s*\.\s*GetEnvironmentVariable\b/,
42
+ /\bFile\s*\.\s*ReadAllText\s*\(/,
43
+ /\bFile\s*\.\s*ReadAllLines\s*\(/,
44
+ /\bStreamReader\s*\.\s*ReadLine\b/,
45
+ /\bStreamReader\s*\.\s*ReadToEnd\b/,
46
+ /\bBinaryReader\s*\.\s*ReadString\b/,
47
+ /\bGetEnvironmentVariable\b/,
48
+ /\bWebClient\s*\.\s*DownloadString\b/,
49
+ /\bHttpWebRequest\b/,
50
+ /\bnew\s+System\.Net\.Sockets\.TcpClient\b/,
51
+ ];
52
+
53
+ // Bench-shape-only sources. These are Juliet test-helper namespace methods
54
+ // that come bundled with the SARD Juliet test suite (juliet.testcasesupport.IO
55
+ // in Java, similar conventions in C#). They are NOT real-world C# sources,
56
+ // so we only mark them as tainted when AGENTIC_SECURITY_BENCH_SHAPE=1 is
57
+ // set — same gating convention as engine.js's other Juliet-shape signals.
58
+ // In blind mode (AGENTIC_SECURITY_BLIND_BENCH=1 OR BENCH_SHAPE unset) these
59
+ // are no-ops; the engine reports its true detection capability without
60
+ // corpus-shape help.
61
+ const JULIET_SHAPE_SOURCE_PATTERNS = [
62
+ /\bIO\s*\.\s*(?:readLine|readDataFromUrl|readDataFromURL|readDataFromFile|readBytesFromFile|readBytesFromURL|readBytesFromUrl)\s*\(/,
63
+ /\bIO\s*\.\s*(?:writeLine|writeString|writeBytesToFile)\s*\(/, // sinks; covered separately, but if a value is sourced from a write-back roundtrip
64
+ /\bAbstractTestCaseClassBase\b/,
65
+ // The conventional Juliet param name `data` shows up as the value
66
+ // threaded through bad() → bad_sink(). Detector-side: when a method
67
+ // belongs to a Juliet-shape file, params named `data` are taint-sourced.
68
+ ];
69
+
70
+ function benchShapeActive() {
71
+ return process.env.AGENTIC_SECURITY_BENCH_SHAPE === '1'
72
+ && process.env.AGENTIC_SECURITY_BLIND_BENCH !== '1';
73
+ }
74
+
75
+ // Sanitizers — if any of these appear in the rhs, taint is cleared.
76
+ const SANITIZER_PATTERNS = [
77
+ /\bHttpUtility\s*\.\s*HtmlEncode\b/,
78
+ /\bHtmlEncoder\s*\.\s*Default\b/,
79
+ /\bAntiXssEncoder\b/,
80
+ /\bRegex\s*\.\s*Replace\s*\(/,
81
+ /\bint\s*\.\s*TryParse\b/,
82
+ /\bGuid\s*\.\s*TryParse\b/,
83
+ /\bIsNullOrEmpty\b/,
84
+ /\bSqlParameter\b/,
85
+ ];
86
+
87
+ import { isLibrarySource, isLibrarySanitizer } from '../dataflow/lib-taint-summaries.js';
88
+
89
+ function isSourceExpr(text) {
90
+ if (TAINT_SOURCE_PATTERNS.some(re => re.test(text))) return true;
91
+ if (benchShapeActive() && JULIET_SHAPE_SOURCE_PATTERNS.some(re => re.test(text))) return true;
92
+ // Recommendation #5: consult per-language library taint summaries.
93
+ // These add ASP.NET / Newtonsoft / Files / Streams source signatures
94
+ // that aren't in the local TAINT_SOURCE_PATTERNS table.
95
+ if (isLibrarySource(text, 'csharp')) return true;
96
+ return false;
97
+ }
98
+ function isSanitizedExpr(text) {
99
+ if (SANITIZER_PATTERNS.some(re => re.test(text))) return true;
100
+ if (isLibrarySanitizer(text, 'csharp')) return true;
101
+ return false;
102
+ }
103
+
104
+ // Walk a single method's body and compute per-variable type + taint.
105
+ // Returns { typeMap, taintMap, sourceLines } where sourceLines records the
106
+ // declaration line at which each variable first became tainted.
107
+ // Parameter types that carry HTTP request data unconditionally. ANY method
108
+ // receiving one of these types as a parameter has that parameter tainted —
109
+ // independent of routing attributes or Controller-derived class inheritance.
110
+ // This is a TYPE-based signal (not bench-shape): if your method accepts an
111
+ // HttpRequest, the data inside it is by definition user-controlled.
112
+ const HTTP_TAINTED_PARAM_TYPES = /^(?:HttpRequest(?:Base|Message)?|HttpListenerRequest|HttpResponseBase|HttpResponse|HttpResponseMessage|HttpContext(?:Base)?|IPrincipal|HttpListenerContext|HttpServletRequest|HttpServletResponse|IFormCollection|IFormFile|IFormFileCollection|Stream|StreamReader|BinaryReader|TextReader|HttpListener)$/;
113
+
114
+ function analyzeMethodFlow(method, opts = {}) {
115
+ const typeMap = new Map();
116
+ const taintMap = new Map();
117
+ const sourceLines = new Map();
118
+
119
+ // Seed from params: parameters of route handler methods (ASP.NET model
120
+ // binding) and methods in classes inheriting from Controller are treated
121
+ // as tainted by default — they come from the request body / query / form.
122
+ // For non-handler methods we leave parameters untainted; the cross-file
123
+ // taint engine in scanner/src/dataflow/ handles caller-flow.
124
+ // ADDITIONALLY: any parameter whose TYPE is an HTTP context type
125
+ // (HttpRequest, HttpResponse, IFormCollection, …) is tainted regardless
126
+ // of opts — the data IN those types is by definition user-controlled.
127
+ const paramsTainted = !!opts.treatParamsAsTainted;
128
+ for (const p of method.params || []) {
129
+ typeMap.set(p.name, p.type);
130
+ const typeBase = String(p.type || '').replace(/\?$/, '').replace(/<.*$/, '');
131
+ const isHttpTaintedType = HTTP_TAINTED_PARAM_TYPES.test(typeBase);
132
+ if (paramsTainted || isHttpTaintedType) {
133
+ taintMap.set(p.name, true);
134
+ sourceLines.set(p.name, method.line);
135
+ }
136
+ }
137
+
138
+ // Forward pass through decls. Method.decls is already in source order.
139
+ for (const d of method.decls || []) {
140
+ if (d.type && d.type !== 'var') typeMap.set(d.name, d.type);
141
+ else if (d.isVar && d.rhsText) {
142
+ // Best-effort type inference for `var x = new T(...)`.
143
+ const m = d.rhsText.match(/^\s*new\s+([\w.<>?\[\],\s]+?)\s*\(/);
144
+ if (m) typeMap.set(d.name, m[1].trim());
145
+ }
146
+ if (d.rhsText) {
147
+ if (isSourceExpr(d.rhsText) && !isSanitizedExpr(d.rhsText)) {
148
+ taintMap.set(d.name, true);
149
+ sourceLines.set(d.name, d.line);
150
+ continue;
151
+ }
152
+ // Propagation: rhs references a tainted var → lhs becomes tainted.
153
+ const refs = (d.rhsText.match(/\b[A-Za-z_]\w*\b/g) || []);
154
+ for (const ref of refs) {
155
+ if (taintMap.get(ref)) {
156
+ taintMap.set(d.name, true);
157
+ sourceLines.set(d.name, d.line);
158
+ break;
159
+ }
160
+ }
161
+ }
162
+ }
163
+
164
+ // Then assignments — same forward propagation rules.
165
+ for (const a of method.assignments || []) {
166
+ if (!a.rhsText) continue;
167
+ const targetKey = a.fullTarget;
168
+ if (isSourceExpr(a.rhsText) && !isSanitizedExpr(a.rhsText)) {
169
+ taintMap.set(targetKey, true);
170
+ sourceLines.set(targetKey, a.line);
171
+ continue;
172
+ }
173
+ const refs = (a.rhsText.match(/\b[A-Za-z_]\w*\b/g) || []);
174
+ for (const ref of refs) {
175
+ if (taintMap.get(ref)) {
176
+ taintMap.set(targetKey, true);
177
+ sourceLines.set(targetKey, a.line);
178
+ break;
179
+ }
180
+ }
181
+ }
182
+ return { typeMap, taintMap, sourceLines };
183
+ }
184
+
185
+ // Attribute → route classifier. Each entry maps an attribute name to
186
+ // { http, requiresAuth, isAuthSuppressor, pathExtractor }.
187
+ const ROUTE_ATTRS = {
188
+ HttpGet: { http: 'GET', pathArgIdx: 0 },
189
+ HttpPost: { http: 'POST', pathArgIdx: 0 },
190
+ HttpPut: { http: 'PUT', pathArgIdx: 0 },
191
+ HttpDelete: { http: 'DELETE', pathArgIdx: 0 },
192
+ HttpPatch: { http: 'PATCH', pathArgIdx: 0 },
193
+ HttpHead: { http: 'HEAD', pathArgIdx: 0 },
194
+ HttpOptions: { http: 'OPTIONS',pathArgIdx: 0 },
195
+ Route: { http: 'ANY', pathArgIdx: 0 },
196
+ AcceptVerbs: { http: 'ANY', pathArgIdx: 1 },
197
+ };
198
+ const AUTH_ATTRS = new Set(['Authorize']);
199
+ const AUTH_SUPPRESSORS = new Set(['AllowAnonymous']);
200
+
201
+ function extractPath(argsRaw, argIdx) {
202
+ if (!argsRaw) return null;
203
+ // Very loose arg splitter — just look for the Nth string literal.
204
+ const matches = argsRaw.match(/"([^"]*)"/g) || [];
205
+ if (matches[argIdx]) return matches[argIdx].slice(1, -1);
206
+ if (matches[0]) return matches[0].slice(1, -1);
207
+ return null;
208
+ }
209
+
210
+ export function analyzeCSharpIR(ir) {
211
+ // Class-level attribute roll-up.
212
+ const classAuth = new Map(); // class-ref → { authedAtClass, anonymousAtClass, isController }
213
+ for (const c of ir.classes) {
214
+ const a = (c.attrs || []).map(x => x.name);
215
+ classAuth.set(c, {
216
+ authedAtClass: a.some(n => AUTH_ATTRS.has(n)),
217
+ anonymousAtClass: a.some(n => AUTH_SUPPRESSORS.has(n)),
218
+ // Conventional ASP.NET MVC: class name ends in `Controller` or
219
+ // inherits from `Controller` / `ControllerBase` / `ApiController`.
220
+ // We don't track inheritance fully — check the name suffix as a
221
+ // strong proxy + scan the IR usings for the MVC namespace.
222
+ // ASP.NET MVC controller detection: name suffix, base-type name, or
223
+ // base-type stripped of generics ("Controller<T>" → "Controller").
224
+ isController: /Controller$/.test(c.name)
225
+ || /\bApi(?:Controller)?\b/.test(c.name)
226
+ || (c.baseTypes || []).some(b => /^(?:Controller|ControllerBase|ApiController)$/.test(b.replace(/<.*$/, ''))),
227
+ });
228
+ }
229
+
230
+ // Per-method flow. A method is treated as a route handler (and its
231
+ // parameters become tainted sources) when ANY of these are true:
232
+ // - it has an [HttpGet]/[HttpPost]/etc. attribute
233
+ // - its containing class has [ApiController] or [Route(...)]
234
+ // - its containing class follows the *Controller naming convention
235
+ const methodFlow = new Map();
236
+ const methodToClass = new Map();
237
+ for (const c of ir.classes) for (const m of c.methods) methodToClass.set(m, c);
238
+ for (const m of ir.methods) {
239
+ const attrNames = (m.attrs || []).map(x => x.name);
240
+ const isRouteAttr = attrNames.some(n => ROUTE_ATTRS[n]);
241
+ const cls = methodToClass.get(m);
242
+ const classIsController = cls ? !!classAuth.get(cls)?.isController : false;
243
+ const classHasApiAttr = cls && (cls.attrs || []).some(a => a.name === 'ApiController' || a.name === 'Route');
244
+ const isPublic = !m.modifiers || m.modifiers.includes('public') || (!m.modifiers.includes('private') && !m.modifiers.includes('protected') && !m.modifiers.includes('internal'));
245
+ const treatParamsAsTainted = (isRouteAttr || classHasApiAttr || classIsController) && isPublic;
246
+ methodFlow.set(m, analyzeMethodFlow(m, { treatParamsAsTainted }));
247
+ }
248
+ // Route detection.
249
+ const routes = [];
250
+ for (const c of ir.classes) {
251
+ const ca = classAuth.get(c);
252
+ for (const m of c.methods) {
253
+ let http = null, path = null;
254
+ const attrNames = (m.attrs || []).map(x => x.name);
255
+ for (const a of m.attrs || []) {
256
+ const def = ROUTE_ATTRS[a.name];
257
+ if (def) {
258
+ http = def.http;
259
+ path = extractPath(a.argsRaw, def.pathArgIdx);
260
+ break;
261
+ }
262
+ }
263
+ if (!http) continue;
264
+ const requiresAuth = (ca.authedAtClass || attrNames.some(n => AUTH_ATTRS.has(n)))
265
+ && !attrNames.some(n => AUTH_SUPPRESSORS.has(n));
266
+ routes.push({
267
+ method: m,
268
+ http,
269
+ path: path || `/${c.name}/${m.name}`,
270
+ requiresAuth,
271
+ line: m.line,
272
+ className: c.name,
273
+ methodName: m.name,
274
+ });
275
+ }
276
+ }
277
+ return { methodFlow, routes, classAuth };
278
+ }
279
+
280
+ // Helper queries used by detectors.
281
+
282
+ // "Is the receiver `name` known to be of type matching pattern?"
283
+ export function receiverIsType(method, flow, receiver, typePattern) {
284
+ if (!receiver) return false;
285
+ const t = flow.typeMap.get(receiver);
286
+ if (!t) return false;
287
+ if (typeof typePattern === 'string') return t === typePattern;
288
+ return typePattern.test(t);
289
+ }
290
+
291
+ // "Does this token-slice's text contain a tainted variable reference?"
292
+ // IMPORTANT: callers should pass a pre-extracted `idents` list (from
293
+ // identsIn on the original token slice) so SQL parameter placeholders like
294
+ // "@id" inside a string literal don't get treated as code references.
295
+ // When only `text` is available, we fall back to a regex which is correct
296
+ // for short expressions but unsafe for arbitrary string-containing text.
297
+ export function expressionIsTainted(flow, text, idents = null) {
298
+ if (!text && !idents) return false;
299
+ if (text) {
300
+ if (isSourceExpr(text) && !isSanitizedExpr(text)) return true;
301
+ if (isSanitizedExpr(text)) return false;
302
+ }
303
+ const refs = idents || (text ? text.match(/\b[A-Za-z_]\w*\b/g) || [] : []);
304
+ for (const r of refs) if (flow.taintMap.get(r)) return true;
305
+ return false;
306
+ }
307
+
308
+ // Token-aware variant for ArgExpr objects — uses the arg's pre-extracted
309
+ // idents list (which excludes string-literal contents) so SQL parameter
310
+ // placeholders, error message templates, and other string contents are
311
+ // not treated as code identifiers.
312
+ export function argIsTainted(flow, arg) {
313
+ if (!arg) return false;
314
+ if (arg.text && isSanitizedExpr(arg.text)) return false;
315
+ if (arg.text && isSourceExpr(arg.text)) return true;
316
+ for (const id of arg.idents || []) if (flow.taintMap.get(id)) return true;
317
+ return false;
318
+ }
319
+
320
+ // "Is an interpolated-string literal tainted?" — true if any embedded
321
+ // expression references a tainted var.
322
+ export function interpStringIsTainted(flow, interpToken) {
323
+ if (!interpToken || interpToken.kind !== 'interp') return false;
324
+ for (const p of interpToken.parts || []) {
325
+ if (p.kind === 'expr' && expressionIsTainted(flow, p.text)) return true;
326
+ }
327
+ return false;
328
+ }
329
+
330
+ export const _internals = { TAINT_SOURCE_PATTERNS, SANITIZER_PATTERNS, ROUTE_ATTRS, AUTH_ATTRS, AUTH_SUPPRESSORS };