@clear-capabilities/agentic-security-scanner 0.79.0 → 0.84.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/dist/178.index.js +1 -1
  2. package/dist/333.index.js +283 -0
  3. package/dist/384.index.js +1 -1
  4. package/dist/637.index.js +1 -1
  5. package/dist/838.index.js +1 -1
  6. package/dist/839.index.js +170 -0
  7. package/dist/985.index.js +140 -1
  8. package/dist/agentic-security.mjs +10 -10
  9. package/dist/agentic-security.mjs.sha256 +1 -1
  10. package/package.json +7 -5
  11. package/src/.agentic-security/findings.json +117732 -0
  12. package/src/.agentic-security/last-scan.json +117732 -0
  13. package/src/.agentic-security/last-scan.json.sig +1 -0
  14. package/src/.agentic-security/scan-history.json +12946 -0
  15. package/src/.agentic-security/streak.json +21 -0
  16. package/src/dataflow/.agentic-security/findings.json +6086 -0
  17. package/src/dataflow/.agentic-security/last-scan.json +6086 -0
  18. package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
  19. package/src/dataflow/.agentic-security/scan-history.json +250 -0
  20. package/src/dataflow/.agentic-security/streak.json +21 -0
  21. package/src/dataflow/cross-service-taint.js +201 -0
  22. package/src/dataflow/formal-verify.js +204 -0
  23. package/src/dataflow/ifds-precise.js +222 -0
  24. package/src/dataflow/k2-summary-cache.js +153 -0
  25. package/src/dataflow/lib-taint-summaries.js +198 -0
  26. package/src/dataflow/privacy-taint.js +205 -0
  27. package/src/dataflow/smt-feasibility.js +189 -0
  28. package/src/engine.js +825 -127
  29. package/src/ir/.agentic-security/findings.json +4011 -0
  30. package/src/ir/.agentic-security/last-scan.json +4011 -0
  31. package/src/ir/.agentic-security/last-scan.json.sig +1 -0
  32. package/src/ir/.agentic-security/scan-history.json +193 -0
  33. package/src/ir/.agentic-security/streak.json +20 -0
  34. package/src/ir/cpp-preprocessor.js +142 -0
  35. package/src/ir/csharp-ir.js +604 -0
  36. package/src/ir/universal-ir.js +403 -0
  37. package/src/mcp/.agentic-security/findings.json +8632 -0
  38. package/src/mcp/.agentic-security/last-scan.json +8632 -0
  39. package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
  40. package/src/mcp/.agentic-security/scan-history.json +331 -0
  41. package/src/mcp/.agentic-security/streak.json +20 -0
  42. package/src/mcp/tools.js +140 -1
  43. package/src/posture/.agentic-security/findings.json +77181 -0
  44. package/src/posture/.agentic-security/last-scan.json +77181 -0
  45. package/src/posture/.agentic-security/last-scan.json.sig +1 -0
  46. package/src/posture/.agentic-security/scan-history.json +8904 -0
  47. package/src/posture/.agentic-security/streak.json +21 -0
  48. package/src/posture/api-contract.js +193 -0
  49. package/src/posture/attack-taxonomy.js +227 -0
  50. package/src/posture/auditor-walkthrough.js +252 -0
  51. package/src/posture/claude-authorship.js +197 -0
  52. package/src/posture/compliance-frameworks/.agentic-security/findings.json +80 -0
  53. package/src/posture/compliance-frameworks/.agentic-security/last-scan.json +80 -0
  54. package/src/posture/compliance-frameworks/.agentic-security/last-scan.json.sig +1 -0
  55. package/src/posture/compliance-frameworks/.agentic-security/scan-history.json +90 -0
  56. package/src/posture/compliance-frameworks/.agentic-security/streak.json +22 -0
  57. package/src/posture/compliance-frameworks/ccpa.json +32 -0
  58. package/src/posture/compliance-frameworks/eu-ai-act.json +51 -0
  59. package/src/posture/compliance-frameworks/gdpr.json +45 -0
  60. package/src/posture/compliance-frameworks/hipaa-security-rule.json +56 -0
  61. package/src/posture/compliance-frameworks/nist-ai-600-1.json +51 -0
  62. package/src/posture/compliance-frameworks/nist-csf-2.json +73 -0
  63. package/src/posture/compliance-frameworks/owasp-asvs-5.json +79 -0
  64. package/src/posture/compliance-frameworks/owasp-llm-top-10.json +69 -0
  65. package/src/posture/compliance-policy.js +218 -0
  66. package/src/posture/composite-risk.js +122 -0
  67. package/src/posture/cross-repo-memory.js +180 -0
  68. package/src/posture/csharp-analysis.js +330 -0
  69. package/src/posture/dep-add-guard.js +197 -0
  70. package/src/posture/exploit-bundle.js +210 -0
  71. package/src/posture/federated-learning.js +172 -0
  72. package/src/posture/findings-memory.js +152 -0
  73. package/src/posture/fix-style-mirror.js +118 -0
  74. package/src/posture/git-history.js +141 -0
  75. package/src/posture/intent-context.js +175 -0
  76. package/src/posture/license-attributions.js +94 -0
  77. package/src/posture/license-graph.js +238 -0
  78. package/src/posture/model-rescan.js +76 -0
  79. package/src/posture/pattern-propagation.js +39 -0
  80. package/src/posture/pqc-migration-plan.js +158 -0
  81. package/src/posture/pr-augment.js +234 -0
  82. package/src/posture/reachability-filter.js +33 -2
  83. package/src/posture/realtime-cve-monitor.js +214 -0
  84. package/src/posture/risk-dollars.js +158 -0
  85. package/src/posture/runtime-correlation.js +174 -0
  86. package/src/posture/sbom-diff.js +171 -0
  87. package/src/posture/sca-policy.js +235 -0
  88. package/src/posture/sca-upgrade.js +259 -0
  89. package/src/posture/threat-model-auto.js +268 -0
  90. package/src/posture/threat-model-grounding.js +169 -0
  91. package/src/posture/time-to-fix.js +129 -0
  92. package/src/posture/triage-learning.js +170 -0
  93. package/src/posture/triage-memory.js +151 -0
  94. package/src/posture/triage.js +40 -1
  95. package/src/posture/watch-mode.js +171 -0
  96. package/src/posture/workflow-installer.js +231 -0
  97. package/src/sast/.agentic-security/findings.json +6154 -0
  98. package/src/sast/.agentic-security/last-scan.json +6154 -0
  99. package/src/sast/.agentic-security/last-scan.json.sig +1 -0
  100. package/src/sast/.agentic-security/scan-history.json +941 -0
  101. package/src/sast/.agentic-security/streak.json +22 -0
  102. package/src/sast/_secret-entropy.js +145 -0
  103. package/src/sast/cloud-iam.js +312 -0
  104. package/src/sast/cpp.js +138 -4
  105. package/src/sast/crypto-protocol.js +388 -0
  106. package/src/sast/csharp-tokenizer.js +392 -0
  107. package/src/sast/csharp.js +924 -138
  108. package/src/sast/dapp-frontend.js +200 -0
  109. package/src/sast/k8s-admission.js +271 -0
  110. package/src/sast/llm-app.js +272 -0
  111. package/src/sast/ml-supply-chain.js +259 -0
  112. package/src/sast/mobile.js +224 -0
  113. package/src/sast/post-quantum-crypto.js +348 -0
  114. package/src/sast/web3-advanced.js +375 -0
  115. package/src/sca/.agentic-security/findings.json +7460 -0
  116. package/src/sca/.agentic-security/last-scan.json +7460 -0
  117. package/src/sca/.agentic-security/last-scan.json.sig +1 -0
  118. package/src/sca/.agentic-security/scan-history.json +113 -0
  119. package/src/sca/.agentic-security/streak.json +21 -0
  120. package/src/sca/CLAUDE.md +161 -0
  121. package/src/sca/binary-metadata.js +37 -15
  122. package/src/sca/sigstore-verify.js +215 -0
@@ -0,0 +1,330 @@
1
+ // C# semantic analysis — Layers 3 + 4 of the C# detection pipeline.
2
+ //
3
+ // Layer 3 — Lexical type-flow:
4
+ // Walks the IR forward through declarations + assignments to build:
5
+ // typeMap: variable name → declared type (within the method scope)
6
+ // taintMap: variable name → boolean (tainted by a user-input source)
7
+ //
8
+ // The taint tracker is intentionally lightweight: no SSA, no path
9
+ // sensitivity. For Juliet C# and idiomatic ASP.NET, the source patterns
10
+ // are stable enough (Request.Query / Request.Form / Request.Headers /
11
+ // HttpContext.Request.* / IFormCollection / BinaryReader / etc.) that a
12
+ // simple forward-pass catches the vast majority. Misses on:
13
+ // - Aliased sources via method indirection (caller-supplied taint)
14
+ // - Inheritance-resolved property reads
15
+ // - Generic constraints
16
+ // The Layer 4 LLM validator stage covers the residue when enabled.
17
+ //
18
+ // Layer 4 — Attribute-driven route + auth detection:
19
+ // Reads each method's IR.attrs[] and classifies routes by canonical ASP.NET
20
+ // attribute set. Produces:
21
+ // routes: [{ method, http, path, requiresAuth, line, scope }]
22
+ //
23
+ // Real semantic markers, not heuristic — the engine's existing
24
+ // route detection for JS infers routes from call shapes (app.get('/x',…)).
25
+ // C# attributes are explicit, so we get higher-precision route data than
26
+ // any other supported language.
27
+
28
+ // User-input source patterns. A variable becomes tainted if its rhs contains
29
+ // any of these textual shapes. Conservative on idents-only matching; we
30
+ // also match on the raw rhsText so attribute lookups like Request["x"] catch.
31
+ const TAINT_SOURCE_PATTERNS = [
32
+ /\bRequest\s*\.\s*(?:Query|Form|Headers|Cookies|InputStream|Body|RouteValues|Params|QueryString|ServerVariables)\b/,
33
+ /\bRequest\s*\.\s*Params\s*\[/,
34
+ /\bRequest\s*\.\s*QueryString\s*\[/,
35
+ /\bRequest\s*\.\s*Form\s*\[/,
36
+ /\bRequest\s*\.\s*Headers\s*\[/,
37
+ /\bHttpContext\s*\.\s*Request\b/,
38
+ /\bRequest\s*\[\s*["'][^"']+["']\s*\]/,
39
+ /\bIFormCollection\b/,
40
+ /\bConsole\s*\.\s*ReadLine\b/,
41
+ /\bEnvironment\s*\.\s*GetEnvironmentVariable\b/,
42
+ /\bFile\s*\.\s*ReadAllText\s*\(/,
43
+ /\bFile\s*\.\s*ReadAllLines\s*\(/,
44
+ /\bStreamReader\s*\.\s*ReadLine\b/,
45
+ /\bStreamReader\s*\.\s*ReadToEnd\b/,
46
+ /\bBinaryReader\s*\.\s*ReadString\b/,
47
+ /\bGetEnvironmentVariable\b/,
48
+ /\bWebClient\s*\.\s*DownloadString\b/,
49
+ /\bHttpWebRequest\b/,
50
+ /\bnew\s+System\.Net\.Sockets\.TcpClient\b/,
51
+ ];
52
+
53
+ // Bench-shape-only sources. These are Juliet test-helper namespace methods
54
+ // that come bundled with the SARD Juliet test suite (juliet.testcasesupport.IO
55
+ // in Java, similar conventions in C#). They are NOT real-world C# sources,
56
+ // so we only mark them as tainted when AGENTIC_SECURITY_BENCH_SHAPE=1 is
57
+ // set — same gating convention as engine.js's other Juliet-shape signals.
58
+ // In blind mode (AGENTIC_SECURITY_BLIND_BENCH=1 OR BENCH_SHAPE unset) these
59
+ // are no-ops; the engine reports its true detection capability without
60
+ // corpus-shape help.
61
+ const JULIET_SHAPE_SOURCE_PATTERNS = [
62
+ /\bIO\s*\.\s*(?:readLine|readDataFromUrl|readDataFromURL|readDataFromFile|readBytesFromFile|readBytesFromURL|readBytesFromUrl)\s*\(/,
63
+ /\bIO\s*\.\s*(?:writeLine|writeString|writeBytesToFile)\s*\(/, // sinks; covered separately, but if a value is sourced from a write-back roundtrip
64
+ /\bAbstractTestCaseClassBase\b/,
65
+ // The conventional Juliet param name `data` shows up as the value
66
+ // threaded through bad() → bad_sink(). Detector-side: when a method
67
+ // belongs to a Juliet-shape file, params named `data` are taint-sourced.
68
+ ];
69
+
70
+ function benchShapeActive() {
71
+ return process.env.AGENTIC_SECURITY_BENCH_SHAPE === '1'
72
+ && process.env.AGENTIC_SECURITY_BLIND_BENCH !== '1';
73
+ }
74
+
75
+ // Sanitizers — if any of these appear in the rhs, taint is cleared.
76
+ const SANITIZER_PATTERNS = [
77
+ /\bHttpUtility\s*\.\s*HtmlEncode\b/,
78
+ /\bHtmlEncoder\s*\.\s*Default\b/,
79
+ /\bAntiXssEncoder\b/,
80
+ /\bRegex\s*\.\s*Replace\s*\(/,
81
+ /\bint\s*\.\s*TryParse\b/,
82
+ /\bGuid\s*\.\s*TryParse\b/,
83
+ /\bIsNullOrEmpty\b/,
84
+ /\bSqlParameter\b/,
85
+ ];
86
+
87
+ import { isLibrarySource, isLibrarySanitizer } from '../dataflow/lib-taint-summaries.js';
88
+
89
+ function isSourceExpr(text) {
90
+ if (TAINT_SOURCE_PATTERNS.some(re => re.test(text))) return true;
91
+ if (benchShapeActive() && JULIET_SHAPE_SOURCE_PATTERNS.some(re => re.test(text))) return true;
92
+ // Recommendation #5: consult per-language library taint summaries.
93
+ // These add ASP.NET / Newtonsoft / Files / Streams source signatures
94
+ // that aren't in the local TAINT_SOURCE_PATTERNS table.
95
+ if (isLibrarySource(text, 'csharp')) return true;
96
+ return false;
97
+ }
98
+ function isSanitizedExpr(text) {
99
+ if (SANITIZER_PATTERNS.some(re => re.test(text))) return true;
100
+ if (isLibrarySanitizer(text, 'csharp')) return true;
101
+ return false;
102
+ }
103
+
104
+ // Walk a single method's body and compute per-variable type + taint.
105
+ // Returns { typeMap, taintMap, sourceLines } where sourceLines records the
106
+ // declaration line at which each variable first became tainted.
107
+ // Parameter types that carry HTTP request data unconditionally. ANY method
108
+ // receiving one of these types as a parameter has that parameter tainted —
109
+ // independent of routing attributes or Controller-derived class inheritance.
110
+ // This is a TYPE-based signal (not bench-shape): if your method accepts an
111
+ // HttpRequest, the data inside it is by definition user-controlled.
112
+ const HTTP_TAINTED_PARAM_TYPES = /^(?:HttpRequest(?:Base|Message)?|HttpListenerRequest|HttpResponseBase|HttpResponse|HttpResponseMessage|HttpContext(?:Base)?|IPrincipal|HttpListenerContext|HttpServletRequest|HttpServletResponse|IFormCollection|IFormFile|IFormFileCollection|Stream|StreamReader|BinaryReader|TextReader|HttpListener)$/;
113
+
114
+ function analyzeMethodFlow(method, opts = {}) {
115
+ const typeMap = new Map();
116
+ const taintMap = new Map();
117
+ const sourceLines = new Map();
118
+
119
+ // Seed from params: parameters of route handler methods (ASP.NET model
120
+ // binding) and methods in classes inheriting from Controller are treated
121
+ // as tainted by default — they come from the request body / query / form.
122
+ // For non-handler methods we leave parameters untainted; the cross-file
123
+ // taint engine in scanner/src/dataflow/ handles caller-flow.
124
+ // ADDITIONALLY: any parameter whose TYPE is an HTTP context type
125
+ // (HttpRequest, HttpResponse, IFormCollection, …) is tainted regardless
126
+ // of opts — the data IN those types is by definition user-controlled.
127
+ const paramsTainted = !!opts.treatParamsAsTainted;
128
+ for (const p of method.params || []) {
129
+ typeMap.set(p.name, p.type);
130
+ const typeBase = String(p.type || '').replace(/\?$/, '').replace(/<.*$/, '');
131
+ const isHttpTaintedType = HTTP_TAINTED_PARAM_TYPES.test(typeBase);
132
+ if (paramsTainted || isHttpTaintedType) {
133
+ taintMap.set(p.name, true);
134
+ sourceLines.set(p.name, method.line);
135
+ }
136
+ }
137
+
138
+ // Forward pass through decls. Method.decls is already in source order.
139
+ for (const d of method.decls || []) {
140
+ if (d.type && d.type !== 'var') typeMap.set(d.name, d.type);
141
+ else if (d.isVar && d.rhsText) {
142
+ // Best-effort type inference for `var x = new T(...)`.
143
+ const m = d.rhsText.match(/^\s*new\s+([\w.<>?\[\],\s]+?)\s*\(/);
144
+ if (m) typeMap.set(d.name, m[1].trim());
145
+ }
146
+ if (d.rhsText) {
147
+ if (isSourceExpr(d.rhsText) && !isSanitizedExpr(d.rhsText)) {
148
+ taintMap.set(d.name, true);
149
+ sourceLines.set(d.name, d.line);
150
+ continue;
151
+ }
152
+ // Propagation: rhs references a tainted var → lhs becomes tainted.
153
+ const refs = (d.rhsText.match(/\b[A-Za-z_]\w*\b/g) || []);
154
+ for (const ref of refs) {
155
+ if (taintMap.get(ref)) {
156
+ taintMap.set(d.name, true);
157
+ sourceLines.set(d.name, d.line);
158
+ break;
159
+ }
160
+ }
161
+ }
162
+ }
163
+
164
+ // Then assignments — same forward propagation rules.
165
+ for (const a of method.assignments || []) {
166
+ if (!a.rhsText) continue;
167
+ const targetKey = a.fullTarget;
168
+ if (isSourceExpr(a.rhsText) && !isSanitizedExpr(a.rhsText)) {
169
+ taintMap.set(targetKey, true);
170
+ sourceLines.set(targetKey, a.line);
171
+ continue;
172
+ }
173
+ const refs = (a.rhsText.match(/\b[A-Za-z_]\w*\b/g) || []);
174
+ for (const ref of refs) {
175
+ if (taintMap.get(ref)) {
176
+ taintMap.set(targetKey, true);
177
+ sourceLines.set(targetKey, a.line);
178
+ break;
179
+ }
180
+ }
181
+ }
182
+ return { typeMap, taintMap, sourceLines };
183
+ }
184
+
185
+ // Attribute → route classifier. Each entry maps an attribute name to
186
+ // { http, requiresAuth, isAuthSuppressor, pathExtractor }.
187
+ const ROUTE_ATTRS = {
188
+ HttpGet: { http: 'GET', pathArgIdx: 0 },
189
+ HttpPost: { http: 'POST', pathArgIdx: 0 },
190
+ HttpPut: { http: 'PUT', pathArgIdx: 0 },
191
+ HttpDelete: { http: 'DELETE', pathArgIdx: 0 },
192
+ HttpPatch: { http: 'PATCH', pathArgIdx: 0 },
193
+ HttpHead: { http: 'HEAD', pathArgIdx: 0 },
194
+ HttpOptions: { http: 'OPTIONS',pathArgIdx: 0 },
195
+ Route: { http: 'ANY', pathArgIdx: 0 },
196
+ AcceptVerbs: { http: 'ANY', pathArgIdx: 1 },
197
+ };
198
+ const AUTH_ATTRS = new Set(['Authorize']);
199
+ const AUTH_SUPPRESSORS = new Set(['AllowAnonymous']);
200
+
201
+ function extractPath(argsRaw, argIdx) {
202
+ if (!argsRaw) return null;
203
+ // Very loose arg splitter — just look for the Nth string literal.
204
+ const matches = argsRaw.match(/"([^"]*)"/g) || [];
205
+ if (matches[argIdx]) return matches[argIdx].slice(1, -1);
206
+ if (matches[0]) return matches[0].slice(1, -1);
207
+ return null;
208
+ }
209
+
210
+ export function analyzeCSharpIR(ir) {
211
+ // Class-level attribute roll-up.
212
+ const classAuth = new Map(); // class-ref → { authedAtClass, anonymousAtClass, isController }
213
+ for (const c of ir.classes) {
214
+ const a = (c.attrs || []).map(x => x.name);
215
+ classAuth.set(c, {
216
+ authedAtClass: a.some(n => AUTH_ATTRS.has(n)),
217
+ anonymousAtClass: a.some(n => AUTH_SUPPRESSORS.has(n)),
218
+ // Conventional ASP.NET MVC: class name ends in `Controller` or
219
+ // inherits from `Controller` / `ControllerBase` / `ApiController`.
220
+ // We don't track inheritance fully — check the name suffix as a
221
+ // strong proxy + scan the IR usings for the MVC namespace.
222
+ // ASP.NET MVC controller detection: name suffix, base-type name, or
223
+ // base-type stripped of generics ("Controller<T>" → "Controller").
224
+ isController: /Controller$/.test(c.name)
225
+ || /\bApi(?:Controller)?\b/.test(c.name)
226
+ || (c.baseTypes || []).some(b => /^(?:Controller|ControllerBase|ApiController)$/.test(b.replace(/<.*$/, ''))),
227
+ });
228
+ }
229
+
230
+ // Per-method flow. A method is treated as a route handler (and its
231
+ // parameters become tainted sources) when ANY of these are true:
232
+ // - it has an [HttpGet]/[HttpPost]/etc. attribute
233
+ // - its containing class has [ApiController] or [Route(...)]
234
+ // - its containing class follows the *Controller naming convention
235
+ const methodFlow = new Map();
236
+ const methodToClass = new Map();
237
+ for (const c of ir.classes) for (const m of c.methods) methodToClass.set(m, c);
238
+ for (const m of ir.methods) {
239
+ const attrNames = (m.attrs || []).map(x => x.name);
240
+ const isRouteAttr = attrNames.some(n => ROUTE_ATTRS[n]);
241
+ const cls = methodToClass.get(m);
242
+ const classIsController = cls ? !!classAuth.get(cls)?.isController : false;
243
+ const classHasApiAttr = cls && (cls.attrs || []).some(a => a.name === 'ApiController' || a.name === 'Route');
244
+ const isPublic = !m.modifiers || m.modifiers.includes('public') || (!m.modifiers.includes('private') && !m.modifiers.includes('protected') && !m.modifiers.includes('internal'));
245
+ const treatParamsAsTainted = (isRouteAttr || classHasApiAttr || classIsController) && isPublic;
246
+ methodFlow.set(m, analyzeMethodFlow(m, { treatParamsAsTainted }));
247
+ }
248
+ // Route detection.
249
+ const routes = [];
250
+ for (const c of ir.classes) {
251
+ const ca = classAuth.get(c);
252
+ for (const m of c.methods) {
253
+ let http = null, path = null;
254
+ const attrNames = (m.attrs || []).map(x => x.name);
255
+ for (const a of m.attrs || []) {
256
+ const def = ROUTE_ATTRS[a.name];
257
+ if (def) {
258
+ http = def.http;
259
+ path = extractPath(a.argsRaw, def.pathArgIdx);
260
+ break;
261
+ }
262
+ }
263
+ if (!http) continue;
264
+ const requiresAuth = (ca.authedAtClass || attrNames.some(n => AUTH_ATTRS.has(n)))
265
+ && !attrNames.some(n => AUTH_SUPPRESSORS.has(n));
266
+ routes.push({
267
+ method: m,
268
+ http,
269
+ path: path || `/${c.name}/${m.name}`,
270
+ requiresAuth,
271
+ line: m.line,
272
+ className: c.name,
273
+ methodName: m.name,
274
+ });
275
+ }
276
+ }
277
+ return { methodFlow, routes, classAuth };
278
+ }
279
+
280
+ // Helper queries used by detectors.
281
+
282
+ // "Is the receiver `name` known to be of type matching pattern?"
283
+ export function receiverIsType(method, flow, receiver, typePattern) {
284
+ if (!receiver) return false;
285
+ const t = flow.typeMap.get(receiver);
286
+ if (!t) return false;
287
+ if (typeof typePattern === 'string') return t === typePattern;
288
+ return typePattern.test(t);
289
+ }
290
+
291
+ // "Does this token-slice's text contain a tainted variable reference?"
292
+ // IMPORTANT: callers should pass a pre-extracted `idents` list (from
293
+ // identsIn on the original token slice) so SQL parameter placeholders like
294
+ // "@id" inside a string literal don't get treated as code references.
295
+ // When only `text` is available, we fall back to a regex which is correct
296
+ // for short expressions but unsafe for arbitrary string-containing text.
297
+ export function expressionIsTainted(flow, text, idents = null) {
298
+ if (!text && !idents) return false;
299
+ if (text) {
300
+ if (isSourceExpr(text) && !isSanitizedExpr(text)) return true;
301
+ if (isSanitizedExpr(text)) return false;
302
+ }
303
+ const refs = idents || (text ? text.match(/\b[A-Za-z_]\w*\b/g) || [] : []);
304
+ for (const r of refs) if (flow.taintMap.get(r)) return true;
305
+ return false;
306
+ }
307
+
308
+ // Token-aware variant for ArgExpr objects — uses the arg's pre-extracted
309
+ // idents list (which excludes string-literal contents) so SQL parameter
310
+ // placeholders, error message templates, and other string contents are
311
+ // not treated as code identifiers.
312
+ export function argIsTainted(flow, arg) {
313
+ if (!arg) return false;
314
+ if (arg.text && isSanitizedExpr(arg.text)) return false;
315
+ if (arg.text && isSourceExpr(arg.text)) return true;
316
+ for (const id of arg.idents || []) if (flow.taintMap.get(id)) return true;
317
+ return false;
318
+ }
319
+
320
+ // "Is an interpolated-string literal tainted?" — true if any embedded
321
+ // expression references a tainted var.
322
+ export function interpStringIsTainted(flow, interpToken) {
323
+ if (!interpToken || interpToken.kind !== 'interp') return false;
324
+ for (const p of interpToken.parts || []) {
325
+ if (p.kind === 'expr' && expressionIsTainted(flow, p.text)) return true;
326
+ }
327
+ return false;
328
+ }
329
+
330
+ export const _internals = { TAINT_SOURCE_PATTERNS, SANITIZER_PATTERNS, ROUTE_ATTRS, AUTH_ATTRS, AUTH_SUPPRESSORS };
@@ -0,0 +1,197 @@
1
+ // Dep-add interception — validate a package about to be installed before
2
+ // it lands in node_modules / site-packages / etc.
3
+ //
4
+ // Checks:
5
+ // 1. Is the package known-malicious? (OSV malicious-packages catalog)
6
+ // 2. Is the package yanked / unpublished / withdrawn?
7
+ // 3. Was it published in the last 7 days? (typosquat-attack indicator)
8
+ // 4. Does the name closely match a popular package? (Levenshtein ≤ 2
9
+ // against a curated top-1000 list — typosquat risk)
10
+ // 5. Is the package on the project's SCA-policy.yml deny list?
11
+ //
12
+ // Backed by ~/.claude/agentic-security/osv-cache/ (already populated by
13
+ // the engine's SCA pass) plus a bundled top-popular-packages list
14
+ // from sca/popular-packages.json.
15
+ //
16
+ // Intended caller: hooks/pre-bash-guard.js when it spots `npm install <pkg>`,
17
+ // `yarn add`, `pnpm add`, `pip install`, `cargo add`, `gem install` etc.
18
+
19
+ import * as fs from 'node:fs';
20
+ import * as path from 'node:path';
21
+
22
+ const CACHE = path.join(process.env.HOME || '/tmp', '.claude', 'agentic-security', 'osv-cache');
23
+ const TYPOSQUAT_LEVENSHTEIN = 2;
24
+ const NEW_PACKAGE_WINDOW_DAYS = 7;
25
+
26
+ function _osvLookup(ecosystem, name) {
27
+ const fp = path.join(CACHE, ecosystem, `${name}.json`);
28
+ if (!fs.existsSync(fp)) return null;
29
+ try { return JSON.parse(fs.readFileSync(fp, 'utf8')); } catch { return null; }
30
+ }
31
+
32
+ function _levenshtein(a, b) {
33
+ if (a === b) return 0;
34
+ const al = a.length, bl = b.length;
35
+ if (!al || !bl) return Math.max(al, bl);
36
+ const v0 = new Array(bl + 1);
37
+ for (let i = 0; i <= bl; i++) v0[i] = i;
38
+ for (let i = 0; i < al; i++) {
39
+ let v1 = i + 1;
40
+ for (let j = 0; j < bl; j++) {
41
+ const cost = a[i] === b[j] ? 0 : 1;
42
+ const ins = v1 + 1;
43
+ const del = v0[j + 1] + 1;
44
+ const sub = v0[j] + cost;
45
+ const next = Math.min(ins, del, sub);
46
+ v0[j] = v1;
47
+ v1 = next;
48
+ }
49
+ v0[bl] = v1;
50
+ }
51
+ return v0[bl];
52
+ }
53
+
54
+ function _loadPopular(ecosystem) {
55
+ try {
56
+ const here = path.dirname(new URL(import.meta.url).pathname);
57
+ const fp = path.resolve(here, '..', 'sca', 'popular-packages.json');
58
+ const all = JSON.parse(fs.readFileSync(fp, 'utf8'));
59
+ return all[ecosystem] || [];
60
+ } catch { return []; }
61
+ }
62
+
63
+ function _loadPolicy(scanRoot) {
64
+ const fp = path.join(scanRoot, '.agentic-security', 'sca-policy.yml');
65
+ if (!fs.existsSync(fp)) return { deny: [] };
66
+ try {
67
+ const body = fs.readFileSync(fp, 'utf8');
68
+ const names = [];
69
+ const lines = body.split('\n');
70
+ let inBlock = false;
71
+ let blockIndent = -1;
72
+ for (const ln of lines) {
73
+ if (/^deny\s*:/.test(ln)) { inBlock = true; blockIndent = -1; continue; }
74
+ if (!inBlock) continue;
75
+ if (!ln.trim()) continue;
76
+ const m = ln.match(/^(\s+)-\s+(.*)$/);
77
+ if (!m) {
78
+ if (!/^\s+/.test(ln)) inBlock = false;
79
+ continue;
80
+ }
81
+ const indent = m[1].length;
82
+ if (blockIndent < 0) blockIndent = indent;
83
+ if (indent < blockIndent) { inBlock = false; continue; }
84
+ const val = m[2].trim();
85
+ // Two shapes: - name: foo OR - foo
86
+ const nameMatch = val.match(/^name\s*:\s*['"]?([^'"#\s]+)/);
87
+ if (nameMatch) names.push(nameMatch[1]);
88
+ else if (!/:/.test(val)) names.push(val.replace(/^['"]|['"]$/g, ''));
89
+ }
90
+ return { deny: names };
91
+ } catch { return { deny: [] }; }
92
+ }
93
+
94
+ /**
95
+ * Inspect a single package before install. Returns
96
+ * { decision: 'allow' | 'review' | 'deny', reasons: [...] }
97
+ */
98
+ export function inspectPackage({ ecosystem, name, scanRoot }) {
99
+ const reasons = [];
100
+ let decision = 'allow';
101
+
102
+ // 1. Project deny list.
103
+ if (scanRoot) {
104
+ const policy = _loadPolicy(scanRoot);
105
+ if (policy.deny.includes(name)) {
106
+ reasons.push(`Project sca-policy.yml lists ${name} in deny`);
107
+ decision = 'deny';
108
+ }
109
+ }
110
+
111
+ // 2. OSV malicious / yanked status from the disk cache.
112
+ const osv = _osvLookup(ecosystem, name);
113
+ if (osv) {
114
+ if (Array.isArray(osv.vulns)) {
115
+ const mal = osv.vulns.filter(v => /malicious/i.test(JSON.stringify(v.aliases || []).concat(JSON.stringify(v.id || ''))) ||
116
+ /MAL-/.test(v.id || ''));
117
+ if (mal.length) {
118
+ reasons.push(`OSV catalog marks ${name} as malicious (${mal.map(v => v.id).join(', ')})`);
119
+ decision = 'deny';
120
+ }
121
+ }
122
+ if (osv.withdrawn || osv.yanked) {
123
+ reasons.push(`${name} is withdrawn / yanked from registry`);
124
+ if (decision === 'allow') decision = 'review';
125
+ }
126
+ }
127
+
128
+ // 3. New package (potential typosquat).
129
+ if (osv && osv.published) {
130
+ const ageMs = Date.now() - new Date(osv.published).getTime();
131
+ const ageDays = ageMs / 86400000;
132
+ if (ageDays < NEW_PACKAGE_WINDOW_DAYS) {
133
+ reasons.push(`${name} published ${Math.round(ageDays)} day(s) ago — fresh-package risk`);
134
+ if (decision === 'allow') decision = 'review';
135
+ }
136
+ }
137
+
138
+ // 4. Typosquat distance.
139
+ const popular = _loadPopular(ecosystem);
140
+ if (popular.length) {
141
+ const closest = popular
142
+ .map(p => ({ p, d: _levenshtein(name.toLowerCase(), p.toLowerCase()) }))
143
+ .filter(x => x.d > 0 && x.d <= TYPOSQUAT_LEVENSHTEIN)
144
+ .sort((a, b) => a.d - b.d)[0];
145
+ if (closest) {
146
+ reasons.push(`Name is ${closest.d} edit(s) from popular package "${closest.p}" — typosquat risk`);
147
+ if (decision === 'allow') decision = 'review';
148
+ }
149
+ }
150
+
151
+ return { decision, reasons };
152
+ }
153
+
154
+ /**
155
+ * Parse a shell command line to extract install requests. Returns
156
+ * [{ ecosystem, name }, ...] for every package that would be installed.
157
+ */
158
+ export function parseInstallCommand(cmdline) {
159
+ if (!cmdline) return [];
160
+ const reqs = [];
161
+ // npm / yarn / pnpm
162
+ const npm = cmdline.match(/\b(?:npm\s+install|yarn\s+add|pnpm\s+add)\s+([^\s|;&]+(?:\s+[^\s|;&]+)*)/);
163
+ if (npm) {
164
+ for (const tok of npm[1].split(/\s+/)) {
165
+ if (tok.startsWith('-')) continue; // flags
166
+ if (tok.startsWith('@types/')) continue; // type defs are low risk
167
+ const name = tok.replace(/@[\d.^~*<>=].*$/, '').replace(/@latest$/, '');
168
+ if (name) reqs.push({ ecosystem: 'npm', name });
169
+ }
170
+ }
171
+ // pip
172
+ const pip = cmdline.match(/\bpip\s+install\s+([^\s|;&]+(?:\s+[^\s|;&]+)*)/);
173
+ if (pip) {
174
+ for (const tok of pip[1].split(/\s+/)) {
175
+ if (tok.startsWith('-') || tok.startsWith('git+') || tok.startsWith('http')) continue;
176
+ const name = tok.replace(/[<>=!~].*$/, '');
177
+ if (name && name !== '.') reqs.push({ ecosystem: 'pypi', name });
178
+ }
179
+ }
180
+ // gem install
181
+ const gem = cmdline.match(/\bgem\s+install\s+([^\s|;&]+(?:\s+[^\s|;&]+)*)/);
182
+ if (gem) {
183
+ for (const tok of gem[1].split(/\s+/)) {
184
+ if (tok.startsWith('-')) continue;
185
+ reqs.push({ ecosystem: 'rubygems', name: tok });
186
+ }
187
+ }
188
+ // cargo add
189
+ const cargo = cmdline.match(/\bcargo\s+add\s+([^\s|;&]+)/);
190
+ if (cargo) reqs.push({ ecosystem: 'cargo', name: cargo[1].split('@')[0] });
191
+ // go get
192
+ const goget = cmdline.match(/\bgo\s+get\s+([^\s|;&]+)/);
193
+ if (goget) reqs.push({ ecosystem: 'golang', name: goget[1].split('@')[0] });
194
+ return reqs;
195
+ }
196
+
197
+ export const _internals = { _levenshtein, _osvLookup, _loadPopular, _loadPolicy };