@clear-capabilities/agentic-security-scanner 0.79.0 → 0.84.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/dist/178.index.js +1 -1
  2. package/dist/333.index.js +283 -0
  3. package/dist/384.index.js +1 -1
  4. package/dist/637.index.js +1 -1
  5. package/dist/838.index.js +1 -1
  6. package/dist/839.index.js +170 -0
  7. package/dist/985.index.js +140 -1
  8. package/dist/agentic-security.mjs +10 -10
  9. package/dist/agentic-security.mjs.sha256 +1 -1
  10. package/package.json +7 -5
  11. package/src/.agentic-security/findings.json +117732 -0
  12. package/src/.agentic-security/last-scan.json +117732 -0
  13. package/src/.agentic-security/last-scan.json.sig +1 -0
  14. package/src/.agentic-security/scan-history.json +12946 -0
  15. package/src/.agentic-security/streak.json +21 -0
  16. package/src/dataflow/.agentic-security/findings.json +6086 -0
  17. package/src/dataflow/.agentic-security/last-scan.json +6086 -0
  18. package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
  19. package/src/dataflow/.agentic-security/scan-history.json +250 -0
  20. package/src/dataflow/.agentic-security/streak.json +21 -0
  21. package/src/dataflow/cross-service-taint.js +201 -0
  22. package/src/dataflow/formal-verify.js +204 -0
  23. package/src/dataflow/ifds-precise.js +222 -0
  24. package/src/dataflow/k2-summary-cache.js +153 -0
  25. package/src/dataflow/lib-taint-summaries.js +198 -0
  26. package/src/dataflow/privacy-taint.js +205 -0
  27. package/src/dataflow/smt-feasibility.js +189 -0
  28. package/src/engine.js +825 -127
  29. package/src/ir/.agentic-security/findings.json +4011 -0
  30. package/src/ir/.agentic-security/last-scan.json +4011 -0
  31. package/src/ir/.agentic-security/last-scan.json.sig +1 -0
  32. package/src/ir/.agentic-security/scan-history.json +193 -0
  33. package/src/ir/.agentic-security/streak.json +20 -0
  34. package/src/ir/cpp-preprocessor.js +142 -0
  35. package/src/ir/csharp-ir.js +604 -0
  36. package/src/ir/universal-ir.js +403 -0
  37. package/src/mcp/.agentic-security/findings.json +8632 -0
  38. package/src/mcp/.agentic-security/last-scan.json +8632 -0
  39. package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
  40. package/src/mcp/.agentic-security/scan-history.json +331 -0
  41. package/src/mcp/.agentic-security/streak.json +20 -0
  42. package/src/mcp/tools.js +140 -1
  43. package/src/posture/.agentic-security/findings.json +77181 -0
  44. package/src/posture/.agentic-security/last-scan.json +77181 -0
  45. package/src/posture/.agentic-security/last-scan.json.sig +1 -0
  46. package/src/posture/.agentic-security/scan-history.json +8904 -0
  47. package/src/posture/.agentic-security/streak.json +21 -0
  48. package/src/posture/api-contract.js +193 -0
  49. package/src/posture/attack-taxonomy.js +227 -0
  50. package/src/posture/auditor-walkthrough.js +252 -0
  51. package/src/posture/claude-authorship.js +197 -0
  52. package/src/posture/compliance-frameworks/.agentic-security/findings.json +80 -0
  53. package/src/posture/compliance-frameworks/.agentic-security/last-scan.json +80 -0
  54. package/src/posture/compliance-frameworks/.agentic-security/last-scan.json.sig +1 -0
  55. package/src/posture/compliance-frameworks/.agentic-security/scan-history.json +90 -0
  56. package/src/posture/compliance-frameworks/.agentic-security/streak.json +22 -0
  57. package/src/posture/compliance-frameworks/ccpa.json +32 -0
  58. package/src/posture/compliance-frameworks/eu-ai-act.json +51 -0
  59. package/src/posture/compliance-frameworks/gdpr.json +45 -0
  60. package/src/posture/compliance-frameworks/hipaa-security-rule.json +56 -0
  61. package/src/posture/compliance-frameworks/nist-ai-600-1.json +51 -0
  62. package/src/posture/compliance-frameworks/nist-csf-2.json +73 -0
  63. package/src/posture/compliance-frameworks/owasp-asvs-5.json +79 -0
  64. package/src/posture/compliance-frameworks/owasp-llm-top-10.json +69 -0
  65. package/src/posture/compliance-policy.js +218 -0
  66. package/src/posture/composite-risk.js +122 -0
  67. package/src/posture/cross-repo-memory.js +180 -0
  68. package/src/posture/csharp-analysis.js +330 -0
  69. package/src/posture/dep-add-guard.js +197 -0
  70. package/src/posture/exploit-bundle.js +210 -0
  71. package/src/posture/federated-learning.js +172 -0
  72. package/src/posture/findings-memory.js +152 -0
  73. package/src/posture/fix-style-mirror.js +118 -0
  74. package/src/posture/git-history.js +141 -0
  75. package/src/posture/intent-context.js +175 -0
  76. package/src/posture/license-attributions.js +94 -0
  77. package/src/posture/license-graph.js +238 -0
  78. package/src/posture/model-rescan.js +76 -0
  79. package/src/posture/pattern-propagation.js +39 -0
  80. package/src/posture/pqc-migration-plan.js +158 -0
  81. package/src/posture/pr-augment.js +234 -0
  82. package/src/posture/reachability-filter.js +33 -2
  83. package/src/posture/realtime-cve-monitor.js +214 -0
  84. package/src/posture/risk-dollars.js +158 -0
  85. package/src/posture/runtime-correlation.js +174 -0
  86. package/src/posture/sbom-diff.js +171 -0
  87. package/src/posture/sca-policy.js +235 -0
  88. package/src/posture/sca-upgrade.js +259 -0
  89. package/src/posture/threat-model-auto.js +268 -0
  90. package/src/posture/threat-model-grounding.js +169 -0
  91. package/src/posture/time-to-fix.js +129 -0
  92. package/src/posture/triage-learning.js +170 -0
  93. package/src/posture/triage-memory.js +151 -0
  94. package/src/posture/triage.js +40 -1
  95. package/src/posture/watch-mode.js +171 -0
  96. package/src/posture/workflow-installer.js +231 -0
  97. package/src/sast/.agentic-security/findings.json +6154 -0
  98. package/src/sast/.agentic-security/last-scan.json +6154 -0
  99. package/src/sast/.agentic-security/last-scan.json.sig +1 -0
  100. package/src/sast/.agentic-security/scan-history.json +941 -0
  101. package/src/sast/.agentic-security/streak.json +22 -0
  102. package/src/sast/_secret-entropy.js +145 -0
  103. package/src/sast/cloud-iam.js +312 -0
  104. package/src/sast/cpp.js +138 -4
  105. package/src/sast/crypto-protocol.js +388 -0
  106. package/src/sast/csharp-tokenizer.js +392 -0
  107. package/src/sast/csharp.js +924 -138
  108. package/src/sast/dapp-frontend.js +200 -0
  109. package/src/sast/k8s-admission.js +271 -0
  110. package/src/sast/llm-app.js +272 -0
  111. package/src/sast/ml-supply-chain.js +259 -0
  112. package/src/sast/mobile.js +224 -0
  113. package/src/sast/post-quantum-crypto.js +348 -0
  114. package/src/sast/web3-advanced.js +375 -0
  115. package/src/sca/.agentic-security/findings.json +7460 -0
  116. package/src/sca/.agentic-security/last-scan.json +7460 -0
  117. package/src/sca/.agentic-security/last-scan.json.sig +1 -0
  118. package/src/sca/.agentic-security/scan-history.json +113 -0
  119. package/src/sca/.agentic-security/streak.json +21 -0
  120. package/src/sca/CLAUDE.md +161 -0
  121. package/src/sca/binary-metadata.js +37 -15
  122. package/src/sca/sigstore-verify.js +215 -0
@@ -0,0 +1,198 @@
1
+ // Library taint summaries — Recommendation #5 of the SCA/SAST plan.
2
+ //
3
+ // Hand-curated knowledge that "this library method returns tainted data" or
4
+ // "this method propagates taint from arg N to its return." Used by the
5
+ // existing dataflow engine + per-language detectors when classifying the
6
+ // taint state of a declaration's rhs.
7
+ //
8
+ // The summaries are intentionally per-language because the same concept
9
+ // (a user-input source) has different idioms in each ecosystem. Each entry:
10
+ //
11
+ // { pattern: RegExp, kind: 'source' | 'sanitizer' | 'passthrough',
12
+ // framework: 'spring' | 'aspnet' | 'glibc' | … }
13
+ //
14
+ // Kinds:
15
+ // source — return value is unconditionally tainted
16
+ // sanitizer — return value is unconditionally clean, even if any arg
17
+ // was tainted (e.g. HtmlEncode, parameterized prepare)
18
+ // passthrough — return value is tainted iff arg N is tainted (taint
19
+ // flows through). Not modelled in v1; reserved for future
20
+ // inter-procedural extensions (Recommendation #9).
21
+ //
22
+ // Usage: detectors call `isLibrarySource(text, lang)` and `isLibrarySanitizer
23
+ // (text, lang)` to refine their per-call decisions.
24
+
25
+ const JAVA_SUMMARIES = {
26
+ sources: [
27
+ // Servlet API — every request-scoped getter is a user-input source.
28
+ /\bHttpServletRequest\b[\s\S]{0,2000}?\.\s*(?:getParameter(?:Values|Map)?|getQueryString|getHeader(?:Names)?|getInputStream|getReader|getCookies?|getRequestURI|getRequestURL|getQueryString|getPathInfo)\s*\(/,
29
+ /\bjavax\.servlet\.http\.HttpServletRequest\b/,
30
+ // Spring MVC — controller method annotations bind to request data.
31
+ /@RequestParam\b/,
32
+ /@RequestBody\b/,
33
+ /@PathVariable\b/,
34
+ /@RequestHeader\b/,
35
+ /@CookieValue\b/,
36
+ /@ModelAttribute\b/,
37
+ // Spring Security — the principal is user-controlled in the trust sense
38
+ // (it identifies WHO the request is from; not auto-sanitized).
39
+ /\bSecurityContextHolder\s*\.\s*getContext\s*\(\s*\)\s*\.\s*getAuthentication\s*\(\s*\)/,
40
+ // Java Files API — file content is untrusted when source is unknown.
41
+ /\bFiles\s*\.\s*(?:readString|readAllBytes|readAllLines|lines|newBufferedReader|newInputStream)\b/,
42
+ /\bPaths\s*\.\s*get\s*\([^)]*(?:System\.getProperty|args)\b/,
43
+ // BufferedReader / Scanner reading user input.
44
+ /\bBufferedReader\b[\s\S]{0,500}?\.\s*readLine\s*\(/,
45
+ /\bScanner\b[\s\S]{0,500}?\.\s*(?:next(?:Line)?|nextInt|nextLong)\s*\(/,
46
+ // System.getenv / System.getProperty — environment is configurable.
47
+ /\bSystem\s*\.\s*(?:getenv|getProperty)\s*\(/,
48
+ // Jackson — deserialization input is untrusted.
49
+ /\bObjectMapper\b[\s\S]{0,500}?\.\s*readValue\s*\(/,
50
+ /\bJsonParser\b[\s\S]{0,500}?\.\s*getValueAsString\s*\(/,
51
+ // Apache Commons IO.
52
+ /\bIOUtils\s*\.\s*toString\s*\(/,
53
+ /\bFileUtils\s*\.\s*readFileToString\s*\(/,
54
+ // Spring WebFlux ServerWebExchange.
55
+ /\bServerWebExchange\b[\s\S]{0,500}?\.\s*getRequest\s*\(/,
56
+ ],
57
+ sanitizers: [
58
+ /\bOWASP\.Encoder\b/,
59
+ /\bESAPI\b[\s\S]{0,200}?\.\s*encoder\s*\(\s*\)/,
60
+ /\bStringEscapeUtils\s*\.\s*escape(?:Html\d?|Xml|Sql|Java|JavaScript)\b/,
61
+ /\bHtmlUtils\s*\.\s*htmlEscape\b/,
62
+ /\bUriUtils\s*\.\s*encode\b/,
63
+ // JDBC PreparedStatement parameter setters — taint is cleaned at bind.
64
+ /\bPreparedStatement\b[\s\S]{0,500}?\.\s*set(?:String|Int|Long|Object|BigDecimal|Date|Timestamp)\s*\(/,
65
+ /\bNamedParameterJdbcTemplate\b[\s\S]{0,500}?\.\s*(?:query|update|queryForObject)\s*\([^,]+,\s*new\s+MapSqlParameterSource\b/,
66
+ // Java validators.
67
+ /\bjakarta\.validation\b/,
68
+ /\bjavax\.validation\b/,
69
+ /\b@Valid\b/,
70
+ ],
71
+ };
72
+
73
+ const CSHARP_SUMMARIES = {
74
+ sources: [
75
+ // ASP.NET request surfaces.
76
+ /\bHttpRequest\b[\s\S]{0,500}?\.\s*(?:Query|Form|Headers|Cookies|RouteValues|Body|InputStream|QueryString|Params|Path|Url)\b/,
77
+ /\bHttpContext\s*\.\s*Request\b/,
78
+ /\bIFormCollection\b/,
79
+ /\bIFormFile\b/,
80
+ /\bIFormFileCollection\b/,
81
+ // ASP.NET Core model binding.
82
+ /\[FromQuery\]/,
83
+ /\[FromBody\]/,
84
+ /\[FromForm\]/,
85
+ /\[FromRoute\]/,
86
+ /\[FromHeader\]/,
87
+ // Configuration may carry secrets but the VALUES are environment-supplied.
88
+ /\bIConfiguration\b[\s\S]{0,500}?\.\s*(?:GetSection|GetValue|GetConnectionString|GetChildren)\s*\(/,
89
+ // Newtonsoft.Json deserialization.
90
+ /\bJsonConvert\s*\.\s*Deserialize(?:Object|XmlNode)\s*</,
91
+ /\bJsonSerializer\s*\.\s*Deserialize\s*</,
92
+ // Files / streams.
93
+ /\bFile\s*\.\s*(?:ReadAllText|ReadAllLines|ReadAllBytes|OpenRead|OpenText)\s*\(/,
94
+ /\bStreamReader\b[\s\S]{0,500}?\.\s*(?:ReadLine|ReadToEnd|Read)\s*\(/,
95
+ /\bBinaryReader\b[\s\S]{0,500}?\.\s*Read(?:String|Bytes|Char|Int32|Int64|UInt32|UInt64)\s*\(/,
96
+ // Network reads.
97
+ /\bWebClient\b[\s\S]{0,500}?\.\s*Download(?:String|Data|File)\s*\(/,
98
+ /\bHttpClient\b[\s\S]{0,500}?\.\s*(?:GetAsync|GetStringAsync|PostAsync|SendAsync)\s*\(/,
99
+ // Environment + console.
100
+ /\bEnvironment\s*\.\s*GetEnvironmentVariable\s*\(/,
101
+ /\bConsole\s*\.\s*ReadLine\s*\(/,
102
+ ],
103
+ sanitizers: [
104
+ /\bHttpUtility\s*\.\s*HtmlEncode\b/,
105
+ /\bHtmlEncoder\s*\.\s*Default\s*\.\s*Encode\b/,
106
+ /\bAntiXssEncoder\b/,
107
+ /\bSqlParameter\b/,
108
+ /\bMySqlParameter\b/,
109
+ /\bNpgsqlParameter\b/,
110
+ // EF Core parameterized helpers.
111
+ /\bFromSqlInterpolated\s*\(/,
112
+ // Validation.
113
+ /\bint\s*\.\s*TryParse\s*\(/,
114
+ /\bGuid\s*\.\s*TryParse\s*\(/,
115
+ /\bDateTime\s*\.\s*TryParse\s*\(/,
116
+ /\bRegex\s*\.\s*Replace\s*\(/,
117
+ ],
118
+ };
119
+
120
+ const CPP_SUMMARIES = {
121
+ sources: [
122
+ // POSIX — environment + user input.
123
+ /\bgetenv\s*\(/,
124
+ /\bsecure_getenv\s*\(/,
125
+ /\bargv\s*\[/,
126
+ /\bgets\s*\(/,
127
+ /\bfgets\s*\(/,
128
+ /\bscanf\s*\(/,
129
+ /\bfscanf\s*\(/,
130
+ /\bgetc\s*\(/,
131
+ /\bfgetc\s*\(/,
132
+ /\bread\s*\(\s*\d+/, // unistd read(fd, ...)
133
+ /\brecv\s*\(/,
134
+ /\brecvfrom\s*\(/,
135
+ // OpenSSL / network.
136
+ /\bBIO_read\s*\(/,
137
+ /\bSSL_read\s*\(/,
138
+ // Win32 input.
139
+ /\bGetCommandLine[AW]?\s*\(/,
140
+ /\bGetEnvironmentVariable[AW]?\s*\(/,
141
+ // Standard streams.
142
+ /\bstd\s*::\s*cin\s*>>/,
143
+ /\bstd\s*::\s*getline\s*\(\s*std\s*::\s*cin\b/,
144
+ ],
145
+ sanitizers: [
146
+ // Length-checked copies (best-effort).
147
+ /\bstrncpy\s*\(\s*[^,]+,\s*[^,]+,\s*sizeof\s*\(/,
148
+ /\bsnprintf\s*\(\s*[^,]+,\s*sizeof\s*\(/,
149
+ /\bisdigit\s*\(/,
150
+ /\bisalpha\s*\(/,
151
+ /\bisalnum\s*\(/,
152
+ /\bstrtol\s*\(/,
153
+ /\bstrtoul\s*\(/,
154
+ ],
155
+ };
156
+
157
+ const SUMMARIES_BY_LANG = {
158
+ java: JAVA_SUMMARIES,
159
+ csharp: CSHARP_SUMMARIES,
160
+ cpp: CPP_SUMMARIES,
161
+ c: CPP_SUMMARIES,
162
+ };
163
+
164
+ // Resolve language from a file path or explicit hint.
165
+ function _langOf(hint, file) {
166
+ if (hint) return hint;
167
+ if (!file) return null;
168
+ if (/\.java$/i.test(file)) return 'java';
169
+ if (/\.cs$/i.test(file)) return 'csharp';
170
+ if (/\.(?:c|cc|cpp|cxx|h|hh|hpp)$/i.test(file)) return 'cpp';
171
+ return null;
172
+ }
173
+
174
+ /**
175
+ * Returns true if `text` contains a library-source pattern for the language.
176
+ */
177
+ export function isLibrarySource(text, langOrFile) {
178
+ if (!text) return false;
179
+ const lang = _langOf(typeof langOrFile === 'string' && langOrFile.includes('.') ? null : langOrFile, langOrFile);
180
+ const s = SUMMARIES_BY_LANG[lang];
181
+ if (!s) return false;
182
+ for (const re of s.sources) if (re.test(text)) return true;
183
+ return false;
184
+ }
185
+
186
+ /**
187
+ * Returns true if `text` contains a library-sanitizer pattern for the language.
188
+ */
189
+ export function isLibrarySanitizer(text, langOrFile) {
190
+ if (!text) return false;
191
+ const lang = _langOf(typeof langOrFile === 'string' && langOrFile.includes('.') ? null : langOrFile, langOrFile);
192
+ const s = SUMMARIES_BY_LANG[lang];
193
+ if (!s) return false;
194
+ for (const re of s.sanitizers) if (re.test(text)) return true;
195
+ return false;
196
+ }
197
+
198
+ export const _internals = { JAVA_SUMMARIES, CSHARP_SUMMARIES, CPP_SUMMARIES, SUMMARIES_BY_LANG };
@@ -0,0 +1,205 @@
1
+ // Privacy / PII data-flow tracking — Recommendation #9 of the
2
+ // world-class roadmap.
3
+ //
4
+ // Runs the existing taint engine with a different lattice (PII / PHI /
5
+ // PCI / FIN classes, instead of security taint) to track where each
6
+ // regulated-data class flows through a codebase. Outputs:
7
+ //
8
+ // 1. Per-field PII classification — `user.email: PII (CWE-359 Information
9
+ // Disclosure if reflected)`
10
+ // 2. Data flow diagrams — exit points (sinks) per PII class — where
11
+ // regulated data leaves the application (response body, log file,
12
+ // third-party API call, S3 upload, etc.)
13
+ // 3. Auto-generated DPIA stub for GDPR Art. 35 / CCPA §1798.130 /
14
+ // HIPAA §164.530 — a compliance artifact the customer's privacy
15
+ // counsel can use
16
+ // 4. Findings: each "PII leaves system via untrusted sink" emits a
17
+ // privacy finding with family `pii-exposure`
18
+ //
19
+ // The PII detection is deterministic and field-name based. We DO NOT
20
+ // attempt content classification (Luhn-checking actual values would
21
+ // only catch leaks that have already happened); we classify by NAME
22
+ // + TYPE in declarations.
23
+
24
+ // PII / PHI / PCI / FIN classifiers — each is a regex against
25
+ // field/variable/column names. Same idea as the existing classifyField
26
+ // helpers in engine.js but enumerated for compliance reporting.
27
+
28
+ const PII_PATTERNS = {
29
+ PII: [
30
+ /\bfirst[_-]?name\b/i, /\blast[_-]?name\b/i, /\bfull[_-]?name\b/i,
31
+ /\bemail([_-]?address)?\b/i, /\bphone([_-]?number)?\b/i, /\bmobile\b/i,
32
+ /\baddress(?:_?(?:line|street|city|zip|postal))?\b/i,
33
+ /\bdob\b/i, /\bdate[_-]?of[_-]?birth\b/i, /\bbirthday\b/i, /\bbirthdate\b/i,
34
+ /\bage\b/i, /\bgender\b/i, /\bethnicity\b/i, /\brace\b/i, /\bnationality\b/i,
35
+ /\bssn\b/i, /\bsocial[_-]?security/i, /\bnational[_-]?id/i, /\bpassport\b/i,
36
+ /\bdriver[_-]?license\b/i, /\btax[_-]?id\b/i, /\bgovernment[_-]?id\b/i,
37
+ /\bip[_-]?address\b/i, /\bgeo[_-]?location\b/i, /\blatitude\b/i, /\blongitude\b/i,
38
+ ],
39
+ PHI: [
40
+ /\b(?:medical|patient|health)[_-]?record\b/i,
41
+ /\bdiagnosis\b/i, /\bcondition\b/i, /\bsymptom\b/i, /\btreatment\b/i,
42
+ /\bmedication\b/i, /\bprescription\b/i, /\bdosage\b/i,
43
+ /\bicd[_-]?(?:9|10|11)\b/i, /\bcpt[_-]?code\b/i, /\bmrn\b/i,
44
+ /\bmedical[_-]?record[_-]?number\b/i, /\bdoctor[_-]?name\b/i,
45
+ /\bphysician\b/i, /\binsurance[_-]?id\b/i, /\bhealth[_-]?plan\b/i,
46
+ ],
47
+ PCI: [
48
+ /\bcredit[_-]?card[_-]?(?:number|num|no)?\b/i,
49
+ /\bcard[_-]?(?:number|num|no)\b/i,
50
+ /\b(?:cvc|cvv)2?\b/i, /\bcvc[_-]?code\b/i,
51
+ /\bexp(?:iry|iration)?(?:_?date)?\b/i,
52
+ /\bcardholder[_-]?name\b/i, /\bpan\b/i,
53
+ /\biban\b/i, /\brouting[_-]?number\b/i,
54
+ /\baccount[_-]?number\b/i,
55
+ ],
56
+ FIN: [
57
+ /\bsalary\b/i, /\bincome\b/i, /\bbalance\b/i, /\btransaction[_-]?amount\b/i,
58
+ /\bbank[_-]?account\b/i,
59
+ /\bcredit[_-]?score\b/i, /\bnet[_-]?worth\b/i,
60
+ ],
61
+ };
62
+
63
+ const SINK_PATTERNS = {
64
+ log: /\b(?:log|logger|console|System\.out|System\.err|stdout|stderr|fmt\.Print|print)\b/i,
65
+ response: /\b(?:res|response|ctx\.response|HttpContext\.Response)\s*\.\s*(?:write|send|json|render|body)\b/i,
66
+ outboundHttp: /\bfetch\b(?:$|[(\s.])|\b(?:axios|got|httpClient|HttpClient|WebClient|requests|node_fetch)\s*(?:\.\s*(?:get|post|put|delete|send|invoke|patch|head)|\()/i,
67
+ thirdPartySdk: /\b(?:stripe|sentry|datadog|segment|amplitude|mixpanel|posthog|braze|intercom)\s*\.\s*track|identify|capture\b/i,
68
+ fileWrite: /\b(?:fs\.writeFile|File\.WriteAllText|File\.AppendAllText|open\([^)]*,\s*['"]w)\b/i,
69
+ s3Upload: /\b(?:s3|S3Client|aws\.S3)\s*\.\s*putObject\b/i,
70
+ emailSend: /\b(?:nodemailer|sendMail|SendGrid|sendgrid|smtp)\b/i,
71
+ };
72
+
73
+ /**
74
+ * Classify a field/variable name into PII / PHI / PCI / FIN buckets.
75
+ * Returns an array of bucket labels (possibly empty, possibly multiple).
76
+ */
77
+ export function classifyField(name) {
78
+ if (!name) return [];
79
+ const out = [];
80
+ for (const [bucket, patterns] of Object.entries(PII_PATTERNS)) {
81
+ for (const p of patterns) {
82
+ if (p.test(name)) { out.push(bucket); break; }
83
+ }
84
+ }
85
+ return out;
86
+ }
87
+
88
+ /**
89
+ * Classify an outbound-data sink expression. Returns the matching sink
90
+ * label (log / response / outboundHttp / etc.) or null.
91
+ */
92
+ export function classifySink(expr) {
93
+ if (!expr) return null;
94
+ for (const [label, p] of Object.entries(SINK_PATTERNS)) if (p.test(expr)) return label;
95
+ return null;
96
+ }
97
+
98
+ /**
99
+ * Run a privacy-taint pass over the per-file IR. For each field declared
100
+ * as PII/PHI/PCI/FIN, track flow into a classifySink-matched sink. Emit
101
+ * a privacy-leak finding when a regulated class reaches a non-secure
102
+ * sink (log, response, outbound HTTP, etc.).
103
+ */
104
+ export function annotatePrivacyTaint(perFileIR) {
105
+ if (!perFileIR) return { findings: [], piiFields: [] };
106
+ const findings = [];
107
+ const piiFields = [];
108
+ for (const [filePath, ir] of (perFileIR instanceof Map ? perFileIR : Object.entries(perFileIR))) {
109
+ if (!ir || !ir._content) continue;
110
+ const lines = ir._content.split('\n');
111
+ // Step 1: collect PII-classified decls.
112
+ const taintedVars = new Map(); // name → array of bucket labels
113
+ for (const d of ir.decls || []) {
114
+ const classes = classifyField(d.name);
115
+ if (classes.length) {
116
+ taintedVars.set(d.name, classes);
117
+ piiFields.push({ file: filePath, line: d.line, name: d.name, classes, declaredType: d.type || null });
118
+ }
119
+ }
120
+ // Step 2: walk calls and assignments looking for a PII variable
121
+ // reaching a sink.
122
+ for (const call of ir.calls || []) {
123
+ const argText = (call.args || []).map(a => a.text || '').join(',');
124
+ const sinkLabel = classifySink(call.fullPath || call.callee || '');
125
+ if (!sinkLabel) continue;
126
+ for (const [name, classes] of taintedVars) {
127
+ if (!new RegExp(`\\b${name.replace(/[.+^${}()|\\]/g, '\\$&')}\\b`).test(argText)) continue;
128
+ findings.push({
129
+ family: 'pii-exposure',
130
+ subfamily: classes.join('+'),
131
+ file: filePath, line: call.line,
132
+ severity: classes.includes('PCI') || classes.includes('PHI') ? 'high' : 'medium',
133
+ cwe: 'CWE-359', // Exposure of Private Personal Information
134
+ vuln: `Privacy — ${classes.join('+')} data flows to ${sinkLabel} sink`,
135
+ snippet: (lines[call.line - 1] || '').trim().slice(0, 200),
136
+ remediation: `${classes.join(' + ')} data must not flow to ${sinkLabel} unencrypted. Mask, redact, or hash the value before logging / responding / sending to third parties.`,
137
+ piiClass: classes,
138
+ sinkKind: sinkLabel,
139
+ });
140
+ }
141
+ }
142
+ }
143
+ return { findings, piiFields };
144
+ }
145
+
146
+ /**
147
+ * Emit a DPIA (Data Protection Impact Assessment) Markdown artifact
148
+ * summarizing the privacy posture for compliance reporting. Output goes
149
+ * to .agentic-security/dpia.md.
150
+ */
151
+ export function emitDpiaArtifact(piiFields, findings, opts = {}) {
152
+ const grouped = new Map();
153
+ for (const field of piiFields) {
154
+ for (const cls of field.classes) {
155
+ let g = grouped.get(cls);
156
+ if (!g) { g = []; grouped.set(cls, g); }
157
+ g.push(field);
158
+ }
159
+ }
160
+ const lines = [];
161
+ lines.push(`# Data Protection Impact Assessment (DPIA)`);
162
+ lines.push('');
163
+ lines.push(`Generated by agentic-security scanner on ${new Date().toISOString().slice(0, 10)}.`);
164
+ lines.push('');
165
+ lines.push(`This is an automated DPIA scaffold derived from static analysis.`);
166
+ lines.push(`It must be reviewed and completed by a privacy officer before use.`);
167
+ lines.push('');
168
+ lines.push(`## Data classes identified`);
169
+ lines.push('');
170
+ for (const [cls, fields] of grouped) {
171
+ lines.push(`### ${cls} (${fields.length} fields)`);
172
+ lines.push('');
173
+ for (const f of fields.slice(0, 20)) {
174
+ lines.push(`- \`${f.name}\` in \`${f.file}:${f.line}\` (type: ${f.declaredType || 'unknown'})`);
175
+ }
176
+ if (fields.length > 20) lines.push(`- … and ${fields.length - 20} more`);
177
+ lines.push('');
178
+ }
179
+ lines.push(`## Privacy-related findings`);
180
+ lines.push('');
181
+ lines.push(`| Severity | File:Line | Class → Sink | Description |`);
182
+ lines.push(`|---|---|---|---|`);
183
+ for (const f of findings.slice(0, 50)) {
184
+ lines.push(`| ${f.severity} | ${f.file}:${f.line} | ${f.piiClass.join('+')} → ${f.sinkKind} | ${f.vuln} |`);
185
+ }
186
+ if (findings.length > 50) lines.push(`| … | … | … | … and ${findings.length - 50} more |`);
187
+ lines.push('');
188
+ lines.push(`## Regulatory framework mapping`);
189
+ lines.push('');
190
+ lines.push(`- **GDPR Art. 35** — DPIA required when processing is likely to result in high risk to data subjects.`);
191
+ lines.push(`- **CCPA §1798.130** — Notice + access rights for collected personal information.`);
192
+ if (grouped.has('PHI')) lines.push(`- **HIPAA §164.308** — Administrative safeguards for ePHI access.`);
193
+ if (grouped.has('PCI')) lines.push(`- **PCI DSS Req. 3** — Protect stored cardholder data.`);
194
+ lines.push('');
195
+ lines.push(`## Reviewer checklist`);
196
+ lines.push('');
197
+ lines.push(`- [ ] Confirm each PII field's collection has a documented lawful basis`);
198
+ lines.push(`- [ ] Confirm retention period for each class is documented`);
199
+ lines.push(`- [ ] Confirm DSAR (data subject access request) workflow exists`);
200
+ lines.push(`- [ ] Confirm encryption at rest + in transit for each class`);
201
+ lines.push(`- [ ] Confirm logging of PII access for audit (where applicable)`);
202
+ return lines.join('\n');
203
+ }
204
+
205
+ export const _internals = { PII_PATTERNS, SINK_PATTERNS };
@@ -0,0 +1,189 @@
1
+ // SMT path feasibility — Recommendation #3 of the world-class roadmap.
2
+ //
3
+ // For top-N findings per scan, generate SMT constraints from the IR
4
+ // representing the conditions that must hold along the call-graph path
5
+ // from source to sink. Discharge via a Z3 solver. If UNSAT, the
6
+ // finding is provably infeasible and gets demoted to 'info' severity
7
+ // with `pathFeasibility: 'unsat'`. If SAT, we emit a sample witness
8
+ // (a concrete tainted input that triggers the sink) which is gold-standard
9
+ // evidence for the developer.
10
+ //
11
+ // Solver backend: prefers `z3-solver` (Z3 WASM published on npm) when
12
+ // installed; falls back to a constraint-emission-only mode that still
13
+ // records the SMT-LIB script so a CI step can discharge it offline.
14
+ //
15
+ // Gating: opt-in via AGENTIC_SECURITY_SMT_FEASIBILITY=1. Always bounded
16
+ // at top-MAX_PROOF_OBLIGATIONS findings per scan to keep wall-clock
17
+ // under PROOF_BUDGET_MS.
18
+ //
19
+ // IMPORTANT — this module is NOT a generic symbolic executor. It targets
20
+ // a narrow shape: "does there exist an input that flows from source S
21
+ // through path P to sink K?" That's enough to prove or refute the
22
+ // reachability claim on a finding the engine already produced. We do
23
+ // NOT attempt to prove arbitrary safety properties.
24
+
25
+ const PROOF_BUDGET_MS_DEFAULT = 30_000;
26
+ const MAX_PROOF_OBLIGATIONS_DEFAULT = 50;
27
+ const PER_QUERY_TIMEOUT_MS_DEFAULT = 5_000;
28
+
29
+ // Lazy-load Z3. The module is permitted to be absent — when it is, we
30
+ // fall back to constraint-emission-only mode (the SMT-LIB script is
31
+ // attached to the finding for offline discharge).
32
+ let _z3Mod = null;
33
+ let _z3LoadAttempted = false;
34
+ async function _loadZ3() {
35
+ if (_z3LoadAttempted) return _z3Mod;
36
+ _z3LoadAttempted = true;
37
+ try {
38
+ _z3Mod = await import('z3-solver');
39
+ if (typeof _z3Mod.init === 'function') await _z3Mod.init();
40
+ } catch { _z3Mod = null; }
41
+ return _z3Mod;
42
+ }
43
+
44
+ // ── Constraint emission ───────────────────────────────────────────────────
45
+
46
+ /**
47
+ * Encode a single IR predicate (one node along the path) into an SMT-LIB
48
+ * assertion. Predicates supported in v1:
49
+ * - `var = source(name)` — declares var as a free symbolic string
50
+ * - `var = const(literal)` — equality with a constant
51
+ * - `var = concat(a, b)` — string concatenation
52
+ * - `var = sanitize(x, kind)` — applies a sanitizer; encoded as
53
+ * `var = "safe"` (forces concrete)
54
+ * - `assert reach(line N)` — terminal predicate: this line must be
55
+ * reachable
56
+ * - `guard(cond)` — a path condition (free-form text)
57
+ */
58
+ function encodePredicate(p, idx) {
59
+ switch (p.kind) {
60
+ case 'source':
61
+ return `(declare-const ${p.var} String)`;
62
+ case 'const':
63
+ return `(assert (= ${p.var} ${JSON.stringify(p.value)}))`;
64
+ case 'concat':
65
+ return `(assert (= ${p.var} (str.++ ${p.a} ${p.b})))`;
66
+ case 'sanitize':
67
+ return `(assert (= ${p.var} "safe-${p.kind}-${idx}"))`;
68
+ case 'reach':
69
+ // Symbolic "this line is reached" — we don't really model reachability,
70
+ // we just record the obligation. The presence of the path is what
71
+ // matters; SAT just means "some input satisfies the path conditions."
72
+ return `; reach(${p.file}:${p.line})`;
73
+ case 'guard':
74
+ return `(assert ${p.smtCond || `(= ${p.var} ${JSON.stringify(p.value)})`})`;
75
+ default:
76
+ return `; unsupported predicate kind: ${p.kind}`;
77
+ }
78
+ }
79
+
80
+ /**
81
+ * Emit a complete SMT-LIB script for one finding. The script declares
82
+ * source variables, asserts every predicate, asks (check-sat). On SAT
83
+ * we (get-model) for the witness; on UNSAT the finding is infeasible.
84
+ */
85
+ export function emitSmtScript(predicates, opts = {}) {
86
+ const lines = [];
87
+ lines.push('; SMT-LIB script — emitted by scanner/src/dataflow/smt-feasibility.js');
88
+ lines.push(`(set-logic QF_S)`);
89
+ lines.push(`(set-option :timeout ${opts.timeoutMs || PER_QUERY_TIMEOUT_MS_DEFAULT})`);
90
+ predicates.forEach((p, i) => lines.push(encodePredicate(p, i)));
91
+ lines.push('(check-sat)');
92
+ lines.push('(get-model)');
93
+ return lines.join('\n');
94
+ }
95
+
96
+ // ── Z3 discharge ──────────────────────────────────────────────────────────
97
+
98
+ /**
99
+ * dischargeFinding(predicates, opts) — encode + solve. Returns one of:
100
+ * { verdict: 'sat', witness: { var: value } }
101
+ * { verdict: 'unsat' }
102
+ * { verdict: 'unknown', reason: '<why>' }
103
+ * { verdict: 'pending', script: '<smt-lib text>' } // when Z3 unavailable
104
+ */
105
+ export async function dischargeFinding(predicates, opts = {}) {
106
+ if (!predicates || !predicates.length) return { verdict: 'unknown', reason: 'no-predicates' };
107
+ const script = emitSmtScript(predicates, opts);
108
+ const z3 = await _loadZ3();
109
+ if (!z3) return { verdict: 'pending', script };
110
+ try {
111
+ const { Context } = z3;
112
+ const ctx = new Context('main');
113
+ const solver = new ctx.Solver();
114
+ // Feed the script via parse — z3-solver supports SMT-LIB ingestion.
115
+ try { solver.fromString(script); }
116
+ catch (e) {
117
+ return { verdict: 'unknown', reason: 'parse-error: ' + String(e && e.message), script };
118
+ }
119
+ const start = Date.now();
120
+ const result = await Promise.race([
121
+ solver.check(),
122
+ new Promise(resolve => setTimeout(() => resolve('timeout'), opts.timeoutMs || PER_QUERY_TIMEOUT_MS_DEFAULT)),
123
+ ]);
124
+ const elapsed = Date.now() - start;
125
+ if (result === 'unsat') return { verdict: 'unsat', elapsedMs: elapsed };
126
+ if (result === 'timeout' || result === 'unknown') return { verdict: 'unknown', reason: result, elapsedMs: elapsed, script };
127
+ if (result === 'sat') {
128
+ // Best-effort witness extraction.
129
+ let witness = {};
130
+ try {
131
+ const model = solver.model();
132
+ for (const decl of model.decls()) witness[decl.name()] = String(model.get(decl));
133
+ } catch { /* no model */ }
134
+ return { verdict: 'sat', witness, elapsedMs: elapsed };
135
+ }
136
+ return { verdict: 'unknown', reason: String(result), elapsedMs: elapsed };
137
+ } catch (e) {
138
+ return { verdict: 'unknown', reason: String(e && e.message || e), script };
139
+ }
140
+ }
141
+
142
+ // ── Finding-level integration ─────────────────────────────────────────────
143
+
144
+ /**
145
+ * Annotate the top-N findings with their feasibility verdict. Modifies
146
+ * findings in place — each gets a `pathFeasibility` field and (when
147
+ * SAT) a `feasibilityWitness` object. Findings whose verdict is UNSAT
148
+ * are demoted to 'info' severity.
149
+ */
150
+ export async function annotatePathFeasibility(findings, opts = {}) {
151
+ if (!Array.isArray(findings)) return { annotated: 0, demoted: 0 };
152
+ const budget = opts.budgetMs || PROOF_BUDGET_MS_DEFAULT;
153
+ const max = opts.maxObligations || MAX_PROOF_OBLIGATIONS_DEFAULT;
154
+ // Prioritize: critical/high findings with concrete chains first.
155
+ const sorted = [...findings]
156
+ .filter(f => f.severity === 'critical' || f.severity === 'high')
157
+ .filter(f => Array.isArray(f.chain) || Array.isArray(f.taintPath))
158
+ .slice(0, max);
159
+ const start = Date.now();
160
+ let annotated = 0, demoted = 0;
161
+ for (const f of sorted) {
162
+ if (Date.now() - start > budget) {
163
+ f.pathFeasibility = 'unknown';
164
+ f.feasibilityReason = 'budget-exceeded';
165
+ continue;
166
+ }
167
+ const predicates = (f.chain || f.taintPath || []).map((step, i) => ({
168
+ kind: i === 0 ? 'source' : (step.kind || 'concat'),
169
+ var: `v${i}`,
170
+ a: `v${Math.max(0, i - 1)}`, b: '""',
171
+ value: step.value || '',
172
+ file: step.file, line: step.line,
173
+ }));
174
+ const r = await dischargeFinding(predicates, { timeoutMs: Math.min(5_000, budget) });
175
+ f.pathFeasibility = r.verdict;
176
+ if (r.witness) f.feasibilityWitness = r.witness;
177
+ if (r.script) f._smtScript = r.script.slice(0, 4000);
178
+ annotated++;
179
+ if (r.verdict === 'unsat') {
180
+ const before = f.severity;
181
+ f.severity = 'info';
182
+ f._pathFeasibilityDemoted = before;
183
+ demoted++;
184
+ }
185
+ }
186
+ return { annotated, demoted, elapsedMs: Date.now() - start };
187
+ }
188
+
189
+ export const _internals = { encodePredicate, emitSmtScript, PROOF_BUDGET_MS_DEFAULT, MAX_PROOF_OBLIGATIONS_DEFAULT };