npm - @clear-capabilities/agentic-security-scanner - Versions diffs - 0.79.0 → 0.84.1 - Mend

@clear-capabilities/agentic-security-scanner 0.79.0 → 0.84.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

package/dist/178.index.js +1 -1
package/dist/333.index.js +283 -0
package/dist/384.index.js +1 -1
package/dist/637.index.js +1 -1
package/dist/838.index.js +1 -1
package/dist/839.index.js +170 -0
package/dist/985.index.js +140 -1
package/dist/agentic-security.mjs +10 -10
package/dist/agentic-security.mjs.sha256 +1 -1
package/package.json +7 -5
package/src/.agentic-security/findings.json +117732 -0
package/src/.agentic-security/last-scan.json +117732 -0
package/src/.agentic-security/last-scan.json.sig +1 -0
package/src/.agentic-security/scan-history.json +12946 -0
package/src/.agentic-security/streak.json +21 -0
package/src/dataflow/.agentic-security/findings.json +6086 -0
package/src/dataflow/.agentic-security/last-scan.json +6086 -0
package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
package/src/dataflow/.agentic-security/scan-history.json +250 -0
package/src/dataflow/.agentic-security/streak.json +21 -0
package/src/dataflow/cross-service-taint.js +201 -0
package/src/dataflow/formal-verify.js +204 -0
package/src/dataflow/ifds-precise.js +222 -0
package/src/dataflow/k2-summary-cache.js +153 -0
package/src/dataflow/lib-taint-summaries.js +198 -0
package/src/dataflow/privacy-taint.js +205 -0
package/src/dataflow/smt-feasibility.js +189 -0
package/src/engine.js +825 -127
package/src/ir/.agentic-security/findings.json +4011 -0
package/src/ir/.agentic-security/last-scan.json +4011 -0
package/src/ir/.agentic-security/last-scan.json.sig +1 -0
package/src/ir/.agentic-security/scan-history.json +193 -0
package/src/ir/.agentic-security/streak.json +20 -0
package/src/ir/cpp-preprocessor.js +142 -0
package/src/ir/csharp-ir.js +604 -0
package/src/ir/universal-ir.js +403 -0
package/src/mcp/.agentic-security/findings.json +8632 -0
package/src/mcp/.agentic-security/last-scan.json +8632 -0
package/src/mcp/.agentic-security/last-scan.json.sig +1 -0
package/src/mcp/.agentic-security/scan-history.json +331 -0
package/src/mcp/.agentic-security/streak.json +20 -0
package/src/mcp/tools.js +140 -1
package/src/posture/.agentic-security/findings.json +77181 -0
package/src/posture/.agentic-security/last-scan.json +77181 -0
package/src/posture/.agentic-security/last-scan.json.sig +1 -0
package/src/posture/.agentic-security/scan-history.json +8904 -0
package/src/posture/.agentic-security/streak.json +21 -0
package/src/posture/api-contract.js +193 -0
package/src/posture/attack-taxonomy.js +227 -0
package/src/posture/auditor-walkthrough.js +252 -0
package/src/posture/claude-authorship.js +197 -0
package/src/posture/compliance-frameworks/.agentic-security/findings.json +80 -0
package/src/posture/compliance-frameworks/.agentic-security/last-scan.json +80 -0
package/src/posture/compliance-frameworks/.agentic-security/last-scan.json.sig +1 -0
package/src/posture/compliance-frameworks/.agentic-security/scan-history.json +90 -0
package/src/posture/compliance-frameworks/.agentic-security/streak.json +22 -0
package/src/posture/compliance-frameworks/ccpa.json +32 -0
package/src/posture/compliance-frameworks/eu-ai-act.json +51 -0
package/src/posture/compliance-frameworks/gdpr.json +45 -0
package/src/posture/compliance-frameworks/hipaa-security-rule.json +56 -0
package/src/posture/compliance-frameworks/nist-ai-600-1.json +51 -0
package/src/posture/compliance-frameworks/nist-csf-2.json +73 -0
package/src/posture/compliance-frameworks/owasp-asvs-5.json +79 -0
package/src/posture/compliance-frameworks/owasp-llm-top-10.json +69 -0
package/src/posture/compliance-policy.js +218 -0
package/src/posture/composite-risk.js +122 -0
package/src/posture/cross-repo-memory.js +180 -0
package/src/posture/csharp-analysis.js +330 -0
package/src/posture/dep-add-guard.js +197 -0
package/src/posture/exploit-bundle.js +210 -0
package/src/posture/federated-learning.js +172 -0
package/src/posture/findings-memory.js +152 -0
package/src/posture/fix-style-mirror.js +118 -0
package/src/posture/git-history.js +141 -0
package/src/posture/intent-context.js +175 -0
package/src/posture/license-attributions.js +94 -0
package/src/posture/license-graph.js +238 -0
package/src/posture/model-rescan.js +76 -0
package/src/posture/pattern-propagation.js +39 -0
package/src/posture/pqc-migration-plan.js +158 -0
package/src/posture/pr-augment.js +234 -0
package/src/posture/reachability-filter.js +33 -2
package/src/posture/realtime-cve-monitor.js +214 -0
package/src/posture/risk-dollars.js +158 -0
package/src/posture/runtime-correlation.js +174 -0
package/src/posture/sbom-diff.js +171 -0
package/src/posture/sca-policy.js +235 -0
package/src/posture/sca-upgrade.js +259 -0
package/src/posture/threat-model-auto.js +268 -0
package/src/posture/threat-model-grounding.js +169 -0
package/src/posture/time-to-fix.js +129 -0
package/src/posture/triage-learning.js +170 -0
package/src/posture/triage-memory.js +151 -0
package/src/posture/triage.js +40 -1
package/src/posture/watch-mode.js +171 -0
package/src/posture/workflow-installer.js +231 -0
package/src/sast/.agentic-security/findings.json +6154 -0
package/src/sast/.agentic-security/last-scan.json +6154 -0
package/src/sast/.agentic-security/last-scan.json.sig +1 -0
package/src/sast/.agentic-security/scan-history.json +941 -0
package/src/sast/.agentic-security/streak.json +22 -0
package/src/sast/_secret-entropy.js +145 -0
package/src/sast/cloud-iam.js +312 -0
package/src/sast/cpp.js +138 -4
package/src/sast/crypto-protocol.js +388 -0
package/src/sast/csharp-tokenizer.js +392 -0
package/src/sast/csharp.js +924 -138
package/src/sast/dapp-frontend.js +200 -0
package/src/sast/k8s-admission.js +271 -0
package/src/sast/llm-app.js +272 -0
package/src/sast/ml-supply-chain.js +259 -0
package/src/sast/mobile.js +224 -0
package/src/sast/post-quantum-crypto.js +348 -0
package/src/sast/web3-advanced.js +375 -0
package/src/sca/.agentic-security/findings.json +7460 -0
package/src/sca/.agentic-security/last-scan.json +7460 -0
package/src/sca/.agentic-security/last-scan.json.sig +1 -0
package/src/sca/.agentic-security/scan-history.json +113 -0
package/src/sca/.agentic-security/streak.json +21 -0
package/src/sca/CLAUDE.md +161 -0
package/src/sca/binary-metadata.js +37 -15
package/src/sca/sigstore-verify.js +215 -0

package/src/dataflow/lib-taint-summaries.js ADDED Viewed

@@ -0,0 +1,198 @@
+// Library taint summaries — Recommendation #5 of the SCA/SAST plan.
+//
+// Hand-curated knowledge that "this library method returns tainted data" or
+// "this method propagates taint from arg N to its return." Used by the
+// existing dataflow engine + per-language detectors when classifying the
+// taint state of a declaration's rhs.
+//
+// The summaries are intentionally per-language because the same concept
+// (a user-input source) has different idioms in each ecosystem. Each entry:
+//
+//   { pattern: RegExp, kind: 'source' | 'sanitizer' | 'passthrough',
+//     framework: 'spring' | 'aspnet' | 'glibc' | … }
+//
+// Kinds:
+//   source       — return value is unconditionally tainted
+//   sanitizer    — return value is unconditionally clean, even if any arg
+//                  was tainted (e.g. HtmlEncode, parameterized prepare)
+//   passthrough  — return value is tainted iff arg N is tainted (taint
+//                  flows through). Not modelled in v1; reserved for future
+//                  inter-procedural extensions (Recommendation #9).
+//
+// Usage: detectors call `isLibrarySource(text, lang)` and `isLibrarySanitizer
+// (text, lang)` to refine their per-call decisions.
+const JAVA_SUMMARIES = {
+  sources: [
+    // Servlet API — every request-scoped getter is a user-input source.
+    /\bHttpServletRequest\b[\s\S]{0,2000}?\.\s*(?:getParameter(?:Values|Map)?|getQueryString|getHeader(?:Names)?|getInputStream|getReader|getCookies?|getRequestURI|getRequestURL|getQueryString|getPathInfo)\s*\(/,
+    /\bjavax\.servlet\.http\.HttpServletRequest\b/,
+    // Spring MVC — controller method annotations bind to request data.
+    /@RequestParam\b/,
+    /@RequestBody\b/,
+    /@PathVariable\b/,
+    /@RequestHeader\b/,
+    /@CookieValue\b/,
+    /@ModelAttribute\b/,
+    // Spring Security — the principal is user-controlled in the trust sense
+    // (it identifies WHO the request is from; not auto-sanitized).
+    /\bSecurityContextHolder\s*\.\s*getContext\s*\(\s*\)\s*\.\s*getAuthentication\s*\(\s*\)/,
+    // Java Files API — file content is untrusted when source is unknown.
+    /\bFiles\s*\.\s*(?:readString|readAllBytes|readAllLines|lines|newBufferedReader|newInputStream)\b/,
+    /\bPaths\s*\.\s*get\s*\([^)]*(?:System\.getProperty|args)\b/,
+    // BufferedReader / Scanner reading user input.
+    /\bBufferedReader\b[\s\S]{0,500}?\.\s*readLine\s*\(/,
+    /\bScanner\b[\s\S]{0,500}?\.\s*(?:next(?:Line)?|nextInt|nextLong)\s*\(/,
+    // System.getenv / System.getProperty — environment is configurable.
+    /\bSystem\s*\.\s*(?:getenv|getProperty)\s*\(/,
+    // Jackson — deserialization input is untrusted.
+    /\bObjectMapper\b[\s\S]{0,500}?\.\s*readValue\s*\(/,
+    /\bJsonParser\b[\s\S]{0,500}?\.\s*getValueAsString\s*\(/,
+    // Apache Commons IO.
+    /\bIOUtils\s*\.\s*toString\s*\(/,
+    /\bFileUtils\s*\.\s*readFileToString\s*\(/,
+    // Spring WebFlux ServerWebExchange.
+    /\bServerWebExchange\b[\s\S]{0,500}?\.\s*getRequest\s*\(/,
+  ],
+  sanitizers: [
+    /\bOWASP\.Encoder\b/,
+    /\bESAPI\b[\s\S]{0,200}?\.\s*encoder\s*\(\s*\)/,
+    /\bStringEscapeUtils\s*\.\s*escape(?:Html\d?|Xml|Sql|Java|JavaScript)\b/,
+    /\bHtmlUtils\s*\.\s*htmlEscape\b/,
+    /\bUriUtils\s*\.\s*encode\b/,
+    // JDBC PreparedStatement parameter setters — taint is cleaned at bind.
+    /\bPreparedStatement\b[\s\S]{0,500}?\.\s*set(?:String|Int|Long|Object|BigDecimal|Date|Timestamp)\s*\(/,
+    /\bNamedParameterJdbcTemplate\b[\s\S]{0,500}?\.\s*(?:query|update|queryForObject)\s*\([^,]+,\s*new\s+MapSqlParameterSource\b/,
+    // Java validators.
+    /\bjakarta\.validation\b/,
+    /\bjavax\.validation\b/,
+    /\b@Valid\b/,
+  ],
+};
+const CSHARP_SUMMARIES = {
+  sources: [
+    // ASP.NET request surfaces.
+    /\bHttpRequest\b[\s\S]{0,500}?\.\s*(?:Query|Form|Headers|Cookies|RouteValues|Body|InputStream|QueryString|Params|Path|Url)\b/,
+    /\bHttpContext\s*\.\s*Request\b/,
+    /\bIFormCollection\b/,
+    /\bIFormFile\b/,
+    /\bIFormFileCollection\b/,
+    // ASP.NET Core model binding.
+    /\[FromQuery\]/,
+    /\[FromBody\]/,
+    /\[FromForm\]/,
+    /\[FromRoute\]/,
+    /\[FromHeader\]/,
+    // Configuration may carry secrets but the VALUES are environment-supplied.
+    /\bIConfiguration\b[\s\S]{0,500}?\.\s*(?:GetSection|GetValue|GetConnectionString|GetChildren)\s*\(/,
+    // Newtonsoft.Json deserialization.
+    /\bJsonConvert\s*\.\s*Deserialize(?:Object|XmlNode)\s*</,
+    /\bJsonSerializer\s*\.\s*Deserialize\s*</,
+    // Files / streams.
+    /\bFile\s*\.\s*(?:ReadAllText|ReadAllLines|ReadAllBytes|OpenRead|OpenText)\s*\(/,
+    /\bStreamReader\b[\s\S]{0,500}?\.\s*(?:ReadLine|ReadToEnd|Read)\s*\(/,
+    /\bBinaryReader\b[\s\S]{0,500}?\.\s*Read(?:String|Bytes|Char|Int32|Int64|UInt32|UInt64)\s*\(/,
+    // Network reads.
+    /\bWebClient\b[\s\S]{0,500}?\.\s*Download(?:String|Data|File)\s*\(/,
+    /\bHttpClient\b[\s\S]{0,500}?\.\s*(?:GetAsync|GetStringAsync|PostAsync|SendAsync)\s*\(/,
+    // Environment + console.
+    /\bEnvironment\s*\.\s*GetEnvironmentVariable\s*\(/,
+    /\bConsole\s*\.\s*ReadLine\s*\(/,
+  ],
+  sanitizers: [
+    /\bHttpUtility\s*\.\s*HtmlEncode\b/,
+    /\bHtmlEncoder\s*\.\s*Default\s*\.\s*Encode\b/,
+    /\bAntiXssEncoder\b/,
+    /\bSqlParameter\b/,
+    /\bMySqlParameter\b/,
+    /\bNpgsqlParameter\b/,
+    // EF Core parameterized helpers.
+    /\bFromSqlInterpolated\s*\(/,
+    // Validation.
+    /\bint\s*\.\s*TryParse\s*\(/,
+    /\bGuid\s*\.\s*TryParse\s*\(/,
+    /\bDateTime\s*\.\s*TryParse\s*\(/,
+    /\bRegex\s*\.\s*Replace\s*\(/,
+  ],
+};
+const CPP_SUMMARIES = {
+  sources: [
+    // POSIX — environment + user input.
+    /\bgetenv\s*\(/,
+    /\bsecure_getenv\s*\(/,
+    /\bargv\s*\[/,
+    /\bgets\s*\(/,
+    /\bfgets\s*\(/,
+    /\bscanf\s*\(/,
+    /\bfscanf\s*\(/,
+    /\bgetc\s*\(/,
+    /\bfgetc\s*\(/,
+    /\bread\s*\(\s*\d+/,    // unistd read(fd, ...)
+    /\brecv\s*\(/,
+    /\brecvfrom\s*\(/,
+    // OpenSSL / network.
+    /\bBIO_read\s*\(/,
+    /\bSSL_read\s*\(/,
+    // Win32 input.
+    /\bGetCommandLine[AW]?\s*\(/,
+    /\bGetEnvironmentVariable[AW]?\s*\(/,
+    // Standard streams.
+    /\bstd\s*::\s*cin\s*>>/,
+    /\bstd\s*::\s*getline\s*\(\s*std\s*::\s*cin\b/,
+  ],
+  sanitizers: [
+    // Length-checked copies (best-effort).
+    /\bstrncpy\s*\(\s*[^,]+,\s*[^,]+,\s*sizeof\s*\(/,
+    /\bsnprintf\s*\(\s*[^,]+,\s*sizeof\s*\(/,
+    /\bisdigit\s*\(/,
+    /\bisalpha\s*\(/,
+    /\bisalnum\s*\(/,
+    /\bstrtol\s*\(/,
+    /\bstrtoul\s*\(/,
+  ],
+};
+const SUMMARIES_BY_LANG = {
+  java:   JAVA_SUMMARIES,
+  csharp: CSHARP_SUMMARIES,
+  cpp:    CPP_SUMMARIES,
+  c:      CPP_SUMMARIES,
+};
+// Resolve language from a file path or explicit hint.
+function _langOf(hint, file) {
+  if (hint) return hint;
+  if (!file) return null;
+  if (/\.java$/i.test(file)) return 'java';
+  if (/\.cs$/i.test(file)) return 'csharp';
+  if (/\.(?:c|cc|cpp|cxx|h|hh|hpp)$/i.test(file)) return 'cpp';
+  return null;
+}
+/**
+ * Returns true if `text` contains a library-source pattern for the language.
+ */
+export function isLibrarySource(text, langOrFile) {
+  if (!text) return false;
+  const lang = _langOf(typeof langOrFile === 'string' && langOrFile.includes('.') ? null : langOrFile, langOrFile);
+  const s = SUMMARIES_BY_LANG[lang];
+  if (!s) return false;
+  for (const re of s.sources) if (re.test(text)) return true;
+  return false;
+}
+/**
+ * Returns true if `text` contains a library-sanitizer pattern for the language.
+ */
+export function isLibrarySanitizer(text, langOrFile) {
+  if (!text) return false;
+  const lang = _langOf(typeof langOrFile === 'string' && langOrFile.includes('.') ? null : langOrFile, langOrFile);
+  const s = SUMMARIES_BY_LANG[lang];
+  if (!s) return false;
+  for (const re of s.sanitizers) if (re.test(text)) return true;
+  return false;
+}
+export const _internals = { JAVA_SUMMARIES, CSHARP_SUMMARIES, CPP_SUMMARIES, SUMMARIES_BY_LANG };

package/src/dataflow/privacy-taint.js ADDED Viewed

@@ -0,0 +1,205 @@
+// Privacy / PII data-flow tracking — Recommendation #9 of the
+// world-class roadmap.
+//
+// Runs the existing taint engine with a different lattice (PII / PHI /
+// PCI / FIN classes, instead of security taint) to track where each
+// regulated-data class flows through a codebase. Outputs:
+//
+//   1. Per-field PII classification — `user.email: PII (CWE-359 Information
+//      Disclosure if reflected)`
+//   2. Data flow diagrams — exit points (sinks) per PII class — where
+//      regulated data leaves the application (response body, log file,
+//      third-party API call, S3 upload, etc.)
+//   3. Auto-generated DPIA stub for GDPR Art. 35 / CCPA §1798.130 /
+//      HIPAA §164.530 — a compliance artifact the customer's privacy
+//      counsel can use
+//   4. Findings: each "PII leaves system via untrusted sink" emits a
+//      privacy finding with family `pii-exposure`
+//
+// The PII detection is deterministic and field-name based. We DO NOT
+// attempt content classification (Luhn-checking actual values would
+// only catch leaks that have already happened); we classify by NAME
+// + TYPE in declarations.
+// PII / PHI / PCI / FIN classifiers — each is a regex against
+// field/variable/column names. Same idea as the existing classifyField
+// helpers in engine.js but enumerated for compliance reporting.
+const PII_PATTERNS = {
+  PII: [
+    /\bfirst[_-]?name\b/i, /\blast[_-]?name\b/i, /\bfull[_-]?name\b/i,
+    /\bemail([_-]?address)?\b/i, /\bphone([_-]?number)?\b/i, /\bmobile\b/i,
+    /\baddress(?:_?(?:line|street|city|zip|postal))?\b/i,
+    /\bdob\b/i, /\bdate[_-]?of[_-]?birth\b/i, /\bbirthday\b/i, /\bbirthdate\b/i,
+    /\bage\b/i, /\bgender\b/i, /\bethnicity\b/i, /\brace\b/i, /\bnationality\b/i,
+    /\bssn\b/i, /\bsocial[_-]?security/i, /\bnational[_-]?id/i, /\bpassport\b/i,
+    /\bdriver[_-]?license\b/i, /\btax[_-]?id\b/i, /\bgovernment[_-]?id\b/i,
+    /\bip[_-]?address\b/i, /\bgeo[_-]?location\b/i, /\blatitude\b/i, /\blongitude\b/i,
+  ],
+  PHI: [
+    /\b(?:medical|patient|health)[_-]?record\b/i,
+    /\bdiagnosis\b/i, /\bcondition\b/i, /\bsymptom\b/i, /\btreatment\b/i,
+    /\bmedication\b/i, /\bprescription\b/i, /\bdosage\b/i,
+    /\bicd[_-]?(?:9|10|11)\b/i, /\bcpt[_-]?code\b/i, /\bmrn\b/i,
+    /\bmedical[_-]?record[_-]?number\b/i, /\bdoctor[_-]?name\b/i,
+    /\bphysician\b/i, /\binsurance[_-]?id\b/i, /\bhealth[_-]?plan\b/i,
+  ],
+  PCI: [
+    /\bcredit[_-]?card[_-]?(?:number|num|no)?\b/i,
+    /\bcard[_-]?(?:number|num|no)\b/i,
+    /\b(?:cvc|cvv)2?\b/i, /\bcvc[_-]?code\b/i,
+    /\bexp(?:iry|iration)?(?:_?date)?\b/i,
+    /\bcardholder[_-]?name\b/i, /\bpan\b/i,
+    /\biban\b/i, /\brouting[_-]?number\b/i,
+    /\baccount[_-]?number\b/i,
+  ],
+  FIN: [
+    /\bsalary\b/i, /\bincome\b/i, /\bbalance\b/i, /\btransaction[_-]?amount\b/i,
+    /\bbank[_-]?account\b/i,
+    /\bcredit[_-]?score\b/i, /\bnet[_-]?worth\b/i,
+  ],
+};
+const SINK_PATTERNS = {
+  log: /\b(?:log|logger|console|System\.out|System\.err|stdout|stderr|fmt\.Print|print)\b/i,
+  response: /\b(?:res|response|ctx\.response|HttpContext\.Response)\s*\.\s*(?:write|send|json|render|body)\b/i,
+  outboundHttp: /\bfetch\b(?:$|[(\s.])|\b(?:axios|got|httpClient|HttpClient|WebClient|requests|node_fetch)\s*(?:\.\s*(?:get|post|put|delete|send|invoke|patch|head)|\()/i,
+  thirdPartySdk: /\b(?:stripe|sentry|datadog|segment|amplitude|mixpanel|posthog|braze|intercom)\s*\.\s*track|identify|capture\b/i,
+  fileWrite: /\b(?:fs\.writeFile|File\.WriteAllText|File\.AppendAllText|open\([^)]*,\s*['"]w)\b/i,
+  s3Upload: /\b(?:s3|S3Client|aws\.S3)\s*\.\s*putObject\b/i,
+  emailSend: /\b(?:nodemailer|sendMail|SendGrid|sendgrid|smtp)\b/i,
+};
+/**
+ * Classify a field/variable name into PII / PHI / PCI / FIN buckets.
+ * Returns an array of bucket labels (possibly empty, possibly multiple).
+ */
+export function classifyField(name) {
+  if (!name) return [];
+  const out = [];
+  for (const [bucket, patterns] of Object.entries(PII_PATTERNS)) {
+    for (const p of patterns) {
+      if (p.test(name)) { out.push(bucket); break; }
+    }
+  }
+  return out;
+}
+/**
+ * Classify an outbound-data sink expression. Returns the matching sink
+ * label (log / response / outboundHttp / etc.) or null.
+ */
+export function classifySink(expr) {
+  if (!expr) return null;
+  for (const [label, p] of Object.entries(SINK_PATTERNS)) if (p.test(expr)) return label;
+  return null;
+}
+/**
+ * Run a privacy-taint pass over the per-file IR. For each field declared
+ * as PII/PHI/PCI/FIN, track flow into a classifySink-matched sink. Emit
+ * a privacy-leak finding when a regulated class reaches a non-secure
+ * sink (log, response, outbound HTTP, etc.).
+ */
+export function annotatePrivacyTaint(perFileIR) {
+  if (!perFileIR) return { findings: [], piiFields: [] };
+  const findings = [];
+  const piiFields = [];
+  for (const [filePath, ir] of (perFileIR instanceof Map ? perFileIR : Object.entries(perFileIR))) {
+    if (!ir || !ir._content) continue;
+    const lines = ir._content.split('\n');
+    // Step 1: collect PII-classified decls.
+    const taintedVars = new Map(); // name → array of bucket labels
+    for (const d of ir.decls || []) {
+      const classes = classifyField(d.name);
+      if (classes.length) {
+        taintedVars.set(d.name, classes);
+        piiFields.push({ file: filePath, line: d.line, name: d.name, classes, declaredType: d.type || null });
+      }
+    }
+    // Step 2: walk calls and assignments looking for a PII variable
+    // reaching a sink.
+    for (const call of ir.calls || []) {
+      const argText = (call.args || []).map(a => a.text || '').join(',');
+      const sinkLabel = classifySink(call.fullPath || call.callee || '');
+      if (!sinkLabel) continue;
+      for (const [name, classes] of taintedVars) {
+        if (!new RegExp(`\\b${name.replace(/[.+^${}()|\\]/g, '\\$&')}\\b`).test(argText)) continue;
+        findings.push({
+          family: 'pii-exposure',
+          subfamily: classes.join('+'),
+          file: filePath, line: call.line,
+          severity: classes.includes('PCI') || classes.includes('PHI') ? 'high' : 'medium',
+          cwe: 'CWE-359', // Exposure of Private Personal Information
+          vuln: `Privacy — ${classes.join('+')} data flows to ${sinkLabel} sink`,
+          snippet: (lines[call.line - 1] || '').trim().slice(0, 200),
+          remediation: `${classes.join(' + ')} data must not flow to ${sinkLabel} unencrypted. Mask, redact, or hash the value before logging / responding / sending to third parties.`,
+          piiClass: classes,
+          sinkKind: sinkLabel,
+        });
+      }
+    }
+  }
+  return { findings, piiFields };
+}
+/**
+ * Emit a DPIA (Data Protection Impact Assessment) Markdown artifact
+ * summarizing the privacy posture for compliance reporting. Output goes
+ * to .agentic-security/dpia.md.
+ */
+export function emitDpiaArtifact(piiFields, findings, opts = {}) {
+  const grouped = new Map();
+  for (const field of piiFields) {
+    for (const cls of field.classes) {
+      let g = grouped.get(cls);
+      if (!g) { g = []; grouped.set(cls, g); }
+      g.push(field);
+    }
+  }
+  const lines = [];
+  lines.push(`# Data Protection Impact Assessment (DPIA)`);
+  lines.push('');
+  lines.push(`Generated by agentic-security scanner on ${new Date().toISOString().slice(0, 10)}.`);
+  lines.push('');
+  lines.push(`This is an automated DPIA scaffold derived from static analysis.`);
+  lines.push(`It must be reviewed and completed by a privacy officer before use.`);
+  lines.push('');
+  lines.push(`## Data classes identified`);
+  lines.push('');
+  for (const [cls, fields] of grouped) {
+    lines.push(`### ${cls} (${fields.length} fields)`);
+    lines.push('');
+    for (const f of fields.slice(0, 20)) {
+      lines.push(`- \`${f.name}\` in \`${f.file}:${f.line}\` (type: ${f.declaredType || 'unknown'})`);
+    }
+    if (fields.length > 20) lines.push(`- … and ${fields.length - 20} more`);
+    lines.push('');
+  }
+  lines.push(`## Privacy-related findings`);
+  lines.push('');
+  lines.push(`| Severity | File:Line | Class → Sink | Description |`);
+  lines.push(`|---|---|---|---|`);
+  for (const f of findings.slice(0, 50)) {
+    lines.push(`| ${f.severity} | ${f.file}:${f.line} | ${f.piiClass.join('+')} → ${f.sinkKind} | ${f.vuln} |`);
+  }
+  if (findings.length > 50) lines.push(`| … | … | … | … and ${findings.length - 50} more |`);
+  lines.push('');
+  lines.push(`## Regulatory framework mapping`);
+  lines.push('');
+  lines.push(`- **GDPR Art. 35** — DPIA required when processing is likely to result in high risk to data subjects.`);
+  lines.push(`- **CCPA §1798.130** — Notice + access rights for collected personal information.`);
+  if (grouped.has('PHI')) lines.push(`- **HIPAA §164.308** — Administrative safeguards for ePHI access.`);
+  if (grouped.has('PCI')) lines.push(`- **PCI DSS Req. 3** — Protect stored cardholder data.`);
+  lines.push('');
+  lines.push(`## Reviewer checklist`);
+  lines.push('');
+  lines.push(`- [ ] Confirm each PII field's collection has a documented lawful basis`);
+  lines.push(`- [ ] Confirm retention period for each class is documented`);
+  lines.push(`- [ ] Confirm DSAR (data subject access request) workflow exists`);
+  lines.push(`- [ ] Confirm encryption at rest + in transit for each class`);
+  lines.push(`- [ ] Confirm logging of PII access for audit (where applicable)`);
+  return lines.join('\n');
+}
+export const _internals = { PII_PATTERNS, SINK_PATTERNS };

package/src/dataflow/smt-feasibility.js ADDED Viewed

@@ -0,0 +1,189 @@
+// SMT path feasibility — Recommendation #3 of the world-class roadmap.
+//
+// For top-N findings per scan, generate SMT constraints from the IR
+// representing the conditions that must hold along the call-graph path
+// from source to sink. Discharge via a Z3 solver. If UNSAT, the
+// finding is provably infeasible and gets demoted to 'info' severity
+// with `pathFeasibility: 'unsat'`. If SAT, we emit a sample witness
+// (a concrete tainted input that triggers the sink) which is gold-standard
+// evidence for the developer.
+//
+// Solver backend: prefers `z3-solver` (Z3 WASM published on npm) when
+// installed; falls back to a constraint-emission-only mode that still
+// records the SMT-LIB script so a CI step can discharge it offline.
+//
+// Gating: opt-in via AGENTIC_SECURITY_SMT_FEASIBILITY=1. Always bounded
+// at top-MAX_PROOF_OBLIGATIONS findings per scan to keep wall-clock
+// under PROOF_BUDGET_MS.
+//
+// IMPORTANT — this module is NOT a generic symbolic executor. It targets
+// a narrow shape: "does there exist an input that flows from source S
+// through path P to sink K?" That's enough to prove or refute the
+// reachability claim on a finding the engine already produced. We do
+// NOT attempt to prove arbitrary safety properties.
+const PROOF_BUDGET_MS_DEFAULT = 30_000;
+const MAX_PROOF_OBLIGATIONS_DEFAULT = 50;
+const PER_QUERY_TIMEOUT_MS_DEFAULT = 5_000;
+// Lazy-load Z3. The module is permitted to be absent — when it is, we
+// fall back to constraint-emission-only mode (the SMT-LIB script is
+// attached to the finding for offline discharge).
+let _z3Mod = null;
+let _z3LoadAttempted = false;
+async function _loadZ3() {
+  if (_z3LoadAttempted) return _z3Mod;
+  _z3LoadAttempted = true;
+  try {
+    _z3Mod = await import('z3-solver');
+    if (typeof _z3Mod.init === 'function') await _z3Mod.init();
+  } catch { _z3Mod = null; }
+  return _z3Mod;
+}
+// ── Constraint emission ───────────────────────────────────────────────────
+/**
+ * Encode a single IR predicate (one node along the path) into an SMT-LIB
+ * assertion. Predicates supported in v1:
+ *   - `var = source(name)`       — declares var as a free symbolic string
+ *   - `var = const(literal)`     — equality with a constant
+ *   - `var = concat(a, b)`       — string concatenation
+ *   - `var = sanitize(x, kind)`  — applies a sanitizer; encoded as
+ *                                  `var = "safe"` (forces concrete)
+ *   - `assert reach(line N)`     — terminal predicate: this line must be
+ *                                  reachable
+ *   - `guard(cond)`              — a path condition (free-form text)
+ */
+function encodePredicate(p, idx) {
+  switch (p.kind) {
+    case 'source':
+      return `(declare-const ${p.var} String)`;
+    case 'const':
+      return `(assert (= ${p.var} ${JSON.stringify(p.value)}))`;
+    case 'concat':
+      return `(assert (= ${p.var} (str.++ ${p.a} ${p.b})))`;
+    case 'sanitize':
+      return `(assert (= ${p.var} "safe-${p.kind}-${idx}"))`;
+    case 'reach':
+      // Symbolic "this line is reached" — we don't really model reachability,
+      // we just record the obligation. The presence of the path is what
+      // matters; SAT just means "some input satisfies the path conditions."
+      return `; reach(${p.file}:${p.line})`;
+    case 'guard':
+      return `(assert ${p.smtCond || `(= ${p.var} ${JSON.stringify(p.value)})`})`;
+    default:
+      return `; unsupported predicate kind: ${p.kind}`;
+  }
+}
+/**
+ * Emit a complete SMT-LIB script for one finding. The script declares
+ * source variables, asserts every predicate, asks (check-sat). On SAT
+ * we (get-model) for the witness; on UNSAT the finding is infeasible.
+ */
+export function emitSmtScript(predicates, opts = {}) {
+  const lines = [];
+  lines.push('; SMT-LIB script — emitted by scanner/src/dataflow/smt-feasibility.js');
+  lines.push(`(set-logic QF_S)`);
+  lines.push(`(set-option :timeout ${opts.timeoutMs || PER_QUERY_TIMEOUT_MS_DEFAULT})`);
+  predicates.forEach((p, i) => lines.push(encodePredicate(p, i)));
+  lines.push('(check-sat)');
+  lines.push('(get-model)');
+  return lines.join('\n');
+}
+// ── Z3 discharge ──────────────────────────────────────────────────────────
+/**
+ * dischargeFinding(predicates, opts) — encode + solve. Returns one of:
+ *   { verdict: 'sat',     witness: { var: value } }
+ *   { verdict: 'unsat' }
+ *   { verdict: 'unknown', reason: '<why>' }
+ *   { verdict: 'pending', script: '<smt-lib text>' }  // when Z3 unavailable
+ */
+export async function dischargeFinding(predicates, opts = {}) {
+  if (!predicates || !predicates.length) return { verdict: 'unknown', reason: 'no-predicates' };
+  const script = emitSmtScript(predicates, opts);
+  const z3 = await _loadZ3();
+  if (!z3) return { verdict: 'pending', script };
+  try {
+    const { Context } = z3;
+    const ctx = new Context('main');
+    const solver = new ctx.Solver();
+    // Feed the script via parse — z3-solver supports SMT-LIB ingestion.
+    try { solver.fromString(script); }
+    catch (e) {
+      return { verdict: 'unknown', reason: 'parse-error: ' + String(e && e.message), script };
+    }
+    const start = Date.now();
+    const result = await Promise.race([
+      solver.check(),
+      new Promise(resolve => setTimeout(() => resolve('timeout'), opts.timeoutMs || PER_QUERY_TIMEOUT_MS_DEFAULT)),
+    ]);
+    const elapsed = Date.now() - start;
+    if (result === 'unsat') return { verdict: 'unsat', elapsedMs: elapsed };
+    if (result === 'timeout' || result === 'unknown') return { verdict: 'unknown', reason: result, elapsedMs: elapsed, script };
+    if (result === 'sat') {
+      // Best-effort witness extraction.
+      let witness = {};
+      try {
+        const model = solver.model();
+        for (const decl of model.decls()) witness[decl.name()] = String(model.get(decl));
+      } catch { /* no model */ }
+      return { verdict: 'sat', witness, elapsedMs: elapsed };
+    }
+    return { verdict: 'unknown', reason: String(result), elapsedMs: elapsed };
+  } catch (e) {
+    return { verdict: 'unknown', reason: String(e && e.message || e), script };
+  }
+}
+// ── Finding-level integration ─────────────────────────────────────────────
+/**
+ * Annotate the top-N findings with their feasibility verdict. Modifies
+ * findings in place — each gets a `pathFeasibility` field and (when
+ * SAT) a `feasibilityWitness` object. Findings whose verdict is UNSAT
+ * are demoted to 'info' severity.
+ */
+export async function annotatePathFeasibility(findings, opts = {}) {
+  if (!Array.isArray(findings)) return { annotated: 0, demoted: 0 };
+  const budget = opts.budgetMs || PROOF_BUDGET_MS_DEFAULT;
+  const max = opts.maxObligations || MAX_PROOF_OBLIGATIONS_DEFAULT;
+  // Prioritize: critical/high findings with concrete chains first.
+  const sorted = [...findings]
+    .filter(f => f.severity === 'critical' || f.severity === 'high')
+    .filter(f => Array.isArray(f.chain) || Array.isArray(f.taintPath))
+    .slice(0, max);
+  const start = Date.now();
+  let annotated = 0, demoted = 0;
+  for (const f of sorted) {
+    if (Date.now() - start > budget) {
+      f.pathFeasibility = 'unknown';
+      f.feasibilityReason = 'budget-exceeded';
+      continue;
+    }
+    const predicates = (f.chain || f.taintPath || []).map((step, i) => ({
+      kind: i === 0 ? 'source' : (step.kind || 'concat'),
+      var: `v${i}`,
+      a: `v${Math.max(0, i - 1)}`, b: '""',
+      value: step.value || '',
+      file: step.file, line: step.line,
+    }));
+    const r = await dischargeFinding(predicates, { timeoutMs: Math.min(5_000, budget) });
+    f.pathFeasibility = r.verdict;
+    if (r.witness) f.feasibilityWitness = r.witness;
+    if (r.script) f._smtScript = r.script.slice(0, 4000);
+    annotated++;
+    if (r.verdict === 'unsat') {
+      const before = f.severity;
+      f.severity = 'info';
+      f._pathFeasibilityDemoted = before;
+      demoted++;
+    }
+  }
+  return { annotated, demoted, elapsedMs: Date.now() - start };
+}
+export const _internals = { encodePredicate, emitSmtScript, PROOF_BUDGET_MS_DEFAULT, MAX_PROOF_OBLIGATIONS_DEFAULT };