@clear-capabilities/agentic-security-scanner 0.77.0 → 0.79.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/bin/.agentic-security/findings.json +1907 -0
  2. package/bin/.agentic-security/last-scan.json +1907 -0
  3. package/bin/.agentic-security/last-scan.json.sig +1 -0
  4. package/bin/.agentic-security/scan-history.json +166 -0
  5. package/bin/.agentic-security/streak.json +20 -0
  6. package/bin/agentic-security.js +55 -9
  7. package/dist/178.index.js +1 -1
  8. package/dist/384.index.js +1 -1
  9. package/dist/476.index.js +5 -5
  10. package/dist/637.index.js +1 -1
  11. package/dist/700.index.js +138 -0
  12. package/dist/718.index.js +159 -0
  13. package/dist/824.index.js +126 -0
  14. package/dist/838.index.js +1 -1
  15. package/dist/985.index.js +5 -0
  16. package/dist/agentic-security.mjs +32 -32
  17. package/dist/agentic-security.mjs.sha256 +1 -1
  18. package/package.json +4 -4
  19. package/src/dataflow/async-sequencing.js +16 -7
  20. package/src/dataflow/builtin-summaries.js +131 -0
  21. package/src/dataflow/catalog.js +107 -0
  22. package/src/dataflow/cross-repo.js +75 -1
  23. package/src/dataflow/engine.js +181 -8
  24. package/src/dataflow/implicit-flow.js +24 -6
  25. package/src/dataflow/stub-aware-filter.js +69 -11
  26. package/src/dataflow/summaries.js +28 -3
  27. package/src/engine-parallel.js +70 -0
  28. package/src/engine.js +270 -19
  29. package/src/integrations/index.js +2 -1
  30. package/src/ir/callgraph.js +27 -7
  31. package/src/ir/index.js +22 -1
  32. package/src/ir/parser-go.js +403 -0
  33. package/src/ir/parser-js.js +2 -0
  34. package/src/ir/parser-php.js +330 -0
  35. package/src/ir/parser-py.helper.py +137 -11
  36. package/src/ir/parser-rb.js +309 -0
  37. package/src/llm-validator/index.js +7 -5
  38. package/src/mcp/audit.js +5 -0
  39. package/src/posture/calibration-drift.js +2 -1
  40. package/src/posture/calibration.js +16 -1
  41. package/src/posture/fix-history.js +8 -2
  42. package/src/posture/profile.js +4 -5
  43. package/src/posture/rule-overrides.js +2 -3
  44. package/src/posture/rule-pack-signing.js +2 -3
  45. package/src/posture/rule-synthesis.js +5 -6
  46. package/src/posture/security-trend.js +4 -7
  47. package/src/posture/state-dir.js +124 -0
  48. package/src/posture/streak.js +3 -0
  49. package/src/posture/suppressions.js +5 -8
  50. package/src/posture/triage.js +16 -5
  51. package/src/posture/validator-metrics.js +3 -6
  52. package/src/report/index.js +23 -2
  53. package/src/sast/cache-poisoning.js +77 -0
  54. package/src/sast/comparison-safety.js +73 -0
  55. package/src/sast/db-taint.js +78 -0
  56. package/src/sast/graphql.js +127 -0
  57. package/src/sast/llm-stored-prompt.js +57 -0
  58. package/src/sast/mutation-xss.js +43 -0
  59. package/src/sast/nosql-injection.js +5 -0
  60. package/src/sast/null-byte-injection.js +76 -0
  61. package/src/sast/redos-nfa.js +338 -0
  62. package/src/sast/rust.js +26 -0
  63. package/src/sast/sensitive-data-logging.js +73 -0
  64. package/src/sast/weak-password-hash.js +77 -0
  65. package/src/sast/weak-randomness.js +100 -0
  66. package/src/sca/binary-metadata.js +124 -0
  67. package/src/sca/llm-function-extract.js +107 -0
  68. package/src/sca/py-package-functions.js +118 -0
  69. package/src/sca/vendor-detect.js +144 -0
@@ -0,0 +1,77 @@
1
+ // Weak password hashing detector.
2
+ //
3
+ // Context-gated: only fires when the hash input variable is named
4
+ // password/secret/credential or the enclosing function is password-related.
5
+ // Detects MD5/SHA1 without salt for password storage.
6
+
7
+ const PASSWORD_CONTEXT = /\b(password|passwd|pwd|secret|credential|passphrase|pass_hash|user_pass)\b/i;
8
+ const PASSWORD_FUNC = /\b(hashPassword|encryptPassword|checkPassword|verifyPassword|createHash|setPassword|validatePassword|hash_password|check_password)\b/i;
9
+
10
+ function _line(raw, idx) { return raw.slice(0, idx).split('\n').length; }
11
+
12
+ const PATTERNS = {
13
+ js: {
14
+ ext: /\.(?:js|jsx|ts|tsx|mjs|cjs)$/i,
15
+ rules: [
16
+ { re: /\bcreateHash\s*\(\s*['"](?:md5|sha1|sha-1|md4)['"]\s*\)[\s\S]{0,60}\.update\s*\(\s*(\w+)/g, label: 'createHash with weak algorithm' },
17
+ { re: /\bmd5\s*\(\s*(\w+)/g, label: 'md5() function call' },
18
+ { re: /\bsha1\s*\(\s*(\w+)/g, label: 'sha1() function call' },
19
+ ],
20
+ },
21
+ py: {
22
+ ext: /\.py$/i,
23
+ rules: [
24
+ { re: /\bhashlib\.(?:md5|sha1)\s*\(\s*(\w+)/g, label: 'hashlib.md5/sha1' },
25
+ { re: /\bhashlib\.new\s*\(\s*['"](?:md5|sha1)['"]\s*\)[\s\S]{0,40}\.update\s*\(\s*(\w+)/g, label: 'hashlib.new with weak algorithm' },
26
+ ],
27
+ },
28
+ go: {
29
+ ext: /\.go$/i,
30
+ rules: [
31
+ { re: /\bmd5\.(?:Sum|New)\s*\(\s*(?:\[\]byte\s*\(\s*)?(\w+)/g, label: 'md5.Sum/New' },
32
+ { re: /\bsha1\.(?:Sum|New)\s*\(\s*(?:\[\]byte\s*\(\s*)?(\w+)/g, label: 'sha1.Sum/New' },
33
+ ],
34
+ },
35
+ };
36
+
37
+ export function scanWeakPasswordHash(fp, raw) {
38
+ if (!fp || !raw || typeof raw !== 'string') return [];
39
+ if (raw.length > 500_000) return [];
40
+
41
+ const findings = [];
42
+ let lang = null;
43
+ for (const v of Object.values(PATTERNS)) {
44
+ if (v.ext.test(fp)) { lang = v; break; }
45
+ }
46
+ if (!lang) return [];
47
+
48
+ for (const { re, label } of lang.rules) {
49
+ re.lastIndex = 0;
50
+ for (const m of raw.matchAll(re)) {
51
+ const inputVar = m[1] || '';
52
+ const line = _line(raw, m.index);
53
+ // Check context: password-named variable or password-related function
54
+ const funcStart = raw.lastIndexOf('\n', Math.max(0, m.index - 500));
55
+ const context = raw.slice(funcStart, m.index + m[0].length + 100);
56
+ if (!PASSWORD_CONTEXT.test(inputVar) && !PASSWORD_CONTEXT.test(context) && !PASSWORD_FUNC.test(context)) continue;
57
+ // Check for salt within 10 lines before
58
+ const before = raw.slice(Math.max(0, m.index - 400), m.index);
59
+ const hasSalt = /\b(salt|randomBytes|urandom|os\.urandom|crypto\.randomBytes|bcrypt|argon2|scrypt|pbkdf2)\b/i.test(before);
60
+ if (hasSalt) continue;
61
+
62
+ findings.push({
63
+ id: `weak-pw-hash:${fp}:${line}`,
64
+ file: fp, line,
65
+ vuln: `Weak Password Hashing — ${label} for password without salt`,
66
+ severity: 'critical',
67
+ family: 'weak-password-hash',
68
+ cwe: 'CWE-916',
69
+ parser: 'WEAK-PW-HASH',
70
+ confidence: 0.80,
71
+ description: `${label} used on a password-context variable without salt. MD5/SHA1 are fast hashes trivially reversed via rainbow tables. Unsalted hashes are cracked in seconds.`,
72
+ remediation: 'Use bcrypt (cost ≥ 12), argon2id, or scrypt. Never use MD5/SHA1/SHA256 for password storage.',
73
+ });
74
+ }
75
+ }
76
+ return findings;
77
+ }
@@ -0,0 +1,100 @@
1
+ // Cross-language insecure randomness detector.
2
+ //
3
+ // Flags usage of non-cryptographic PRNGs when the result is assigned to a
4
+ // security-sensitive variable (token, session, nonce, key, secret, etc.).
5
+ //
6
+ // Coverage:
7
+ // JS/TS: Math.random()
8
+ // Python: random.random(), random.randint(), random.choice(), random.uniform()
9
+ // Go: rand.Intn(), rand.Int(), rand.Float64(), rand.Int31(), rand.Int63()
10
+ // Ruby: rand(), Random.rand, Random.new.rand
11
+ // PHP: rand(), mt_rand(), array_rand(), shuffle()
12
+
13
+ const SECURITY_CONTEXT = /\b(token|session|nonce|key|secret|password|otp|csrf|salt|code|pin|auth|reset|verify|captcha|challenge|ticket)\b/i;
14
+
15
+ function _line(raw, idx) {
16
+ return raw.slice(0, idx).split('\n').length;
17
+ }
18
+
19
+ const LANG_PATTERNS = {
20
+ js: {
21
+ ext: /\.(?:js|jsx|ts|tsx|mjs|cjs)$/i,
22
+ patterns: [
23
+ { re: /\bMath\.random\s*\(\s*\)/g, label: 'Math.random()' },
24
+ ],
25
+ },
26
+ py: {
27
+ ext: /\.py$/i,
28
+ patterns: [
29
+ { re: /\brandom\.(?:random|randint|choice|uniform|randrange|sample|getrandbits)\s*\(/g, label: 'random module (non-crypto)' },
30
+ ],
31
+ },
32
+ go: {
33
+ ext: /\.go$/i,
34
+ patterns: [
35
+ { re: /\brand\.(?:Intn|Int|Float64|Float32|Int31|Int63|Int31n|Int63n|Uint32|Uint64)\s*\(/g, label: 'math/rand (non-crypto)' },
36
+ ],
37
+ },
38
+ rb: {
39
+ ext: /\.rb$/i,
40
+ patterns: [
41
+ { re: /\b(?:rand\s*\(|Random\.(?:rand|new\.rand)\s*\()/g, label: 'Kernel.rand / Random.rand' },
42
+ ],
43
+ },
44
+ php: {
45
+ ext: /\.(?:php|phtml)$/i,
46
+ patterns: [
47
+ { re: /\b(?:rand|mt_rand|array_rand)\s*\(/g, label: 'rand() / mt_rand()' },
48
+ ],
49
+ },
50
+ };
51
+
52
+ export function scanWeakRandomness(fp, raw) {
53
+ if (!fp || !raw || typeof raw !== 'string') return [];
54
+ if (raw.length > 500_000) return [];
55
+
56
+ const findings = [];
57
+ let lang = null;
58
+ for (const [k, v] of Object.entries(LANG_PATTERNS)) {
59
+ if (v.ext.test(fp)) { lang = v; break; }
60
+ }
61
+ if (!lang) return [];
62
+
63
+ for (const { re, label } of lang.patterns) {
64
+ re.lastIndex = 0;
65
+ for (const m of raw.matchAll(re)) {
66
+ const line = _line(raw, m.index);
67
+ const lineStart = raw.lastIndexOf('\n', m.index) + 1;
68
+ const lineEnd = raw.indexOf('\n', m.index);
69
+ const lineText = raw.slice(lineStart, lineEnd > 0 ? lineEnd : raw.length);
70
+ if (!SECURITY_CONTEXT.test(lineText)) {
71
+ const prevLineStart = raw.lastIndexOf('\n', lineStart - 2) + 1;
72
+ const prevLine = raw.slice(prevLineStart, lineStart - 1);
73
+ if (!SECURITY_CONTEXT.test(prevLine)) continue;
74
+ }
75
+ findings.push({
76
+ id: `weak-rng:${fp}:${line}`,
77
+ file: fp,
78
+ line,
79
+ vuln: `Insecure Randomness — ${label} used for security-sensitive value`,
80
+ severity: 'high',
81
+ family: 'weak-rng',
82
+ cwe: 'CWE-330',
83
+ parser: 'WEAK-RNG',
84
+ confidence: 0.80,
85
+ description: `${label} is not cryptographically secure. An attacker can predict the output and forge tokens, bypass OTP, or guess session identifiers.`,
86
+ remediation: _remediation(fp),
87
+ snippet: lineText.trim().slice(0, 80),
88
+ });
89
+ }
90
+ }
91
+ return findings;
92
+ }
93
+
94
+ function _remediation(fp) {
95
+ if (/\.py$/i.test(fp)) return 'Use secrets.token_hex(32), secrets.token_urlsafe(32), or secrets.randbelow(n).';
96
+ if (/\.go$/i.test(fp)) return 'Use crypto/rand: n, _ := rand.Int(rand.Reader, big.NewInt(999999)).';
97
+ if (/\.rb$/i.test(fp)) return 'Use SecureRandom.hex(32) or SecureRandom.uuid.';
98
+ if (/\.(?:php|phtml)$/i.test(fp)) return 'Use random_bytes(32) or random_int(0, $max).';
99
+ return 'Use crypto.randomBytes(32).toString("hex") or crypto.getRandomValues().';
100
+ }
@@ -0,0 +1,124 @@
1
+ // Binary artifact SCA metadata extraction.
2
+ //
3
+ // Reads dependency information from compiled artifacts:
4
+ // - Java JAR files: META-INF/MANIFEST.MF for version + classpath
5
+ // - Go binaries: embedded go.buildinfo for dependency tree
6
+ //
7
+ // Gated behind AGENTIC_SECURITY_BINARY_SCA=1 (opt-in).
8
+ // Does NOT execute binaries — only reads metadata sections.
9
+
10
+ import * as fs from 'node:fs';
11
+ import * as path from 'node:path';
12
+ import { execFileSync } from 'node:child_process';
13
+
14
+ export function isBinaryScaEnabled() {
15
+ return process.env.AGENTIC_SECURITY_BINARY_SCA === '1';
16
+ }
17
+
18
+ export function extractJarMetadata(jarPath) {
19
+ if (!jarPath || !jarPath.endsWith('.jar')) return null;
20
+ try {
21
+ const out = execFileSync('jar', ['tf', jarPath], { encoding: 'utf8', timeout: 5000 });
22
+ const hasManifest = out.includes('META-INF/MANIFEST.MF');
23
+ if (!hasManifest) return null;
24
+ const manifest = execFileSync('jar', ['xf', jarPath, 'META-INF/MANIFEST.MF', '-C', '/tmp'], {
25
+ encoding: 'utf8', timeout: 5000, cwd: '/tmp',
26
+ });
27
+ const manifestPath = '/tmp/META-INF/MANIFEST.MF';
28
+ if (!fs.existsSync(manifestPath)) return null;
29
+ const content = fs.readFileSync(manifestPath, 'utf8');
30
+ const attrs = {};
31
+ for (const line of content.split('\n')) {
32
+ const m = line.match(/^([A-Za-z-]+):\s*(.+)$/);
33
+ if (m) attrs[m[1].toLowerCase()] = m[2].trim();
34
+ }
35
+ const hasPom = out.includes('pom.properties');
36
+ let groupId = attrs['implementation-vendor-id'] || '';
37
+ let artifactId = attrs['implementation-title'] || path.basename(jarPath, '.jar');
38
+ let version = attrs['implementation-version'] || attrs['bundle-version'] || 'unknown';
39
+ if (hasPom) {
40
+ try {
41
+ execFileSync('jar', ['xf', jarPath, '--', ...out.split('\n').filter(l => l.includes('pom.properties'))], {
42
+ timeout: 5000, cwd: '/tmp',
43
+ });
44
+ const pomFiles = out.split('\n').filter(l => l.includes('pom.properties'));
45
+ for (const pf of pomFiles) {
46
+ const pfPath = path.join('/tmp', pf);
47
+ if (!fs.existsSync(pfPath)) continue;
48
+ const props = fs.readFileSync(pfPath, 'utf8');
49
+ for (const line of props.split('\n')) {
50
+ if (line.startsWith('groupId=')) groupId = line.split('=')[1].trim();
51
+ if (line.startsWith('artifactId=')) artifactId = line.split('=')[1].trim();
52
+ if (line.startsWith('version=')) version = line.split('=')[1].trim();
53
+ }
54
+ break;
55
+ }
56
+ } catch { /* pom extraction optional */ }
57
+ }
58
+ return {
59
+ name: artifactId,
60
+ version,
61
+ group: groupId,
62
+ ecosystem: 'maven',
63
+ filePath: jarPath,
64
+ scope: 'required',
65
+ purl: `pkg:maven/${groupId}/${artifactId}@${version}`,
66
+ isUnpinned: false,
67
+ _source: 'jar-manifest',
68
+ };
69
+ } catch { return null; }
70
+ }
71
+
72
+ export function extractGoBuildInfo(binPath) {
73
+ if (!binPath) return [];
74
+ try {
75
+ const out = execFileSync('go', ['version', '-m', binPath], { encoding: 'utf8', timeout: 5000 });
76
+ const deps = [];
77
+ for (const line of out.split('\n')) {
78
+ const m = line.match(/^\s*dep\s+([\w./-]+)\s+(v[\d.]+(?:-[\w.]+)?)/);
79
+ if (m) {
80
+ deps.push({
81
+ name: m[1],
82
+ version: m[2].replace(/^v/, ''),
83
+ group: '',
84
+ ecosystem: 'golang',
85
+ filePath: binPath,
86
+ scope: 'required',
87
+ purl: `pkg:golang/${m[1]}@${m[2]}`,
88
+ isUnpinned: false,
89
+ _source: 'go-buildinfo',
90
+ });
91
+ }
92
+ }
93
+ return deps;
94
+ } catch { return []; }
95
+ }
96
+
97
+ export function scanBinaryArtifacts(fileContents, scanRoot) {
98
+ if (!isBinaryScaEnabled()) return [];
99
+ const components = [];
100
+ const root = scanRoot || '.';
101
+ try {
102
+ const jarFiles = fs.readdirSync(root, { recursive: true })
103
+ .filter(f => f.endsWith('.jar') && !f.includes('node_modules'))
104
+ .slice(0, 20);
105
+ for (const jar of jarFiles) {
106
+ const meta = extractJarMetadata(path.join(root, jar));
107
+ if (meta) components.push(meta);
108
+ }
109
+ } catch { /* jar scan optional */ }
110
+ try {
111
+ const goBins = fs.readdirSync(root, { recursive: true })
112
+ .filter(f => !f.includes('.') && !f.includes('node_modules') && !f.includes('/'))
113
+ .slice(0, 10);
114
+ for (const bin of goBins) {
115
+ const fp = path.join(root, bin);
116
+ try {
117
+ if (fs.statSync(fp).isFile() && (fs.statSync(fp).mode & 0o111)) {
118
+ components.push(...extractGoBuildInfo(fp));
119
+ }
120
+ } catch { /* skip non-executable */ }
121
+ }
122
+ } catch { /* go binary scan optional */ }
123
+ return components;
124
+ }
@@ -0,0 +1,107 @@
1
+ // LLM-assisted vulnerable function extraction for SCA findings.
2
+ //
3
+ // For CVEs without OSV ecosystem_specific data or GHSA fix commits,
4
+ // uses an LLM to extract vulnerable function names from the CVE description.
5
+ //
6
+ // Gated behind AGENTIC_SECURITY_LLM_SCA=1 (opt-in).
7
+ // Uses the same LLM endpoint config as the SAST validator.
8
+
9
+ import * as crypto from 'node:crypto';
10
+ import * as fs from 'node:fs';
11
+ import * as path from 'node:path';
12
+
13
+ const CACHE_DIR = process.env.XDG_CONFIG_HOME
14
+ ? path.join(process.env.XDG_CONFIG_HOME, 'agentic-security', 'llm-sca-cache')
15
+ : path.join(process.env.HOME || '/tmp', '.config', 'agentic-security', 'llm-sca-cache');
16
+
17
+ function _cacheKey(osvId) {
18
+ return crypto.createHash('sha256').update(`sca-fn:${osvId}`).digest('hex').slice(0, 16);
19
+ }
20
+
21
+ function _readCache(osvId) {
22
+ try {
23
+ const fp = path.join(CACHE_DIR, _cacheKey(osvId) + '.json');
24
+ return JSON.parse(fs.readFileSync(fp, 'utf8'));
25
+ } catch { return null; }
26
+ }
27
+
28
+ function _writeCache(osvId, data) {
29
+ try {
30
+ fs.mkdirSync(CACHE_DIR, { recursive: true });
31
+ fs.writeFileSync(path.join(CACHE_DIR, _cacheKey(osvId) + '.json'), JSON.stringify(data));
32
+ } catch { /* cache write failure is non-fatal */ }
33
+ }
34
+
35
+ export function isLlmScaEnabled() {
36
+ return process.env.AGENTIC_SECURITY_LLM_SCA === '1';
37
+ }
38
+
39
+ function _endpointConfig() {
40
+ const endpoint = process.env.AGENTIC_SECURITY_LLM_ENDPOINT;
41
+ const apiKey = process.env.AGENTIC_SECURITY_LLM_API_KEY;
42
+ const model = process.env.AGENTIC_SECURITY_LLM_MODEL || 'unknown';
43
+ return endpoint ? { endpoint, apiKey, model } : null;
44
+ }
45
+
46
+ async function _askLlm(prompt, config) {
47
+ const headers = { 'Content-Type': 'application/json' };
48
+ if (config.apiKey) headers['Authorization'] = `Bearer ${config.apiKey}`;
49
+ const resp = await fetch(config.endpoint, {
50
+ method: 'POST',
51
+ headers,
52
+ body: JSON.stringify({ prompt, model: config.model }),
53
+ signal: AbortSignal.timeout(15000),
54
+ });
55
+ if (!resp.ok) return null;
56
+ const text = await resp.text();
57
+ const jsonMatch = text.match(/\{[^{}]*"functions"\s*:\s*\[[^\]]*\][^{}]*\}/);
58
+ if (!jsonMatch) return null;
59
+ try { return JSON.parse(jsonMatch[0]); } catch { return null; }
60
+ }
61
+
62
+ export async function extractVulnFunctionsViaLLM(supplyChain, opts = {}) {
63
+ if (!isLlmScaEnabled()) return [];
64
+ const config = _endpointConfig();
65
+ if (!config) return [];
66
+
67
+ const enriched = [];
68
+ const candidates = (supplyChain || []).filter(sc =>
69
+ sc.type === 'vulnerable_dep' &&
70
+ (!sc.osvVulnFunctions || !sc.osvVulnFunctions.length) &&
71
+ sc.noKnownCallSite &&
72
+ sc.description
73
+ );
74
+
75
+ const BATCH_LIMIT = 20;
76
+ for (const sc of candidates.slice(0, BATCH_LIMIT)) {
77
+ const cached = _readCache(sc.osvId);
78
+ if (cached) {
79
+ if (cached.functions && cached.functions.length) {
80
+ sc.osvVulnFunctions = cached.functions;
81
+ sc._llmFunctionExtracted = true;
82
+ enriched.push(sc);
83
+ }
84
+ continue;
85
+ }
86
+
87
+ const prompt = `Given security advisory ${sc.osvId || ''} (${sc.cveAliases?.[0] || ''}) affecting npm package "${sc.name}" version ${sc.version}:\n\nDescription: ${sc.description.slice(0, 500)}\n\nWhat specific exported function(s) in this package are vulnerable? Return ONLY a JSON object: { "functions": ["functionName1", "functionName2"] }\n\nIf you cannot determine the specific functions, return: { "functions": [] }`;
88
+
89
+ try {
90
+ const result = await _askLlm(prompt, config);
91
+ if (result && Array.isArray(result.functions)) {
92
+ const fns = result.functions.filter(f => typeof f === 'string' && f.length > 0 && f.length < 100);
93
+ _writeCache(sc.osvId, { functions: fns, model: config.model, extractedAt: new Date().toISOString() });
94
+ if (fns.length) {
95
+ sc.osvVulnFunctions = fns;
96
+ sc._llmFunctionExtracted = true;
97
+ enriched.push(sc);
98
+ }
99
+ } else {
100
+ _writeCache(sc.osvId, { functions: [], model: config.model, extractedAt: new Date().toISOString() });
101
+ }
102
+ } catch {
103
+ // LLM call failure — skip, don't cache (may be transient)
104
+ }
105
+ }
106
+ return enriched;
107
+ }
@@ -0,0 +1,118 @@
1
+ // Python package function extraction via the CST parser.
2
+ //
3
+ // Locates an installed Python package in site-packages or .venv,
4
+ // parses its source files via the Python CST parser, and returns
5
+ // a map of exported function names. Used by markUsedVulnFunctions
6
+ // to validate that OSV-named vulnerable functions actually exist
7
+ // in the installed version.
8
+
9
+ import * as fs from 'node:fs';
10
+ import * as path from 'node:path';
11
+ import { execFileSync } from 'node:child_process';
12
+ import { parsePythonFilesBatch, probePythonAvailable } from '../ir/parser-py-cst.js';
13
+
14
+ const VENV_DIRS = ['.venv', 'venv', '.env', 'env'];
15
+
16
+ function _findSitePackages(scanRoot) {
17
+ for (const vdir of VENV_DIRS) {
18
+ const base = path.join(scanRoot || '.', vdir);
19
+ if (!fs.existsSync(base)) continue;
20
+ const lib = path.join(base, 'lib');
21
+ if (!fs.existsSync(lib)) continue;
22
+ const pydirs = fs.readdirSync(lib).filter(d => d.startsWith('python'));
23
+ for (const pydir of pydirs) {
24
+ const sp = path.join(lib, pydir, 'site-packages');
25
+ if (fs.existsSync(sp)) return sp;
26
+ }
27
+ }
28
+ // Fallback: ask python3 directly
29
+ try {
30
+ const out = execFileSync('python3', ['-c', 'import site; print(site.getsitepackages()[0])'], {
31
+ encoding: 'utf8', timeout: 5000,
32
+ }).trim();
33
+ if (out && fs.existsSync(out)) return out;
34
+ } catch { /* no python3 or no site-packages */ }
35
+ return null;
36
+ }
37
+
38
+ function _findPackageDir(sitePackages, packageName) {
39
+ if (!sitePackages) return null;
40
+ const normalized = packageName.replace(/-/g, '_').toLowerCase();
41
+ const candidates = [
42
+ normalized,
43
+ packageName.toLowerCase(),
44
+ packageName,
45
+ ];
46
+ for (const name of candidates) {
47
+ const dir = path.join(sitePackages, name);
48
+ if (fs.existsSync(dir) && fs.statSync(dir).isDirectory()) return dir;
49
+ }
50
+ return null;
51
+ }
52
+
53
+ function _readPyFilesFromDir(dir, maxFiles = 50) {
54
+ const entries = [];
55
+ try {
56
+ const files = fs.readdirSync(dir, { recursive: true })
57
+ .filter(f => f.endsWith('.py'))
58
+ .slice(0, maxFiles);
59
+ for (const f of files) {
60
+ const fp = path.join(dir, f);
61
+ try {
62
+ const content = fs.readFileSync(fp, 'utf8');
63
+ if (content.length < 1_000_000) {
64
+ entries.push({ file: f, content });
65
+ }
66
+ } catch { /* skip unreadable files */ }
67
+ }
68
+ } catch { /* dir not readable */ }
69
+ return entries;
70
+ }
71
+
72
+ export function extractPythonPackageFunctions(packageName, scanRoot) {
73
+ const cap = probePythonAvailable();
74
+ if (!cap.ok) return null;
75
+
76
+ const sitePackages = _findSitePackages(scanRoot);
77
+ const pkgDir = _findPackageDir(sitePackages, packageName);
78
+ if (!pkgDir) return null;
79
+
80
+ const pyFiles = _readPyFilesFromDir(pkgDir);
81
+ if (!pyFiles.length) return null;
82
+
83
+ const batch = parsePythonFilesBatch(pyFiles);
84
+ if (!batch || !Array.isArray(batch)) return null;
85
+
86
+ const functionMap = new Map();
87
+ for (const fileIR of batch) {
88
+ if (!fileIR || !fileIR.functions) continue;
89
+ for (const fn of fileIR.functions) {
90
+ if (fn.name && !fn.name.startsWith('_')) {
91
+ functionMap.set(fn.name, {
92
+ file: fileIR.file,
93
+ line: fn.line,
94
+ qid: fn.qid,
95
+ params: fn.params,
96
+ });
97
+ }
98
+ }
99
+ }
100
+ return functionMap;
101
+ }
102
+
103
+ export function validateOsvFunctionsExist(packageName, osvFunctions, scanRoot) {
104
+ if (!osvFunctions || !osvFunctions.length) return { validated: [], missing: [] };
105
+ const fnMap = extractPythonPackageFunctions(packageName, scanRoot);
106
+ if (!fnMap) return { validated: osvFunctions, missing: [] };
107
+ const validated = [];
108
+ const missing = [];
109
+ for (const fn of osvFunctions) {
110
+ const shortFn = fn.includes('.') ? fn.split('.').pop() : fn;
111
+ if (fnMap.has(shortFn) || fnMap.has(fn)) {
112
+ validated.push(shortFn);
113
+ } else {
114
+ missing.push(fn);
115
+ }
116
+ }
117
+ return { validated, missing };
118
+ }
@@ -0,0 +1,144 @@
1
+ // Vendored / copied library detection via version-string fingerprinting.
2
+ //
3
+ // Detects library code copied into src/ that bypasses SCA. Uses version
4
+ // string patterns (_.VERSION, jQuery.fn.jquery, etc.) and characteristic
5
+ // function signatures to identify vendored libraries.
6
+
7
+ const VERSION_FINGERPRINTS = [
8
+ { pkg: 'lodash', ecosystem: 'npm', patterns: [
9
+ { re: /\b(?:lodash|_)\.VERSION\s*=\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
10
+ { re: /\b__lodash_hash_undefined__\b/, version: null },
11
+ ]},
12
+ { pkg: 'jquery', ecosystem: 'npm', patterns: [
13
+ { re: /jQuery\.fn\.jquery\s*=\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
14
+ { re: /\bjQuery\.fn\.init\b/, version: null },
15
+ ]},
16
+ { pkg: 'underscore', ecosystem: 'npm', patterns: [
17
+ { re: /\b_\.VERSION\s*=\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
18
+ ]},
19
+ { pkg: 'moment', ecosystem: 'npm', patterns: [
20
+ { re: /\bmoment\.version\s*=\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
21
+ { re: /\bmoment\.(?:utc|parseZone|duration|locale)\b/, version: null },
22
+ ]},
23
+ { pkg: 'handlebars', ecosystem: 'npm', patterns: [
24
+ { re: /\bHandlebars\.VERSION\s*=\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
25
+ ]},
26
+ { pkg: 'backbone', ecosystem: 'npm', patterns: [
27
+ { re: /\bBackbone\.VERSION\s*=\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
28
+ ]},
29
+ { pkg: 'angular', ecosystem: 'npm', patterns: [
30
+ { re: /\bangular\.version\s*=\s*\{[^}]*full\s*:\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
31
+ ]},
32
+ { pkg: 'vue', ecosystem: 'npm', patterns: [
33
+ { re: /\bVue\.version\s*=\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
34
+ ]},
35
+ { pkg: 'react', ecosystem: 'npm', patterns: [
36
+ { re: /\bReactVersion\s*=\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
37
+ ]},
38
+ { pkg: 'dompurify', ecosystem: 'npm', patterns: [
39
+ { re: /\bDOMPurify\.version\s*=\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
40
+ ]},
41
+ { pkg: 'marked', ecosystem: 'npm', patterns: [
42
+ { re: /\bmarked\.(?:defaults|setOptions|use|parse)\b[\s\S]{0,200}version\s*[:=]\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
43
+ ]},
44
+ { pkg: 'axios', ecosystem: 'npm', patterns: [
45
+ { re: /\baxios\.VERSION\s*=\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
46
+ ]},
47
+ { pkg: 'socket.io-client', ecosystem: 'npm', patterns: [
48
+ { re: /\bio\.protocol\s*=\s*(\d+)/, version: null },
49
+ ]},
50
+ { pkg: 'highlight.js', ecosystem: 'npm', patterns: [
51
+ { re: /\bhljs\.versionString\s*=\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
52
+ ]},
53
+ { pkg: 'chart.js', ecosystem: 'npm', patterns: [
54
+ { re: /\bChart\.version\s*=\s*['"](\d+\.\d+\.\d+)['"]/, versionGroup: 1 },
55
+ ]},
56
+ ];
57
+
58
+ const SKIP_DIRS = /(?:^|[/\\])(?:node_modules|vendor|dist|build|\.next|__pycache__|\.git)[/\\]/;
59
+
60
+ export function detectVendoredLibraries(fileContents) {
61
+ if (!fileContents || typeof fileContents !== 'object') return [];
62
+ const detected = [];
63
+ const seen = new Set();
64
+
65
+ for (const [fp, content] of Object.entries(fileContents)) {
66
+ if (!content || typeof content !== 'string') continue;
67
+ if (SKIP_DIRS.test(fp)) continue;
68
+ if (content.length < 500) continue;
69
+
70
+ for (const lib of VERSION_FINGERPRINTS) {
71
+ for (const pat of lib.patterns) {
72
+ const m = content.match(pat.re);
73
+ if (!m) continue;
74
+ const version = pat.versionGroup ? m[pat.versionGroup] : null;
75
+ const key = `${lib.pkg}:${fp}`;
76
+ if (seen.has(key)) continue;
77
+ seen.add(key);
78
+ detected.push({
79
+ name: lib.pkg,
80
+ version: version || 'unknown',
81
+ ecosystem: lib.ecosystem,
82
+ file: fp,
83
+ scope: 'vendored',
84
+ isVendored: true,
85
+ });
86
+ break;
87
+ }
88
+ }
89
+ }
90
+ // Pass 2: Function-body structural matching for minified/forked copies
91
+ const FUNCTION_BODY_SIGS = [
92
+ { pkg: 'lodash', ecosystem: 'npm', fn: 'merge', paramMin: 1,
93
+ bodyContains: ['assignValue', 'baseFor', 'isObject', 'baseMerge'] },
94
+ { pkg: 'lodash', ecosystem: 'npm', fn: 'template', paramMin: 1,
95
+ bodyContains: ['sourceURL', 'interpolate', 'evaluate', 'escape'] },
96
+ { pkg: 'lodash', ecosystem: 'npm', fn: 'defaultsDeep', paramMin: 1,
97
+ bodyContains: ['baseMerge', 'isMergeableObject', 'customDefaultsMerge'] },
98
+ { pkg: 'jquery', ecosystem: 'npm', fn: 'ajax', paramMin: 1,
99
+ bodyContains: ['XMLHttpRequest', 'ajaxSettings', 'crossDomain', 'responseFields'] },
100
+ { pkg: 'handlebars', ecosystem: 'npm', fn: 'compile', paramMin: 1,
101
+ bodyContains: ['templateSpec', 'container', 'invokePartial', 'blockParams'] },
102
+ { pkg: 'marked', ecosystem: 'npm', fn: 'parse', paramMin: 1,
103
+ bodyContains: ['Lexer', 'Parser', 'blockTokens', 'walkTokens'] },
104
+ { pkg: 'ejs', ecosystem: 'npm', fn: 'render', paramMin: 1,
105
+ bodyContains: ['includeFile', 'resolveInclude', 'rethrow', 'escapeFn'] },
106
+ { pkg: 'moment', ecosystem: 'npm', fn: 'format', paramMin: 0,
107
+ bodyContains: ['formatMoment', 'expandFormat', 'makeFormatFunction', 'localFormattingTokens'] },
108
+ { pkg: 'underscore', ecosystem: 'npm', fn: 'template', paramMin: 1,
109
+ bodyContains: ['interpolate', 'evaluate', 'escape', 'templateSettings'] },
110
+ { pkg: 'minimist', ecosystem: 'npm', fn: 'parse', paramMin: 1,
111
+ bodyContains: ['boolean', 'alias', 'default', 'stopEarly', 'unknown'] },
112
+ ];
113
+
114
+ for (const [fp, content] of Object.entries(fileContents)) {
115
+ if (!content || typeof content !== 'string') continue;
116
+ if (SKIP_DIRS.test(fp)) continue;
117
+ if (!/\.(?:js|mjs|cjs)$/i.test(fp)) continue;
118
+ if (content.length < 200 || content.length > 500_000) continue;
119
+
120
+ for (const sig of FUNCTION_BODY_SIGS) {
121
+ const key = `${sig.pkg}:${fp}`;
122
+ if (seen.has(key)) continue;
123
+ const fnRe = new RegExp(`(?:function\\s+${sig.fn}|(?:const|let|var)\\s+${sig.fn}\\s*=|${sig.fn}\\s*[:=]\\s*function)\\s*\\(`, 'g');
124
+ const m = fnRe.exec(content);
125
+ if (!m) continue;
126
+ const bodyWindow = content.slice(m.index, m.index + 2000);
127
+ const matchCount = sig.bodyContains.filter(kw => bodyWindow.includes(kw)).length;
128
+ if (matchCount < Math.ceil(sig.bodyContains.length * 0.6)) continue;
129
+ seen.add(key);
130
+ detected.push({
131
+ name: sig.pkg,
132
+ version: 'unknown',
133
+ ecosystem: sig.ecosystem,
134
+ file: fp,
135
+ scope: 'vendored',
136
+ isVendored: true,
137
+ _detectionMethod: 'function-body-signature',
138
+ _matchedKeywords: matchCount,
139
+ });
140
+ }
141
+ }
142
+
143
+ return detected;
144
+ }