muaddib-scanner 2.11.46 → 2.11.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.46",
3
+ "version": "2.11.48",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-05-25T15:19:42.839Z",
3
+ "timestamp": "2026-05-26T08:43:39.544Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -13,6 +13,16 @@ const IOCS_URL = 'https://github.com/DNSZLSK/muad-dib/releases/latest/download/i
13
13
  const HOME_DATA_DIR = path.join(os.homedir(), '.muaddib', 'data');
14
14
  const IOCS_PATH = path.join(HOME_DATA_DIR, 'iocs.json');
15
15
 
16
+ // Local bundled IOC file (committed in repo) — when present we don't need a network download.
17
+ // Loader (src/ioc/updater.js) reads this directly, so the home-cache download becomes a
18
+ // no-op for users who already have the source tree.
19
+ const LOCAL_BUNDLED_IOCS = path.join(__dirname, 'data', 'iocs.json');
20
+
21
+ // Per-process memoization: once download has failed (or been skipped), don't retry on the
22
+ // next scan within the same process. Eval runs hundreds of scans — without this we burn a
23
+ // 60s timeout per scan when the asset is missing.
24
+ let _ensureIocsResult = null;
25
+
16
26
  // Minimum file size to consider IOCs valid (1MB)
17
27
  const MIN_IOCS_SIZE = 1_000_000;
18
28
 
@@ -135,6 +145,9 @@ function downloadAndDecompress(url, destPath) {
135
145
  * @returns {Promise<boolean>} true if IOCs are available (cached or downloaded), false if download failed
136
146
  */
137
147
  async function ensureIOCs() {
148
+ // Per-process memoization — first scan decides, subsequent scans reuse the result.
149
+ if (_ensureIocsResult !== null) return _ensureIocsResult;
150
+
138
151
  try {
139
152
  // Create data directory if needed
140
153
  if (!fs.existsSync(HOME_DATA_DIR)) {
@@ -145,10 +158,21 @@ async function ensureIOCs() {
145
158
  if (fs.existsSync(IOCS_PATH)) {
146
159
  const stat = fs.statSync(IOCS_PATH);
147
160
  if (stat.size >= MIN_IOCS_SIZE) {
148
- return true;
161
+ return (_ensureIocsResult = true);
149
162
  }
150
163
  }
151
164
 
165
+ // Bundled-source fast path: dev installs and the npm tarball both ship src/ioc/data/iocs.json.
166
+ // When that file is present, the updater loader already merges it — no need to hit GitHub.
167
+ if (fs.existsSync(LOCAL_BUNDLED_IOCS)) {
168
+ try {
169
+ const stat = fs.statSync(LOCAL_BUNDLED_IOCS);
170
+ if (stat.size >= MIN_IOCS_SIZE) {
171
+ return (_ensureIocsResult = true);
172
+ }
173
+ } catch { /* fall through to download */ }
174
+ }
175
+
152
176
  // Offline / CI escape hatch: cache is empty/missing AND we don't want to
153
177
  // hit the network. Tests and air-gapped environments use this to avoid
154
178
  // 1-2s timeouts × N tests when the asset is unavailable. Same env var as
@@ -157,7 +181,7 @@ async function ensureIOCs() {
157
181
  // the cache check so a healthy cache still returns true even in offline
158
182
  // mode (otherwise tests that pre-populate the cache would falsely fail).
159
183
  if (process.env.MUADDIB_NO_REGISTRY_FETCH === '1') {
160
- return false;
184
+ return (_ensureIocsResult = false);
161
185
  }
162
186
 
163
187
  // Download IOCs (messages go to stderr to avoid contaminating JSON/SARIF stdout)
@@ -169,18 +193,23 @@ async function ensureIOCs() {
169
193
  if (stat.size < MIN_IOCS_SIZE) {
170
194
  try { fs.unlinkSync(IOCS_PATH); } catch {}
171
195
  process.stderr.write('[WARN] Downloaded IOC file is too small, using compact IOCs\n');
172
- return false;
196
+ return (_ensureIocsResult = false);
173
197
  }
174
198
 
175
199
  process.stderr.write('[MUADDIB] IOC database ready (' + Math.round(stat.size / 1024 / 1024) + ' MB)\n');
176
- return true;
200
+ return (_ensureIocsResult = true);
177
201
  } catch (err) {
178
202
  process.stderr.write('[WARN] Could not download IOC database: ' + err.message + '\n');
179
- process.stderr.write('[WARN] Continuing with YAML IOCs only (run "muaddib update" for full coverage)\n');
180
- return false;
203
+ process.stderr.write('[WARN] Continuing with bundled/YAML IOCs (run "muaddib update" for full coverage)\n');
204
+ return (_ensureIocsResult = false);
181
205
  }
182
206
  }
183
207
 
208
+ // Test hook — lets the test suite reset the memoization without spawning a fresh process.
209
+ function _resetEnsureIocsForTests() {
210
+ _ensureIocsResult = null;
211
+ }
212
+
184
213
  module.exports = {
185
214
  ensureIOCs,
186
215
  downloadAndDecompress,
@@ -188,5 +217,6 @@ module.exports = {
188
217
  IOCS_URL,
189
218
  IOCS_PATH,
190
219
  HOME_DATA_DIR,
191
- MIN_IOCS_SIZE
220
+ MIN_IOCS_SIZE,
221
+ _resetEnsureIocsForTests
192
222
  };
@@ -125,7 +125,15 @@ async function execute(targetPath, options, pythonDeps, warnings) {
125
125
  const deobfuscateFn = options.noDeobfuscate ? null : deobfuscate;
126
126
 
127
127
  // Helper: yield to event loop so spinner can animate between sync operations
128
- const yieldThen = (fn) => new Promise(resolve => setImmediate(() => resolve(fn())));
128
+ // Yield to the event loop before running `fn`. Without the try/catch the
129
+ // exception escapes the setImmediate callback as an uncaught exception
130
+ // (Node's setImmediate handler is outside any await/promise frame) and
131
+ // crashes the process — which is what was killing evaluate on benigns that
132
+ // hit a corner-case in detect-cross-file.js. Now sync throws become
133
+ // promise rejections, picked up by the surrounding try/catch.
134
+ const yieldThen = (fn) => new Promise((resolve, reject) =>
135
+ setImmediate(() => { try { resolve(fn()); } catch (e) { reject(e); } })
136
+ );
129
137
 
130
138
  // Cross-file module graph analysis (before individual scanners)
131
139
  // Bounded: 5s timeout to prevent DoS on large/adversarial packages
@@ -11,6 +11,10 @@ const { debugLog } = require('../utils.js');
11
11
  const { getPackageMetadata } = require('../scanner/npm-registry.js');
12
12
  const { checkReleaseZero } = require('../scanner/release-zero.js');
13
13
  const { checkUnclaimedMaintainerEmail, checkCompromisedDomain } = require('../scanner/email-domain.js');
14
+ const { getPyPIPackageMetadata } = require('../scanner/pypi-registry.js');
15
+ const { runPyPIMaintainerChecks } = require('../scanner/pypi-maintainer.js');
16
+ const { checkPyPIReleaseZero } = require('../scanner/pypi-release-zero.js');
17
+ const { detectScannedPackageName } = require('../scanner/python.js');
14
18
 
15
19
  // Auto-sandbox compound trigger : optional out-of-tree dependency. Lazy-load
16
20
  // it so the pipeline still works when the file is absent (some dev machines
@@ -247,6 +251,56 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
247
251
  }
248
252
  }
249
253
 
254
+ // ───── PyPI side — same shape as the npm block above (v2.11.47, Phase 2) ─────
255
+ // Fetch PyPI registry metadata for the scanned package iff we can identify it
256
+ // (pyproject.toml / setup.py / setup.cfg). Same MUADDIB_NO_REGISTRY_FETCH
257
+ // master switch. Failure mode: silent (returns null), all downstream PyPI
258
+ // checks degrade gracefully.
259
+ if (
260
+ _pkgMeta &&
261
+ globalThis.process.env.MUADDIB_NO_REGISTRY_FETCH !== '1'
262
+ ) {
263
+ let pypiPackageName = null;
264
+ try {
265
+ pypiPackageName = detectScannedPackageName(targetPath);
266
+ } catch (err) {
267
+ debugLog('[PYPI-NAME] detect failed: ' + err.message);
268
+ }
269
+ if (pypiPackageName) {
270
+ try {
271
+ const pypiMeta = await getPyPIPackageMetadata(pypiPackageName);
272
+ if (pypiMeta) {
273
+ _pkgMeta.pypiPackageName = pypiPackageName;
274
+ _pkgMeta.pypiRegistryMeta = pypiMeta;
275
+ }
276
+ } catch (err) {
277
+ debugLog('[PYPI-REGISTRY-META] fetch failed for ' + pypiPackageName + ': ' + err.message);
278
+ }
279
+ }
280
+ }
281
+
282
+ if (_pkgMeta && _pkgMeta.pypiRegistryMeta) {
283
+ // PYPI-003 — release-zero (v0.x.x + age <30d). MEDIUM, composite-only.
284
+ try {
285
+ const rz = checkPyPIReleaseZero(_pkgMeta.pypiRegistryMeta.latest_version, _pkgMeta.pypiRegistryMeta);
286
+ if (rz) deduped.push(rz);
287
+ } catch (err) {
288
+ debugLog('[PYPI-RELEASE-ZERO] check failed: ' + err.message);
289
+ }
290
+ // MAINTAINER-005 + MAINTAINER-006, PyPI-flavoured (file → pyproject.toml,
291
+ // wording mentions PyPI). Same env opt-outs as npm
292
+ // (MUADDIB_EMAIL_DOMAIN_CHECK=0, MUADDIB_RDAP_CHECK=0).
293
+ try {
294
+ const pypiThreats = await runPyPIMaintainerChecks(
295
+ _pkgMeta.pypiPackageName,
296
+ _pkgMeta.pypiRegistryMeta
297
+ );
298
+ for (const t of pypiThreats) deduped.push(t);
299
+ } catch (err) {
300
+ debugLog('[PYPI-MAINTAINER] check failed: ' + err.message);
301
+ }
302
+ }
303
+
250
304
  // Cross-scanner compound: detached_process + suspicious_dataflow in same file
251
305
  // Catches cases where credential flow is detected by dataflow scanner, not AST scanner
252
306
  {
@@ -498,6 +498,13 @@ const PLAYBOOKS = {
498
498
  'qui ne tracent que les "import X" statiques. Inspecter les appels suivants au module dynamiquement ' +
499
499
  'importe — combine a exec/subprocess/fetch indique malveillance avec haute confiance.',
500
500
 
501
+ pypi_release_zero:
502
+ 'MEDIUM: Package PyPI v0.x.x publie il y a moins de 30 jours. Pattern de lure / ship-as-vulnerable. ' +
503
+ 'Composite-only: cette regle seule ne suffit pas, mais combinee avec PYAST-001 (cmdclass) ou un IOC ' +
504
+ 'string TrapDoor, c\'est un signal fort. Inspecter manuellement: qui est l\'auteur, est-ce que d\'autres ' +
505
+ 'packages aux noms similaires ont ete publies le meme jour, est-ce que le code source ressemble a une ' +
506
+ 'lib utile ou a un stub vide.',
507
+
501
508
  fork_exec_inline_interpreter:
502
509
  'HIGH: subprocess.X([<interpreter>, -e|-c, ...]) — fork-exec d\'un interpreteur inline ' +
503
510
  '(node -e, python -c, bash -c, ruby -e, perl -e, php -r, ...). Pattern transversal: Python ouvre ' +
@@ -224,6 +224,19 @@ const RULES = {
224
224
  ],
225
225
  mitre: 'T1195.002'
226
226
  },
227
+ pypi_release_zero: {
228
+ id: 'MUADDIB-PYPI-003',
229
+ name: 'PyPI Release-Zero (v0.x.x recently published)',
230
+ severity: 'MEDIUM',
231
+ confidence: 'medium',
232
+ domain: 'malware',
233
+ description: 'Package PyPI dont la latest_version est 0.x.x (release-zero PEP 440) ET dont la premiere publication est < 30 jours. Pattern de lure / ship-as-vulnerable / typosquat-staging. Equivalent PyPI de PKG-022 cote npm. Composite-only : la severite reste MEDIUM pour eviter les FPs sur les vrais projets early-stage.',
234
+ references: [
235
+ 'https://attack.mitre.org/techniques/T1195/002/',
236
+ 'https://peps.python.org/pep-0440/'
237
+ ],
238
+ mitre: 'T1195.002'
239
+ },
227
240
 
228
241
  // PYSRC-001 a 008 — Python source scanner (TrapDoor PyPI gap, v2.11.25).
229
242
  // python.js est manifest-only ; ast.js/dataflow.js sont JS-only ; ioc-strings.js
@@ -395,7 +395,7 @@ function collectImportTaint(ast, currentFile, graph, taintedExports, packagePath
395
395
  // const data = reader.getData() or const data = reader.data
396
396
  if (decl.init.type === 'MemberExpression' && decl.init.object.type === 'Identifier') {
397
397
  const modRef = localTaint['__module__' + decl.init.object.name];
398
- if (modRef) {
398
+ if (modRef && modRef.modTaint) {
399
399
  const propName = decl.init.property.name || decl.init.property.value;
400
400
  if (modRef.modTaint[propName] && modRef.modTaint[propName].tainted) {
401
401
  const t = modRef.modTaint[propName];
@@ -411,7 +411,7 @@ function collectImportTaint(ast, currentFile, graph, taintedExports, packagePath
411
411
  const callee = decl.init.callee;
412
412
  if (callee.object.type === 'Identifier') {
413
413
  const modRef = localTaint['__module__' + callee.object.name];
414
- if (modRef) {
414
+ if (modRef && modRef.modTaint) {
415
415
  const propName = callee.property.name || callee.property.value;
416
416
  if (modRef.modTaint[propName] && modRef.modTaint[propName].tainted) {
417
417
  const t = modRef.modTaint[propName];
@@ -474,7 +474,7 @@ function collectImportTaint(ast, currentFile, graph, taintedExports, packagePath
474
474
  const thisProp = decl.init.callee.object.property.name || decl.init.callee.object.property.value;
475
475
  const methodName = decl.init.callee.property.name || decl.init.callee.property.value;
476
476
  const modRef = thisRefs[thisProp];
477
- if (modRef && methodName && modRef.modTaint[methodName] && modRef.modTaint[methodName].tainted) {
477
+ if (modRef && methodName && modRef.modTaint && modRef.modTaint[methodName] && modRef.modTaint[methodName].tainted) {
478
478
  const t = modRef.modTaint[methodName];
479
479
  localTaint[decl.id.name] = {
480
480
  source: t.source,
@@ -0,0 +1,87 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * PyPI maintainer / metadata checks — wires the existing ecosystem-agnostic
5
+ * email-domain.js (MAINTAINER-005 MX + MAINTAINER-006 RDAP) onto PyPI
6
+ * metadata returned by pypi-registry.js.
7
+ *
8
+ * Created v2.11.47. Mirror of how npm calls these from processor.js, but
9
+ * for the PyPI side. We reuse the same threat types (`unclaimed_maintainer_email`,
10
+ * `compromised_email_domain`) — they're conceptually identical, only the
11
+ * registry context differs. We post-process the returned threats to swap the
12
+ * `file` field (package.json → pyproject.toml) and tweak the message wording
13
+ * so the operator sees "PyPI" not "npm" in the report.
14
+ */
15
+
16
+ const {
17
+ checkUnclaimedMaintainerEmail,
18
+ checkCompromisedDomain
19
+ } = require('./email-domain.js');
20
+
21
+ /**
22
+ * Adapt an npm-flavoured threat from email-domain.js to a PyPI-flavoured one.
23
+ * Returns a new object (does not mutate the input).
24
+ */
25
+ function adaptThreatToPyPI(threat, declarationFile) {
26
+ if (!threat || typeof threat !== 'object') return threat;
27
+ const adapted = { ...threat, file: declarationFile || 'pyproject.toml' };
28
+ if (typeof threat.message === 'string') {
29
+ adapted.message = threat.message
30
+ .replace(/\bnpm\b/g, 'PyPI')
31
+ .replace(/take over the account/g, 'take over the PyPI account');
32
+ }
33
+ return adapted;
34
+ }
35
+
36
+ /**
37
+ * Entry point for PyPI maintainer-domain checks.
38
+ *
39
+ * @param {string} packageName - PyPI package name being scanned.
40
+ * @param {object} pypiRegistryMeta - Output of getPyPIPackageMetadata().
41
+ * Must have shape { maintainer_emails: string[], created_at: ISO | null }.
42
+ * @param {object} options - { resolveMx, fetchRdap, declarationFile } — first
43
+ * two are forwarded to email-domain.js (test injection); declarationFile is
44
+ * the path to display in threat.file (defaults to 'pyproject.toml').
45
+ * @returns {Promise<Array>} threats array (empty if metadata is missing or
46
+ * the env opt-outs MUADDIB_EMAIL_DOMAIN_CHECK=0 / MUADDIB_RDAP_CHECK=0 are set).
47
+ */
48
+ async function runPyPIMaintainerChecks(packageName, pypiRegistryMeta, options = {}) {
49
+ if (!pypiRegistryMeta || typeof pypiRegistryMeta !== 'object') return [];
50
+ if (!Array.isArray(pypiRegistryMeta.maintainer_emails) || pypiRegistryMeta.maintainer_emails.length === 0) {
51
+ return [];
52
+ }
53
+
54
+ const declarationFile = options.declarationFile || 'pyproject.toml';
55
+ // email-domain.js reads `meta.maintainer_emails` and `meta.created_at`.
56
+ // The shape matches one-to-one with what pypi-registry.js returns.
57
+ const helperMeta = {
58
+ maintainer_emails: pypiRegistryMeta.maintainer_emails,
59
+ created_at: pypiRegistryMeta.created_at
60
+ };
61
+
62
+ const threats = [];
63
+
64
+ let mxThreats = [];
65
+ try {
66
+ mxThreats = await checkUnclaimedMaintainerEmail(helperMeta, {
67
+ resolveMx: options.resolveMx
68
+ });
69
+ } catch { /* silent — same posture as email-domain.js */ }
70
+ for (const t of mxThreats) threats.push(adaptThreatToPyPI(t, declarationFile));
71
+
72
+ let rdapThreats = [];
73
+ try {
74
+ rdapThreats = await checkCompromisedDomain(helperMeta, {
75
+ fetchRdap: options.fetchRdap
76
+ });
77
+ } catch { /* silent */ }
78
+ for (const t of rdapThreats) threats.push(adaptThreatToPyPI(t, declarationFile));
79
+
80
+ return threats;
81
+ }
82
+
83
+ module.exports = {
84
+ runPyPIMaintainerChecks,
85
+ // Exposed for unit tests
86
+ _internal: { adaptThreatToPyPI }
87
+ };
@@ -0,0 +1,239 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * PyPI registry metadata fetcher — mirror of `src/scanner/npm-registry.js`
5
+ * for the PyPI ecosystem. Closes the npm/PyPI asymmetry on the metadata axis.
6
+ *
7
+ * Created v2.11.47 to enable PyPI-side maintainer/email/release-zero
8
+ * detections (port of MAINTAINER-005/006/PKG-022 to PyPI).
9
+ *
10
+ * Architecture parity with npm-registry.js :
11
+ * - built-in fetch (no external dep)
12
+ * - 10s timeout, 3 retries with exponential backoff
13
+ * - 429 backoff respecting Retry-After
14
+ * - throttle via http-limiter.js semaphore (shared with npm — same MUAD'DIB
15
+ * self-DoS protection ; rate budget is global, ok since target hosts differ)
16
+ * - 5min in-process cache keyed by package name
17
+ * - returns null on any failure (never throws — pipeline safety)
18
+ * - gated upstream by `MUADDIB_NO_REGISTRY_FETCH === '1'` (same master switch)
19
+ *
20
+ * URL : https://pypi.org/pypi/<package>/json (canonical PEP 691 JSON API)
21
+ */
22
+
23
+ const { debugLog } = require('../utils.js');
24
+ const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
25
+
26
+ const PYPI_REGISTRY_URL = 'https://pypi.org/pypi';
27
+ const REQUEST_TIMEOUT = 10000; // 10 seconds
28
+ const MAX_RETRIES = 3;
29
+ const CACHE_TTL = 5 * 60 * 1000; // 5 minutes — mirror npm-registry
30
+
31
+ // PEP 503 normalized package name: lowercase letters / digits / `-` `_` `.`
32
+ // Case-insensitive on input, server normalizes.
33
+ const PYPI_PACKAGE_REGEX = /^[A-Za-z0-9_.-]{1,214}$/;
34
+
35
+ // In-process cache : Map<packageName, { fetchedAt: number, data: object | null }>
36
+ // Negative caching (data === null) is honored too — avoids repeat 404 hammering.
37
+ const _pypiMetadataCache = new Map();
38
+
39
+ // AbortSignal.timeout polyfill — mirror npm-registry.js
40
+ function createTimeoutSignal(ms) {
41
+ if (typeof AbortSignal !== 'undefined' && AbortSignal.timeout) {
42
+ return { signal: AbortSignal.timeout(ms), cleanup: () => {} };
43
+ }
44
+ const controller = new AbortController();
45
+ const timer = setTimeout(() => controller.abort(), ms);
46
+ return { signal: controller.signal, cleanup: () => clearTimeout(timer) };
47
+ }
48
+
49
+ async function fetchWithRetry(url) {
50
+ let lastError = null;
51
+ for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
52
+ let response;
53
+ const { signal, cleanup } = createTimeoutSignal(REQUEST_TIMEOUT);
54
+ try {
55
+ response = await fetch(url, { signal });
56
+ } catch (err) {
57
+ cleanup();
58
+ lastError = err;
59
+ if (attempt < MAX_RETRIES - 1) {
60
+ const backoff = Math.min(1000 * Math.pow(2, attempt), 8000);
61
+ await new Promise(r => setTimeout(r, backoff));
62
+ }
63
+ continue;
64
+ }
65
+ cleanup();
66
+
67
+ if (response.status === 404) {
68
+ try { await response.text(); } catch (e) { debugLog('pypi-registry: response drain failed:', e.message); }
69
+ return null;
70
+ }
71
+
72
+ if (response.status === 429) {
73
+ try { await response.text(); } catch (e) { debugLog('pypi-registry: response drain failed:', e.message); }
74
+ const retryAfter = parseInt(response.headers.get('retry-after'), 10);
75
+ const delay = Math.min(retryAfter && retryAfter > 0 ? retryAfter * 1000 : 2000, 30000);
76
+ await new Promise(r => setTimeout(r, delay));
77
+ continue;
78
+ }
79
+
80
+ if (!response.ok) {
81
+ try { await response.text(); } catch (e) { debugLog('pypi-registry: response drain failed:', e.message); }
82
+ return null;
83
+ }
84
+
85
+ try {
86
+ return await response.json();
87
+ } catch {
88
+ return null;
89
+ }
90
+ }
91
+ if (lastError) debugLog('pypi-registry: retries exhausted for ' + url + ': ' + lastError.message);
92
+ return null;
93
+ }
94
+
95
+ /**
96
+ * Extract a deduped, lowercased list of maintainer emails from PyPI metadata.
97
+ * PyPI distinguishes `author_email` and `maintainer_email` (top-level strings,
98
+ * not arrays). Either or both may be present. Some packages list multiple
99
+ * addresses separated by commas — split on those.
100
+ */
101
+ function extractMaintainerEmails(infoBlock) {
102
+ const out = new Set();
103
+ if (!infoBlock || typeof infoBlock !== 'object') return [];
104
+ for (const field of ['author_email', 'maintainer_email']) {
105
+ const raw = infoBlock[field];
106
+ if (typeof raw !== 'string' || !raw.includes('@')) continue;
107
+ // Split on commas (PEP 621-style multi-author) and on "Name <email>" wrappers
108
+ const parts = raw.split(',');
109
+ for (const part of parts) {
110
+ const m = part.match(/<([^>]+@[^>]+)>/) || part.match(/([^\s<>]+@[^\s<>]+)/);
111
+ if (m && m[1].includes('@')) out.add(m[1].toLowerCase().trim());
112
+ }
113
+ }
114
+ return Array.from(out);
115
+ }
116
+
117
+ /**
118
+ * Extract per-version publish timestamps from PyPI metadata.
119
+ * `releases` is an object keyed by version string, each value is an array of
120
+ * file entries with `upload_time_iso_8601`. Use the earliest upload time per
121
+ * version (a release may have multiple files for sdist + wheels).
122
+ */
123
+ function extractReleaseTimes(releases) {
124
+ if (!releases || typeof releases !== 'object') return {};
125
+ const out = {};
126
+ for (const [version, files] of Object.entries(releases)) {
127
+ if (!Array.isArray(files) || files.length === 0) continue;
128
+ let earliest = null;
129
+ for (const f of files) {
130
+ const t = typeof f === 'object' && f ? (f.upload_time_iso_8601 || f.upload_time) : null;
131
+ if (typeof t !== 'string') continue;
132
+ if (earliest === null || t < earliest) earliest = t;
133
+ }
134
+ if (earliest) out[version] = earliest;
135
+ }
136
+ return out;
137
+ }
138
+
139
+ /**
140
+ * Fetch + parse PyPI registry metadata. Returns null on validation fail,
141
+ * cache hit of a previous null, network fail, or 404.
142
+ *
143
+ * Cached for 5 minutes (positive AND negative results).
144
+ *
145
+ * @param {string} packageName
146
+ * @returns {Promise<{
147
+ * created_at: string | null,
148
+ * latest_release_at: string | null,
149
+ * age_days: number | null,
150
+ * latest_version: string | null,
151
+ * version_count: number,
152
+ * maintainer_emails: string[],
153
+ * yanked: boolean,
154
+ * description: string,
155
+ * home_page: string | null,
156
+ * project_urls: object | null,
157
+ * releases: { [version: string]: string }
158
+ * } | null>}
159
+ */
160
+ async function getPyPIPackageMetadata(packageName) {
161
+ if (typeof packageName !== 'string' || !PYPI_PACKAGE_REGEX.test(packageName)) return null;
162
+ const normalized = packageName.toLowerCase();
163
+
164
+ // Cache check (honors negative cache)
165
+ const cached = _pypiMetadataCache.get(normalized);
166
+ if (cached && (Date.now() - cached.fetchedAt) < CACHE_TTL) {
167
+ return cached.data;
168
+ }
169
+
170
+ const url = PYPI_REGISTRY_URL + '/' + encodeURIComponent(packageName) + '/json';
171
+ let raw;
172
+ await acquireRegistrySlot();
173
+ try {
174
+ raw = await fetchWithRetry(url);
175
+ } finally {
176
+ releaseRegistrySlot();
177
+ }
178
+
179
+ if (!raw || typeof raw !== 'object') {
180
+ _pypiMetadataCache.set(normalized, { fetchedAt: Date.now(), data: null });
181
+ return null;
182
+ }
183
+
184
+ const info = raw.info || {};
185
+ const releases = raw.releases || {};
186
+ const releaseTimes = extractReleaseTimes(releases);
187
+
188
+ // earliest + latest publish dates across all release-versions
189
+ let createdAt = null;
190
+ let latestReleaseAt = null;
191
+ for (const t of Object.values(releaseTimes)) {
192
+ if (createdAt === null || t < createdAt) createdAt = t;
193
+ if (latestReleaseAt === null || t > latestReleaseAt) latestReleaseAt = t;
194
+ }
195
+
196
+ const ageDays = createdAt
197
+ ? Math.floor((Date.now() - new Date(createdAt).getTime()) / (1000 * 60 * 60 * 24))
198
+ : null;
199
+
200
+ // Latest version: prefer info.version, fallback to highest key in releases
201
+ const latestVersion = (typeof info.version === 'string' && info.version) || null;
202
+
203
+ // Yanked status of the latest version (PyPI sets a "yanked" boolean on each file).
204
+ let yanked = false;
205
+ if (latestVersion && Array.isArray(releases[latestVersion])) {
206
+ yanked = releases[latestVersion].every(f => f && f.yanked === true);
207
+ }
208
+
209
+ const data = {
210
+ created_at: createdAt,
211
+ latest_release_at: latestReleaseAt,
212
+ age_days: ageDays,
213
+ latest_version: latestVersion,
214
+ version_count: Object.keys(releaseTimes).length,
215
+ maintainer_emails: extractMaintainerEmails(info),
216
+ yanked,
217
+ description: typeof info.summary === 'string' ? info.summary
218
+ : (typeof info.description === 'string' ? info.description.slice(0, 1000) : ''),
219
+ home_page: typeof info.home_page === 'string' && info.home_page ? info.home_page : null,
220
+ project_urls: (info.project_urls && typeof info.project_urls === 'object') ? info.project_urls : null,
221
+ releases: releaseTimes
222
+ };
223
+
224
+ _pypiMetadataCache.set(normalized, { fetchedAt: Date.now(), data });
225
+ return data;
226
+ }
227
+
228
+ module.exports = {
229
+ getPyPIPackageMetadata,
230
+ // Exposed for unit tests
231
+ _internal: {
232
+ PYPI_PACKAGE_REGEX,
233
+ extractMaintainerEmails,
234
+ extractReleaseTimes,
235
+ fetchWithRetry,
236
+ _pypiMetadataCache,
237
+ _resetCache: () => _pypiMetadataCache.clear()
238
+ }
239
+ };
@@ -0,0 +1,59 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * F2-PyPI — Release-Zero detection (PEP 440 variant of release-zero.js).
5
+ *
6
+ * Created v2.11.47 to mirror npm's release-zero.js for the PyPI ecosystem.
7
+ *
8
+ * Threat model: an attacker publishes a brand-new package with version 0.0.0
9
+ * (or any 0.x.x variant) as a lure or as a ship-as-vulnerable placeholder.
10
+ * On its own a v0.x.x is noise (many legit early-stage projects sit there
11
+ * forever), so we conjunction-gate with `age_days < 30`: a recently-published
12
+ * 0.x.x is suspicious; an abandoned 0.x.x from 2017 is not.
13
+ *
14
+ * PyPI-specific differences vs npm release-zero.js:
15
+ * - PEP 440 versions, not semver. We accept 0, 0.1, 0.0.0, 0.1.0a1,
16
+ * 0.0.0.dev1, 0.1.0rc2, 0.1.0.post1 — anything that starts with "0"
17
+ * or "0.0..." in the release/pre/dev segment.
18
+ * - No `preinstall`/`postinstall` lifecycle hook concept in PyPI manifests.
19
+ * The functionally-equivalent vector — `setup.py cmdclass` override —
20
+ * is already covered by PYAST-001. We don't double-detect here.
21
+ * - Conjunction is just `recently published` (no script check).
22
+ */
23
+
24
+ // Match a PEP 440 "release segment" that is exactly 0 in every component.
25
+ // Accepts: 0 | 0.0 | 0.0.0 | 0.0.0.0
26
+ // Optional pre/post/dev segment: a1 | b2 | rc3 | .dev1 | .post1
27
+ // Also allows the rare epoch prefix `0!` (PEP 440 §epoch).
28
+ const PYPI_RELEASE_ZERO_RE = /^(?:0!)?0(?:\.0)*(?:[abc]\d+|rc\d+|\.dev\d+|\.post\d+)?$/i;
29
+
30
+ const RECENT_PUBLISH_THRESHOLD_DAYS = 30;
31
+
32
+ /**
33
+ * @param {string} version - PyPI version string from registry meta (latest_version).
34
+ * @param {object} pypiRegistryMeta - { age_days: number | null, ... }.
35
+ * @returns {object | null} threat object or null.
36
+ */
37
+ function checkPyPIReleaseZero(version, pypiRegistryMeta) {
38
+ if (typeof version !== 'string' || version.length === 0) return null;
39
+ if (!PYPI_RELEASE_ZERO_RE.test(version)) return null;
40
+ if (!pypiRegistryMeta || typeof pypiRegistryMeta !== 'object') return null;
41
+
42
+ const ageDays = pypiRegistryMeta.age_days;
43
+ if (typeof ageDays !== 'number' || ageDays >= RECENT_PUBLISH_THRESHOLD_DAYS) return null;
44
+
45
+ return {
46
+ type: 'pypi_release_zero',
47
+ severity: 'MEDIUM',
48
+ message: 'PyPI package latest version is "' + version + '" (release-zero) and was first published only ' + ageDays + ' day(s) ago — possible lure / ship-as-vulnerable / typosquat-staging pattern. setup.py cmdclass install-time hooks are covered separately by PYAST-001.',
49
+ file: 'pyproject.toml',
50
+ count: 1,
51
+ version,
52
+ age_days: ageDays
53
+ };
54
+ }
55
+
56
+ module.exports = {
57
+ checkPyPIReleaseZero,
58
+ _internal: { PYPI_RELEASE_ZERO_RE, RECENT_PUBLISH_THRESHOLD_DAYS }
59
+ };
@@ -433,10 +433,87 @@ function escapeRegex(str) {
433
433
  return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
434
434
  }
435
435
 
436
+ /**
437
+ * Try to detect the NAME of the Python package being scanned. Used by
438
+ * `pypi-registry.js` to fetch metadata for the scanned package itself.
439
+ *
440
+ * Priority order :
441
+ * 1. pyproject.toml [project] name (PEP 621)
442
+ * 2. pyproject.toml [tool.poetry] name
443
+ * 3. setup.py setup(name="...")
444
+ * 4. setup.cfg [metadata] name
445
+ *
446
+ * Returns null if no canonical name can be extracted.
447
+ *
448
+ * @param {string} targetPath
449
+ * @returns {string | null}
450
+ */
451
+ function detectScannedPackageName(targetPath) {
452
+ // Generic helper: extract a section [section_header] up to the next [section]
453
+ // or end of file. JS regex has no \Z, so we use `(?=\n\[|$)` with no /m flag
454
+ // on the lookahead anchor.
455
+ function extractSection(content, header) {
456
+ // Find [header] line, then capture until the next [ at column 0 or EOF.
457
+ const startRe = new RegExp('^\\[' + header.replace(/[.[\]\\]/g, '\\$&') + '\\][^\\n]*\\n', 'm');
458
+ const startMatch = content.match(startRe);
459
+ if (!startMatch) return null;
460
+ const start = startMatch.index + startMatch[0].length;
461
+ const rest = content.slice(start);
462
+ const nextSection = rest.search(/^\[/m);
463
+ return nextSection === -1 ? rest : rest.slice(0, nextSection);
464
+ }
465
+
466
+ // 1. pyproject.toml
467
+ const pyproject = path.join(targetPath, 'pyproject.toml');
468
+ if (fs.existsSync(pyproject)) {
469
+ let content;
470
+ try { content = fs.readFileSync(pyproject, 'utf8'); } catch { content = ''; }
471
+ // [project] name = "X" — PEP 621
472
+ const projectSection = extractSection(content, 'project');
473
+ if (projectSection) {
474
+ const m = projectSection.match(/^\s*name\s*=\s*["']([^"']+)["']/m);
475
+ if (m) return m[1].trim();
476
+ }
477
+ // [tool.poetry] name = "X"
478
+ const poetrySection = extractSection(content, 'tool.poetry');
479
+ if (poetrySection) {
480
+ const m = poetrySection.match(/^\s*name\s*=\s*["']([^"']+)["']/m);
481
+ if (m) return m[1].trim();
482
+ }
483
+ }
484
+
485
+ // 2. setup.py — regex on setup(... name="X" ...). Dotall flag for multi-line args.
486
+ const setupPy = path.join(targetPath, 'setup.py');
487
+ if (fs.existsSync(setupPy)) {
488
+ let content;
489
+ try { content = fs.readFileSync(setupPy, 'utf8'); } catch { content = ''; }
490
+ const m = content.match(/\bsetup\s*\(([\s\S]*?)\)/);
491
+ if (m) {
492
+ const nameMatch = m[1].match(/\bname\s*=\s*["']([^"']+)["']/);
493
+ if (nameMatch) return nameMatch[1].trim();
494
+ }
495
+ }
496
+
497
+ // 3. setup.cfg [metadata] name = X
498
+ const setupCfg = path.join(targetPath, 'setup.cfg');
499
+ if (fs.existsSync(setupCfg)) {
500
+ let content;
501
+ try { content = fs.readFileSync(setupCfg, 'utf8'); } catch { content = ''; }
502
+ const metaSection = extractSection(content, 'metadata');
503
+ if (metaSection) {
504
+ const m = metaSection.match(/^\s*name\s*=\s*(.+?)\s*$/m);
505
+ if (m) return m[1].trim();
506
+ }
507
+ }
508
+
509
+ return null;
510
+ }
511
+
436
512
  module.exports = {
437
513
  parseRequirementsTxt,
438
514
  parseSetupPy,
439
515
  parsePyprojectToml,
440
516
  detectPythonProject,
517
+ detectScannedPackageName,
441
518
  normalizePythonName
442
519
  };