@westbayberry/dg 1.0.52 → 1.0.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +5 -1
  2. package/dist/index.mjs +349 -168
  3. package/dist/packages/cli/src/alt-screen.js +36 -0
  4. package/dist/packages/cli/src/api.js +322 -0
  5. package/dist/packages/cli/src/auth.js +218 -0
  6. package/dist/packages/cli/src/bin.js +386 -0
  7. package/dist/packages/cli/src/config.js +228 -0
  8. package/dist/packages/cli/src/discover.js +126 -0
  9. package/dist/packages/cli/src/first-run.js +135 -0
  10. package/dist/packages/cli/src/hook.js +360 -0
  11. package/dist/packages/cli/src/lockfile.js +303 -0
  12. package/dist/packages/cli/src/npm-wrapper.js +218 -0
  13. package/dist/packages/cli/src/pip-wrapper.js +273 -0
  14. package/dist/packages/cli/src/sanitize.js +38 -0
  15. package/dist/packages/cli/src/scan-core.js +144 -0
  16. package/dist/packages/cli/src/setup-status.js +46 -0
  17. package/dist/packages/cli/src/static-output.js +625 -0
  18. package/dist/packages/cli/src/telemetry.js +141 -0
  19. package/dist/packages/cli/src/ui/App.js +137 -0
  20. package/dist/packages/cli/src/ui/InitApp.js +391 -0
  21. package/dist/packages/cli/src/ui/LoginApp.js +51 -0
  22. package/dist/packages/cli/src/ui/NpmWrapperApp.js +73 -0
  23. package/dist/packages/cli/src/ui/PipWrapperApp.js +72 -0
  24. package/dist/packages/cli/src/ui/components/ConfirmPrompt.js +24 -0
  25. package/dist/packages/cli/src/ui/components/DemoScanAnimation.js +26 -0
  26. package/dist/packages/cli/src/ui/components/DurationLine.js +7 -0
  27. package/dist/packages/cli/src/ui/components/ErrorView.js +30 -0
  28. package/dist/packages/cli/src/ui/components/FileSavePrompt.js +210 -0
  29. package/dist/packages/cli/src/ui/components/InteractiveResultsView.js +557 -0
  30. package/dist/packages/cli/src/ui/components/Mascot.js +33 -0
  31. package/dist/packages/cli/src/ui/components/ProgressBar.js +51 -0
  32. package/dist/packages/cli/src/ui/components/ProgressDots.js +35 -0
  33. package/dist/packages/cli/src/ui/components/ProjectSelector.js +60 -0
  34. package/dist/packages/cli/src/ui/components/ResultsView.js +105 -0
  35. package/dist/packages/cli/src/ui/components/ScanResultCard.js +54 -0
  36. package/dist/packages/cli/src/ui/components/ScoreHeader.js +142 -0
  37. package/dist/packages/cli/src/ui/components/SetupBanner.js +17 -0
  38. package/dist/packages/cli/src/ui/components/Spinner.js +11 -0
  39. package/dist/packages/cli/src/ui/hooks/useExpandAnimation.js +44 -0
  40. package/dist/packages/cli/src/ui/hooks/useInit.js +341 -0
  41. package/dist/packages/cli/src/ui/hooks/useLogin.js +121 -0
  42. package/dist/packages/cli/src/ui/hooks/useNpmWrapper.js +192 -0
  43. package/dist/packages/cli/src/ui/hooks/usePipWrapper.js +195 -0
  44. package/dist/packages/cli/src/ui/hooks/useScan.js +202 -0
  45. package/dist/packages/cli/src/ui/hooks/useTerminalSize.js +29 -0
  46. package/dist/packages/cli/src/update-check.js +152 -0
  47. package/dist/packages/cli/src/wizard-demo-data.js +63 -0
  48. package/dist/src/ecosystem.js +2 -0
  49. package/dist/src/lockfile/diff.js +38 -0
  50. package/dist/src/lockfile/parse_package_json.js +41 -0
  51. package/dist/src/lockfile/parse_package_lock.js +55 -0
  52. package/dist/src/lockfile/parse_pipfile_lock.js +69 -0
  53. package/dist/src/lockfile/parse_pnpm_lock.js +62 -0
  54. package/dist/src/lockfile/parse_poetry_lock.js +71 -0
  55. package/dist/src/lockfile/parse_requirements.js +83 -0
  56. package/dist/src/lockfile/parse_yarn_lock.js +66 -0
  57. package/dist/src/logger.js +21 -0
  58. package/dist/src/npm/h2pool.js +161 -0
  59. package/dist/src/npm/registry.js +299 -0
  60. package/dist/src/npm/tarball.js +274 -0
  61. package/dist/src/pypi/registry.js +299 -0
  62. package/dist/src/pypi/tarball.js +361 -0
  63. package/dist/src/types.js +2 -0
  64. package/package.json +6 -3
@@ -0,0 +1,299 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.fetchPyPIMetadata = fetchPyPIMetadata;
4
+ exports.fetchPyPIDownloads = fetchPyPIDownloads;
5
+ exports.prefetchPyPIDownloadCounts = prefetchPyPIDownloadCounts;
6
+ exports.normalizePyPIName = normalizePyPIName;
7
+ const logger_1 = require("../logger");
8
+ const FETCH_TIMEOUT_MS = 30000;
9
+ const downloadCache = new Map();
10
+ const metadataCache = new Map();
11
+ const DOWNLOAD_CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
12
+ const NULL_DOWNLOAD_CACHE_TTL_MS = 5 * 60 * 1000; // 5 min — retry sooner on failure
13
+ const METADATA_CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
14
+ const MAX_CACHE_SIZE = 10000;
15
+ // --- Fetch functions ---
16
+ async function fetchPyPIMetadata(packageName) {
17
+ const normalizedName = normalizePyPIName(packageName);
18
+ // Check in-memory cache
19
+ const cached = metadataCache.get(normalizedName);
20
+ if (cached && Date.now() - cached.fetchedAt < METADATA_CACHE_TTL_MS) {
21
+ return cached.meta;
22
+ }
23
+ try {
24
+ const url = `https://pypi.org/pypi/${normalizedName}/json`;
25
+ const response = await fetch(url, {
26
+ headers: { "User-Agent": "dependency-guardian" },
27
+ signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
28
+ });
29
+ if (!response.ok) {
30
+ evictAndSet(metadataCache, normalizedName, { meta: null, fetchedAt: Date.now() });
31
+ return null;
32
+ }
33
+ const raw = (await response.json());
34
+ const result = { raw, common: mapToCommon(raw) };
35
+ evictAndSet(metadataCache, normalizedName, { meta: result, fetchedAt: Date.now() });
36
+ return result;
37
+ }
38
+ catch (err) {
39
+ const msg = err instanceof Error ? err.message : String(err);
40
+ logger_1.logger.warning(`Failed to fetch PyPI metadata for ${packageName}: ${msg}`);
41
+ return null;
42
+ }
43
+ }
44
+ /**
45
+ * Fetch recent download counts from pypistats.org.
46
+ * Returns weekly downloads (last 7 days) or null on failure.
47
+ */
48
+ async function fetchPyPIDownloads(packageName) {
49
+ const normalizedName = normalizePyPIName(packageName);
50
+ // Check in-memory cache — use shorter TTL for null entries so we retry sooner
51
+ const cached = downloadCache.get(normalizedName);
52
+ if (cached) {
53
+ const ttl = cached.count !== null ? DOWNLOAD_CACHE_TTL_MS : NULL_DOWNLOAD_CACHE_TTL_MS;
54
+ if (Date.now() - cached.fetchedAt < ttl) {
55
+ return cached.count;
56
+ }
57
+ }
58
+ const url = `https://pypistats.org/api/packages/${normalizedName}/recent`;
59
+ for (let attempt = 0; attempt < 3; attempt++) {
60
+ try {
61
+ const response = await fetch(url, {
62
+ headers: { "User-Agent": "dependency-guardian" },
63
+ signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
64
+ });
65
+ if (response.status === 429) {
66
+ // Rate limited — exponential backoff
67
+ await new Promise((r) => setTimeout(r, 1000 * Math.pow(2, attempt)));
68
+ continue;
69
+ }
70
+ if (!response.ok) {
71
+ evictAndSet(downloadCache, normalizedName, { count: null, fetchedAt: Date.now() });
72
+ return null;
73
+ }
74
+ const data = (await response.json());
75
+ const count = data?.data?.last_week ?? null;
76
+ evictAndSet(downloadCache, normalizedName, { count, fetchedAt: Date.now() });
77
+ return count;
78
+ }
79
+ catch {
80
+ if (attempt < 2) {
81
+ await new Promise((r) => setTimeout(r, 1000 * (attempt + 1)));
82
+ continue;
83
+ }
84
+ evictAndSet(downloadCache, normalizedName, { count: null, fetchedAt: Date.now() });
85
+ return null;
86
+ }
87
+ }
88
+ evictAndSet(downloadCache, normalizedName, { count: null, fetchedAt: Date.now() });
89
+ return null;
90
+ }
91
+ /**
92
+ * Prefetch download counts for a batch of packages.
93
+ * pypistats.org doesn't have a bulk API, so we fetch individually
94
+ * with concurrency limiting to avoid rate limits.
95
+ */
96
+ async function prefetchPyPIDownloadCounts(packageNames) {
97
+ const CONCURRENCY = 3; // pypistats rate limits aggressively
98
+ const uncached = packageNames.filter((name) => {
99
+ const normalizedName = normalizePyPIName(name);
100
+ const cached = downloadCache.get(normalizedName);
101
+ return !cached || Date.now() - cached.fetchedAt >= DOWNLOAD_CACHE_TTL_MS;
102
+ });
103
+ for (let i = 0; i < uncached.length; i += CONCURRENCY) {
104
+ const batch = uncached.slice(i, i + CONCURRENCY);
105
+ await Promise.all(batch.map((name) => fetchPyPIDownloads(name)));
106
+ // Small delay between batches to respect rate limits
107
+ if (i + CONCURRENCY < uncached.length) {
108
+ await new Promise((r) => setTimeout(r, 200));
109
+ }
110
+ }
111
+ }
112
+ // --- Helpers ---
113
+ /**
114
+ * Evict expired entries and add new entry to cache.
115
+ * LRU-like: evicts oldest expired entries when at capacity.
116
+ */
117
+ function evictAndSet(cache, key, value) {
118
+ if (cache.size >= MAX_CACHE_SIZE) {
119
+ const now = Date.now();
120
+ for (const [k, v] of cache) {
121
+ if (now - v.fetchedAt >= DOWNLOAD_CACHE_TTL_MS) {
122
+ cache.delete(k);
123
+ }
124
+ if (cache.size < MAX_CACHE_SIZE * 0.8)
125
+ break;
126
+ }
127
+ // If still full, delete oldest entries
128
+ if (cache.size >= MAX_CACHE_SIZE) {
129
+ const keysToDelete = [...cache.keys()].slice(0, Math.floor(MAX_CACHE_SIZE * 0.2));
130
+ for (const k of keysToDelete)
131
+ cache.delete(k);
132
+ }
133
+ }
134
+ cache.set(key, value);
135
+ }
136
+ /**
137
+ * Normalize PyPI package name per PEP 503:
138
+ * lowercase, replace [-_.] with hyphens, collapse runs.
139
+ */
140
+ function normalizePyPIName(name) {
141
+ return name.toLowerCase().replace(/[-_.]+/g, "-");
142
+ }
143
+ function extractRepoUrl(info) {
144
+ // Check project_urls first
145
+ if (info.project_urls) {
146
+ for (const [key, url] of Object.entries(info.project_urls)) {
147
+ const lowerKey = key.toLowerCase();
148
+ if (lowerKey.includes("source") ||
149
+ lowerKey.includes("repository") ||
150
+ lowerKey.includes("github") ||
151
+ lowerKey.includes("code")) {
152
+ return url;
153
+ }
154
+ }
155
+ }
156
+ // Fall back to home_page if it looks like a repo URL
157
+ if (info.home_page &&
158
+ (info.home_page.includes("github.com") ||
159
+ info.home_page.includes("gitlab.com") ||
160
+ info.home_page.includes("bitbucket.org"))) {
161
+ return info.home_page;
162
+ }
163
+ return undefined;
164
+ }
165
+ function extractMaintainers(info) {
166
+ const maintainers = [];
167
+ if (info.maintainer) {
168
+ maintainers.push({ name: info.maintainer, email: info.maintainer_email || undefined });
169
+ }
170
+ if (info.author && info.author !== info.maintainer) {
171
+ maintainers.push({ name: info.author, email: info.author_email || undefined });
172
+ }
173
+ return maintainers;
174
+ }
175
+ function pickBestDistUrl(files) {
176
+ // Prefer wheel over sdist
177
+ const wheel = files.find((f) => f.packagetype === "bdist_wheel");
178
+ if (wheel)
179
+ return wheel.url;
180
+ const sdist = files.find((f) => f.packagetype === "sdist");
181
+ if (sdist)
182
+ return sdist.url;
183
+ return files[0]?.url ?? null;
184
+ }
185
+ function mapToCommon(raw) {
186
+ const info = raw.info;
187
+ const versions = {};
188
+ const time = {};
189
+ let earliestTimestamp = null;
190
+ for (const [ver, files] of Object.entries(raw.releases)) {
191
+ if (files.length === 0)
192
+ continue;
193
+ const downloadUrl = pickBestDistUrl(files);
194
+ if (!downloadUrl)
195
+ continue;
196
+ const checksum = files[0]?.digests?.sha256 ?? undefined;
197
+ // Extract dependencies from requires_dist (only available on latest version)
198
+ const deps = {};
199
+ if (ver === info.version && info.requires_dist) {
200
+ for (const req of info.requires_dist) {
201
+ const match = req.match(/^([a-zA-Z0-9][-a-zA-Z0-9_.]*)/);
202
+ if (match) {
203
+ deps[normalizePyPIName(match[1])] = req;
204
+ }
205
+ }
206
+ }
207
+ versions[ver] = {
208
+ downloadUrl,
209
+ checksum,
210
+ dependencies: Object.keys(deps).length > 0 ? deps : undefined,
211
+ repository: extractRepoUrl(info)
212
+ ? { type: "git", url: extractRepoUrl(info) }
213
+ : undefined,
214
+ maintainers: extractMaintainers(info),
215
+ };
216
+ // Build time map from upload timestamps
217
+ const uploadTime = files[0]?.upload_time_iso_8601;
218
+ if (uploadTime) {
219
+ time[ver] = uploadTime;
220
+ if (!earliestTimestamp || uploadTime < earliestTimestamp) {
221
+ earliestTimestamp = uploadTime;
222
+ }
223
+ }
224
+ }
225
+ if (earliestTimestamp) {
226
+ time.created = earliestTimestamp;
227
+ }
228
+ const keywords = info.keywords
229
+ ? info.keywords.split(/[,\s]+/).filter(Boolean)
230
+ : undefined;
231
+ // Extract license from metadata — try PEP 639 first, then legacy fields
232
+ let license;
233
+ if (info.license_expression && info.license_expression.trim().length > 0) {
234
+ // PEP 639 SPDX expression — most reliable when present (e.g. numpy, pandas)
235
+ license = info.license_expression.trim();
236
+ }
237
+ else if (info.license && info.license !== "UNKNOWN" && info.license.trim().length > 0) {
238
+ if (info.license.length > 200) {
239
+ // Long text is often full license body pasted verbatim — try classifiers first
240
+ const classifierLicense = classifierToSpdx(info.classifiers);
241
+ license = classifierLicense ?? "(long-text)";
242
+ }
243
+ else {
244
+ license = info.license.trim();
245
+ }
246
+ }
247
+ else {
248
+ license = classifierToSpdx(info.classifiers) ?? undefined;
249
+ }
250
+ return {
251
+ name: info.name,
252
+ versions,
253
+ time: Object.keys(time).length > 0 ? time : undefined,
254
+ maintainers: extractMaintainers(info),
255
+ description: info.summary || info.description || undefined,
256
+ keywords,
257
+ homepage: info.home_page || undefined,
258
+ repositoryUrl: extractRepoUrl(info),
259
+ license,
260
+ };
261
+ }
262
+ const CLASSIFIER_TO_SPDX = {
263
+ "License :: OSI Approved :: MIT License": "MIT",
264
+ "License :: OSI Approved :: Apache Software License": "Apache-2.0",
265
+ "License :: OSI Approved :: BSD License": "BSD-3-Clause",
266
+ "License :: OSI Approved :: ISC License (ISCL)": "ISC",
267
+ "License :: OSI Approved :: GNU General Public License v2 (GPLv2)": "GPL-2.0-only",
268
+ "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)": "GPL-2.0-or-later",
269
+ "License :: OSI Approved :: GNU General Public License v3 (GPLv3)": "GPL-3.0-only",
270
+ "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)": "GPL-3.0-or-later",
271
+ "License :: OSI Approved :: GNU Lesser General Public License v2 (LGPLv2)": "LGPL-2.0-only",
272
+ "License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)": "LGPL-2.0-or-later",
273
+ "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)": "LGPL-3.0-only",
274
+ "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)": "LGPL-3.0-or-later",
275
+ "License :: OSI Approved :: GNU Affero General Public License v3": "AGPL-3.0-only",
276
+ "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)": "AGPL-3.0-or-later",
277
+ "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)": "MPL-2.0",
278
+ "License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)": "MPL-1.1",
279
+ "License :: OSI Approved :: Eclipse Public License 1.0 (EPL-1.0)": "EPL-1.0",
280
+ "License :: OSI Approved :: Eclipse Public License 2.0 (EPL-2.0)": "EPL-2.0",
281
+ "License :: OSI Approved :: European Union Public Licence 1.2 (EUPL 1.2)": "EUPL-1.2",
282
+ "License :: OSI Approved :: The Unlicense (Unlicense)": "Unlicense",
283
+ "License :: OSI Approved :: Python Software Foundation License": "PSF-2.0",
284
+ "License :: OSI Approved :: Zope Public License": "ZPL-2.0",
285
+ "License :: OSI Approved :: Academic Free License (AFL)": "AFL-3.0",
286
+ "License :: OSI Approved :: Artistic License": "Artistic-2.0",
287
+ "License :: OSI Approved :: Boost Software License 1.0 (BSL-1.0)": "BSL-1.0",
288
+ "License :: OSI Approved :: zlib/libpng License": "Zlib",
289
+ };
290
+ function classifierToSpdx(classifiers) {
291
+ if (!classifiers)
292
+ return null;
293
+ for (const c of classifiers) {
294
+ const spdx = CLASSIFIER_TO_SPDX[c];
295
+ if (spdx)
296
+ return spdx;
297
+ }
298
+ return null;
299
+ }
@@ -0,0 +1,361 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.downloadAndExtractPyPI = downloadAndExtractPyPI;
37
+ exports.cleanupPyPI = cleanupPyPI;
38
+ const logger_1 = require("../logger");
39
+ const crypto = __importStar(require("crypto"));
40
+ const fs = __importStar(require("fs"));
41
+ const path = __importStar(require("path"));
42
+ const os = __importStar(require("os"));
43
+ const https = __importStar(require("https"));
44
+ const tar = __importStar(require("tar"));
45
+ const yauzl = __importStar(require("yauzl"));
46
+ // Size caps — same env vars as npm so ops can tune both pipelines together.
47
+ // Defaults sized to cover ML / native-binary packages like torch/tensorflow,
48
+ // which routinely exceed 200 MB per wheel. Streaming download + per-entry
49
+ // extraction means memory stays bounded regardless of these caps.
50
+ const MAX_DOWNLOAD_BYTES = (() => {
51
+ const mb = parseInt(process.env.MAX_TARBALL_MB || "2000", 10);
52
+ return (Number.isFinite(mb) ? mb : 2000) * 1024 * 1024;
53
+ })();
54
+ const MAX_EXTRACTED_BYTES = (() => {
55
+ const mb = parseInt(process.env.MAX_EXTRACTED_MB || "4000", 10);
56
+ return (Number.isFinite(mb) ? mb : 4000) * 1024 * 1024;
57
+ })();
58
+ const MAX_EXTRACTED_FILES = parseInt(process.env.MAX_EXTRACTED_FILES || "25000", 10);
59
+ const MAX_REDIRECTS = 5;
60
+ const ALLOWED_HOSTS = new Set([
61
+ "files.pythonhosted.org",
62
+ "pypi.org",
63
+ "pypi.python.org",
64
+ ]);
65
+ const BLOCKED_ENTRY_TYPES = new Set([
66
+ "SymbolicLink", "Link", "CharacterDevice", "BlockDevice", "FIFO",
67
+ ]);
68
+ const downloadAgent = new https.Agent({
69
+ keepAlive: true,
70
+ maxSockets: 50,
71
+ maxFreeSockets: 10,
72
+ });
73
+ // Persistent tarball cache — set TARBALL_CACHE_PATH to enable
74
+ const PYPI_CACHE_DIR = process.env.TARBALL_CACHE_PATH
75
+ ? path.join(process.env.TARBALL_CACHE_PATH, "pypi")
76
+ : "";
77
+ function pypiCacheKey(packageName, version, url) {
78
+ const ext = url.endsWith(".whl") ? ".whl" : ".tar.gz";
79
+ return `${packageName.replace(/[/]/g, "__")}-${version}${ext}`;
80
+ }
81
+ function getCachedPyPI(packageName, version, url) {
82
+ if (!PYPI_CACHE_DIR)
83
+ return null;
84
+ const cached = path.join(PYPI_CACHE_DIR, pypiCacheKey(packageName, version, url));
85
+ try {
86
+ if (fs.existsSync(cached))
87
+ return cached;
88
+ }
89
+ catch { /* ignore */ }
90
+ return null;
91
+ }
92
+ function savePyPIToCache(archivePath, packageName, version, url) {
93
+ if (!PYPI_CACHE_DIR)
94
+ return;
95
+ try {
96
+ fs.mkdirSync(PYPI_CACHE_DIR, { recursive: true });
97
+ const dest = path.join(PYPI_CACHE_DIR, pypiCacheKey(packageName, version, url));
98
+ fs.copyFileSync(archivePath, dest);
99
+ }
100
+ catch { /* best effort */ }
101
+ }
102
+ /** Defense-in-depth: verify resolved path stays within the extraction directory. */
103
+ function isContainedPath(base, entryName) {
104
+ const resolved = path.resolve(path.join(base, entryName));
105
+ const resolvedBase = path.resolve(base) + path.sep;
106
+ return resolved === path.resolve(base) || resolved.startsWith(resolvedBase);
107
+ }
108
+ /**
109
+ * Download and extract a PyPI distribution (.tar.gz or .whl).
110
+ * .whl files are ZIP archives; .tar.gz are standard tarballs.
111
+ */
112
+ async function downloadAndExtractPyPI(downloadUrl, packageName, version, expectedSha256) {
113
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "dg-pypi-"));
114
+ const isWheel = downloadUrl.endsWith(".whl");
115
+ const ext = isWheel ? ".whl" : ".tar.gz";
116
+ const archivePath = path.join(tmpDir, `pkg${ext}`);
117
+ const extractDir = path.join(tmpDir, "extracted");
118
+ fs.mkdirSync(extractDir, { recursive: true });
119
+ try {
120
+ const cached = getCachedPyPI(packageName, version, downloadUrl);
121
+ if (cached) {
122
+ fs.copyFileSync(cached, archivePath);
123
+ }
124
+ else {
125
+ await downloadToFile(downloadUrl, archivePath);
126
+ savePyPIToCache(archivePath, packageName, version, downloadUrl);
127
+ }
128
+ if (expectedSha256) {
129
+ const hash = crypto.createHash("sha256");
130
+ const stream = fs.createReadStream(archivePath);
131
+ for await (const chunk of stream)
132
+ hash.update(chunk);
133
+ const actual = hash.digest("hex");
134
+ if (actual !== expectedSha256) {
135
+ logger_1.logger.warning(`Hash mismatch for PyPI ${packageName}@${version}: expected ${expectedSha256}, got ${actual}`);
136
+ cleanupPyPI(tmpDir);
137
+ return null;
138
+ }
139
+ }
140
+ if (isWheel) {
141
+ await extractWheel(archivePath, extractDir, packageName, version);
142
+ }
143
+ else {
144
+ await extractTarGz(archivePath, extractDir, packageName, version);
145
+ }
146
+ // Find the package root directory
147
+ const entries = fs.readdirSync(extractDir);
148
+ if (entries.length === 1) {
149
+ const singleDir = path.join(extractDir, entries[0]);
150
+ if (fs.statSync(singleDir).isDirectory()) {
151
+ return { packageDir: singleDir, rootTmpDir: tmpDir, archivePath };
152
+ }
153
+ }
154
+ return { packageDir: extractDir, rootTmpDir: tmpDir, archivePath };
155
+ }
156
+ catch (err) {
157
+ const msg = err instanceof Error ? err.message : String(err);
158
+ logger_1.logger.warning(`Failed to download/extract PyPI ${packageName}@${version}: ${msg}`);
159
+ cleanupPyPI(tmpDir);
160
+ return null;
161
+ }
162
+ }
163
+ async function extractTarGz(archivePath, extractDir, packageName, version) {
164
+ let extractedBytes = 0;
165
+ let extractedFiles = 0;
166
+ let extractionAborted = false;
167
+ await tar.extract({
168
+ file: archivePath,
169
+ cwd: extractDir,
170
+ filter: (entryPath, entry) => {
171
+ if (extractionAborted)
172
+ return false;
173
+ if ("type" in entry && BLOCKED_ENTRY_TYPES.has(entry.type)) {
174
+ logger_1.logger.warning(`Blocked ${entry.type} entry in ${packageName}@${version}: ${entryPath}`);
175
+ return false;
176
+ }
177
+ if (entryPath.startsWith("/") || entryPath.includes("..")) {
178
+ logger_1.logger.warning(`Blocked path traversal in PyPI ${packageName}@${version}: ${entryPath}`);
179
+ return false;
180
+ }
181
+ // Defense-in-depth: verify resolved path stays within extractDir
182
+ if (!isContainedPath(extractDir, entryPath)) {
183
+ logger_1.logger.warning(`Blocked path escape in PyPI ${packageName}@${version}: ${entryPath}`);
184
+ return false;
185
+ }
186
+ extractedBytes += entry.size;
187
+ if (extractedBytes > MAX_EXTRACTED_BYTES) {
188
+ extractionAborted = true;
189
+ logger_1.logger.warning(`Extracted content for PyPI ${packageName}@${version} exceeds 200MB — aborting`);
190
+ return false;
191
+ }
192
+ extractedFiles++;
193
+ if (extractedFiles > MAX_EXTRACTED_FILES) {
194
+ extractionAborted = true;
195
+ logger_1.logger.warning(`File count for PyPI ${packageName}@${version} exceeds ${MAX_EXTRACTED_FILES} — aborting`);
196
+ return false;
197
+ }
198
+ return true;
199
+ },
200
+ });
201
+ if (extractionAborted) {
202
+ throw new Error("Extraction aborted due to size/file limits");
203
+ }
204
+ }
205
+ /**
206
+ * Extract a .whl file (ZIP archive) using yauzl with per-entry validation.
207
+ * Blocks symlinks, path traversal, and enforces size/file-count limits
208
+ * during extraction (not post-extraction).
209
+ */
210
+ async function extractWheel(archivePath, extractDir, packageName, version) {
211
+ return new Promise((resolve, reject) => {
212
+ yauzl.open(archivePath, { lazyEntries: true }, (err, zipfile) => {
213
+ if (err)
214
+ return reject(err);
215
+ let extractedBytes = 0;
216
+ let extractedFiles = 0;
217
+ zipfile.readEntry();
218
+ zipfile.on("entry", (entry) => {
219
+ // Block path traversal
220
+ if (entry.fileName.startsWith("/") || entry.fileName.includes("..")) {
221
+ logger_1.logger.warning(`Blocked path traversal in PyPI ${packageName}@${version}: ${entry.fileName}`);
222
+ zipfile.readEntry();
223
+ return;
224
+ }
225
+ // Block symlinks (Unix external file attributes: 0xA000 = symlink)
226
+ const mode = (entry.externalFileAttributes >> 16) & 0xFFFF;
227
+ if ((mode & 0xF000) === 0xA000) {
228
+ logger_1.logger.warning(`Blocked symlink in PyPI ${packageName}@${version}: ${entry.fileName}`);
229
+ zipfile.readEntry();
230
+ return;
231
+ }
232
+ // Defense-in-depth: verify resolved path stays within extractDir
233
+ if (!isContainedPath(extractDir, entry.fileName)) {
234
+ logger_1.logger.warning(`Blocked path escape in PyPI ${packageName}@${version}: ${entry.fileName}`);
235
+ zipfile.readEntry();
236
+ return;
237
+ }
238
+ extractedFiles++;
239
+ if (extractedFiles > MAX_EXTRACTED_FILES) {
240
+ zipfile.close();
241
+ return reject(new Error(`File count for PyPI ${packageName}@${version} exceeds ${MAX_EXTRACTED_FILES}`));
242
+ }
243
+ // Directory entry — create and continue
244
+ if (entry.fileName.endsWith("/")) {
245
+ fs.mkdirSync(path.join(extractDir, entry.fileName), { recursive: true });
246
+ zipfile.readEntry();
247
+ return;
248
+ }
249
+ // Extract file — track actual bytes written (not metadata) to prevent
250
+ // zip bombs with spoofed uncompressedSize headers.
251
+ zipfile.openReadStream(entry, (err, readStream) => {
252
+ if (err)
253
+ return reject(err);
254
+ const outPath = path.join(extractDir, entry.fileName);
255
+ fs.mkdirSync(path.dirname(outPath), { recursive: true });
256
+ const writeStream = fs.createWriteStream(outPath);
257
+ readStream.on("data", (chunk) => {
258
+ extractedBytes += chunk.length;
259
+ if (extractedBytes > MAX_EXTRACTED_BYTES) {
260
+ readStream.destroy();
261
+ writeStream.destroy();
262
+ zipfile.close();
263
+ reject(new Error(`Extracted wheel for PyPI ${packageName}@${version} exceeds 200MB`));
264
+ }
265
+ });
266
+ readStream.pipe(writeStream);
267
+ writeStream.on("close", () => zipfile.readEntry());
268
+ writeStream.on("error", reject);
269
+ });
270
+ });
271
+ zipfile.on("end", resolve);
272
+ zipfile.on("error", reject);
273
+ });
274
+ });
275
+ }
276
+ function validateDownloadUrl(url) {
277
+ let parsed;
278
+ try {
279
+ parsed = new URL(url);
280
+ }
281
+ catch {
282
+ throw new Error(`Invalid PyPI download URL: ${url}`);
283
+ }
284
+ if (parsed.protocol !== "https:") {
285
+ throw new Error(`PyPI download URL must use HTTPS: ${url}`);
286
+ }
287
+ if (!ALLOWED_HOSTS.has(parsed.hostname)) {
288
+ throw new Error(`PyPI download host not allowed: ${parsed.hostname}`);
289
+ }
290
+ }
291
+ function downloadToFile(url, destPath, redirectsRemaining = MAX_REDIRECTS) {
292
+ return new Promise((resolve, reject) => {
293
+ try {
294
+ validateDownloadUrl(url);
295
+ }
296
+ catch (err) {
297
+ reject(err);
298
+ return;
299
+ }
300
+ https
301
+ .get(url, { agent: downloadAgent }, (response) => {
302
+ if ((response.statusCode === 301 || response.statusCode === 302) &&
303
+ response.headers.location) {
304
+ if (redirectsRemaining <= 0) {
305
+ reject(new Error("Too many redirects for PyPI download"));
306
+ return;
307
+ }
308
+ response.resume();
309
+ downloadToFile(response.headers.location, destPath, redirectsRemaining - 1)
310
+ .then(resolve)
311
+ .catch(reject);
312
+ return;
313
+ }
314
+ if (response.statusCode !== 200) {
315
+ reject(new Error(`HTTP ${response.statusCode} for ${url}`));
316
+ return;
317
+ }
318
+ let bytesReceived = 0;
319
+ let aborted = false;
320
+ const file = fs.createWriteStream(destPath);
321
+ response.on("data", (chunk) => {
322
+ bytesReceived += chunk.length;
323
+ if (!aborted && bytesReceived > MAX_DOWNLOAD_BYTES) {
324
+ aborted = true;
325
+ response.destroy();
326
+ file.destroy();
327
+ try {
328
+ fs.unlinkSync(destPath);
329
+ }
330
+ catch { }
331
+ reject(new Error(`PyPI download exceeds ${MAX_DOWNLOAD_BYTES / 1024 / 1024}MB limit — aborting`));
332
+ }
333
+ });
334
+ response.pipe(file);
335
+ file.on("finish", () => {
336
+ if (!aborted) {
337
+ file.close();
338
+ resolve();
339
+ }
340
+ });
341
+ file.on("error", (err) => {
342
+ if (!aborted) {
343
+ try {
344
+ fs.unlinkSync(destPath);
345
+ }
346
+ catch { }
347
+ reject(err);
348
+ }
349
+ });
350
+ })
351
+ .on("error", reject);
352
+ });
353
+ }
354
+ function cleanupPyPI(dir) {
355
+ try {
356
+ fs.rmSync(dir, { recursive: true, force: true });
357
+ }
358
+ catch {
359
+ // Non-fatal
360
+ }
361
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });