muaddib-scanner 2.4.13 → 2.4.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -20
- package/iocs/builtin.yaml +131 -131
- package/iocs/packages.yaml +276 -276
- package/package.json +2 -3
- package/src/canary-tokens.js +184 -184
- package/src/ioc/bootstrap.js +181 -181
- package/src/ioc/yaml-loader.js +223 -223
- package/src/maintainer-change.js +224 -224
- package/src/output-formatter.js +192 -192
- package/src/publish-anomaly.js +206 -206
- package/src/report.js +230 -230
- package/src/sarif.js +96 -96
- package/src/scanner/ai-config.js +183 -183
- package/src/scanner/dependencies.js +223 -223
- package/src/scanner/hash.js +118 -118
- package/src/scanner/npm-registry.js +128 -128
- package/src/scanner/python.js +442 -442
- package/src/shared/analyze-helper.js +49 -49
- package/src/temporal-analysis.js +260 -260
- package/src/temporal-runner.js +139 -139
- package/src/utils.js +327 -327
- package/src/watch.js +55 -55
package/src/scanner/python.js
CHANGED
|
@@ -1,442 +1,442 @@
|
|
|
1
|
-
const fs = require('fs');
|
|
2
|
-
const path = require('path');
|
|
3
|
-
|
|
4
|
-
// ============================================
|
|
5
|
-
// REQUIREMENTS.TXT PARSER
|
|
6
|
-
// ============================================
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Parse a requirements.txt file into dependency objects.
|
|
10
|
-
* Supports: pinned (==), minimum (>=), compatible (~=), no version, extras ([extra]),
|
|
11
|
-
* comments (#), blank lines, and recursive includes (-r file.txt).
|
|
12
|
-
*
|
|
13
|
-
* @param {string} filePath - Absolute path to requirements.txt
|
|
14
|
-
* @param {Set} [visited] - Already-visited files (cycle protection)
|
|
15
|
-
* @returns {Array<{name: string, version: string, file: string}>}
|
|
16
|
-
*/
|
|
17
|
-
function parseRequirementsTxt(filePath, visited, projectRoot) {
|
|
18
|
-
if (!fs.existsSync(filePath)) return [];
|
|
19
|
-
|
|
20
|
-
if (!visited) visited = new Set();
|
|
21
|
-
const resolved = path.resolve(filePath);
|
|
22
|
-
if (visited.has(resolved)) return [];
|
|
23
|
-
visited.add(resolved);
|
|
24
|
-
|
|
25
|
-
const content = fs.readFileSync(filePath, 'utf8');
|
|
26
|
-
const lines = content.split(/\r?\n/);
|
|
27
|
-
const deps = [];
|
|
28
|
-
const relFile = filePath;
|
|
29
|
-
|
|
30
|
-
for (const rawLine of lines) {
|
|
31
|
-
const line = rawLine.trim();
|
|
32
|
-
|
|
33
|
-
// Skip blanks and comments
|
|
34
|
-
if (!line || line.startsWith('#')) continue;
|
|
35
|
-
|
|
36
|
-
// Recursive include: -r other.txt or --requirement other.txt
|
|
37
|
-
const includeMatch = line.match(/^(?:-r|--requirement)\s+(.+)$/);
|
|
38
|
-
if (includeMatch) {
|
|
39
|
-
const includePath = path.resolve(path.dirname(filePath), includeMatch[1].trim());
|
|
40
|
-
// Path traversal guard: ensure included file stays within the project root
|
|
41
|
-
// Use case-insensitive comparison on Windows (PY-01)
|
|
42
|
-
// PY-001: Derive rootDir once at top-level, pass it down for all recursive calls
|
|
43
|
-
const rootDir = projectRoot || path.resolve(path.dirname(filePath));
|
|
44
|
-
const isWithin = process.platform === 'win32'
|
|
45
|
-
? includePath.toLowerCase().startsWith(rootDir.toLowerCase())
|
|
46
|
-
: includePath.startsWith(rootDir);
|
|
47
|
-
if (!isWithin) continue;
|
|
48
|
-
const included = parseRequirementsTxt(includePath, visited, rootDir);
|
|
49
|
-
deps.push(...included);
|
|
50
|
-
continue;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
// Skip options lines (-i, --index-url, -f, --find-links, -e, etc.)
|
|
54
|
-
if (line.startsWith('-')) continue;
|
|
55
|
-
|
|
56
|
-
// Parse dependency line
|
|
57
|
-
const parsed = parseRequirementLine(line);
|
|
58
|
-
if (parsed) {
|
|
59
|
-
deps.push({ name: parsed.name, version: parsed.version, file: relFile });
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
return deps;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
/**
|
|
67
|
-
* Parse a single requirements line into name + version.
|
|
68
|
-
* Handles: package==1.0, package>=1.0, package~=1.0, package!=1.0,
|
|
69
|
-
* package<=1.0, package<1.0, package>1.0, package[extra]==1.0,
|
|
70
|
-
* package (no version), inline comments (# ...), environment markers (; ...)
|
|
71
|
-
*
|
|
72
|
-
* @param {string} line - A single requirement line
|
|
73
|
-
* @returns {{name: string, version: string}|null}
|
|
74
|
-
*/
|
|
75
|
-
function parseRequirementLine(line) {
|
|
76
|
-
// Strip inline comments
|
|
77
|
-
let clean = line.split('#')[0].trim();
|
|
78
|
-
if (!clean) return null;
|
|
79
|
-
|
|
80
|
-
// Strip environment markers (e.g. ; python_version >= "3.6")
|
|
81
|
-
clean = clean.split(';')[0].trim();
|
|
82
|
-
if (!clean) return null;
|
|
83
|
-
|
|
84
|
-
// Match: name[extras] operator version
|
|
85
|
-
// Operators: ==, >=, <=, ~=, !=, >, <
|
|
86
|
-
const match = clean.match(/^([a-zA-Z0-9_][a-zA-Z0-9._-]*)(?:\[([^\]]*)\])?\s*(==|>=|<=|~=|!=|>|<)\s*([^\s,;]+)/);
|
|
87
|
-
if (match) {
|
|
88
|
-
return {
|
|
89
|
-
name: normalizePythonName(match[1]),
|
|
90
|
-
version: match[3] + match[4]
|
|
91
|
-
};
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// No version specified: just a package name (possibly with extras)
|
|
95
|
-
const nameMatch = clean.match(/^([a-zA-Z0-9_][a-zA-Z0-9._-]*)(?:\[([^\]]*)\])?$/);
|
|
96
|
-
if (nameMatch) {
|
|
97
|
-
return {
|
|
98
|
-
name: normalizePythonName(nameMatch[1]),
|
|
99
|
-
version: '*'
|
|
100
|
-
};
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
return null;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
// ============================================
|
|
107
|
-
// SETUP.PY PARSER
|
|
108
|
-
// ============================================
|
|
109
|
-
|
|
110
|
-
/**
|
|
111
|
-
* Parse a setup.py file to extract install_requires dependencies.
|
|
112
|
-
* Uses regex-based extraction (not full Python AST).
|
|
113
|
-
*
|
|
114
|
-
* @param {string} filePath - Absolute path to setup.py
|
|
115
|
-
* @returns {Array<{name: string, version: string, file: string}>}
|
|
116
|
-
*/
|
|
117
|
-
function parseSetupPy(filePath) {
|
|
118
|
-
if (!fs.existsSync(filePath)) return [];
|
|
119
|
-
|
|
120
|
-
let content;
|
|
121
|
-
try {
|
|
122
|
-
content = fs.readFileSync(filePath, 'utf8');
|
|
123
|
-
} catch {
|
|
124
|
-
return [];
|
|
125
|
-
}
|
|
126
|
-
const deps = [];
|
|
127
|
-
|
|
128
|
-
// Match install_requires=[...] — handles multiline lists
|
|
129
|
-
const installRequiresMatch = content.match(/install_requires\s*=\s*\[([^\]]*)\]/s);
|
|
130
|
-
if (installRequiresMatch) {
|
|
131
|
-
const listContent = installRequiresMatch[1];
|
|
132
|
-
const items = extractStringItems(listContent);
|
|
133
|
-
for (const item of items) {
|
|
134
|
-
const parsed = parseRequirementLine(item);
|
|
135
|
-
if (parsed) {
|
|
136
|
-
deps.push({ name: parsed.name, version: parsed.version, file: filePath });
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
// Also check setup_requires
|
|
142
|
-
const setupRequiresMatch = content.match(/setup_requires\s*=\s*\[([^\]]*)\]/s);
|
|
143
|
-
if (setupRequiresMatch) {
|
|
144
|
-
const items = extractStringItems(setupRequiresMatch[1]);
|
|
145
|
-
for (const item of items) {
|
|
146
|
-
const parsed = parseRequirementLine(item);
|
|
147
|
-
if (parsed) {
|
|
148
|
-
deps.push({ name: parsed.name, version: parsed.version, file: filePath });
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
return deps;
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
/**
|
|
157
|
-
* Extract string literals from a Python list body.
|
|
158
|
-
* Handles both single-quoted and double-quoted strings.
|
|
159
|
-
* @param {string} listBody - Content between [ and ]
|
|
160
|
-
* @returns {string[]}
|
|
161
|
-
*/
|
|
162
|
-
function extractStringItems(listBody) {
|
|
163
|
-
const items = [];
|
|
164
|
-
const regex = /(?:'([^'\\]*(?:\\.[^'\\]*)*)'|"([^"\\]*(?:\\.[^"\\]*)*)")/g;
|
|
165
|
-
let match;
|
|
166
|
-
while ((match = regex.exec(listBody)) !== null) {
|
|
167
|
-
const value = (match[1] !== undefined ? match[1] : match[2]).trim();
|
|
168
|
-
if (value) items.push(value);
|
|
169
|
-
}
|
|
170
|
-
return items;
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
// ============================================
|
|
174
|
-
// PYPROJECT.TOML PARSER
|
|
175
|
-
// ============================================
|
|
176
|
-
|
|
177
|
-
/**
|
|
178
|
-
* Parse a pyproject.toml file to extract dependencies.
|
|
179
|
-
* Handles both PEP 621 ([project].dependencies) and Poetry ([tool.poetry.dependencies]).
|
|
180
|
-
* Uses a lightweight TOML parser (no external dependency).
|
|
181
|
-
*
|
|
182
|
-
* @param {string} filePath - Absolute path to pyproject.toml
|
|
183
|
-
* @returns {Array<{name: string, version: string, file: string}>}
|
|
184
|
-
*/
|
|
185
|
-
function parsePyprojectToml(filePath) {
|
|
186
|
-
if (!fs.existsSync(filePath)) return [];
|
|
187
|
-
|
|
188
|
-
let content;
|
|
189
|
-
try {
|
|
190
|
-
content = fs.readFileSync(filePath, 'utf8');
|
|
191
|
-
} catch {
|
|
192
|
-
return [];
|
|
193
|
-
}
|
|
194
|
-
const deps = [];
|
|
195
|
-
|
|
196
|
-
// --- PEP 621: [project] dependencies = [...] ---
|
|
197
|
-
const projectDeps = extractTomlArray(content, 'project', 'dependencies');
|
|
198
|
-
for (const item of projectDeps) {
|
|
199
|
-
const parsed = parseRequirementLine(item);
|
|
200
|
-
if (parsed) {
|
|
201
|
-
deps.push({ name: parsed.name, version: parsed.version, file: filePath });
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
// --- Poetry: [tool.poetry.dependencies] ---
|
|
206
|
-
const poetryDeps = extractTomlTable(content, 'tool.poetry.dependencies');
|
|
207
|
-
for (const [name, value] of poetryDeps) {
|
|
208
|
-
// Skip python itself
|
|
209
|
-
if (name === 'python') continue;
|
|
210
|
-
const version = parsePoetryVersion(value);
|
|
211
|
-
deps.push({
|
|
212
|
-
name: normalizePythonName(name),
|
|
213
|
-
version: version,
|
|
214
|
-
file: filePath
|
|
215
|
-
});
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
return deps;
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
/**
|
|
222
|
-
* Extract an array value from a TOML section.
|
|
223
|
-
* e.g., [project] dependencies = ["flask>=2.0", "requests"]
|
|
224
|
-
*
|
|
225
|
-
* @param {string} content - Full TOML content
|
|
226
|
-
* @param {string} section - Section name (e.g., "project")
|
|
227
|
-
* @param {string} key - Key name (e.g., "dependencies")
|
|
228
|
-
* @returns {string[]}
|
|
229
|
-
*/
|
|
230
|
-
function extractTomlArray(content, section, key) {
|
|
231
|
-
const lines = content.split(/\r?\n/);
|
|
232
|
-
let inSection = false;
|
|
233
|
-
let collecting = false;
|
|
234
|
-
let buffer = '';
|
|
235
|
-
|
|
236
|
-
for (const line of lines) {
|
|
237
|
-
const trimmed = line.trim();
|
|
238
|
-
|
|
239
|
-
// Detect section headers (both [section] and [[section]])
|
|
240
|
-
const sectionMatch = trimmed.match(/^\[{1,2}([^\]]+)\]{1,2}$/);
|
|
241
|
-
if (sectionMatch) {
|
|
242
|
-
if (collecting) break; // Finished collecting if we hit a new section
|
|
243
|
-
inSection = (sectionMatch[1].trim() === section);
|
|
244
|
-
continue;
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
if (!inSection) continue;
|
|
248
|
-
|
|
249
|
-
// Look for key = [...]
|
|
250
|
-
if (!collecting) {
|
|
251
|
-
const keyMatch = trimmed.match(new RegExp('^' + escapeRegex(key) + '\\s*=\\s*(.*)$'));
|
|
252
|
-
if (keyMatch) {
|
|
253
|
-
buffer = keyMatch[1].trim();
|
|
254
|
-
if (buffer.startsWith('[')) {
|
|
255
|
-
collecting = true;
|
|
256
|
-
if (buffer.includes(']')) {
|
|
257
|
-
// Single-line array
|
|
258
|
-
return extractStringItems(buffer);
|
|
259
|
-
}
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
} else {
|
|
263
|
-
buffer += ' ' + trimmed;
|
|
264
|
-
if (trimmed.includes(']')) {
|
|
265
|
-
return extractStringItems(buffer);
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
if (collecting) {
|
|
271
|
-
return extractStringItems(buffer);
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
return [];
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
/**
|
|
278
|
-
* Extract key-value pairs from a TOML table section.
|
|
279
|
-
* e.g., [tool.poetry.dependencies]
|
|
280
|
-
* flask = "^2.0"
|
|
281
|
-
* requests = {version = "^2.28", optional = true}
|
|
282
|
-
*
|
|
283
|
-
* @param {string} content - Full TOML content
|
|
284
|
-
* @param {string} section - Dotted section name
|
|
285
|
-
* @returns {Array<[string, string]>} Array of [name, versionSpec] pairs
|
|
286
|
-
*/
|
|
287
|
-
function extractTomlTable(content, section) {
|
|
288
|
-
const lines = content.split(/\r?\n/);
|
|
289
|
-
let inSection = false;
|
|
290
|
-
const pairs = [];
|
|
291
|
-
|
|
292
|
-
for (const line of lines) {
|
|
293
|
-
const trimmed = line.trim();
|
|
294
|
-
|
|
295
|
-
// Detect section headers (both [section] and [[section]])
|
|
296
|
-
const sectionMatch = trimmed.match(/^\[{1,2}([^\]]+)\]{1,2}$/);
|
|
297
|
-
if (sectionMatch) {
|
|
298
|
-
const sectionName = sectionMatch[1].trim();
|
|
299
|
-
if (sectionName === section) {
|
|
300
|
-
inSection = true;
|
|
301
|
-
continue;
|
|
302
|
-
} else if (inSection) {
|
|
303
|
-
break; // New section, stop
|
|
304
|
-
}
|
|
305
|
-
continue;
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
if (!inSection) continue;
|
|
309
|
-
if (!trimmed || trimmed.startsWith('#')) continue;
|
|
310
|
-
|
|
311
|
-
// Parse key = value (extended to support dots in package names)
|
|
312
|
-
const kvMatch = trimmed.match(/^([a-zA-Z0-9_.][a-zA-Z0-9_.-]*)\s*=\s*(.+)$/);
|
|
313
|
-
if (kvMatch) {
|
|
314
|
-
pairs.push([kvMatch[1].trim(), kvMatch[2].trim()]);
|
|
315
|
-
}
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
return pairs;
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
/**
|
|
322
|
-
* Parse a Poetry version specifier from TOML value.
|
|
323
|
-
* Handles: "^2.0", {version = "^2.0", optional = true}, ">=1.0,<2.0"
|
|
324
|
-
*
|
|
325
|
-
* @param {string} value - TOML value string
|
|
326
|
-
* @returns {string} Version spec or '*'
|
|
327
|
-
*/
|
|
328
|
-
function parsePoetryVersion(value) {
|
|
329
|
-
// Simple string: "^2.0" or ">=1.0"
|
|
330
|
-
const simpleMatch = value.match(/^["']([^"']+)["']$/);
|
|
331
|
-
if (simpleMatch) {
|
|
332
|
-
const ver = simpleMatch[1].trim();
|
|
333
|
-
return ver === '*' ? '*' : ver;
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
// Inline table: {version = "^2.0", ...}
|
|
337
|
-
const tableMatch = value.match(/version\s*=\s*["']([^"']+)["']/);
|
|
338
|
-
if (tableMatch) {
|
|
339
|
-
const ver = tableMatch[1].trim();
|
|
340
|
-
return ver === '*' ? '*' : ver;
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
return '*';
|
|
344
|
-
}
|
|
345
|
-
|
|
346
|
-
// ============================================
|
|
347
|
-
// MAIN DETECTION FUNCTION
|
|
348
|
-
// ============================================
|
|
349
|
-
|
|
350
|
-
/**
|
|
351
|
-
* Detect a Python project and parse all dependency files.
|
|
352
|
-
* Searches for: requirements.txt, requirements/*.txt, setup.py, pyproject.toml
|
|
353
|
-
*
|
|
354
|
-
* @param {string} targetPath - Path to the project root
|
|
355
|
-
* @returns {Array<{name: string, version: string, file: string}>} Deduplicated dependencies
|
|
356
|
-
*/
|
|
357
|
-
function detectPythonProject(targetPath) {
|
|
358
|
-
const deps = [];
|
|
359
|
-
|
|
360
|
-
const resolvedRoot = path.resolve(targetPath);
|
|
361
|
-
|
|
362
|
-
// 1. requirements.txt at root
|
|
363
|
-
const reqTxt = path.join(targetPath, 'requirements.txt');
|
|
364
|
-
if (fs.existsSync(reqTxt)) {
|
|
365
|
-
deps.push(...parseRequirementsTxt(reqTxt, undefined, resolvedRoot));
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
// 2. requirements/*.txt (common pattern: requirements/dev.txt, requirements/prod.txt)
|
|
369
|
-
const reqDir = path.join(targetPath, 'requirements');
|
|
370
|
-
if (fs.existsSync(reqDir) && fs.statSync(reqDir).isDirectory()) {
|
|
371
|
-
const files = fs.readdirSync(reqDir);
|
|
372
|
-
for (const file of files) {
|
|
373
|
-
if (file.endsWith('.txt')) {
|
|
374
|
-
const reqFile = path.join(reqDir, file);
|
|
375
|
-
deps.push(...parseRequirementsTxt(reqFile, undefined, resolvedRoot));
|
|
376
|
-
}
|
|
377
|
-
}
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
// 3. setup.py
|
|
381
|
-
const setupPy = path.join(targetPath, 'setup.py');
|
|
382
|
-
if (fs.existsSync(setupPy)) {
|
|
383
|
-
deps.push(...parseSetupPy(setupPy));
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
// 4. pyproject.toml
|
|
387
|
-
const pyproject = path.join(targetPath, 'pyproject.toml');
|
|
388
|
-
if (fs.existsSync(pyproject)) {
|
|
389
|
-
deps.push(...parsePyprojectToml(pyproject));
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
// Deduplicate by name (keep first occurrence, which has highest priority file)
|
|
393
|
-
return deduplicateDeps(deps);
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
/**
|
|
397
|
-
* Deduplicate dependencies by name, keeping the first occurrence.
|
|
398
|
-
* @param {Array<{name: string, version: string, file: string}>} deps
|
|
399
|
-
* @returns {Array<{name: string, version: string, file: string}>}
|
|
400
|
-
*/
|
|
401
|
-
function deduplicateDeps(deps) {
|
|
402
|
-
const seen = new Set();
|
|
403
|
-
const result = [];
|
|
404
|
-
for (const dep of deps) {
|
|
405
|
-
if (!seen.has(dep.name)) {
|
|
406
|
-
seen.add(dep.name);
|
|
407
|
-
result.push(dep);
|
|
408
|
-
}
|
|
409
|
-
}
|
|
410
|
-
return result;
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
// ============================================
|
|
414
|
-
// UTILITIES
|
|
415
|
-
// ============================================
|
|
416
|
-
|
|
417
|
-
/**
|
|
418
|
-
* Normalize a Python package name.
|
|
419
|
-
* PEP 503: all comparisons should be case-insensitive, with hyphens/underscores/periods equivalent.
|
|
420
|
-
* @param {string} name
|
|
421
|
-
* @returns {string}
|
|
422
|
-
*/
|
|
423
|
-
function normalizePythonName(name) {
|
|
424
|
-
return name.toLowerCase().replace(/[-_.]+/g, '-');
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
/**
|
|
428
|
-
* Escape special regex characters in a string.
|
|
429
|
-
* @param {string} str
|
|
430
|
-
* @returns {string}
|
|
431
|
-
*/
|
|
432
|
-
function escapeRegex(str) {
|
|
433
|
-
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
434
|
-
}
|
|
435
|
-
|
|
436
|
-
module.exports = {
|
|
437
|
-
parseRequirementsTxt,
|
|
438
|
-
parseSetupPy,
|
|
439
|
-
parsePyprojectToml,
|
|
440
|
-
detectPythonProject,
|
|
441
|
-
normalizePythonName
|
|
442
|
-
};
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
|
|
4
|
+
// ============================================
|
|
5
|
+
// REQUIREMENTS.TXT PARSER
|
|
6
|
+
// ============================================
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Parse a requirements.txt file into dependency objects.
|
|
10
|
+
* Supports: pinned (==), minimum (>=), compatible (~=), no version, extras ([extra]),
|
|
11
|
+
* comments (#), blank lines, and recursive includes (-r file.txt).
|
|
12
|
+
*
|
|
13
|
+
* @param {string} filePath - Absolute path to requirements.txt
|
|
14
|
+
* @param {Set} [visited] - Already-visited files (cycle protection)
|
|
15
|
+
* @returns {Array<{name: string, version: string, file: string}>}
|
|
16
|
+
*/
|
|
17
|
+
function parseRequirementsTxt(filePath, visited, projectRoot) {
|
|
18
|
+
if (!fs.existsSync(filePath)) return [];
|
|
19
|
+
|
|
20
|
+
if (!visited) visited = new Set();
|
|
21
|
+
const resolved = path.resolve(filePath);
|
|
22
|
+
if (visited.has(resolved)) return [];
|
|
23
|
+
visited.add(resolved);
|
|
24
|
+
|
|
25
|
+
const content = fs.readFileSync(filePath, 'utf8');
|
|
26
|
+
const lines = content.split(/\r?\n/);
|
|
27
|
+
const deps = [];
|
|
28
|
+
const relFile = filePath;
|
|
29
|
+
|
|
30
|
+
for (const rawLine of lines) {
|
|
31
|
+
const line = rawLine.trim();
|
|
32
|
+
|
|
33
|
+
// Skip blanks and comments
|
|
34
|
+
if (!line || line.startsWith('#')) continue;
|
|
35
|
+
|
|
36
|
+
// Recursive include: -r other.txt or --requirement other.txt
|
|
37
|
+
const includeMatch = line.match(/^(?:-r|--requirement)\s+(.+)$/);
|
|
38
|
+
if (includeMatch) {
|
|
39
|
+
const includePath = path.resolve(path.dirname(filePath), includeMatch[1].trim());
|
|
40
|
+
// Path traversal guard: ensure included file stays within the project root
|
|
41
|
+
// Use case-insensitive comparison on Windows (PY-01)
|
|
42
|
+
// PY-001: Derive rootDir once at top-level, pass it down for all recursive calls
|
|
43
|
+
const rootDir = projectRoot || path.resolve(path.dirname(filePath));
|
|
44
|
+
const isWithin = process.platform === 'win32'
|
|
45
|
+
? includePath.toLowerCase().startsWith(rootDir.toLowerCase())
|
|
46
|
+
: includePath.startsWith(rootDir);
|
|
47
|
+
if (!isWithin) continue;
|
|
48
|
+
const included = parseRequirementsTxt(includePath, visited, rootDir);
|
|
49
|
+
deps.push(...included);
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Skip options lines (-i, --index-url, -f, --find-links, -e, etc.)
|
|
54
|
+
if (line.startsWith('-')) continue;
|
|
55
|
+
|
|
56
|
+
// Parse dependency line
|
|
57
|
+
const parsed = parseRequirementLine(line);
|
|
58
|
+
if (parsed) {
|
|
59
|
+
deps.push({ name: parsed.name, version: parsed.version, file: relFile });
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return deps;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Parse a single requirements line into name + version.
|
|
68
|
+
* Handles: package==1.0, package>=1.0, package~=1.0, package!=1.0,
|
|
69
|
+
* package<=1.0, package<1.0, package>1.0, package[extra]==1.0,
|
|
70
|
+
* package (no version), inline comments (# ...), environment markers (; ...)
|
|
71
|
+
*
|
|
72
|
+
* @param {string} line - A single requirement line
|
|
73
|
+
* @returns {{name: string, version: string}|null}
|
|
74
|
+
*/
|
|
75
|
+
function parseRequirementLine(line) {
|
|
76
|
+
// Strip inline comments
|
|
77
|
+
let clean = line.split('#')[0].trim();
|
|
78
|
+
if (!clean) return null;
|
|
79
|
+
|
|
80
|
+
// Strip environment markers (e.g. ; python_version >= "3.6")
|
|
81
|
+
clean = clean.split(';')[0].trim();
|
|
82
|
+
if (!clean) return null;
|
|
83
|
+
|
|
84
|
+
// Match: name[extras] operator version
|
|
85
|
+
// Operators: ==, >=, <=, ~=, !=, >, <
|
|
86
|
+
const match = clean.match(/^([a-zA-Z0-9_][a-zA-Z0-9._-]*)(?:\[([^\]]*)\])?\s*(==|>=|<=|~=|!=|>|<)\s*([^\s,;]+)/);
|
|
87
|
+
if (match) {
|
|
88
|
+
return {
|
|
89
|
+
name: normalizePythonName(match[1]),
|
|
90
|
+
version: match[3] + match[4]
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// No version specified: just a package name (possibly with extras)
|
|
95
|
+
const nameMatch = clean.match(/^([a-zA-Z0-9_][a-zA-Z0-9._-]*)(?:\[([^\]]*)\])?$/);
|
|
96
|
+
if (nameMatch) {
|
|
97
|
+
return {
|
|
98
|
+
name: normalizePythonName(nameMatch[1]),
|
|
99
|
+
version: '*'
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ============================================
|
|
107
|
+
// SETUP.PY PARSER
|
|
108
|
+
// ============================================
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Parse a setup.py file to extract install_requires dependencies.
|
|
112
|
+
* Uses regex-based extraction (not full Python AST).
|
|
113
|
+
*
|
|
114
|
+
* @param {string} filePath - Absolute path to setup.py
|
|
115
|
+
* @returns {Array<{name: string, version: string, file: string}>}
|
|
116
|
+
*/
|
|
117
|
+
function parseSetupPy(filePath) {
|
|
118
|
+
if (!fs.existsSync(filePath)) return [];
|
|
119
|
+
|
|
120
|
+
let content;
|
|
121
|
+
try {
|
|
122
|
+
content = fs.readFileSync(filePath, 'utf8');
|
|
123
|
+
} catch {
|
|
124
|
+
return [];
|
|
125
|
+
}
|
|
126
|
+
const deps = [];
|
|
127
|
+
|
|
128
|
+
// Match install_requires=[...] — handles multiline lists
|
|
129
|
+
const installRequiresMatch = content.match(/install_requires\s*=\s*\[([^\]]*)\]/s);
|
|
130
|
+
if (installRequiresMatch) {
|
|
131
|
+
const listContent = installRequiresMatch[1];
|
|
132
|
+
const items = extractStringItems(listContent);
|
|
133
|
+
for (const item of items) {
|
|
134
|
+
const parsed = parseRequirementLine(item);
|
|
135
|
+
if (parsed) {
|
|
136
|
+
deps.push({ name: parsed.name, version: parsed.version, file: filePath });
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Also check setup_requires
|
|
142
|
+
const setupRequiresMatch = content.match(/setup_requires\s*=\s*\[([^\]]*)\]/s);
|
|
143
|
+
if (setupRequiresMatch) {
|
|
144
|
+
const items = extractStringItems(setupRequiresMatch[1]);
|
|
145
|
+
for (const item of items) {
|
|
146
|
+
const parsed = parseRequirementLine(item);
|
|
147
|
+
if (parsed) {
|
|
148
|
+
deps.push({ name: parsed.name, version: parsed.version, file: filePath });
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return deps;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Extract string literals from a Python list body.
|
|
158
|
+
* Handles both single-quoted and double-quoted strings.
|
|
159
|
+
* @param {string} listBody - Content between [ and ]
|
|
160
|
+
* @returns {string[]}
|
|
161
|
+
*/
|
|
162
|
+
function extractStringItems(listBody) {
|
|
163
|
+
const items = [];
|
|
164
|
+
const regex = /(?:'([^'\\]*(?:\\.[^'\\]*)*)'|"([^"\\]*(?:\\.[^"\\]*)*)")/g;
|
|
165
|
+
let match;
|
|
166
|
+
while ((match = regex.exec(listBody)) !== null) {
|
|
167
|
+
const value = (match[1] !== undefined ? match[1] : match[2]).trim();
|
|
168
|
+
if (value) items.push(value);
|
|
169
|
+
}
|
|
170
|
+
return items;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// ============================================
|
|
174
|
+
// PYPROJECT.TOML PARSER
|
|
175
|
+
// ============================================
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Parse a pyproject.toml file to extract dependencies.
|
|
179
|
+
* Handles both PEP 621 ([project].dependencies) and Poetry ([tool.poetry.dependencies]).
|
|
180
|
+
* Uses a lightweight TOML parser (no external dependency).
|
|
181
|
+
*
|
|
182
|
+
* @param {string} filePath - Absolute path to pyproject.toml
|
|
183
|
+
* @returns {Array<{name: string, version: string, file: string}>}
|
|
184
|
+
*/
|
|
185
|
+
function parsePyprojectToml(filePath) {
|
|
186
|
+
if (!fs.existsSync(filePath)) return [];
|
|
187
|
+
|
|
188
|
+
let content;
|
|
189
|
+
try {
|
|
190
|
+
content = fs.readFileSync(filePath, 'utf8');
|
|
191
|
+
} catch {
|
|
192
|
+
return [];
|
|
193
|
+
}
|
|
194
|
+
const deps = [];
|
|
195
|
+
|
|
196
|
+
// --- PEP 621: [project] dependencies = [...] ---
|
|
197
|
+
const projectDeps = extractTomlArray(content, 'project', 'dependencies');
|
|
198
|
+
for (const item of projectDeps) {
|
|
199
|
+
const parsed = parseRequirementLine(item);
|
|
200
|
+
if (parsed) {
|
|
201
|
+
deps.push({ name: parsed.name, version: parsed.version, file: filePath });
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// --- Poetry: [tool.poetry.dependencies] ---
|
|
206
|
+
const poetryDeps = extractTomlTable(content, 'tool.poetry.dependencies');
|
|
207
|
+
for (const [name, value] of poetryDeps) {
|
|
208
|
+
// Skip python itself
|
|
209
|
+
if (name === 'python') continue;
|
|
210
|
+
const version = parsePoetryVersion(value);
|
|
211
|
+
deps.push({
|
|
212
|
+
name: normalizePythonName(name),
|
|
213
|
+
version: version,
|
|
214
|
+
file: filePath
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
return deps;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Extract an array value from a TOML section.
|
|
223
|
+
* e.g., [project] dependencies = ["flask>=2.0", "requests"]
|
|
224
|
+
*
|
|
225
|
+
* @param {string} content - Full TOML content
|
|
226
|
+
* @param {string} section - Section name (e.g., "project")
|
|
227
|
+
* @param {string} key - Key name (e.g., "dependencies")
|
|
228
|
+
* @returns {string[]}
|
|
229
|
+
*/
|
|
230
|
+
function extractTomlArray(content, section, key) {
|
|
231
|
+
const lines = content.split(/\r?\n/);
|
|
232
|
+
let inSection = false;
|
|
233
|
+
let collecting = false;
|
|
234
|
+
let buffer = '';
|
|
235
|
+
|
|
236
|
+
for (const line of lines) {
|
|
237
|
+
const trimmed = line.trim();
|
|
238
|
+
|
|
239
|
+
// Detect section headers (both [section] and [[section]])
|
|
240
|
+
const sectionMatch = trimmed.match(/^\[{1,2}([^\]]+)\]{1,2}$/);
|
|
241
|
+
if (sectionMatch) {
|
|
242
|
+
if (collecting) break; // Finished collecting if we hit a new section
|
|
243
|
+
inSection = (sectionMatch[1].trim() === section);
|
|
244
|
+
continue;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
if (!inSection) continue;
|
|
248
|
+
|
|
249
|
+
// Look for key = [...]
|
|
250
|
+
if (!collecting) {
|
|
251
|
+
const keyMatch = trimmed.match(new RegExp('^' + escapeRegex(key) + '\\s*=\\s*(.*)$'));
|
|
252
|
+
if (keyMatch) {
|
|
253
|
+
buffer = keyMatch[1].trim();
|
|
254
|
+
if (buffer.startsWith('[')) {
|
|
255
|
+
collecting = true;
|
|
256
|
+
if (buffer.includes(']')) {
|
|
257
|
+
// Single-line array
|
|
258
|
+
return extractStringItems(buffer);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
} else {
|
|
263
|
+
buffer += ' ' + trimmed;
|
|
264
|
+
if (trimmed.includes(']')) {
|
|
265
|
+
return extractStringItems(buffer);
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
if (collecting) {
|
|
271
|
+
return extractStringItems(buffer);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
return [];
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Extract key-value pairs from a TOML table section.
|
|
279
|
+
* e.g., [tool.poetry.dependencies]
|
|
280
|
+
* flask = "^2.0"
|
|
281
|
+
* requests = {version = "^2.28", optional = true}
|
|
282
|
+
*
|
|
283
|
+
* @param {string} content - Full TOML content
|
|
284
|
+
* @param {string} section - Dotted section name
|
|
285
|
+
* @returns {Array<[string, string]>} Array of [name, versionSpec] pairs
|
|
286
|
+
*/
|
|
287
|
+
function extractTomlTable(content, section) {
|
|
288
|
+
const lines = content.split(/\r?\n/);
|
|
289
|
+
let inSection = false;
|
|
290
|
+
const pairs = [];
|
|
291
|
+
|
|
292
|
+
for (const line of lines) {
|
|
293
|
+
const trimmed = line.trim();
|
|
294
|
+
|
|
295
|
+
// Detect section headers (both [section] and [[section]])
|
|
296
|
+
const sectionMatch = trimmed.match(/^\[{1,2}([^\]]+)\]{1,2}$/);
|
|
297
|
+
if (sectionMatch) {
|
|
298
|
+
const sectionName = sectionMatch[1].trim();
|
|
299
|
+
if (sectionName === section) {
|
|
300
|
+
inSection = true;
|
|
301
|
+
continue;
|
|
302
|
+
} else if (inSection) {
|
|
303
|
+
break; // New section, stop
|
|
304
|
+
}
|
|
305
|
+
continue;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
if (!inSection) continue;
|
|
309
|
+
if (!trimmed || trimmed.startsWith('#')) continue;
|
|
310
|
+
|
|
311
|
+
// Parse key = value (extended to support dots in package names)
|
|
312
|
+
const kvMatch = trimmed.match(/^([a-zA-Z0-9_.][a-zA-Z0-9_.-]*)\s*=\s*(.+)$/);
|
|
313
|
+
if (kvMatch) {
|
|
314
|
+
pairs.push([kvMatch[1].trim(), kvMatch[2].trim()]);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
return pairs;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Parse a Poetry version specifier from TOML value.
|
|
323
|
+
* Handles: "^2.0", {version = "^2.0", optional = true}, ">=1.0,<2.0"
|
|
324
|
+
*
|
|
325
|
+
* @param {string} value - TOML value string
|
|
326
|
+
* @returns {string} Version spec or '*'
|
|
327
|
+
*/
|
|
328
|
+
function parsePoetryVersion(value) {
|
|
329
|
+
// Simple string: "^2.0" or ">=1.0"
|
|
330
|
+
const simpleMatch = value.match(/^["']([^"']+)["']$/);
|
|
331
|
+
if (simpleMatch) {
|
|
332
|
+
const ver = simpleMatch[1].trim();
|
|
333
|
+
return ver === '*' ? '*' : ver;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// Inline table: {version = "^2.0", ...}
|
|
337
|
+
const tableMatch = value.match(/version\s*=\s*["']([^"']+)["']/);
|
|
338
|
+
if (tableMatch) {
|
|
339
|
+
const ver = tableMatch[1].trim();
|
|
340
|
+
return ver === '*' ? '*' : ver;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
return '*';
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// ============================================
|
|
347
|
+
// MAIN DETECTION FUNCTION
|
|
348
|
+
// ============================================
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* Detect a Python project and parse all dependency files.
|
|
352
|
+
* Searches for: requirements.txt, requirements/*.txt, setup.py, pyproject.toml
|
|
353
|
+
*
|
|
354
|
+
* @param {string} targetPath - Path to the project root
|
|
355
|
+
* @returns {Array<{name: string, version: string, file: string}>} Deduplicated dependencies
|
|
356
|
+
*/
|
|
357
|
+
function detectPythonProject(targetPath) {
|
|
358
|
+
const deps = [];
|
|
359
|
+
|
|
360
|
+
const resolvedRoot = path.resolve(targetPath);
|
|
361
|
+
|
|
362
|
+
// 1. requirements.txt at root
|
|
363
|
+
const reqTxt = path.join(targetPath, 'requirements.txt');
|
|
364
|
+
if (fs.existsSync(reqTxt)) {
|
|
365
|
+
deps.push(...parseRequirementsTxt(reqTxt, undefined, resolvedRoot));
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// 2. requirements/*.txt (common pattern: requirements/dev.txt, requirements/prod.txt)
|
|
369
|
+
const reqDir = path.join(targetPath, 'requirements');
|
|
370
|
+
if (fs.existsSync(reqDir) && fs.statSync(reqDir).isDirectory()) {
|
|
371
|
+
const files = fs.readdirSync(reqDir);
|
|
372
|
+
for (const file of files) {
|
|
373
|
+
if (file.endsWith('.txt')) {
|
|
374
|
+
const reqFile = path.join(reqDir, file);
|
|
375
|
+
deps.push(...parseRequirementsTxt(reqFile, undefined, resolvedRoot));
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// 3. setup.py
|
|
381
|
+
const setupPy = path.join(targetPath, 'setup.py');
|
|
382
|
+
if (fs.existsSync(setupPy)) {
|
|
383
|
+
deps.push(...parseSetupPy(setupPy));
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// 4. pyproject.toml
|
|
387
|
+
const pyproject = path.join(targetPath, 'pyproject.toml');
|
|
388
|
+
if (fs.existsSync(pyproject)) {
|
|
389
|
+
deps.push(...parsePyprojectToml(pyproject));
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Deduplicate by name (keep first occurrence, which has highest priority file)
|
|
393
|
+
return deduplicateDeps(deps);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
/**
|
|
397
|
+
* Deduplicate dependencies by name, keeping the first occurrence.
|
|
398
|
+
* @param {Array<{name: string, version: string, file: string}>} deps
|
|
399
|
+
* @returns {Array<{name: string, version: string, file: string}>}
|
|
400
|
+
*/
|
|
401
|
+
function deduplicateDeps(deps) {
|
|
402
|
+
const seen = new Set();
|
|
403
|
+
const result = [];
|
|
404
|
+
for (const dep of deps) {
|
|
405
|
+
if (!seen.has(dep.name)) {
|
|
406
|
+
seen.add(dep.name);
|
|
407
|
+
result.push(dep);
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
return result;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// ============================================
|
|
414
|
+
// UTILITIES
|
|
415
|
+
// ============================================
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Normalize a Python package name.
|
|
419
|
+
* PEP 503: all comparisons should be case-insensitive, with hyphens/underscores/periods equivalent.
|
|
420
|
+
* @param {string} name
|
|
421
|
+
* @returns {string}
|
|
422
|
+
*/
|
|
423
|
+
function normalizePythonName(name) {
|
|
424
|
+
return name.toLowerCase().replace(/[-_.]+/g, '-');
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
/**
|
|
428
|
+
* Escape special regex characters in a string.
|
|
429
|
+
* @param {string} str
|
|
430
|
+
* @returns {string}
|
|
431
|
+
*/
|
|
432
|
+
function escapeRegex(str) {
|
|
433
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
module.exports = {
|
|
437
|
+
parseRequirementsTxt,
|
|
438
|
+
parseSetupPy,
|
|
439
|
+
parsePyprojectToml,
|
|
440
|
+
detectPythonProject,
|
|
441
|
+
normalizePythonName
|
|
442
|
+
};
|