cve-sentinel 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,586 @@
1
+ """NPM/Yarn/PNPM dependency analyzer for JavaScript/TypeScript projects."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, Optional, Tuple
9
+
10
+ import yaml
11
+
12
+ from cve_sentinel.analyzers.base import (
13
+ AnalyzerRegistry,
14
+ BaseAnalyzer,
15
+ FileDetector,
16
+ Package,
17
+ )
18
+
19
+
20
+ class NpmAnalyzer(BaseAnalyzer):
21
+ """Analyzer for npm, yarn, and pnpm package managers.
22
+
23
+ Supports:
24
+ - package.json (Level 1: direct dependencies)
25
+ - package-lock.json (Level 2: transitive dependencies)
26
+ - yarn.lock (Level 2: transitive dependencies)
27
+ - pnpm-lock.yaml (Level 2: transitive dependencies)
28
+ """
29
+
30
+ @property
31
+ def ecosystem(self) -> str:
32
+ """Return the ecosystem name."""
33
+ return "npm"
34
+
35
+ @property
36
+ def manifest_patterns(self) -> List[str]:
37
+ """Return glob patterns for manifest files."""
38
+ default_patterns = ["package.json"]
39
+ custom = self._custom_patterns.get("manifests", [])
40
+ return default_patterns + custom
41
+
42
+ @property
43
+ def lock_patterns(self) -> List[str]:
44
+ """Return glob patterns for lock files."""
45
+ default_patterns = ["package-lock.json", "yarn.lock", "pnpm-lock.yaml"]
46
+ custom = self._custom_patterns.get("locks", [])
47
+ return default_patterns + custom
48
+
49
+ def __init__(
50
+ self,
51
+ analysis_level: int = 2,
52
+ custom_patterns: Optional[Dict[str, List[str]]] = None,
53
+ ) -> None:
54
+ """Initialize NPM analyzer.
55
+
56
+ Args:
57
+ analysis_level: Analysis depth (1=manifest only, 2=include lock files)
58
+ custom_patterns: Optional custom file patterns {"manifests": [...], "locks": [...]}
59
+ """
60
+ self.analysis_level = analysis_level
61
+ self._custom_patterns = custom_patterns or {}
62
+ self._file_detector = FileDetector()
63
+
64
+ def detect_files(self, path: Path) -> List[Path]:
65
+ """Detect npm dependency files in the given path.
66
+
67
+ Args:
68
+ path: Directory to search
69
+
70
+ Returns:
71
+ List of found dependency files
72
+ """
73
+ patterns = self.manifest_patterns.copy()
74
+ if self.analysis_level >= 2:
75
+ patterns.extend(self.lock_patterns)
76
+
77
+ return self._file_detector.find_files(path, patterns)
78
+
79
+ def parse(self, file_path: Path) -> List[Package]:
80
+ """Parse a dependency file and return list of packages.
81
+
82
+ Args:
83
+ file_path: Path to the dependency file
84
+
85
+ Returns:
86
+ List of Package objects
87
+ """
88
+ file_name = file_path.name
89
+
90
+ if file_name == "package.json":
91
+ return self._parse_package_json(file_path)
92
+ elif file_name == "package-lock.json":
93
+ return self._parse_package_lock(file_path)
94
+ elif file_name == "yarn.lock":
95
+ return self._parse_yarn_lock(file_path)
96
+ elif file_name == "pnpm-lock.yaml":
97
+ return self._parse_pnpm_lock(file_path)
98
+
99
+ return []
100
+
101
+ def _parse_package_json(self, file_path: Path) -> List[Package]:
102
+ """Parse package.json file.
103
+
104
+ Args:
105
+ file_path: Path to package.json
106
+
107
+ Returns:
108
+ List of direct dependency packages
109
+ """
110
+ packages: List[Package] = []
111
+ content = file_path.read_text(encoding="utf-8")
112
+
113
+ try:
114
+ data = json.loads(content)
115
+ except json.JSONDecodeError:
116
+ return packages
117
+
118
+ # Parse dependencies
119
+ deps = data.get("dependencies", {})
120
+ for name, version_spec in deps.items():
121
+ version = self._normalize_version(version_spec)
122
+ line_num = self._find_line_number(content, name, "dependencies")
123
+ packages.append(
124
+ Package(
125
+ name=name,
126
+ version=version,
127
+ ecosystem=self.ecosystem,
128
+ source_file=file_path,
129
+ source_line=line_num,
130
+ is_direct=True,
131
+ )
132
+ )
133
+
134
+ # Parse devDependencies
135
+ dev_deps = data.get("devDependencies", {})
136
+ for name, version_spec in dev_deps.items():
137
+ version = self._normalize_version(version_spec)
138
+ line_num = self._find_line_number(content, name, "devDependencies")
139
+ packages.append(
140
+ Package(
141
+ name=name,
142
+ version=version,
143
+ ecosystem=self.ecosystem,
144
+ source_file=file_path,
145
+ source_line=line_num,
146
+ is_direct=True,
147
+ )
148
+ )
149
+
150
+ return packages
151
+
152
+ def _parse_package_lock(self, file_path: Path) -> List[Package]:
153
+ """Parse package-lock.json file (v2/v3 format).
154
+
155
+ Args:
156
+ file_path: Path to package-lock.json
157
+
158
+ Returns:
159
+ List of transitive dependency packages
160
+ """
161
+ packages: List[Package] = []
162
+ content = file_path.read_text(encoding="utf-8")
163
+
164
+ try:
165
+ data = json.loads(content)
166
+ except json.JSONDecodeError:
167
+ return packages
168
+
169
+ # Get root package name to skip it
170
+ root_name = data.get("name", "")
171
+
172
+ # Handle v2/v3 format with "packages" key
173
+ packages_data = data.get("packages", {})
174
+ if packages_data:
175
+ for pkg_path, pkg_info in packages_data.items():
176
+ # Skip root package (empty path)
177
+ if not pkg_path:
178
+ continue
179
+
180
+ # Skip link: protocol packages
181
+ if pkg_info.get("link"):
182
+ continue
183
+
184
+ # Extract package name from path (e.g., "node_modules/@scope/pkg" -> "@scope/pkg")
185
+ name = self._extract_package_name_from_path(pkg_path)
186
+ if not name or name == root_name:
187
+ continue
188
+
189
+ version = pkg_info.get("version", "")
190
+ if not version:
191
+ continue
192
+
193
+ packages.append(
194
+ Package(
195
+ name=name,
196
+ version=version,
197
+ ecosystem=self.ecosystem,
198
+ source_file=file_path,
199
+ source_line=None,
200
+ is_direct=False,
201
+ )
202
+ )
203
+ else:
204
+ # Handle v1 format with "dependencies" key
205
+ deps = data.get("dependencies", {})
206
+ self._parse_lock_v1_deps(deps, file_path, packages, root_name)
207
+
208
+ return packages
209
+
210
+ def _parse_lock_v1_deps(
211
+ self,
212
+ deps: Dict[str, Any],
213
+ file_path: Path,
214
+ packages: List[Package],
215
+ root_name: str,
216
+ ) -> None:
217
+ """Recursively parse v1 lock file dependencies.
218
+
219
+ Args:
220
+ deps: Dependencies dict
221
+ file_path: Source file path
222
+ packages: List to append packages to
223
+ root_name: Root package name to skip
224
+ """
225
+ for name, info in deps.items():
226
+ if name == root_name:
227
+ continue
228
+
229
+ version = info.get("version", "")
230
+ if version:
231
+ packages.append(
232
+ Package(
233
+ name=name,
234
+ version=version,
235
+ ecosystem=self.ecosystem,
236
+ source_file=file_path,
237
+ source_line=None,
238
+ is_direct=False,
239
+ )
240
+ )
241
+
242
+ # Recursively parse nested dependencies
243
+ nested_deps = info.get("dependencies", {})
244
+ if nested_deps:
245
+ self._parse_lock_v1_deps(nested_deps, file_path, packages, root_name)
246
+
247
+ def _parse_yarn_lock(self, file_path: Path) -> List[Package]:
248
+ """Parse yarn.lock file.
249
+
250
+ Args:
251
+ file_path: Path to yarn.lock
252
+
253
+ Returns:
254
+ List of transitive dependency packages
255
+ """
256
+ packages: List[Package] = []
257
+ content = file_path.read_text(encoding="utf-8")
258
+
259
+ # Yarn.lock format:
260
+ # "package@^version", "package@~version":
261
+ # version "x.y.z"
262
+ # resolved "..."
263
+ # ...
264
+
265
+ # Match package blocks - handle both yarn v1 and berry formats
266
+ # Pattern for package name line (can have multiple specifiers)
267
+ current_names: List[str] = []
268
+ current_version: Optional[str] = None
269
+
270
+ lines = content.split("\n")
271
+ seen_packages: set = set()
272
+
273
+ for line in lines:
274
+ # Skip comments and empty lines
275
+ if line.startswith("#") or not line.strip():
276
+ continue
277
+
278
+ # Check if this is a package header line
279
+ if not line.startswith(" ") and not line.startswith("\t"):
280
+ # If we have a previous package, save it
281
+ if current_names and current_version:
282
+ for name in current_names:
283
+ pkg_key = (name, current_version)
284
+ if pkg_key not in seen_packages:
285
+ seen_packages.add(pkg_key)
286
+ packages.append(
287
+ Package(
288
+ name=name,
289
+ version=current_version,
290
+ ecosystem=self.ecosystem,
291
+ source_file=file_path,
292
+ source_line=None,
293
+ is_direct=False,
294
+ )
295
+ )
296
+
297
+ # Parse new package header
298
+ current_names = self._parse_yarn_header(line)
299
+ current_version = None
300
+
301
+ elif line.strip().startswith("version"):
302
+ # Extract version
303
+ match = re.match(r'\s+version\s+"?([^"]+)"?', line)
304
+ if match:
305
+ current_version = match.group(1)
306
+
307
+ # Don't forget the last package
308
+ if current_names and current_version:
309
+ for name in current_names:
310
+ pkg_key = (name, current_version)
311
+ if pkg_key not in seen_packages:
312
+ seen_packages.add(pkg_key)
313
+ packages.append(
314
+ Package(
315
+ name=name,
316
+ version=current_version,
317
+ ecosystem=self.ecosystem,
318
+ source_file=file_path,
319
+ source_line=None,
320
+ is_direct=False,
321
+ )
322
+ )
323
+
324
+ return packages
325
+
326
+ def _parse_yarn_header(self, line: str) -> List[str]:
327
+ """Parse yarn.lock package header line.
328
+
329
+ Args:
330
+ line: Header line (e.g., '"@scope/pkg@^1.0.0", "@scope/pkg@~1.0.0":')
331
+
332
+ Returns:
333
+ List of package names
334
+ """
335
+ names: List[str] = []
336
+
337
+ # Remove trailing colon
338
+ line = line.rstrip(":")
339
+
340
+ # Split by comma for multiple version specifiers
341
+ specs = re.split(r'",\s*"?', line)
342
+
343
+ for spec in specs:
344
+ # Clean up the spec
345
+ spec = spec.strip().strip('"').strip("'")
346
+ if not spec:
347
+ continue
348
+
349
+ # Extract package name from specifier (before @ version)
350
+ # Handle scoped packages: @scope/name@version
351
+ name = self._extract_package_name_from_spec(spec)
352
+ if name and name not in names:
353
+ names.append(name)
354
+
355
+ return names
356
+
357
+ def _parse_pnpm_lock(self, file_path: Path) -> List[Package]:
358
+ """Parse pnpm-lock.yaml file.
359
+
360
+ Args:
361
+ file_path: Path to pnpm-lock.yaml
362
+
363
+ Returns:
364
+ List of transitive dependency packages
365
+ """
366
+ packages: List[Package] = []
367
+ content = file_path.read_text(encoding="utf-8")
368
+
369
+ try:
370
+ data = yaml.safe_load(content)
371
+ except yaml.YAMLError:
372
+ return packages
373
+
374
+ if not data:
375
+ return packages
376
+
377
+ # Handle pnpm lockfile v6+ format
378
+ pkgs = data.get("packages", {})
379
+ for pkg_spec, pkg_info in pkgs.items():
380
+ name, version = self._parse_pnpm_package_spec(pkg_spec)
381
+ if not name or not version:
382
+ continue
383
+
384
+ # Skip peer dependencies and dev meta
385
+ if isinstance(pkg_info, dict) and pkg_info.get("dev") is True:
386
+ continue
387
+
388
+ packages.append(
389
+ Package(
390
+ name=name,
391
+ version=version,
392
+ ecosystem=self.ecosystem,
393
+ source_file=file_path,
394
+ source_line=None,
395
+ is_direct=False,
396
+ )
397
+ )
398
+
399
+ # Handle older pnpm format with dependencies at root level
400
+ dependencies = data.get("dependencies", {})
401
+ for name, version_info in dependencies.items():
402
+ if isinstance(version_info, str):
403
+ version = self._normalize_version(version_info)
404
+ elif isinstance(version_info, dict):
405
+ version = version_info.get("version", "")
406
+ else:
407
+ continue
408
+
409
+ if version:
410
+ packages.append(
411
+ Package(
412
+ name=name,
413
+ version=version,
414
+ ecosystem=self.ecosystem,
415
+ source_file=file_path,
416
+ source_line=None,
417
+ is_direct=False,
418
+ )
419
+ )
420
+
421
+ return packages
422
+
423
+ def _parse_pnpm_package_spec(self, spec: str) -> Tuple[Optional[str], Optional[str]]:
424
+ """Parse pnpm package specifier.
425
+
426
+ Args:
427
+ spec: Package specifier (e.g., "/@scope/pkg@1.0.0" or "/pkg@1.0.0")
428
+
429
+ Returns:
430
+ Tuple of (name, version) or (None, None) if parsing fails
431
+ """
432
+ # Remove leading slash
433
+ spec = spec.lstrip("/")
434
+
435
+ # Handle scoped packages: @scope/name@version
436
+ if spec.startswith("@"):
437
+ # @scope/name@version
438
+ match = re.match(r"(@[^/]+/[^@]+)@(.+)", spec)
439
+ if match:
440
+ return match.group(1), match.group(2)
441
+ else:
442
+ # name@version
443
+ match = re.match(r"([^@]+)@(.+)", spec)
444
+ if match:
445
+ return match.group(1), match.group(2)
446
+
447
+ return None, None
448
+
449
+ def _normalize_version(self, version_spec: str) -> str:
450
+ """Normalize version specifier to a clean version string.
451
+
452
+ Handles npm version specifiers: ^, ~, >=, <=, >, <, =, x, *, ||, etc.
453
+
454
+ Args:
455
+ version_spec: Version specifier (e.g., "^1.2.3", "~1.0.0", ">=2.0.0")
456
+
457
+ Returns:
458
+ Normalized version string
459
+ """
460
+ if not version_spec:
461
+ return ""
462
+
463
+ # Handle special protocols
464
+ if any(
465
+ version_spec.startswith(p)
466
+ for p in ["file:", "link:", "workspace:", "git:", "git+", "http:", "https:"]
467
+ ):
468
+ return version_spec
469
+
470
+ # Handle npm: prefix
471
+ if version_spec.startswith("npm:"):
472
+ # npm:@scope/pkg@version -> extract version
473
+ parts = version_spec.split("@")
474
+ if len(parts) >= 2:
475
+ return parts[-1]
476
+
477
+ # Remove common prefixes
478
+ version = re.sub(r"^[\^~>=<]+", "", version_spec)
479
+
480
+ # Handle range: take first version from "x.y.z - a.b.c"
481
+ if " - " in version:
482
+ version = version.split(" - ")[0].strip()
483
+
484
+ # Handle OR: take first version from "x.y.z || a.b.c"
485
+ if " || " in version:
486
+ version = version.split(" || ")[0].strip()
487
+ version = re.sub(r"^[\^~>=<]+", "", version)
488
+
489
+ # Handle space-separated (AND): take first
490
+ if " " in version:
491
+ version = version.split()[0].strip()
492
+ version = re.sub(r"^[\^~>=<]+", "", version)
493
+
494
+ return version.strip()
495
+
496
+ def _extract_package_name_from_path(self, pkg_path: str) -> Optional[str]:
497
+ """Extract package name from node_modules path.
498
+
499
+ Args:
500
+ pkg_path: Path like "node_modules/@scope/pkg" or "node_modules/pkg"
501
+
502
+ Returns:
503
+ Package name or None
504
+ """
505
+ # Handle nested node_modules (e.g., node_modules/a/node_modules/b)
506
+ parts = pkg_path.split("node_modules/")
507
+ if len(parts) < 2:
508
+ return None
509
+
510
+ # Get the last segment after node_modules/
511
+ last_part = parts[-1]
512
+
513
+ # Handle scoped packages
514
+ if last_part.startswith("@"):
515
+ # @scope/name
516
+ segments = last_part.split("/")
517
+ if len(segments) >= 2:
518
+ return f"{segments[0]}/{segments[1]}"
519
+ else:
520
+ # Regular package - just get the first directory name
521
+ return last_part.split("/")[0]
522
+
523
+ return None
524
+
525
+ def _extract_package_name_from_spec(self, spec: str) -> Optional[str]:
526
+ """Extract package name from version specifier.
527
+
528
+ Args:
529
+ spec: Specifier like "@scope/pkg@^1.0.0" or "pkg@~2.0.0"
530
+
531
+ Returns:
532
+ Package name or None
533
+ """
534
+ # Handle scoped packages
535
+ if spec.startswith("@"):
536
+ # @scope/name@version - find the second @
537
+ match = re.match(r"(@[^/]+/[^@]+)@", spec)
538
+ if match:
539
+ return match.group(1)
540
+ # Might be just @scope/name without version
541
+ if "/" in spec and "@" not in spec[1:]:
542
+ return spec
543
+ else:
544
+ # name@version
545
+ at_idx = spec.find("@")
546
+ if at_idx > 0:
547
+ return spec[:at_idx]
548
+ # Might be just name without version
549
+ return spec if spec else None
550
+
551
+ return None
552
+
553
+ def _find_line_number(self, content: str, package_name: str, section: str) -> Optional[int]:
554
+ """Find line number of a package in package.json.
555
+
556
+ Args:
557
+ content: File content
558
+ package_name: Package name to find
559
+ section: Section name (dependencies, devDependencies)
560
+
561
+ Returns:
562
+ Line number (1-indexed) or None
563
+ """
564
+ lines = content.split("\n")
565
+ in_section = False
566
+
567
+ # Escape special regex characters in package name
568
+ escaped_name = re.escape(package_name)
569
+
570
+ for i, line in enumerate(lines, start=1):
571
+ if f'"{section}"' in line or f"'{section}'" in line:
572
+ in_section = True
573
+ elif in_section:
574
+ # Check if we've exited the section (closing brace at same indent or less)
575
+ if re.match(r"^\s*\}", line):
576
+ in_section = False
577
+ elif re.search(rf'["\']({escaped_name})["\']', line):
578
+ return i
579
+
580
+ return None
581
+
582
+
583
+ # Register the analyzer
584
+ def register() -> None:
585
+ """Register the NPM analyzer."""
586
+ AnalyzerRegistry.get_instance().register(NpmAnalyzer())