apisec-code-bolt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. apisec_code_bolt/__init__.py +42 -0
  2. apisec_code_bolt/__main__.py +11 -0
  3. apisec_code_bolt/analysis/__init__.py +96 -0
  4. apisec_code_bolt/analysis/analyzer.py +2309 -0
  5. apisec_code_bolt/analysis/binding_tracker.py +341 -0
  6. apisec_code_bolt/analysis/call_graph.py +1197 -0
  7. apisec_code_bolt/analysis/call_graph_types.py +332 -0
  8. apisec_code_bolt/analysis/call_resolver.py +988 -0
  9. apisec_code_bolt/analysis/capability_tagger.py +322 -0
  10. apisec_code_bolt/analysis/config_scanner.py +197 -0
  11. apisec_code_bolt/analysis/data_flow.py +1883 -0
  12. apisec_code_bolt/analysis/dependency_extractor.py +959 -0
  13. apisec_code_bolt/analysis/flow_analysis.py +1406 -0
  14. apisec_code_bolt/analysis/hof_catalog.py +61 -0
  15. apisec_code_bolt/analysis/integration_detector.py +1399 -0
  16. apisec_code_bolt/analysis/literal_scanner.py +300 -0
  17. apisec_code_bolt/analysis/path_normalizer.py +55 -0
  18. apisec_code_bolt/analysis/read_site_detector.py +310 -0
  19. apisec_code_bolt/analysis/request_patterns.py +162 -0
  20. apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
  21. apisec_code_bolt/analysis/sink_evidence.py +333 -0
  22. apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
  23. apisec_code_bolt/cli/__init__.py +5 -0
  24. apisec_code_bolt/cli/exit_codes.py +17 -0
  25. apisec_code_bolt/cli/main.py +1069 -0
  26. apisec_code_bolt/cloud/__init__.py +1 -0
  27. apisec_code_bolt/cloud/apisec_client.py +118 -0
  28. apisec_code_bolt/cloud/client.py +255 -0
  29. apisec_code_bolt/core/__init__.py +75 -0
  30. apisec_code_bolt/core/config.py +528 -0
  31. apisec_code_bolt/core/credentials.py +65 -0
  32. apisec_code_bolt/core/discovery.py +433 -0
  33. apisec_code_bolt/core/log_format.py +115 -0
  34. apisec_code_bolt/core/manifest.py +1009 -0
  35. apisec_code_bolt/core/repo.py +280 -0
  36. apisec_code_bolt/core/state.py +59 -0
  37. apisec_code_bolt/core/telemetry.py +451 -0
  38. apisec_code_bolt/core/types.py +587 -0
  39. apisec_code_bolt/fingerprinting/__init__.py +1 -0
  40. apisec_code_bolt/frameworks/__init__.py +29 -0
  41. apisec_code_bolt/frameworks/_jwt_common.py +50 -0
  42. apisec_code_bolt/frameworks/auth_helpers.py +437 -0
  43. apisec_code_bolt/frameworks/base.py +608 -0
  44. apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
  45. apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
  46. apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
  47. apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
  48. apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
  49. apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
  50. apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
  51. apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
  52. apisec_code_bolt/frameworks/java/__init__.py +6 -0
  53. apisec_code_bolt/frameworks/java/_annotations.py +167 -0
  54. apisec_code_bolt/frameworks/java/_constraints.py +128 -0
  55. apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
  56. apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
  57. apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
  58. apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
  59. apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
  60. apisec_code_bolt/frameworks/js/__init__.py +8 -0
  61. apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
  62. apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
  63. apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
  64. apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
  65. apisec_code_bolt/frameworks/python/__init__.py +19 -0
  66. apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
  67. apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
  68. apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
  69. apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
  70. apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
  71. apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
  72. apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
  73. apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
  74. apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
  75. apisec_code_bolt/parsing/__init__.py +62 -0
  76. apisec_code_bolt/parsing/base.py +554 -0
  77. apisec_code_bolt/parsing/csharp/__init__.py +5 -0
  78. apisec_code_bolt/parsing/csharp/language_services.py +203 -0
  79. apisec_code_bolt/parsing/csharp/literals.py +72 -0
  80. apisec_code_bolt/parsing/csharp/parser.py +1158 -0
  81. apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
  82. apisec_code_bolt/parsing/js/__init__.py +5 -0
  83. apisec_code_bolt/parsing/js/language_services.py +118 -0
  84. apisec_code_bolt/parsing/js/parser.py +622 -0
  85. apisec_code_bolt/parsing/jvm/__init__.py +7 -0
  86. apisec_code_bolt/parsing/jvm/language_services.py +270 -0
  87. apisec_code_bolt/parsing/jvm/parser.py +774 -0
  88. apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
  89. apisec_code_bolt/parsing/python/__init__.py +150 -0
  90. apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
  91. apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
  92. apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
  93. apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
  94. apisec_code_bolt/parsing/python/expression_utils.py +221 -0
  95. apisec_code_bolt/parsing/python/extraction_types.py +271 -0
  96. apisec_code_bolt/parsing/python/language_services.py +487 -0
  97. apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
  98. apisec_code_bolt/parsing/python/parser.py +719 -0
  99. apisec_code_bolt/parsing/python/path_resolver.py +576 -0
  100. apisec_code_bolt/parsing/python/router_registry.py +806 -0
  101. apisec_code_bolt/parsing/python/type_resolver.py +730 -0
  102. apisec_code_bolt/parsing/python/visitors.py +1544 -0
  103. apisec_code_bolt/parsing/services.py +544 -0
  104. apisec_code_bolt/query/__init__.py +1 -0
  105. apisec_code_bolt/query/ast_cache.py +182 -0
  106. apisec_code_bolt/query/executor.py +283 -0
  107. apisec_code_bolt/query/handlers.py +832 -0
  108. apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
  109. apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
  110. apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
  111. apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,959 @@
1
+ """
2
+ Dependency extraction from project dependency files.
3
+
4
+ Parses requirements.txt, Pipfile, pyproject.toml, setup.cfg, setup.py,
5
+ package.json, go.mod, Cargo.toml, *.csproj, pom.xml, and build.gradle(.kts)
6
+ to produce a unified dependency model.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import configparser
12
+ import json
13
+ import logging
14
+ import re
15
+ from pathlib import Path
16
+
17
+ from ..core.manifest import (
18
+ DependenciesModel,
19
+ ImportModel,
20
+ LocationModel,
21
+ PackageDependencyModel,
22
+ )
23
+ from ..parsing.base import ParsedFile
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class DependencyExtractor:
29
+ """Extracts package dependencies and internal imports from a project."""
30
+
31
+ _DEP_FILE_PARSERS: dict[str, str] = {
32
+ "requirements.txt": "_parse_requirements_txt",
33
+ "Pipfile": "_parse_pipfile",
34
+ "pyproject.toml": "_parse_pyproject_toml",
35
+ "setup.cfg": "_parse_setup_cfg",
36
+ "setup.py": "_parse_setup_py",
37
+ "package.json": "_parse_package_json",
38
+ "go.mod": "_parse_go_mod",
39
+ "Cargo.toml": "_parse_cargo_toml",
40
+ }
41
+
42
+ # Each parser handles exactly one ecosystem; tag packages authoritatively at
43
+ # the source so the reasoning engine never has to guess from source_file. (F-6)
44
+ # Values match OSV ecosystem names (reasoning engine's Ecosystem enum).
45
+ _ECOSYSTEM_BY_PARSER: dict[str, str] = {
46
+ "_parse_requirements_txt": "PyPI",
47
+ "_parse_pipfile": "PyPI",
48
+ "_parse_pyproject_toml": "PyPI",
49
+ "_parse_setup_cfg": "PyPI",
50
+ "_parse_setup_py": "PyPI",
51
+ "_parse_package_json": "npm",
52
+ "_parse_go_mod": "Go",
53
+ "_parse_cargo_toml": "crates.io",
54
+ "_parse_csproj": "NuGet",
55
+ "_parse_pom": "Maven",
56
+ "_parse_gradle": "Maven",
57
+ }
58
+
59
+ _SKIP_DIRS = frozenset(
60
+ {
61
+ ".venv",
62
+ "venv",
63
+ "node_modules",
64
+ ".git",
65
+ "__pycache__",
66
+ ".tox",
67
+ ".nox",
68
+ ".mypy_cache",
69
+ ".pytest_cache",
70
+ "dist",
71
+ "build",
72
+ "test",
73
+ "tests",
74
+ "example",
75
+ "examples",
76
+ "docs",
77
+ }
78
+ )
79
+
80
+ _REQ_VERSION_RE = re.compile(
81
+ r"^([A-Za-z0-9][\w.*-]*(?:\[[^\]]*\])?)\s*(.*)",
82
+ )
83
+
84
+ # Exact npm/semver pin: 1.2.3, 1.2.3-rc.1, 1.2.3+build, 1.2.3-rc.1+build —
85
+ # NOT ^1.2.3, ~1.2.3, 1.2.x, 1.x, or hyphen/OR ranges (which also start
86
+ # with a digit). Pre-release and build metadata are each optional.
87
+ _EXACT_SEMVER_RE = re.compile(r"^\d+\.\d+\.\d+(?:-[0-9A-Za-z.\-]+)?(?:\+[0-9A-Za-z.\-]+)?$")
88
+
89
+ _SETUP_PY_INSTALL_REQUIRES = re.compile(
90
+ r"install_requires\s*=\s*\[([^\]]*)\]",
91
+ re.DOTALL,
92
+ )
93
+ _SETUP_PY_TESTS_REQUIRE = re.compile(
94
+ r"tests_require\s*=\s*\[([^\]]*)\]",
95
+ re.DOTALL,
96
+ )
97
+
98
+ _GO_MOD_REQUIRE_BLOCK = re.compile(
99
+ r"require\s*\((.*?)\)",
100
+ re.DOTALL,
101
+ )
102
+ _GO_MOD_SINGLE = re.compile(
103
+ r"require\s+([\w./\-]+)\s+(v[\w.\-+]+)",
104
+ )
105
+
106
+ # Gradle dependency configurations we recognise (Groovy + Kotlin DSL).
107
+ _GRADLE_CONFIGS = (
108
+ r"implementation|api|compileOnly|compileOnlyApi|runtimeOnly|"
109
+ r"testImplementation|testCompileOnly|testRuntimeOnly|"
110
+ r"androidTestImplementation|annotationProcessor|kapt|ksp|"
111
+ r"developmentOnly|providedCompile|providedRuntime|classpath"
112
+ )
113
+ # String notation: implementation 'g:a:v' / implementation("g:a:v") (version optional)
114
+ _GRADLE_STRING_DEP = re.compile(
115
+ r"\b(" + _GRADLE_CONFIGS + r")\s*\(?\s*['\"]"
116
+ r"([\w.\-]+):([\w.\-]+)(?::([^'\"]+))?['\"]"
117
+ )
118
+ # Map notation: implementation group: 'g', name: 'a', version: 'v' (version optional)
119
+ _GRADLE_MAP_DEP = re.compile(
120
+ r"\b(" + _GRADLE_CONFIGS + r")\s*\(?\s*"
121
+ r"group:\s*['\"]([\w.\-]+)['\"]\s*,\s*name:\s*['\"]([\w.\-]+)['\"]"
122
+ r"(?:\s*,\s*version:\s*['\"]([^'\"]+)['\"])?"
123
+ )
124
+ # A literal, exact Gradle/Maven version: starts with a digit, no dynamic
125
+ # (+), range ([](),), or variable ($) markers.
126
+ _EXACT_JVM_VERSION_RE = re.compile(r"^\d[\w.\-]*$")
127
+
128
+ def __init__(
129
+ self,
130
+ project_root: Path,
131
+ parsed_files: dict[Path, ParsedFile],
132
+ ) -> None:
133
+ self._project_root = project_root
134
+ self._parsed_files = parsed_files
135
+
136
+ def extract(self) -> DependenciesModel:
137
+ """Extract package dependencies from all recognised dependency files."""
138
+ packages: list[PackageDependencyModel] = []
139
+ root = self._project_root
140
+
141
+ # 1. Check each known dep file at project root
142
+ for filename, method_name in self._DEP_FILE_PARSERS.items():
143
+ dep_file = root / filename
144
+ if dep_file.exists():
145
+ parser_fn = getattr(self, method_name)
146
+ packages.extend(self._tag(parser_fn(dep_file), method_name))
147
+
148
+ # 2. Glob for requirements-*.txt / requirements_*.txt variants
149
+ for pattern in ("requirements-*.txt", "requirements_*.txt"):
150
+ for req_variant in sorted(root.glob(pattern)):
151
+ if req_variant.name == "requirements.txt":
152
+ continue # already handled above
153
+ packages.extend(
154
+ self._tag(self._parse_requirements_txt(req_variant), "_parse_requirements_txt")
155
+ )
156
+
157
+ # 3. Scan requirements/ subdirectory (*.txt and *.in files)
158
+ req_dir = root / "requirements"
159
+ if req_dir.is_dir():
160
+ for req_file in sorted(req_dir.glob("*")):
161
+ if req_file.suffix in (".txt", ".in") and req_file.is_file():
162
+ packages.extend(
163
+ self._tag(self._parse_requirements_txt(req_file), "_parse_requirements_txt")
164
+ )
165
+
166
+ # 3b. Workspace member / shallow subdirectory dep files
167
+ member_dirs = self._find_workspace_members(root)
168
+ for member_dir in member_dirs:
169
+ for filename, method_name in self._DEP_FILE_PARSERS.items():
170
+ dep_file = member_dir / filename
171
+ if dep_file.exists():
172
+ parser_fn = getattr(self, method_name)
173
+ packages.extend(self._tag(parser_fn(dep_file), method_name))
174
+ for pattern in ("requirements-*.txt", "requirements_*.txt"):
175
+ for req_variant in sorted(member_dir.glob(pattern)):
176
+ packages.extend(
177
+ self._tag(
178
+ self._parse_requirements_txt(req_variant), "_parse_requirements_txt"
179
+ )
180
+ )
181
+
182
+ # 3c. .csproj files (NuGet / .NET) — glob recursively from root
183
+ for csproj in sorted(root.rglob("*.csproj")):
184
+ packages.extend(self._tag(self._parse_csproj(csproj), "_parse_csproj"))
185
+
186
+ # 3d. Maven pom.xml + Gradle build files (JVM) — recursive, skipping
187
+ # build-output and vendored dirs. Catches multi-module / subproject
188
+ # layouts common in Spring Boot, Micronaut, and Quarkus apps.
189
+ for pom in sorted(root.rglob("pom.xml")):
190
+ if self._under_skip_dir(pom, root):
191
+ continue
192
+ packages.extend(self._tag(self._parse_pom(pom), "_parse_pom"))
193
+ for pattern in ("build.gradle", "build.gradle.kts"):
194
+ for gradle in sorted(root.rglob(pattern)):
195
+ if self._under_skip_dir(gradle, root):
196
+ continue
197
+ packages.extend(self._tag(self._parse_gradle(gradle), "_parse_gradle"))
198
+
199
+ # 4. Deduplicate by (name, source_file)
200
+ seen: set[tuple[str, str]] = set()
201
+ deduped: list[PackageDependencyModel] = []
202
+ for pkg in packages:
203
+ key = (pkg.name.lower(), pkg.source_file)
204
+ if key not in seen:
205
+ seen.add(key)
206
+ deduped.append(pkg)
207
+ packages = deduped
208
+
209
+ # 5. Collect imports
210
+ imports: list[ImportModel] = []
211
+ for parsed_file in self._parsed_files.values():
212
+ if not parsed_file.success:
213
+ continue
214
+
215
+ for imp in parsed_file.imports:
216
+ imports.append(
217
+ ImportModel(
218
+ module=imp.module,
219
+ names=imp.names,
220
+ alias=imp.alias,
221
+ is_relative=imp.is_relative,
222
+ location=LocationModel(
223
+ file=str(parsed_file.path),
224
+ line=imp.location.line if imp.location else 0,
225
+ )
226
+ if imp.location
227
+ else LocationModel(file=str(parsed_file.path), line=0),
228
+ )
229
+ )
230
+
231
+ return DependenciesModel(packages=packages, internal_imports=imports)
232
+
233
+ def _tag(
234
+ self, packages: list[PackageDependencyModel], method_name: str
235
+ ) -> list[PackageDependencyModel]:
236
+ """Stamp the ecosystem of the parser that produced these packages. (F-6)"""
237
+ ecosystem = self._ECOSYSTEM_BY_PARSER.get(method_name)
238
+ for pkg in packages:
239
+ pkg.ecosystem = ecosystem
240
+ return packages
241
+
242
+ def _under_skip_dir(self, path: Path, root: Path) -> bool:
243
+ """True if *path* sits under a build-output or vendored directory."""
244
+ try:
245
+ parts = path.relative_to(root).parts[:-1] # exclude the filename
246
+ except ValueError:
247
+ parts = path.parts[:-1]
248
+ return any(part in self._SKIP_DIRS or part == "target" for part in parts)
249
+
250
+ # -- workspace member discovery --------------------------------------------
251
+
252
+ def _find_workspace_members(self, root: Path) -> list[Path]:
253
+ """Return directories that may contain their own dependency files.
254
+
255
+ Checks for uv workspace (``[tool.uv.workspace.members]``), Poetry
256
+ workspaces (``[tool.poetry.packages]``), and falls back to a shallow
257
+ depth-1 glob for any immediate subdirectory containing a recognised
258
+ dep file.
259
+ """
260
+
261
+ root_pyproject = root / "pyproject.toml"
262
+ member_dirs: list[Path] = []
263
+
264
+ if root_pyproject.exists():
265
+ try:
266
+ import tomllib
267
+
268
+ data = tomllib.loads(root_pyproject.read_text(errors="replace"))
269
+ except Exception:
270
+ data = {}
271
+
272
+ member_globs: list[str] = []
273
+
274
+ uv_members = data.get("tool", {}).get("uv", {}).get("workspace", {}).get("members", [])
275
+ if uv_members:
276
+ member_globs.extend(uv_members)
277
+
278
+ poetry_packages = data.get("tool", {}).get("poetry", {}).get("packages", [])
279
+ for pkg in poetry_packages:
280
+ if isinstance(pkg, dict) and "include" in pkg:
281
+ member_globs.append(pkg["include"])
282
+
283
+ for pattern in member_globs:
284
+ clean = pattern.rstrip("/").rstrip("*").rstrip("/")
285
+ if not clean:
286
+ continue
287
+ for candidate in sorted(root.glob(clean)):
288
+ if candidate.is_dir() and candidate.name not in self._SKIP_DIRS:
289
+ member_dirs.append(candidate)
290
+
291
+ if not member_dirs:
292
+ for child in sorted(root.iterdir()):
293
+ if not child.is_dir() or child.name.startswith("."):
294
+ continue
295
+ if child.name in self._SKIP_DIRS:
296
+ continue
297
+ has_dep_file = any((child / fname).exists() for fname in self._DEP_FILE_PARSERS)
298
+ if has_dep_file:
299
+ member_dirs.append(child)
300
+
301
+ return member_dirs
302
+
303
+ # -- requirements.txt / requirements-*.txt ---------------------------------
304
+
305
+ def _parse_requirements_txt(self, path: Path) -> list[PackageDependencyModel]:
306
+ """Parse requirements.txt (or any requirements-*.txt variant)."""
307
+ source_name = path.name
308
+ is_dev = "dev" in source_name.lower() or "test" in source_name.lower()
309
+ packages: list[PackageDependencyModel] = []
310
+
311
+ try:
312
+ for line in path.read_text(errors="replace").splitlines():
313
+ line = line.split("#", 1)[0].strip()
314
+ if not line:
315
+ continue
316
+ # Skip flags (-r, --hash, --index-url, etc.) and URLs
317
+ if (
318
+ line.startswith("-")
319
+ or line.startswith("http://")
320
+ or line.startswith("https://")
321
+ ):
322
+ continue
323
+
324
+ m = self._REQ_VERSION_RE.match(line)
325
+ if not m:
326
+ continue
327
+ raw_name, constraint = m.group(1), m.group(2).strip()
328
+ # Strip extras bracket from the name for the canonical name
329
+ name = raw_name.split("[")[0].strip()
330
+ if not name:
331
+ continue
332
+
333
+ # Only "==" (and PEP 440 arbitrary-equality "===") is an exact
334
+ # pin. Floors/ranges (~=, >=, >, <) leave version=None so the
335
+ # matcher never confirms a CVE against a version the resolver
336
+ # may not have installed. (F-1)
337
+ version: str | None = None
338
+ if constraint.startswith("==="):
339
+ version = constraint[3:].strip().split(",")[0].strip()
340
+ elif constraint.startswith("=="):
341
+ version = constraint[2:].strip().split(",")[0].strip()
342
+
343
+ packages.append(
344
+ PackageDependencyModel(
345
+ name=name,
346
+ version=version,
347
+ version_constraint=constraint or None,
348
+ is_dev=is_dev,
349
+ source_file=source_name,
350
+ )
351
+ )
352
+ except Exception as e:
353
+ logger.warning("Failed to parse %s: %s", path.name, e)
354
+
355
+ return packages
356
+
357
+ # -- Pipfile ---------------------------------------------------------------
358
+
359
+ def _parse_pipfile(self, path: Path) -> list[PackageDependencyModel]:
360
+ """Parse a Pipfile (TOML format) — [packages] and [dev-packages]."""
361
+ packages: list[PackageDependencyModel] = []
362
+ try:
363
+ import tomllib
364
+ except ImportError:
365
+ logger.debug("tomllib unavailable, skipping Pipfile")
366
+ return packages
367
+
368
+ try:
369
+ data = tomllib.loads(path.read_text(errors="replace"))
370
+ except Exception as e:
371
+ logger.warning("Failed to parse Pipfile: %s", e)
372
+ return packages
373
+
374
+ for section, is_dev in (("packages", False), ("dev-packages", True)):
375
+ for name, spec in data.get(section, {}).items():
376
+ version: str | None = None
377
+ constraint: str | None = None
378
+ if isinstance(spec, str):
379
+ constraint = spec if spec != "*" else None
380
+ if spec.startswith("=="):
381
+ version = spec[2:]
382
+ elif isinstance(spec, dict):
383
+ ver = spec.get("version", "*")
384
+ constraint = ver if ver != "*" else None
385
+ if isinstance(ver, str) and ver.startswith("=="):
386
+ version = ver[2:]
387
+
388
+ packages.append(
389
+ PackageDependencyModel(
390
+ name=name,
391
+ version=version,
392
+ version_constraint=constraint,
393
+ is_dev=is_dev,
394
+ source_file="Pipfile",
395
+ )
396
+ )
397
+
398
+ return packages
399
+
400
+ # -- pyproject.toml --------------------------------------------------------
401
+
402
+ def _parse_pyproject_toml(self, path: Path) -> list[PackageDependencyModel]:
403
+ """Parse pyproject.toml — PEP 621 and Poetry formats."""
404
+ packages: list[PackageDependencyModel] = []
405
+ try:
406
+ import tomllib
407
+ except ImportError:
408
+ logger.debug("tomllib unavailable, skipping pyproject.toml")
409
+ return packages
410
+
411
+ try:
412
+ data = tomllib.loads(path.read_text(errors="replace"))
413
+ except Exception as e:
414
+ logger.warning("Failed to parse pyproject.toml: %s", e)
415
+ return packages
416
+
417
+ # PEP 621 — [project] dependencies
418
+ for dep in data.get("project", {}).get("dependencies", []):
419
+ name = re.split(r"[\[><=!~;]", dep, maxsplit=1)[0].strip()
420
+ if name:
421
+ packages.append(
422
+ PackageDependencyModel(
423
+ name=name,
424
+ version_constraint=dep.strip(),
425
+ source_file="pyproject.toml",
426
+ )
427
+ )
428
+
429
+ # PEP 621 — [project.optional-dependencies]
430
+ for group, deps in data.get("project", {}).get("optional-dependencies", {}).items():
431
+ is_dev = group.lower() in {"dev", "test", "testing", "tests", "lint", "docs"}
432
+ for dep in deps:
433
+ name = re.split(r"[\[><=!~;]", dep, maxsplit=1)[0].strip()
434
+ if name:
435
+ packages.append(
436
+ PackageDependencyModel(
437
+ name=name,
438
+ version_constraint=dep.strip(),
439
+ is_dev=is_dev,
440
+ source_file="pyproject.toml",
441
+ )
442
+ )
443
+
444
+ # Poetry — [tool.poetry.dependencies]
445
+ poetry = data.get("tool", {}).get("poetry", {})
446
+ for name, spec in poetry.get("dependencies", {}).items():
447
+ if name.lower() == "python":
448
+ continue
449
+ constraint = self._poetry_constraint(spec)
450
+ packages.append(
451
+ PackageDependencyModel(
452
+ name=name,
453
+ version=constraint if constraint and constraint.startswith("==") else None,
454
+ version_constraint=constraint,
455
+ source_file="pyproject.toml",
456
+ )
457
+ )
458
+
459
+ # Poetry — [tool.poetry.group.*.dependencies]
460
+ for group_name, group_data in poetry.get("group", {}).items():
461
+ is_dev = group_name.lower() in {"dev", "test", "testing", "tests", "lint", "docs"}
462
+ for name, spec in group_data.get("dependencies", {}).items():
463
+ if name.lower() == "python":
464
+ continue
465
+ constraint = self._poetry_constraint(spec)
466
+ packages.append(
467
+ PackageDependencyModel(
468
+ name=name,
469
+ version=constraint if constraint and constraint.startswith("==") else None,
470
+ version_constraint=constraint,
471
+ is_dev=is_dev,
472
+ source_file="pyproject.toml",
473
+ )
474
+ )
475
+
476
+ # Poetry — legacy [tool.poetry.dev-dependencies]
477
+ for name, spec in poetry.get("dev-dependencies", {}).items():
478
+ if name.lower() == "python":
479
+ continue
480
+ constraint = self._poetry_constraint(spec)
481
+ packages.append(
482
+ PackageDependencyModel(
483
+ name=name,
484
+ version=constraint if constraint and constraint.startswith("==") else None,
485
+ version_constraint=constraint,
486
+ is_dev=True,
487
+ source_file="pyproject.toml",
488
+ )
489
+ )
490
+
491
+ return packages
492
+
493
+ @staticmethod
494
+ def _poetry_constraint(spec: str | dict) -> str | None:
495
+ """Normalise a Poetry version spec to a constraint string."""
496
+ if isinstance(spec, str):
497
+ return spec if spec != "*" else None
498
+ if isinstance(spec, dict):
499
+ v = spec.get("version", "*")
500
+ return v if v != "*" else None
501
+ return None
502
+
503
+ # -- setup.cfg -------------------------------------------------------------
504
+
505
+ def _parse_setup_cfg(self, path: Path) -> list[PackageDependencyModel]:
506
+ """Parse setup.cfg [options] install_requires and extras_require."""
507
+ packages: list[PackageDependencyModel] = []
508
+ cfg = configparser.ConfigParser()
509
+
510
+ try:
511
+ cfg.read(str(path), encoding="utf-8")
512
+ except Exception as e:
513
+ logger.warning("Failed to parse setup.cfg: %s", e)
514
+ return packages
515
+
516
+ for dep_str in self._cfg_multiline(cfg, "options", "install_requires"):
517
+ name = re.split(r"[\[><=!~;]", dep_str, maxsplit=1)[0].strip()
518
+ if name:
519
+ packages.append(
520
+ PackageDependencyModel(
521
+ name=name,
522
+ version_constraint=dep_str.strip(),
523
+ source_file="setup.cfg",
524
+ )
525
+ )
526
+
527
+ # extras_require sections (e.g. [options.extras_require] dev = ...)
528
+ if cfg.has_section("options.extras_require"):
529
+ for group in cfg.options("options.extras_require"):
530
+ is_dev = group.lower() in {"dev", "test", "testing", "tests", "lint", "docs"}
531
+ raw = cfg.get("options.extras_require", group, fallback="")
532
+ for dep_str in self._split_cfg_list(raw):
533
+ name = re.split(r"[\[><=!~;]", dep_str, maxsplit=1)[0].strip()
534
+ if name:
535
+ packages.append(
536
+ PackageDependencyModel(
537
+ name=name,
538
+ version_constraint=dep_str.strip(),
539
+ is_dev=is_dev,
540
+ source_file="setup.cfg",
541
+ )
542
+ )
543
+
544
+ return packages
545
+
546
+ @staticmethod
547
+ def _cfg_multiline(cfg: configparser.ConfigParser, section: str, key: str) -> list[str]:
548
+ """Read a setup.cfg multiline value as a list of non-empty strings."""
549
+ raw = cfg.get(section, key, fallback="")
550
+ return [line.strip() for line in raw.splitlines() if line.strip()]
551
+
552
+ @staticmethod
553
+ def _split_cfg_list(raw: str) -> list[str]:
554
+ """Split a comma-or-newline-separated setup.cfg value."""
555
+ items: list[str] = []
556
+ for part in raw.replace("\n", ",").split(","):
557
+ part = part.strip()
558
+ if part:
559
+ items.append(part)
560
+ return items
561
+
562
+ # -- setup.py (regex) ------------------------------------------------------
563
+
564
+ def _parse_setup_py(self, path: Path) -> list[PackageDependencyModel]:
565
+ """Best-effort regex extraction of install_requires from setup.py."""
566
+ packages: list[PackageDependencyModel] = []
567
+ try:
568
+ content = path.read_text(errors="replace")
569
+ except Exception as e:
570
+ logger.warning("Failed to read setup.py: %s", e)
571
+ return packages
572
+
573
+ for pattern, is_dev in (
574
+ (self._SETUP_PY_INSTALL_REQUIRES, False),
575
+ (self._SETUP_PY_TESTS_REQUIRE, True),
576
+ ):
577
+ m = pattern.search(content)
578
+ if not m:
579
+ continue
580
+ for item in self._extract_string_literals(m.group(1)):
581
+ name = re.split(r"[\[><=!~;]", item, maxsplit=1)[0].strip()
582
+ if name:
583
+ packages.append(
584
+ PackageDependencyModel(
585
+ name=name,
586
+ version_constraint=item.strip(),
587
+ is_dev=is_dev,
588
+ source_file="setup.py",
589
+ )
590
+ )
591
+
592
+ return packages
593
+
594
+ @staticmethod
595
+ def _extract_string_literals(text: str) -> list[str]:
596
+ """Pull quoted strings out of a Python list literal fragment."""
597
+ return re.findall(r"""['"]([^'"]+)['"]""", text)
598
+
599
+ # -- package.json (Node / JS / TS) -----------------------------------------
600
+
601
+ def _parse_package_json(self, path: Path) -> list[PackageDependencyModel]:
602
+ """Parse package.json dependencies and devDependencies."""
603
+ packages: list[PackageDependencyModel] = []
604
+ try:
605
+ data = json.loads(path.read_text(errors="replace"))
606
+ except Exception as e:
607
+ logger.warning("Failed to parse package.json: %s", e)
608
+ return packages
609
+
610
+ for section, is_dev in (("dependencies", False), ("devDependencies", True)):
611
+ for name, ver in data.get(section, {}).items():
612
+ # Bare "1.2.3" is an exact pin in npm, but "1.2.x", "1.x", and
613
+ # hyphen/OR ranges also start with a digit — accept only strict
614
+ # exact semver so ranges aren't treated as installed. (F-2)
615
+ version: str | None = None
616
+ if isinstance(ver, str) and self._EXACT_SEMVER_RE.match(ver.strip()):
617
+ version = ver.strip()
618
+ packages.append(
619
+ PackageDependencyModel(
620
+ name=name,
621
+ version=version,
622
+ version_constraint=ver if isinstance(ver, str) else None,
623
+ is_dev=is_dev,
624
+ source_file="package.json",
625
+ )
626
+ )
627
+
628
+ return packages
629
+
630
+ # -- go.mod ----------------------------------------------------------------
631
+
632
+ def _parse_go_mod(self, path: Path) -> list[PackageDependencyModel]:
633
+ """Parse go.mod require blocks."""
634
+ packages: list[PackageDependencyModel] = []
635
+ try:
636
+ content = path.read_text(errors="replace")
637
+ except Exception as e:
638
+ logger.warning("Failed to read go.mod: %s", e)
639
+ return packages
640
+
641
+ # Multi-line require ( ... ) blocks
642
+ for block in self._GO_MOD_REQUIRE_BLOCK.findall(content):
643
+ for line in block.splitlines():
644
+ line = line.split("//")[0].strip()
645
+ if not line:
646
+ continue
647
+ parts = line.split()
648
+ if len(parts) >= 2:
649
+ packages.append(
650
+ PackageDependencyModel(
651
+ name=parts[0],
652
+ version=parts[1],
653
+ version_constraint=parts[1],
654
+ source_file="go.mod",
655
+ )
656
+ )
657
+
658
+ # Single-line require statements
659
+ for m in self._GO_MOD_SINGLE.finditer(content):
660
+ pkg_name, ver = m.group(1), m.group(2)
661
+ if not any(p.name == pkg_name for p in packages):
662
+ packages.append(
663
+ PackageDependencyModel(
664
+ name=pkg_name,
665
+ version=ver,
666
+ version_constraint=ver,
667
+ source_file="go.mod",
668
+ )
669
+ )
670
+
671
+ return packages
672
+
673
+ # -- Cargo.toml (Rust) -----------------------------------------------------
674
+
675
+ def _parse_cargo_toml(self, path: Path) -> list[PackageDependencyModel]:
676
+ """Parse Cargo.toml [dependencies] and [dev-dependencies]."""
677
+ packages: list[PackageDependencyModel] = []
678
+ try:
679
+ import tomllib
680
+ except ImportError:
681
+ logger.debug("tomllib unavailable, skipping Cargo.toml")
682
+ return packages
683
+
684
+ try:
685
+ data = tomllib.loads(path.read_text(errors="replace"))
686
+ except Exception as e:
687
+ logger.warning("Failed to parse Cargo.toml: %s", e)
688
+ return packages
689
+
690
+ for section, is_dev in (("dependencies", False), ("dev-dependencies", True)):
691
+ for name, spec in data.get(section, {}).items():
692
+ version: str | None = None
693
+ constraint: str | None = None
694
+ if isinstance(spec, str):
695
+ constraint = spec
696
+ elif isinstance(spec, dict):
697
+ constraint = spec.get("version")
698
+ # Cargo defaults a bare "1.2.3" to "^1.2.3" (caret). Only a
699
+ # leading single "=" is an exact pin. (F-1)
700
+ if (
701
+ constraint
702
+ and constraint.lstrip().startswith("=")
703
+ and not constraint.lstrip().startswith("==")
704
+ ):
705
+ version = constraint.lstrip()[1:].strip()
706
+
707
+ packages.append(
708
+ PackageDependencyModel(
709
+ name=name,
710
+ version=version,
711
+ version_constraint=constraint,
712
+ is_dev=is_dev,
713
+ source_file="Cargo.toml",
714
+ )
715
+ )
716
+
717
+ return packages
718
+
719
+ def _parse_csproj(self, path: Path) -> list[PackageDependencyModel]:
720
+ """Parse a .csproj file and extract NuGet <PackageReference> entries.
721
+
722
+ Handles both versioned and version-less references (the latter are
723
+ common in Directory.Packages.props / central-package-management
724
+ scenarios where the version is inherited from a parent props file).
725
+
726
+ Example XML:
727
+ <PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
728
+ <PackageReference Include="Microsoft.AspNetCore.Authentication.JwtBearer" />
729
+ """
730
+ import xml.etree.ElementTree as ET
731
+
732
+ packages: list[PackageDependencyModel] = []
733
+ source_name = path.name
734
+
735
+ try:
736
+ tree = ET.parse(path)
737
+ except ET.ParseError as exc:
738
+ logger.warning("Failed to parse %s: %s", path, exc)
739
+ return packages
740
+
741
+ root_el = tree.getroot()
742
+ # Strip XML namespace if present (MSBuild files sometimes have one)
743
+ ns_prefix = ""
744
+ if root_el.tag.startswith("{"):
745
+ ns_prefix = root_el.tag[: root_el.tag.index("}") + 1]
746
+
747
+ # Classify dev/test references by common SDK item group labels
748
+ _dev_groups: frozenset[str] = frozenset(
749
+ {
750
+ "test",
751
+ "tests",
752
+ "testing",
753
+ }
754
+ )
755
+
756
+ for item_group in root_el.iter(f"{ns_prefix}ItemGroup"):
757
+ label = (item_group.get("Label") or "").lower()
758
+ is_dev = any(d in label for d in _dev_groups)
759
+
760
+ for ref in item_group.iter(f"{ns_prefix}PackageReference"):
761
+ name = ref.get("Include") or ref.get("Update") or ""
762
+ if not name:
763
+ continue
764
+ name = name.strip()
765
+
766
+ version_str: str | None = ref.get("Version") or ref.get("VersionOverride") or None
767
+ # Version may also be a child element: <Version>1.0</Version>
768
+ if version_str is None:
769
+ ver_el = ref.find(f"{ns_prefix}Version")
770
+ if ver_el is not None and ver_el.text:
771
+ version_str = ver_el.text.strip()
772
+
773
+ # Classify dev packages by common naming conventions when
774
+ # no explicit group label is present
775
+ if not is_dev:
776
+ lower = name.lower()
777
+ is_dev = any(
778
+ lower.startswith(prefix)
779
+ for prefix in (
780
+ "microsoft.net.test",
781
+ "xunit",
782
+ "nunit",
783
+ "mstest",
784
+ "moq",
785
+ "fluentassertions",
786
+ "coverlet",
787
+ "bogus",
788
+ )
789
+ )
790
+
791
+ # Detect Private/Tool-only assets (not a runtime dep)
792
+ private_assets = ref.get("PrivateAssets") or ""
793
+ if private_assets.lower() == "all":
794
+ is_dev = True
795
+
796
+ packages.append(
797
+ PackageDependencyModel(
798
+ name=name,
799
+ version=version_str,
800
+ version_constraint=version_str,
801
+ is_dev=is_dev,
802
+ source_file=source_name,
803
+ )
804
+ )
805
+
806
+ return packages
807
+
808
+ # -- pom.xml (Maven / JVM) -------------------------------------------------
809
+
810
+ def _parse_pom(self, path: Path) -> list[PackageDependencyModel]:
811
+ """Parse a Maven pom.xml.
812
+
813
+ Reads ``<dependencies>``, resolving each version through ``<properties>``
814
+ (``${prop}`` interpolation, incl. ``project.version``) and the local
815
+ ``<dependencyManagement>`` block. Coordinates are stored as
816
+ ``groupId:artifactId`` to match NVD/OSV. Only a literal version is a pin;
817
+ ranges (``[1,2)``), dynamic versions, and unresolved property refs
818
+ (e.g. versions inherited from an imported BOM) leave version=None with
819
+ the raw value preserved in version_constraint. (F-1/F-4)
820
+ """
821
+ import xml.etree.ElementTree as ET
822
+
823
+ packages: list[PackageDependencyModel] = []
824
+ try:
825
+ root_el = ET.parse(path).getroot()
826
+ except ET.ParseError as exc:
827
+ logger.warning("Failed to parse %s: %s", path, exc)
828
+ return packages
829
+
830
+ ns = root_el.tag[: root_el.tag.index("}") + 1] if root_el.tag.startswith("{") else ""
831
+
832
+ def q(tag: str) -> str:
833
+ return f"{ns}{tag}"
834
+
835
+ # Build the property table used for ${...} interpolation.
836
+ props: dict[str, str] = {}
837
+ for builtin, tag in (("project.version", "version"), ("project.groupId", "groupId")):
838
+ el = root_el.find(q(tag))
839
+ if el is not None and el.text:
840
+ props[builtin] = el.text.strip()
841
+ props_el = root_el.find(q("properties"))
842
+ if props_el is not None:
843
+ for child in props_el:
844
+ key = child.tag[len(ns) :] if ns else child.tag
845
+ if child.text:
846
+ props[key] = child.text.strip()
847
+
848
+ def resolve(value: str | None) -> str | None:
849
+ """Resolve ${prop} references; None if any part can't be resolved."""
850
+ if value is None:
851
+ return None
852
+ value = value.strip()
853
+ seen: set[str] = set()
854
+ m = re.fullmatch(r"\$\{([^}]+)\}", value)
855
+ while m and m.group(1) not in seen:
856
+ seen.add(m.group(1))
857
+ repl = props.get(m.group(1))
858
+ if repl is None:
859
+ return None
860
+ value = repl.strip()
861
+ m = re.fullmatch(r"\$\{([^}]+)\}", value)
862
+ return None if "${" in value else value
863
+
864
+ # dependencyManagement gives versions for deps that omit one.
865
+ managed: dict[str, str] = {}
866
+ dm = root_el.find(q("dependencyManagement"))
867
+ dm_deps = dm.find(q("dependencies")) if dm is not None else None
868
+ if dm_deps is not None:
869
+ for dep in dm_deps.findall(q("dependency")):
870
+ g = (
871
+ resolve(dep.findtext(q("groupId")))
872
+ or (dep.findtext(q("groupId")) or "").strip()
873
+ )
874
+ a = (dep.findtext(q("artifactId")) or "").strip()
875
+ v = resolve(dep.findtext(q("version")))
876
+ if g and a and v:
877
+ managed[f"{g}:{a}"] = v
878
+
879
+ deps_el = root_el.find(q("dependencies"))
880
+ if deps_el is None:
881
+ return packages
882
+
883
+ for dep in deps_el.findall(q("dependency")):
884
+ group = (dep.findtext(q("groupId")) or "").strip()
885
+ artifact = (dep.findtext(q("artifactId")) or "").strip()
886
+ if not group or not artifact:
887
+ continue
888
+ group = resolve(group) or group
889
+ name = f"{group}:{artifact}"
890
+
891
+ raw_version = dep.findtext(q("version"))
892
+ constraint = raw_version.strip() if raw_version and raw_version.strip() else None
893
+ resolved = resolve(raw_version)
894
+ if resolved is None:
895
+ resolved = managed.get(name)
896
+ if resolved and constraint is None:
897
+ constraint = resolved
898
+
899
+ version = resolved if resolved and self._EXACT_JVM_VERSION_RE.match(resolved) else None
900
+
901
+ scope = (dep.findtext(q("scope")) or "").strip().lower()
902
+ packages.append(
903
+ PackageDependencyModel(
904
+ name=name,
905
+ version=version,
906
+ version_constraint=constraint,
907
+ is_dev=(scope == "test"),
908
+ source_file=path.name,
909
+ )
910
+ )
911
+
912
+ return packages
913
+
914
+ # -- build.gradle / build.gradle.kts (Gradle / JVM) ------------------------
915
+
916
+ def _parse_gradle(self, path: Path) -> list[PackageDependencyModel]:
917
+ """Best-effort regex extraction of Gradle dependencies (Groovy + Kotlin).
918
+
919
+ Handles string notation (``implementation 'g:a:v'`` /
920
+ ``implementation("g:a:v")``) and map notation (``group:`` / ``name:`` /
921
+ ``version:``). Coordinates are stored as ``groupId:artifactId``. Only a
922
+ literal version is a pin; dynamic versions (``1.+``), ranges, and
923
+ interpolated variables (``$ver``) leave version=None. (F-1/F-4)
924
+ """
925
+ packages: list[PackageDependencyModel] = []
926
+ try:
927
+ content = path.read_text(errors="replace")
928
+ except Exception as e:
929
+ logger.warning("Failed to read %s: %s", path.name, e)
930
+ return packages
931
+
932
+ seen: set[tuple[str, str]] = set()
933
+
934
+ def add(config: str, group: str, artifact: str, raw_version: str | None) -> None:
935
+ name = f"{group}:{artifact}"
936
+ key = (config, name)
937
+ if key in seen:
938
+ return
939
+ seen.add(key)
940
+ constraint = raw_version.strip() if raw_version and raw_version.strip() else None
941
+ version = (
942
+ constraint if constraint and self._EXACT_JVM_VERSION_RE.match(constraint) else None
943
+ )
944
+ packages.append(
945
+ PackageDependencyModel(
946
+ name=name,
947
+ version=version,
948
+ version_constraint=constraint,
949
+ is_dev=config.lower().startswith("test") or "androidtest" in config.lower(),
950
+ source_file=path.name,
951
+ )
952
+ )
953
+
954
+ for m in self._GRADLE_STRING_DEP.finditer(content):
955
+ add(m.group(1), m.group(2), m.group(3), m.group(4))
956
+ for m in self._GRADLE_MAP_DEP.finditer(content):
957
+ add(m.group(1), m.group(2), m.group(3), m.group(4))
958
+
959
+ return packages