cve-sentinel 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cve_sentinel/__init__.py +4 -0
- cve_sentinel/__main__.py +18 -0
- cve_sentinel/analyzers/__init__.py +19 -0
- cve_sentinel/analyzers/base.py +274 -0
- cve_sentinel/analyzers/go.py +186 -0
- cve_sentinel/analyzers/maven.py +291 -0
- cve_sentinel/analyzers/npm.py +586 -0
- cve_sentinel/analyzers/php.py +238 -0
- cve_sentinel/analyzers/python.py +435 -0
- cve_sentinel/analyzers/ruby.py +182 -0
- cve_sentinel/analyzers/rust.py +199 -0
- cve_sentinel/cli.py +517 -0
- cve_sentinel/config.py +347 -0
- cve_sentinel/fetchers/__init__.py +22 -0
- cve_sentinel/fetchers/nvd.py +544 -0
- cve_sentinel/fetchers/osv.py +719 -0
- cve_sentinel/matcher.py +496 -0
- cve_sentinel/reporter.py +549 -0
- cve_sentinel/scanner.py +513 -0
- cve_sentinel/scanners/__init__.py +13 -0
- cve_sentinel/scanners/import_scanner.py +1121 -0
- cve_sentinel/utils/__init__.py +5 -0
- cve_sentinel/utils/cache.py +61 -0
- cve_sentinel-0.1.2.dist-info/METADATA +454 -0
- cve_sentinel-0.1.2.dist-info/RECORD +28 -0
- cve_sentinel-0.1.2.dist-info/WHEEL +4 -0
- cve_sentinel-0.1.2.dist-info/entry_points.txt +2 -0
- cve_sentinel-0.1.2.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1121 @@
|
|
|
1
|
+
"""Import statement scanner for Level 3 analysis.
|
|
2
|
+
|
|
3
|
+
This module scans source code files to find import/require statements
|
|
4
|
+
and maps them to package names for vulnerability detection.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import re
|
|
11
|
+
from abc import ABC, abstractmethod
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Dict, List, Optional, Pattern, Set, Type
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ImportReference:
|
|
21
|
+
"""Reference to an import statement in source code.
|
|
22
|
+
|
|
23
|
+
Attributes:
|
|
24
|
+
package_name: The name of the imported package.
|
|
25
|
+
file_path: Path to the source file.
|
|
26
|
+
line_number: Line number where the import occurs (1-indexed).
|
|
27
|
+
import_statement: The full import statement text.
|
|
28
|
+
ecosystem: The package ecosystem (npm, pypi, etc.).
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
package_name: str
|
|
32
|
+
file_path: Path
|
|
33
|
+
line_number: int
|
|
34
|
+
import_statement: str
|
|
35
|
+
ecosystem: str
|
|
36
|
+
|
|
37
|
+
def to_dict(self) -> Dict:
|
|
38
|
+
"""Convert to dictionary for serialization."""
|
|
39
|
+
return {
|
|
40
|
+
"package_name": self.package_name,
|
|
41
|
+
"file_path": str(self.file_path),
|
|
42
|
+
"line_number": self.line_number,
|
|
43
|
+
"import_statement": self.import_statement,
|
|
44
|
+
"ecosystem": self.ecosystem,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class BaseLanguageScanner(ABC):
|
|
49
|
+
"""Base class for language-specific import scanners."""
|
|
50
|
+
|
|
51
|
+
# File extensions this scanner handles
|
|
52
|
+
FILE_EXTENSIONS: List[str] = []
|
|
53
|
+
|
|
54
|
+
# Ecosystem name for this scanner
|
|
55
|
+
ECOSYSTEM: str = ""
|
|
56
|
+
|
|
57
|
+
# Default exclude patterns
|
|
58
|
+
DEFAULT_EXCLUDES: List[str] = [
|
|
59
|
+
"node_modules",
|
|
60
|
+
"vendor",
|
|
61
|
+
".git",
|
|
62
|
+
"__pycache__",
|
|
63
|
+
"venv",
|
|
64
|
+
".venv",
|
|
65
|
+
"env",
|
|
66
|
+
".tox",
|
|
67
|
+
"build",
|
|
68
|
+
"dist",
|
|
69
|
+
"target",
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
def __init__(self, exclude_patterns: Optional[List[str]] = None) -> None:
|
|
73
|
+
"""Initialize the scanner.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
exclude_patterns: Additional patterns to exclude from scanning.
|
|
77
|
+
"""
|
|
78
|
+
self.exclude_patterns = self.DEFAULT_EXCLUDES.copy()
|
|
79
|
+
if exclude_patterns:
|
|
80
|
+
self.exclude_patterns.extend(exclude_patterns)
|
|
81
|
+
|
|
82
|
+
def _should_exclude(self, path: Path) -> bool:
|
|
83
|
+
"""Check if a path should be excluded from scanning.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
path: Path to check.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
True if the path should be excluded.
|
|
90
|
+
"""
|
|
91
|
+
path_str = str(path)
|
|
92
|
+
for pattern in self.exclude_patterns:
|
|
93
|
+
if pattern in path_str:
|
|
94
|
+
return True
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
def scan_directory(
|
|
98
|
+
self,
|
|
99
|
+
directory: Path,
|
|
100
|
+
max_file_size: int = 1024 * 1024, # 1MB default
|
|
101
|
+
) -> List[ImportReference]:
|
|
102
|
+
"""Scan a directory for import statements.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
directory: Directory to scan.
|
|
106
|
+
max_file_size: Maximum file size in bytes to scan.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
List of ImportReference objects found.
|
|
110
|
+
"""
|
|
111
|
+
references: List[ImportReference] = []
|
|
112
|
+
|
|
113
|
+
if not directory.exists() or not directory.is_dir():
|
|
114
|
+
return references
|
|
115
|
+
|
|
116
|
+
for ext in self.FILE_EXTENSIONS:
|
|
117
|
+
pattern = f"**/*{ext}"
|
|
118
|
+
for file_path in directory.glob(pattern):
|
|
119
|
+
if self._should_exclude(file_path):
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
if file_path.is_file():
|
|
123
|
+
# Check file size
|
|
124
|
+
try:
|
|
125
|
+
if file_path.stat().st_size > max_file_size:
|
|
126
|
+
logger.debug(f"Skipping large file: {file_path}")
|
|
127
|
+
continue
|
|
128
|
+
except OSError:
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
file_refs = self.scan_file(file_path)
|
|
132
|
+
references.extend(file_refs)
|
|
133
|
+
|
|
134
|
+
return references
|
|
135
|
+
|
|
136
|
+
def scan_file(self, file_path: Path) -> List[ImportReference]:
|
|
137
|
+
"""Scan a single file for import statements.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
file_path: Path to the file to scan.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
List of ImportReference objects found.
|
|
144
|
+
"""
|
|
145
|
+
references: List[ImportReference] = []
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
149
|
+
except OSError as e:
|
|
150
|
+
logger.warning(f"Failed to read file {file_path}: {e}")
|
|
151
|
+
return references
|
|
152
|
+
|
|
153
|
+
lines = content.split("\n")
|
|
154
|
+
for line_num, line in enumerate(lines, start=1):
|
|
155
|
+
packages = self._extract_packages(line)
|
|
156
|
+
for pkg_name, statement in packages:
|
|
157
|
+
references.append(
|
|
158
|
+
ImportReference(
|
|
159
|
+
package_name=pkg_name,
|
|
160
|
+
file_path=file_path,
|
|
161
|
+
line_number=line_num,
|
|
162
|
+
import_statement=statement.strip(),
|
|
163
|
+
ecosystem=self.ECOSYSTEM,
|
|
164
|
+
)
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
return references
|
|
168
|
+
|
|
169
|
+
@abstractmethod
|
|
170
|
+
def _extract_packages(self, line: str) -> List[tuple]:
|
|
171
|
+
"""Extract package names from a line of code.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
line: A single line of source code.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
List of tuples (package_name, import_statement).
|
|
178
|
+
"""
|
|
179
|
+
pass
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class JavaScriptScanner(BaseLanguageScanner):
|
|
183
|
+
"""Scanner for JavaScript/TypeScript import statements."""
|
|
184
|
+
|
|
185
|
+
FILE_EXTENSIONS = [".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"]
|
|
186
|
+
ECOSYSTEM = "npm"
|
|
187
|
+
|
|
188
|
+
# Patterns for JavaScript/TypeScript imports
|
|
189
|
+
# import ... from 'package'
|
|
190
|
+
IMPORT_FROM_PATTERN: Pattern = re.compile(
|
|
191
|
+
r"""import\s+(?:(?:\{[^}]*\}|\*\s+as\s+\w+|\w+)(?:\s*,\s*(?:\{[^}]*\}|\*\s+as\s+\w+|\w+))*\s+from\s+)?['"]([^'"]+)['"]"""
|
|
192
|
+
)
|
|
193
|
+
# require('package')
|
|
194
|
+
REQUIRE_PATTERN: Pattern = re.compile(r"""require\s*\(\s*['"]([^'"]+)['"]\s*\)""")
|
|
195
|
+
# import('package') - dynamic import
|
|
196
|
+
DYNAMIC_IMPORT_PATTERN: Pattern = re.compile(r"""import\s*\(\s*['"]([^'"]+)['"]\s*\)""")
|
|
197
|
+
|
|
198
|
+
def _extract_packages(self, line: str) -> List[tuple]:
|
|
199
|
+
"""Extract package names from JavaScript/TypeScript import statements."""
|
|
200
|
+
results: List[tuple] = []
|
|
201
|
+
|
|
202
|
+
# Check for import ... from 'package' or import 'package'
|
|
203
|
+
for match in self.IMPORT_FROM_PATTERN.finditer(line):
|
|
204
|
+
pkg_path = match.group(1)
|
|
205
|
+
pkg_name = self._normalize_package_name(pkg_path)
|
|
206
|
+
if pkg_name:
|
|
207
|
+
results.append((pkg_name, line))
|
|
208
|
+
|
|
209
|
+
# Check for require('package')
|
|
210
|
+
for match in self.REQUIRE_PATTERN.finditer(line):
|
|
211
|
+
pkg_path = match.group(1)
|
|
212
|
+
pkg_name = self._normalize_package_name(pkg_path)
|
|
213
|
+
if pkg_name:
|
|
214
|
+
results.append((pkg_name, line))
|
|
215
|
+
|
|
216
|
+
# Check for import('package') - dynamic import
|
|
217
|
+
for match in self.DYNAMIC_IMPORT_PATTERN.finditer(line):
|
|
218
|
+
pkg_path = match.group(1)
|
|
219
|
+
pkg_name = self._normalize_package_name(pkg_path)
|
|
220
|
+
if pkg_name:
|
|
221
|
+
results.append((pkg_name, line))
|
|
222
|
+
|
|
223
|
+
return results
|
|
224
|
+
|
|
225
|
+
def _normalize_package_name(self, pkg_path: str) -> Optional[str]:
|
|
226
|
+
"""Normalize a package path to a package name.
|
|
227
|
+
|
|
228
|
+
Handles:
|
|
229
|
+
- Scoped packages: @scope/package -> @scope/package
|
|
230
|
+
- Subpath imports: package/subpath -> package
|
|
231
|
+
- Relative imports: ./local -> None (excluded)
|
|
232
|
+
- Node built-ins: node:fs -> None (excluded)
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
pkg_path: The raw package path from the import.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
Normalized package name or None if it should be excluded.
|
|
239
|
+
"""
|
|
240
|
+
# Exclude relative imports
|
|
241
|
+
if pkg_path.startswith(".") or pkg_path.startswith("/"):
|
|
242
|
+
return None
|
|
243
|
+
|
|
244
|
+
# Exclude node: protocol (built-ins)
|
|
245
|
+
if pkg_path.startswith("node:"):
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
# Handle scoped packages (@scope/package)
|
|
249
|
+
if pkg_path.startswith("@"):
|
|
250
|
+
parts = pkg_path.split("/")
|
|
251
|
+
if len(parts) >= 2:
|
|
252
|
+
# Return @scope/package, ignore subpaths
|
|
253
|
+
return f"{parts[0]}/{parts[1]}"
|
|
254
|
+
return pkg_path
|
|
255
|
+
|
|
256
|
+
# Regular package - get first part before /
|
|
257
|
+
parts = pkg_path.split("/")
|
|
258
|
+
return parts[0]
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class PythonScanner(BaseLanguageScanner):
|
|
262
|
+
"""Scanner for Python import statements."""
|
|
263
|
+
|
|
264
|
+
FILE_EXTENSIONS = [".py"]
|
|
265
|
+
ECOSYSTEM = "pypi"
|
|
266
|
+
|
|
267
|
+
# Patterns for Python imports
|
|
268
|
+
# import package or import package as alias
|
|
269
|
+
IMPORT_PATTERN: Pattern = re.compile(r"""^import\s+([\w.]+)""")
|
|
270
|
+
# from package import ... or from package.sub import ...
|
|
271
|
+
FROM_IMPORT_PATTERN: Pattern = re.compile(r"""^from\s+([\w.]+)\s+import\s+""")
|
|
272
|
+
|
|
273
|
+
# Standard library modules to exclude
|
|
274
|
+
STDLIB_MODULES: Set[str] = {
|
|
275
|
+
"abc",
|
|
276
|
+
"aifc",
|
|
277
|
+
"argparse",
|
|
278
|
+
"array",
|
|
279
|
+
"ast",
|
|
280
|
+
"asyncio",
|
|
281
|
+
"atexit",
|
|
282
|
+
"base64",
|
|
283
|
+
"bdb",
|
|
284
|
+
"binascii",
|
|
285
|
+
"binhex",
|
|
286
|
+
"bisect",
|
|
287
|
+
"builtins",
|
|
288
|
+
"bz2",
|
|
289
|
+
"calendar",
|
|
290
|
+
"cgi",
|
|
291
|
+
"cgitb",
|
|
292
|
+
"chunk",
|
|
293
|
+
"cmath",
|
|
294
|
+
"cmd",
|
|
295
|
+
"code",
|
|
296
|
+
"codecs",
|
|
297
|
+
"codeop",
|
|
298
|
+
"collections",
|
|
299
|
+
"colorsys",
|
|
300
|
+
"compileall",
|
|
301
|
+
"concurrent",
|
|
302
|
+
"configparser",
|
|
303
|
+
"contextlib",
|
|
304
|
+
"contextvars",
|
|
305
|
+
"copy",
|
|
306
|
+
"copyreg",
|
|
307
|
+
"cProfile",
|
|
308
|
+
"crypt",
|
|
309
|
+
"csv",
|
|
310
|
+
"ctypes",
|
|
311
|
+
"curses",
|
|
312
|
+
"dataclasses",
|
|
313
|
+
"datetime",
|
|
314
|
+
"dbm",
|
|
315
|
+
"decimal",
|
|
316
|
+
"difflib",
|
|
317
|
+
"dis",
|
|
318
|
+
"distutils",
|
|
319
|
+
"doctest",
|
|
320
|
+
"email",
|
|
321
|
+
"encodings",
|
|
322
|
+
"enum",
|
|
323
|
+
"errno",
|
|
324
|
+
"faulthandler",
|
|
325
|
+
"fcntl",
|
|
326
|
+
"filecmp",
|
|
327
|
+
"fileinput",
|
|
328
|
+
"fnmatch",
|
|
329
|
+
"fractions",
|
|
330
|
+
"ftplib",
|
|
331
|
+
"functools",
|
|
332
|
+
"gc",
|
|
333
|
+
"getopt",
|
|
334
|
+
"getpass",
|
|
335
|
+
"gettext",
|
|
336
|
+
"glob",
|
|
337
|
+
"graphlib",
|
|
338
|
+
"grp",
|
|
339
|
+
"gzip",
|
|
340
|
+
"hashlib",
|
|
341
|
+
"heapq",
|
|
342
|
+
"hmac",
|
|
343
|
+
"html",
|
|
344
|
+
"http",
|
|
345
|
+
"imaplib",
|
|
346
|
+
"imghdr",
|
|
347
|
+
"imp",
|
|
348
|
+
"importlib",
|
|
349
|
+
"inspect",
|
|
350
|
+
"io",
|
|
351
|
+
"ipaddress",
|
|
352
|
+
"itertools",
|
|
353
|
+
"json",
|
|
354
|
+
"keyword",
|
|
355
|
+
"lib2to3",
|
|
356
|
+
"linecache",
|
|
357
|
+
"locale",
|
|
358
|
+
"logging",
|
|
359
|
+
"lzma",
|
|
360
|
+
"mailbox",
|
|
361
|
+
"mailcap",
|
|
362
|
+
"marshal",
|
|
363
|
+
"math",
|
|
364
|
+
"mimetypes",
|
|
365
|
+
"mmap",
|
|
366
|
+
"modulefinder",
|
|
367
|
+
"multiprocessing",
|
|
368
|
+
"netrc",
|
|
369
|
+
"nis",
|
|
370
|
+
"nntplib",
|
|
371
|
+
"numbers",
|
|
372
|
+
"operator",
|
|
373
|
+
"optparse",
|
|
374
|
+
"os",
|
|
375
|
+
"ossaudiodev",
|
|
376
|
+
"pathlib",
|
|
377
|
+
"pdb",
|
|
378
|
+
"pickle",
|
|
379
|
+
"pickletools",
|
|
380
|
+
"pipes",
|
|
381
|
+
"pkgutil",
|
|
382
|
+
"platform",
|
|
383
|
+
"plistlib",
|
|
384
|
+
"poplib",
|
|
385
|
+
"posix",
|
|
386
|
+
"posixpath",
|
|
387
|
+
"pprint",
|
|
388
|
+
"profile",
|
|
389
|
+
"pstats",
|
|
390
|
+
"pty",
|
|
391
|
+
"pwd",
|
|
392
|
+
"py_compile",
|
|
393
|
+
"pyclbr",
|
|
394
|
+
"pydoc",
|
|
395
|
+
"queue",
|
|
396
|
+
"quopri",
|
|
397
|
+
"random",
|
|
398
|
+
"re",
|
|
399
|
+
"readline",
|
|
400
|
+
"reprlib",
|
|
401
|
+
"resource",
|
|
402
|
+
"rlcompleter",
|
|
403
|
+
"runpy",
|
|
404
|
+
"sched",
|
|
405
|
+
"secrets",
|
|
406
|
+
"select",
|
|
407
|
+
"selectors",
|
|
408
|
+
"shelve",
|
|
409
|
+
"shlex",
|
|
410
|
+
"shutil",
|
|
411
|
+
"signal",
|
|
412
|
+
"site",
|
|
413
|
+
"smtpd",
|
|
414
|
+
"smtplib",
|
|
415
|
+
"sndhdr",
|
|
416
|
+
"socket",
|
|
417
|
+
"socketserver",
|
|
418
|
+
"spwd",
|
|
419
|
+
"sqlite3",
|
|
420
|
+
"ssl",
|
|
421
|
+
"stat",
|
|
422
|
+
"statistics",
|
|
423
|
+
"string",
|
|
424
|
+
"stringprep",
|
|
425
|
+
"struct",
|
|
426
|
+
"subprocess",
|
|
427
|
+
"sunau",
|
|
428
|
+
"symtable",
|
|
429
|
+
"sys",
|
|
430
|
+
"sysconfig",
|
|
431
|
+
"syslog",
|
|
432
|
+
"tabnanny",
|
|
433
|
+
"tarfile",
|
|
434
|
+
"telnetlib",
|
|
435
|
+
"tempfile",
|
|
436
|
+
"termios",
|
|
437
|
+
"test",
|
|
438
|
+
"textwrap",
|
|
439
|
+
"threading",
|
|
440
|
+
"time",
|
|
441
|
+
"timeit",
|
|
442
|
+
"tkinter",
|
|
443
|
+
"token",
|
|
444
|
+
"tokenize",
|
|
445
|
+
"trace",
|
|
446
|
+
"traceback",
|
|
447
|
+
"tracemalloc",
|
|
448
|
+
"tty",
|
|
449
|
+
"turtle",
|
|
450
|
+
"turtledemo",
|
|
451
|
+
"types",
|
|
452
|
+
"typing",
|
|
453
|
+
"unicodedata",
|
|
454
|
+
"unittest",
|
|
455
|
+
"urllib",
|
|
456
|
+
"uu",
|
|
457
|
+
"uuid",
|
|
458
|
+
"venv",
|
|
459
|
+
"warnings",
|
|
460
|
+
"wave",
|
|
461
|
+
"weakref",
|
|
462
|
+
"webbrowser",
|
|
463
|
+
"winreg",
|
|
464
|
+
"winsound",
|
|
465
|
+
"wsgiref",
|
|
466
|
+
"xdrlib",
|
|
467
|
+
"xml",
|
|
468
|
+
"xmlrpc",
|
|
469
|
+
"zipapp",
|
|
470
|
+
"zipfile",
|
|
471
|
+
"zipimport",
|
|
472
|
+
"zlib",
|
|
473
|
+
"_thread",
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
def _extract_packages(self, line: str) -> List[tuple]:
|
|
477
|
+
"""Extract package names from Python import statements."""
|
|
478
|
+
results: List[tuple] = []
|
|
479
|
+
line_stripped = line.strip()
|
|
480
|
+
|
|
481
|
+
# Check for 'import package'
|
|
482
|
+
match = self.IMPORT_PATTERN.match(line_stripped)
|
|
483
|
+
if match:
|
|
484
|
+
module_path = match.group(1)
|
|
485
|
+
pkg_name = self._normalize_package_name(module_path)
|
|
486
|
+
if pkg_name:
|
|
487
|
+
results.append((pkg_name, line))
|
|
488
|
+
|
|
489
|
+
# Check for 'from package import ...'
|
|
490
|
+
match = self.FROM_IMPORT_PATTERN.match(line_stripped)
|
|
491
|
+
if match:
|
|
492
|
+
module_path = match.group(1)
|
|
493
|
+
pkg_name = self._normalize_package_name(module_path)
|
|
494
|
+
if pkg_name:
|
|
495
|
+
results.append((pkg_name, line))
|
|
496
|
+
|
|
497
|
+
return results
|
|
498
|
+
|
|
499
|
+
def _normalize_package_name(self, module_path: str) -> Optional[str]:
|
|
500
|
+
"""Normalize a module path to a package name.
|
|
501
|
+
|
|
502
|
+
Args:
|
|
503
|
+
module_path: The module path from the import (e.g., 'package.submodule').
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
Package name or None if it's a standard library module.
|
|
507
|
+
"""
|
|
508
|
+
# Get the top-level package
|
|
509
|
+
parts = module_path.split(".")
|
|
510
|
+
top_level = parts[0]
|
|
511
|
+
|
|
512
|
+
# Exclude standard library modules
|
|
513
|
+
if top_level in self.STDLIB_MODULES:
|
|
514
|
+
return None
|
|
515
|
+
|
|
516
|
+
# Exclude relative imports (shouldn't match our pattern, but safety check)
|
|
517
|
+
if top_level.startswith("_") and top_level != "_":
|
|
518
|
+
return None
|
|
519
|
+
|
|
520
|
+
return top_level
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
class GoScanner(BaseLanguageScanner):
|
|
524
|
+
"""Scanner for Go import statements."""
|
|
525
|
+
|
|
526
|
+
FILE_EXTENSIONS = [".go"]
|
|
527
|
+
ECOSYSTEM = "go"
|
|
528
|
+
|
|
529
|
+
# Patterns for Go imports
|
|
530
|
+
# import "package"
|
|
531
|
+
SINGLE_IMPORT_PATTERN: Pattern = re.compile(r"""^\s*import\s+(?:\w+\s+)?["']([^"']+)["']""")
|
|
532
|
+
# import ( "package" ) - inside block
|
|
533
|
+
BLOCK_IMPORT_PATTERN: Pattern = re.compile(r"""^\s*(?:\w+\s+)?["']([^"']+)["']""")
|
|
534
|
+
|
|
535
|
+
# Standard library prefixes to exclude
|
|
536
|
+
STDLIB_PREFIXES: List[str] = [
|
|
537
|
+
"archive/",
|
|
538
|
+
"bufio",
|
|
539
|
+
"bytes",
|
|
540
|
+
"compress/",
|
|
541
|
+
"container/",
|
|
542
|
+
"context",
|
|
543
|
+
"crypto/",
|
|
544
|
+
"database/",
|
|
545
|
+
"debug/",
|
|
546
|
+
"embed",
|
|
547
|
+
"encoding/",
|
|
548
|
+
"errors",
|
|
549
|
+
"expvar",
|
|
550
|
+
"flag",
|
|
551
|
+
"fmt",
|
|
552
|
+
"go/",
|
|
553
|
+
"hash/",
|
|
554
|
+
"html/",
|
|
555
|
+
"image/",
|
|
556
|
+
"index/",
|
|
557
|
+
"io",
|
|
558
|
+
"log/",
|
|
559
|
+
"math/",
|
|
560
|
+
"mime/",
|
|
561
|
+
"net/",
|
|
562
|
+
"os",
|
|
563
|
+
"path/",
|
|
564
|
+
"plugin",
|
|
565
|
+
"reflect",
|
|
566
|
+
"regexp",
|
|
567
|
+
"runtime",
|
|
568
|
+
"sort",
|
|
569
|
+
"strconv",
|
|
570
|
+
"strings",
|
|
571
|
+
"sync",
|
|
572
|
+
"syscall",
|
|
573
|
+
"testing",
|
|
574
|
+
"text/",
|
|
575
|
+
"time",
|
|
576
|
+
"unicode",
|
|
577
|
+
"unsafe",
|
|
578
|
+
]
|
|
579
|
+
|
|
580
|
+
def __init__(self, exclude_patterns: Optional[List[str]] = None) -> None:
|
|
581
|
+
super().__init__(exclude_patterns)
|
|
582
|
+
self._in_import_block = False
|
|
583
|
+
|
|
584
|
+
def scan_file(self, file_path: Path) -> List[ImportReference]:
|
|
585
|
+
"""Override to handle import blocks."""
|
|
586
|
+
references: List[ImportReference] = []
|
|
587
|
+
|
|
588
|
+
try:
|
|
589
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
590
|
+
except OSError as e:
|
|
591
|
+
logger.warning(f"Failed to read file {file_path}: {e}")
|
|
592
|
+
return references
|
|
593
|
+
|
|
594
|
+
lines = content.split("\n")
|
|
595
|
+
in_import_block = False
|
|
596
|
+
|
|
597
|
+
for line_num, line in enumerate(lines, start=1):
|
|
598
|
+
stripped = line.strip()
|
|
599
|
+
|
|
600
|
+
# Check for start of import block
|
|
601
|
+
if stripped.startswith("import ("):
|
|
602
|
+
in_import_block = True
|
|
603
|
+
continue
|
|
604
|
+
|
|
605
|
+
# Check for end of import block
|
|
606
|
+
if in_import_block and stripped == ")":
|
|
607
|
+
in_import_block = False
|
|
608
|
+
continue
|
|
609
|
+
|
|
610
|
+
# Process imports
|
|
611
|
+
if in_import_block:
|
|
612
|
+
# Inside import block
|
|
613
|
+
match = self.BLOCK_IMPORT_PATTERN.match(line)
|
|
614
|
+
if match:
|
|
615
|
+
pkg_path = match.group(1)
|
|
616
|
+
pkg_name = self._normalize_package_name(pkg_path)
|
|
617
|
+
if pkg_name:
|
|
618
|
+
references.append(
|
|
619
|
+
ImportReference(
|
|
620
|
+
package_name=pkg_name,
|
|
621
|
+
file_path=file_path,
|
|
622
|
+
line_number=line_num,
|
|
623
|
+
import_statement=line.strip(),
|
|
624
|
+
ecosystem=self.ECOSYSTEM,
|
|
625
|
+
)
|
|
626
|
+
)
|
|
627
|
+
else:
|
|
628
|
+
# Single import
|
|
629
|
+
match = self.SINGLE_IMPORT_PATTERN.match(line)
|
|
630
|
+
if match:
|
|
631
|
+
pkg_path = match.group(1)
|
|
632
|
+
pkg_name = self._normalize_package_name(pkg_path)
|
|
633
|
+
if pkg_name:
|
|
634
|
+
references.append(
|
|
635
|
+
ImportReference(
|
|
636
|
+
package_name=pkg_name,
|
|
637
|
+
file_path=file_path,
|
|
638
|
+
line_number=line_num,
|
|
639
|
+
import_statement=line.strip(),
|
|
640
|
+
ecosystem=self.ECOSYSTEM,
|
|
641
|
+
)
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
return references
|
|
645
|
+
|
|
646
|
+
def _extract_packages(self, line: str) -> List[tuple]:
|
|
647
|
+
"""Not used for Go - overridden scan_file instead."""
|
|
648
|
+
return []
|
|
649
|
+
|
|
650
|
+
def _normalize_package_name(self, pkg_path: str) -> Optional[str]:
|
|
651
|
+
"""Normalize a Go import path to a package identifier.
|
|
652
|
+
|
|
653
|
+
Args:
|
|
654
|
+
pkg_path: The import path (e.g., 'github.com/user/repo/pkg').
|
|
655
|
+
|
|
656
|
+
Returns:
|
|
657
|
+
The module path or None if it's a standard library package.
|
|
658
|
+
"""
|
|
659
|
+
# Exclude standard library
|
|
660
|
+
for prefix in self.STDLIB_PREFIXES:
|
|
661
|
+
if pkg_path == prefix.rstrip("/") or pkg_path.startswith(prefix):
|
|
662
|
+
return None
|
|
663
|
+
|
|
664
|
+
# For external packages, return the full module path
|
|
665
|
+
# Go modules typically use the first 3 parts: host/user/repo
|
|
666
|
+
if "/" in pkg_path:
|
|
667
|
+
parts = pkg_path.split("/")
|
|
668
|
+
if len(parts) >= 3:
|
|
669
|
+
return "/".join(parts[:3])
|
|
670
|
+
return pkg_path
|
|
671
|
+
|
|
672
|
+
return None
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
class JavaScanner(BaseLanguageScanner):
|
|
676
|
+
"""Scanner for Java import statements."""
|
|
677
|
+
|
|
678
|
+
FILE_EXTENSIONS = [".java"]
|
|
679
|
+
ECOSYSTEM = "maven"
|
|
680
|
+
|
|
681
|
+
# Pattern for Java imports
|
|
682
|
+
# import package.Class; or import package.*;
|
|
683
|
+
IMPORT_PATTERN: Pattern = re.compile(r"""^\s*import\s+(?:static\s+)?([\w.]+)(?:\.\*)?;""")
|
|
684
|
+
|
|
685
|
+
# Java standard library and common internal packages to exclude
|
|
686
|
+
STDLIB_PACKAGES: List[str] = [
|
|
687
|
+
"java.",
|
|
688
|
+
"javax.",
|
|
689
|
+
"sun.",
|
|
690
|
+
"com.sun.",
|
|
691
|
+
"jdk.",
|
|
692
|
+
]
|
|
693
|
+
|
|
694
|
+
def _extract_packages(self, line: str) -> List[tuple]:
|
|
695
|
+
"""Extract package names from Java import statements."""
|
|
696
|
+
results: List[tuple] = []
|
|
697
|
+
|
|
698
|
+
match = self.IMPORT_PATTERN.match(line.strip())
|
|
699
|
+
if match:
|
|
700
|
+
import_path = match.group(1)
|
|
701
|
+
pkg_name = self._normalize_package_name(import_path)
|
|
702
|
+
if pkg_name:
|
|
703
|
+
results.append((pkg_name, line))
|
|
704
|
+
|
|
705
|
+
return results
|
|
706
|
+
|
|
707
|
+
def _normalize_package_name(self, import_path: str) -> Optional[str]:
|
|
708
|
+
"""Normalize a Java import path to a package identifier.
|
|
709
|
+
|
|
710
|
+
Args:
|
|
711
|
+
import_path: The import path (e.g., 'org.apache.commons.lang3.StringUtils').
|
|
712
|
+
|
|
713
|
+
Returns:
|
|
714
|
+
Group:Artifact format or None if standard library.
|
|
715
|
+
"""
|
|
716
|
+
# Exclude standard library packages
|
|
717
|
+
for prefix in self.STDLIB_PACKAGES:
|
|
718
|
+
if import_path.startswith(prefix):
|
|
719
|
+
return None
|
|
720
|
+
|
|
721
|
+
# Java package naming convention typically uses reversed domain
|
|
722
|
+
# We return the first 2-3 parts as package identifier
|
|
723
|
+
parts = import_path.split(".")
|
|
724
|
+
if len(parts) >= 2:
|
|
725
|
+
# Common pattern: org.groupid.artifactid or com.groupid.artifactid
|
|
726
|
+
return ".".join(parts[: min(3, len(parts))])
|
|
727
|
+
|
|
728
|
+
return None
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
class RubyScanner(BaseLanguageScanner):
|
|
732
|
+
"""Scanner for Ruby require statements."""
|
|
733
|
+
|
|
734
|
+
FILE_EXTENSIONS = [".rb"]
|
|
735
|
+
ECOSYSTEM = "rubygems"
|
|
736
|
+
|
|
737
|
+
# Patterns for Ruby requires
|
|
738
|
+
# require 'package' or require "package"
|
|
739
|
+
REQUIRE_PATTERN: Pattern = re.compile(r"""^\s*require\s+['"]([^'"]+)['"]""")
|
|
740
|
+
# require_relative should be excluded
|
|
741
|
+
REQUIRE_RELATIVE_PATTERN: Pattern = re.compile(r"""^\s*require_relative\s+""")
|
|
742
|
+
|
|
743
|
+
# Ruby standard library modules to exclude
|
|
744
|
+
STDLIB_MODULES: Set[str] = {
|
|
745
|
+
"abbrev",
|
|
746
|
+
"base64",
|
|
747
|
+
"benchmark",
|
|
748
|
+
"bigdecimal",
|
|
749
|
+
"cgi",
|
|
750
|
+
"cmath",
|
|
751
|
+
"coverage",
|
|
752
|
+
"csv",
|
|
753
|
+
"date",
|
|
754
|
+
"dbm",
|
|
755
|
+
"debug",
|
|
756
|
+
"delegate",
|
|
757
|
+
"digest",
|
|
758
|
+
"drb",
|
|
759
|
+
"english",
|
|
760
|
+
"erb",
|
|
761
|
+
"etc",
|
|
762
|
+
"extmk",
|
|
763
|
+
"fcntl",
|
|
764
|
+
"fiddle",
|
|
765
|
+
"fileutils",
|
|
766
|
+
"find",
|
|
767
|
+
"forwardable",
|
|
768
|
+
"gdbm",
|
|
769
|
+
"getoptlong",
|
|
770
|
+
"io",
|
|
771
|
+
"ipaddr",
|
|
772
|
+
"irb",
|
|
773
|
+
"json",
|
|
774
|
+
"logger",
|
|
775
|
+
"matrix",
|
|
776
|
+
"minitest",
|
|
777
|
+
"mkmf",
|
|
778
|
+
"monitor",
|
|
779
|
+
"mutex_m",
|
|
780
|
+
"net",
|
|
781
|
+
"nkf",
|
|
782
|
+
"objspace",
|
|
783
|
+
"observer",
|
|
784
|
+
"open-uri",
|
|
785
|
+
"open3",
|
|
786
|
+
"openssl",
|
|
787
|
+
"optparse",
|
|
788
|
+
"ostruct",
|
|
789
|
+
"pathname",
|
|
790
|
+
"pp",
|
|
791
|
+
"prettyprint",
|
|
792
|
+
"prime",
|
|
793
|
+
"pstore",
|
|
794
|
+
"psych",
|
|
795
|
+
"pty",
|
|
796
|
+
"racc",
|
|
797
|
+
"rake",
|
|
798
|
+
"rdoc",
|
|
799
|
+
"readline",
|
|
800
|
+
"reline",
|
|
801
|
+
"resolv",
|
|
802
|
+
"resolv-replace",
|
|
803
|
+
"rexml",
|
|
804
|
+
"rinda",
|
|
805
|
+
"ripper",
|
|
806
|
+
"rss",
|
|
807
|
+
"rubygems",
|
|
808
|
+
"scanf",
|
|
809
|
+
"sdbm",
|
|
810
|
+
"securerandom",
|
|
811
|
+
"set",
|
|
812
|
+
"shellwords",
|
|
813
|
+
"singleton",
|
|
814
|
+
"socket",
|
|
815
|
+
"stringio",
|
|
816
|
+
"strscan",
|
|
817
|
+
"syslog",
|
|
818
|
+
"tempfile",
|
|
819
|
+
"thwait",
|
|
820
|
+
"time",
|
|
821
|
+
"timeout",
|
|
822
|
+
"tmpdir",
|
|
823
|
+
"tracer",
|
|
824
|
+
"tsort",
|
|
825
|
+
"un",
|
|
826
|
+
"unicode_normalize",
|
|
827
|
+
"uri",
|
|
828
|
+
"weakref",
|
|
829
|
+
"webrick",
|
|
830
|
+
"yaml",
|
|
831
|
+
"zlib",
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
def _extract_packages(self, line: str) -> List[tuple]:
|
|
835
|
+
"""Extract package names from Ruby require statements."""
|
|
836
|
+
results: List[tuple] = []
|
|
837
|
+
stripped = line.strip()
|
|
838
|
+
|
|
839
|
+
# Skip require_relative
|
|
840
|
+
if self.REQUIRE_RELATIVE_PATTERN.match(stripped):
|
|
841
|
+
return results
|
|
842
|
+
|
|
843
|
+
match = self.REQUIRE_PATTERN.match(stripped)
|
|
844
|
+
if match:
|
|
845
|
+
gem_path = match.group(1)
|
|
846
|
+
pkg_name = self._normalize_package_name(gem_path)
|
|
847
|
+
if pkg_name:
|
|
848
|
+
results.append((pkg_name, line))
|
|
849
|
+
|
|
850
|
+
return results
|
|
851
|
+
|
|
852
|
+
def _normalize_package_name(self, gem_path: str) -> Optional[str]:
|
|
853
|
+
"""Normalize a gem path to a package name.
|
|
854
|
+
|
|
855
|
+
Args:
|
|
856
|
+
gem_path: The required path (e.g., 'rails' or 'active_support/core_ext').
|
|
857
|
+
|
|
858
|
+
Returns:
|
|
859
|
+
Gem name or None if it's a standard library module.
|
|
860
|
+
"""
|
|
861
|
+
# Get the top-level gem name
|
|
862
|
+
parts = gem_path.split("/")
|
|
863
|
+
gem_name = parts[0]
|
|
864
|
+
|
|
865
|
+
# Exclude standard library
|
|
866
|
+
if gem_name in self.STDLIB_MODULES:
|
|
867
|
+
return None
|
|
868
|
+
|
|
869
|
+
return gem_name
|
|
870
|
+
|
|
871
|
+
|
|
872
|
+
class RustScanner(BaseLanguageScanner):
|
|
873
|
+
"""Scanner for Rust use statements."""
|
|
874
|
+
|
|
875
|
+
FILE_EXTENSIONS = [".rs"]
|
|
876
|
+
ECOSYSTEM = "crates.io"
|
|
877
|
+
|
|
878
|
+
# Patterns for Rust
|
|
879
|
+
# use crate::...; or use package::...;
|
|
880
|
+
USE_PATTERN: Pattern = re.compile(r"""^\s*use\s+([\w]+)(?:::|;)""")
|
|
881
|
+
# extern crate package;
|
|
882
|
+
EXTERN_CRATE_PATTERN: Pattern = re.compile(r"""^\s*extern\s+crate\s+([\w]+)""")
|
|
883
|
+
|
|
884
|
+
# Rust standard library and internal crates to exclude
|
|
885
|
+
STDLIB_CRATES: Set[str] = {
|
|
886
|
+
"std",
|
|
887
|
+
"core",
|
|
888
|
+
"alloc",
|
|
889
|
+
"proc_macro",
|
|
890
|
+
"test",
|
|
891
|
+
"crate",
|
|
892
|
+
"self",
|
|
893
|
+
"super",
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
def _extract_packages(self, line: str) -> List[tuple]:
|
|
897
|
+
"""Extract crate names from Rust use/extern statements."""
|
|
898
|
+
results: List[tuple] = []
|
|
899
|
+
stripped = line.strip()
|
|
900
|
+
|
|
901
|
+
# Check for 'use crate::...'
|
|
902
|
+
match = self.USE_PATTERN.match(stripped)
|
|
903
|
+
if match:
|
|
904
|
+
crate_name = match.group(1)
|
|
905
|
+
if crate_name not in self.STDLIB_CRATES:
|
|
906
|
+
results.append((crate_name, line))
|
|
907
|
+
|
|
908
|
+
# Check for 'extern crate ...'
|
|
909
|
+
match = self.EXTERN_CRATE_PATTERN.match(stripped)
|
|
910
|
+
if match:
|
|
911
|
+
crate_name = match.group(1)
|
|
912
|
+
if crate_name not in self.STDLIB_CRATES:
|
|
913
|
+
results.append((crate_name, line))
|
|
914
|
+
|
|
915
|
+
return results
|
|
916
|
+
|
|
917
|
+
def _normalize_package_name(self, crate_name: str) -> Optional[str]:
|
|
918
|
+
"""Normalize a crate name."""
|
|
919
|
+
if crate_name in self.STDLIB_CRATES:
|
|
920
|
+
return None
|
|
921
|
+
return crate_name
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
class PHPScanner(BaseLanguageScanner):
|
|
925
|
+
"""Scanner for PHP use statements."""
|
|
926
|
+
|
|
927
|
+
FILE_EXTENSIONS = [".php"]
|
|
928
|
+
ECOSYSTEM = "packagist"
|
|
929
|
+
|
|
930
|
+
# Patterns for PHP
|
|
931
|
+
# use Namespace\Class;
|
|
932
|
+
USE_PATTERN: Pattern = re.compile(r"""^\s*use\s+([\w\\]+)""")
|
|
933
|
+
|
|
934
|
+
# PHP internal namespaces to exclude
|
|
935
|
+
INTERNAL_NAMESPACES: List[str] = [
|
|
936
|
+
"Exception",
|
|
937
|
+
"Error",
|
|
938
|
+
"Throwable",
|
|
939
|
+
"Iterator",
|
|
940
|
+
"Generator",
|
|
941
|
+
"Closure",
|
|
942
|
+
"stdClass",
|
|
943
|
+
"DateTime",
|
|
944
|
+
"DateTimeImmutable",
|
|
945
|
+
"DateInterval",
|
|
946
|
+
"DatePeriod",
|
|
947
|
+
"DateTimeZone",
|
|
948
|
+
]
|
|
949
|
+
|
|
950
|
+
def _extract_packages(self, line: str) -> List[tuple]:
|
|
951
|
+
"""Extract package names from PHP use statements."""
|
|
952
|
+
results: List[tuple] = []
|
|
953
|
+
stripped = line.strip()
|
|
954
|
+
|
|
955
|
+
# Skip require/include statements (vendor autoload)
|
|
956
|
+
if "require" in stripped or "include" in stripped:
|
|
957
|
+
return results
|
|
958
|
+
|
|
959
|
+
match = self.USE_PATTERN.match(stripped)
|
|
960
|
+
if match:
|
|
961
|
+
namespace = match.group(1)
|
|
962
|
+
pkg_name = self._normalize_package_name(namespace)
|
|
963
|
+
if pkg_name:
|
|
964
|
+
results.append((pkg_name, line))
|
|
965
|
+
|
|
966
|
+
return results
|
|
967
|
+
|
|
968
|
+
def _normalize_package_name(self, namespace: str) -> Optional[str]:
|
|
969
|
+
"""Normalize a PHP namespace to a package name.
|
|
970
|
+
|
|
971
|
+
Args:
|
|
972
|
+
namespace: The use namespace (e.g., 'Symfony\\Component\\HttpFoundation').
|
|
973
|
+
|
|
974
|
+
Returns:
|
|
975
|
+
Vendor/package format or None if internal.
|
|
976
|
+
"""
|
|
977
|
+
# Replace backslashes with forward slashes
|
|
978
|
+
namespace = namespace.replace("\\", "/")
|
|
979
|
+
parts = namespace.split("/")
|
|
980
|
+
|
|
981
|
+
# Skip internal PHP classes
|
|
982
|
+
if parts[0] in self.INTERNAL_NAMESPACES:
|
|
983
|
+
return None
|
|
984
|
+
|
|
985
|
+
# Packagist convention: vendor/package
|
|
986
|
+
if len(parts) >= 2:
|
|
987
|
+
return f"{parts[0].lower()}/{parts[1].lower()}"
|
|
988
|
+
|
|
989
|
+
return None
|
|
990
|
+
|
|
991
|
+
|
|
992
|
+
class ImportScanner:
|
|
993
|
+
"""Main import scanner that coordinates language-specific scanners."""
|
|
994
|
+
|
|
995
|
+
# Mapping of ecosystems to their scanners
|
|
996
|
+
SCANNERS: Dict[str, Type[BaseLanguageScanner]] = {
|
|
997
|
+
"npm": JavaScriptScanner,
|
|
998
|
+
"pypi": PythonScanner,
|
|
999
|
+
"go": GoScanner,
|
|
1000
|
+
"maven": JavaScanner,
|
|
1001
|
+
"rubygems": RubyScanner,
|
|
1002
|
+
"crates.io": RustScanner,
|
|
1003
|
+
"packagist": PHPScanner,
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
# File extension to ecosystem mapping
|
|
1007
|
+
EXTENSION_MAP: Dict[str, str] = {
|
|
1008
|
+
".js": "npm",
|
|
1009
|
+
".jsx": "npm",
|
|
1010
|
+
".ts": "npm",
|
|
1011
|
+
".tsx": "npm",
|
|
1012
|
+
".mjs": "npm",
|
|
1013
|
+
".cjs": "npm",
|
|
1014
|
+
".py": "pypi",
|
|
1015
|
+
".go": "go",
|
|
1016
|
+
".java": "maven",
|
|
1017
|
+
".rb": "rubygems",
|
|
1018
|
+
".rs": "crates.io",
|
|
1019
|
+
".php": "packagist",
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
def __init__(
|
|
1023
|
+
self,
|
|
1024
|
+
ecosystems: Optional[List[str]] = None,
|
|
1025
|
+
exclude_patterns: Optional[List[str]] = None,
|
|
1026
|
+
) -> None:
|
|
1027
|
+
"""Initialize the import scanner.
|
|
1028
|
+
|
|
1029
|
+
Args:
|
|
1030
|
+
ecosystems: List of ecosystems to scan for. If None, all are enabled.
|
|
1031
|
+
exclude_patterns: Additional patterns to exclude from scanning.
|
|
1032
|
+
"""
|
|
1033
|
+
self.exclude_patterns = exclude_patterns
|
|
1034
|
+
self.active_scanners: Dict[str, BaseLanguageScanner] = {}
|
|
1035
|
+
|
|
1036
|
+
# Initialize scanners for requested ecosystems
|
|
1037
|
+
target_ecosystems = ecosystems or list(self.SCANNERS.keys())
|
|
1038
|
+
for eco in target_ecosystems:
|
|
1039
|
+
if eco in self.SCANNERS:
|
|
1040
|
+
self.active_scanners[eco] = self.SCANNERS[eco](exclude_patterns)
|
|
1041
|
+
|
|
1042
|
+
def scan_directory(
|
|
1043
|
+
self,
|
|
1044
|
+
directory: Path,
|
|
1045
|
+
max_file_size: int = 1024 * 1024,
|
|
1046
|
+
) -> Dict[str, List[ImportReference]]:
|
|
1047
|
+
"""Scan a directory for import statements across all languages.
|
|
1048
|
+
|
|
1049
|
+
Args:
|
|
1050
|
+
directory: Directory to scan.
|
|
1051
|
+
max_file_size: Maximum file size in bytes to scan.
|
|
1052
|
+
|
|
1053
|
+
Returns:
|
|
1054
|
+
Dictionary mapping ecosystems to their import references.
|
|
1055
|
+
"""
|
|
1056
|
+
results: Dict[str, List[ImportReference]] = {}
|
|
1057
|
+
|
|
1058
|
+
for ecosystem, scanner in self.active_scanners.items():
|
|
1059
|
+
refs = scanner.scan_directory(directory, max_file_size)
|
|
1060
|
+
if refs:
|
|
1061
|
+
results[ecosystem] = refs
|
|
1062
|
+
|
|
1063
|
+
return results
|
|
1064
|
+
|
|
1065
|
+
def scan_file(self, file_path: Path) -> List[ImportReference]:
|
|
1066
|
+
"""Scan a single file for import statements.
|
|
1067
|
+
|
|
1068
|
+
Args:
|
|
1069
|
+
file_path: Path to the file to scan.
|
|
1070
|
+
|
|
1071
|
+
Returns:
|
|
1072
|
+
List of ImportReference objects found.
|
|
1073
|
+
"""
|
|
1074
|
+
ext = file_path.suffix.lower()
|
|
1075
|
+
ecosystem = self.EXTENSION_MAP.get(ext)
|
|
1076
|
+
|
|
1077
|
+
if ecosystem and ecosystem in self.active_scanners:
|
|
1078
|
+
return self.active_scanners[ecosystem].scan_file(file_path)
|
|
1079
|
+
|
|
1080
|
+
return []
|
|
1081
|
+
|
|
1082
|
+
def get_imports_for_package(
|
|
1083
|
+
self,
|
|
1084
|
+
package_name: str,
|
|
1085
|
+
references: List[ImportReference],
|
|
1086
|
+
) -> List[ImportReference]:
|
|
1087
|
+
"""Filter import references for a specific package.
|
|
1088
|
+
|
|
1089
|
+
Args:
|
|
1090
|
+
package_name: Package name to filter for.
|
|
1091
|
+
references: List of all import references.
|
|
1092
|
+
|
|
1093
|
+
Returns:
|
|
1094
|
+
List of references for the specified package.
|
|
1095
|
+
"""
|
|
1096
|
+
return [ref for ref in references if ref.package_name == package_name]
|
|
1097
|
+
|
|
1098
|
+
@staticmethod
|
|
1099
|
+
def get_supported_extensions() -> List[str]:
|
|
1100
|
+
"""Get list of supported file extensions."""
|
|
1101
|
+
return list(ImportScanner.EXTENSION_MAP.keys())
|
|
1102
|
+
|
|
1103
|
+
@staticmethod
|
|
1104
|
+
def get_supported_ecosystems() -> List[str]:
|
|
1105
|
+
"""Get list of supported ecosystems."""
|
|
1106
|
+
return list(ImportScanner.SCANNERS.keys())
|
|
1107
|
+
|
|
1108
|
+
|
|
1109
|
+
def get_scanner_for_ecosystem(ecosystem: str) -> Optional[BaseLanguageScanner]:
|
|
1110
|
+
"""Get a scanner instance for a specific ecosystem.
|
|
1111
|
+
|
|
1112
|
+
Args:
|
|
1113
|
+
ecosystem: The ecosystem name.
|
|
1114
|
+
|
|
1115
|
+
Returns:
|
|
1116
|
+
Scanner instance or None if not supported.
|
|
1117
|
+
"""
|
|
1118
|
+
scanner_class = ImportScanner.SCANNERS.get(ecosystem)
|
|
1119
|
+
if scanner_class:
|
|
1120
|
+
return scanner_class()
|
|
1121
|
+
return None
|