ca9 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ca9/__init__.py +1 -0
- ca9/analysis/__init__.py +0 -0
- ca9/analysis/ast_scanner.py +182 -0
- ca9/analysis/coverage_reader.py +75 -0
- ca9/analysis/vuln_matcher.py +371 -0
- ca9/cli.py +245 -0
- ca9/config.py +25 -0
- ca9/engine.py +196 -0
- ca9/models.py +86 -0
- ca9/parsers/__init__.py +26 -0
- ca9/parsers/base.py +11 -0
- ca9/parsers/dependabot.py +48 -0
- ca9/parsers/pip_audit.py +50 -0
- ca9/parsers/snyk.py +48 -0
- ca9/parsers/trivy.py +42 -0
- ca9/py.typed +0 -0
- ca9/report.py +234 -0
- ca9/scanner.py +256 -0
- ca9/version.py +60 -0
- ca9-0.1.1.dist-info/METADATA +269 -0
- ca9-0.1.1.dist-info/RECORD +24 -0
- ca9-0.1.1.dist-info/WHEEL +4 -0
- ca9-0.1.1.dist-info/entry_points.txt +2 -0
- ca9-0.1.1.dist-info/licenses/LICENSE +374 -0
ca9/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.1"
|
ca9/analysis/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import importlib.metadata
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
PYPI_TO_IMPORT: dict[str, str] = {
|
|
9
|
+
"beautifulsoup4": "bs4",
|
|
10
|
+
"dateutil": "dateutil",
|
|
11
|
+
"django-rest-framework": "rest_framework",
|
|
12
|
+
"djangorestframework": "rest_framework",
|
|
13
|
+
"elasticsearch-dsl": "elasticsearch_dsl",
|
|
14
|
+
"google-api-python-client": "googleapiclient",
|
|
15
|
+
"google-auth": "google.auth",
|
|
16
|
+
"google-cloud-storage": "google.cloud.storage",
|
|
17
|
+
"jinja2": "jinja2",
|
|
18
|
+
"msgpack-python": "msgpack",
|
|
19
|
+
"opencv-python": "cv2",
|
|
20
|
+
"opencv-python-headless": "cv2",
|
|
21
|
+
"pillow": "pil",
|
|
22
|
+
"protobuf": "google.protobuf",
|
|
23
|
+
"pyasn1": "pyasn1",
|
|
24
|
+
"pycryptodome": "crypto",
|
|
25
|
+
"pyjwt": "jwt",
|
|
26
|
+
"pymongo": "pymongo",
|
|
27
|
+
"pyopenssl": "openssl",
|
|
28
|
+
"python-dateutil": "dateutil",
|
|
29
|
+
"python-dotenv": "dotenv",
|
|
30
|
+
"python-jose": "jose",
|
|
31
|
+
"python-multipart": "multipart",
|
|
32
|
+
"pyyaml": "yaml",
|
|
33
|
+
"scikit-learn": "sklearn",
|
|
34
|
+
"sentry-sdk": "sentry_sdk",
|
|
35
|
+
"setuptools": "setuptools",
|
|
36
|
+
"typing-extensions": "typing_extensions",
|
|
37
|
+
"websocket-client": "websocket",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def pypi_to_import_name(package_name: str) -> str:
|
|
42
|
+
lower = package_name.lower()
|
|
43
|
+
if lower in PYPI_TO_IMPORT:
|
|
44
|
+
return PYPI_TO_IMPORT[lower]
|
|
45
|
+
return lower.replace("-", "_")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def collect_imports_from_source(source: str) -> set[str]:
|
|
49
|
+
try:
|
|
50
|
+
tree = ast.parse(source)
|
|
51
|
+
except SyntaxError:
|
|
52
|
+
return set()
|
|
53
|
+
|
|
54
|
+
imports: set[str] = set()
|
|
55
|
+
for node in ast.walk(tree):
|
|
56
|
+
if isinstance(node, ast.Import):
|
|
57
|
+
for alias in node.names:
|
|
58
|
+
imports.add(alias.name)
|
|
59
|
+
elif isinstance(node, ast.ImportFrom) and node.module:
|
|
60
|
+
imports.add(node.module)
|
|
61
|
+
for alias in node.names:
|
|
62
|
+
if alias.name != "*":
|
|
63
|
+
imports.add(f"{node.module}.{alias.name}")
|
|
64
|
+
return imports
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
_EXCLUDED_DIRS = {
|
|
68
|
+
".venv",
|
|
69
|
+
"venv",
|
|
70
|
+
".env",
|
|
71
|
+
"env",
|
|
72
|
+
"node_modules",
|
|
73
|
+
".git",
|
|
74
|
+
"__pycache__",
|
|
75
|
+
".tox",
|
|
76
|
+
".nox",
|
|
77
|
+
".eggs",
|
|
78
|
+
".mypy_cache",
|
|
79
|
+
"site-packages",
|
|
80
|
+
"dist-packages",
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def collect_imports_from_repo(repo_path: Path) -> set[str]:
|
|
85
|
+
all_imports: set[str] = set()
|
|
86
|
+
for py_file in repo_path.rglob("*.py"):
|
|
87
|
+
if _EXCLUDED_DIRS & {p.name for p in py_file.relative_to(repo_path).parents}:
|
|
88
|
+
continue
|
|
89
|
+
try:
|
|
90
|
+
source = py_file.read_text(encoding="utf-8", errors="ignore")
|
|
91
|
+
except OSError:
|
|
92
|
+
continue
|
|
93
|
+
all_imports.update(collect_imports_from_source(source))
|
|
94
|
+
return all_imports
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def is_package_imported(package_name: str, repo_imports: set[str]) -> bool:
|
|
98
|
+
import_name = pypi_to_import_name(package_name)
|
|
99
|
+
target = import_name.lower()
|
|
100
|
+
|
|
101
|
+
for imp in repo_imports:
|
|
102
|
+
imp_lower = imp.lower()
|
|
103
|
+
if imp_lower == target:
|
|
104
|
+
return True
|
|
105
|
+
if imp_lower.startswith(target + ".") or target.startswith(imp_lower + "."):
|
|
106
|
+
return True
|
|
107
|
+
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def is_submodule_imported(
|
|
112
|
+
submodule_paths: tuple[str, ...],
|
|
113
|
+
repo_imports: set[str],
|
|
114
|
+
) -> tuple[bool, str | None]:
|
|
115
|
+
for submod in submodule_paths:
|
|
116
|
+
target = submod.lower()
|
|
117
|
+
for imp in repo_imports:
|
|
118
|
+
imp_lower = imp.lower()
|
|
119
|
+
if imp_lower == target:
|
|
120
|
+
return True, imp
|
|
121
|
+
if imp_lower.startswith(target + "."):
|
|
122
|
+
return True, imp
|
|
123
|
+
if "." in imp_lower and target.startswith(imp_lower + "."):
|
|
124
|
+
return True, imp
|
|
125
|
+
return False, None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
_REQ_NAME_RE = re.compile(r"^([A-Za-z0-9]([A-Za-z0-9._-]*[A-Za-z0-9])?)")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _parse_requirement_name(req_str: str) -> str | None:
|
|
132
|
+
m = _REQ_NAME_RE.match(req_str.strip())
|
|
133
|
+
return m.group(1) if m else None
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _get_direct_deps(package_name: str) -> list[str]:
|
|
137
|
+
try:
|
|
138
|
+
reqs = importlib.metadata.requires(package_name)
|
|
139
|
+
except importlib.metadata.PackageNotFoundError:
|
|
140
|
+
return []
|
|
141
|
+
if reqs is None:
|
|
142
|
+
return []
|
|
143
|
+
deps = []
|
|
144
|
+
for req_str in reqs:
|
|
145
|
+
if "extra ==" in req_str or "extra==" in req_str:
|
|
146
|
+
continue
|
|
147
|
+
name = _parse_requirement_name(req_str)
|
|
148
|
+
if name:
|
|
149
|
+
deps.append(name)
|
|
150
|
+
return deps
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def resolve_transitive_deps(repo_imports: set[str]) -> dict[str, str]:
|
|
154
|
+
directly_imported: set[str] = set()
|
|
155
|
+
try:
|
|
156
|
+
for dist in importlib.metadata.distributions():
|
|
157
|
+
name = dist.metadata["Name"]
|
|
158
|
+
if name and is_package_imported(name, repo_imports):
|
|
159
|
+
directly_imported.add(name)
|
|
160
|
+
except Exception:
|
|
161
|
+
return {}
|
|
162
|
+
|
|
163
|
+
direct_lower = {n.lower() for n in directly_imported}
|
|
164
|
+
|
|
165
|
+
transitive: dict[str, str] = {}
|
|
166
|
+
visited: set[str] = set()
|
|
167
|
+
|
|
168
|
+
def _walk(pkg_name: str, root: str) -> None:
|
|
169
|
+
key = pkg_name.lower()
|
|
170
|
+
if key in visited:
|
|
171
|
+
return
|
|
172
|
+
visited.add(key)
|
|
173
|
+
for dep in _get_direct_deps(pkg_name):
|
|
174
|
+
dep_lower = dep.lower()
|
|
175
|
+
if dep_lower not in direct_lower and dep_lower not in transitive:
|
|
176
|
+
transitive[dep_lower] = root
|
|
177
|
+
_walk(dep, root)
|
|
178
|
+
|
|
179
|
+
for pkg in directly_imported:
|
|
180
|
+
_walk(pkg, pkg)
|
|
181
|
+
|
|
182
|
+
return transitive
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from ca9.analysis.ast_scanner import pypi_to_import_name
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_coverage(coverage_path: Path) -> dict:
|
|
10
|
+
return json.loads(coverage_path.read_text())
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_covered_files(coverage_data: dict) -> dict[str, list[int]]:
|
|
14
|
+
files: dict[str, list[int]] = {}
|
|
15
|
+
file_data = coverage_data.get("files", {})
|
|
16
|
+
for filepath, info in file_data.items():
|
|
17
|
+
executed = info.get("executed_lines", [])
|
|
18
|
+
if executed:
|
|
19
|
+
files[filepath] = executed
|
|
20
|
+
return files
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def is_package_executed(
|
|
24
|
+
package_name: str,
|
|
25
|
+
covered_files: dict[str, list[int]],
|
|
26
|
+
) -> tuple[bool, list[str]]:
|
|
27
|
+
import_name = pypi_to_import_name(package_name)
|
|
28
|
+
path_fragment = import_name.replace(".", "/")
|
|
29
|
+
|
|
30
|
+
matching_files: list[str] = []
|
|
31
|
+
|
|
32
|
+
for filepath in covered_files:
|
|
33
|
+
normalized = filepath.replace("\\", "/").lower()
|
|
34
|
+
if (
|
|
35
|
+
f"site-packages/{path_fragment}/" in normalized
|
|
36
|
+
or f"site-packages/{path_fragment}.py" in normalized
|
|
37
|
+
or normalized.endswith(f"/{path_fragment}/__init__.py")
|
|
38
|
+
or normalized.endswith(f"/{path_fragment}.py")
|
|
39
|
+
):
|
|
40
|
+
matching_files.append(filepath)
|
|
41
|
+
|
|
42
|
+
return bool(matching_files), matching_files
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def is_submodule_executed(
|
|
46
|
+
submodule_paths: tuple[str, ...],
|
|
47
|
+
file_hints: tuple[str, ...],
|
|
48
|
+
covered_files: dict[str, list[int]],
|
|
49
|
+
) -> tuple[bool, list[str]]:
|
|
50
|
+
matching_files: list[str] = []
|
|
51
|
+
|
|
52
|
+
fragments: list[str] = []
|
|
53
|
+
for submod in submodule_paths:
|
|
54
|
+
fragment = submod.replace(".", "/")
|
|
55
|
+
fragments.append(fragment)
|
|
56
|
+
|
|
57
|
+
for filepath in covered_files:
|
|
58
|
+
normalized = filepath.replace("\\", "/").lower()
|
|
59
|
+
|
|
60
|
+
for fragment in fragments:
|
|
61
|
+
if (
|
|
62
|
+
f"/{fragment}/" in normalized
|
|
63
|
+
or f"/{fragment}.py" in normalized
|
|
64
|
+
or normalized.endswith(f"/{fragment}/__init__.py")
|
|
65
|
+
or normalized.endswith(f"/{fragment}.py")
|
|
66
|
+
):
|
|
67
|
+
matching_files.append(filepath)
|
|
68
|
+
break
|
|
69
|
+
else:
|
|
70
|
+
for hint in file_hints:
|
|
71
|
+
if normalized.endswith(f"/{hint.lower()}"):
|
|
72
|
+
matching_files.append(filepath)
|
|
73
|
+
break
|
|
74
|
+
|
|
75
|
+
return bool(matching_files), matching_files
|
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
import urllib.error
|
|
6
|
+
import urllib.request
|
|
7
|
+
|
|
8
|
+
from ca9.models import AffectedComponent, Vulnerability
|
|
9
|
+
|
|
10
|
+
_CURATED: dict[str, list[tuple[re.Pattern[str], tuple[str, ...], tuple[str, ...]]]] = {
|
|
11
|
+
"django": [
|
|
12
|
+
(re.compile(r"admin(?:docs)?", re.I), ("django.contrib.admin",), ()),
|
|
13
|
+
(re.compile(r"admindocs", re.I), ("django.contrib.admindocs",), ()),
|
|
14
|
+
(re.compile(r"(?:session|SESSION)", re.I), ("django.contrib.sessions",), ()),
|
|
15
|
+
(
|
|
16
|
+
re.compile(r"(?:auth(?:entication)?|password|login|logout)", re.I),
|
|
17
|
+
("django.contrib.auth",),
|
|
18
|
+
(),
|
|
19
|
+
),
|
|
20
|
+
(re.compile(r"QuerySet|aggregat|\.db\.models", re.I), ("django.db.models",), ()),
|
|
21
|
+
(re.compile(r"Truncat|utils\.text", re.I), ("django.utils.text",), ()),
|
|
22
|
+
(re.compile(r"utils\.encoding", re.I), ("django.utils.encoding",), ()),
|
|
23
|
+
(re.compile(r"multipart|MultiPartParser", re.I), ("django.http.multipartparser",), ()),
|
|
24
|
+
(re.compile(r"(?:template|Template)", re.I), ("django.template",), ()),
|
|
25
|
+
(re.compile(r"(?:GIS|Geo|GDAL|GeoJSON)", re.I), ("django.contrib.gis",), ()),
|
|
26
|
+
(re.compile(r"(?:syndication|feed)", re.I), ("django.contrib.syndication",), ()),
|
|
27
|
+
(re.compile(r"validators?\.URL|URLValidator", re.I), ("django.core.validators",), ()),
|
|
28
|
+
(
|
|
29
|
+
re.compile(r"FileUpload|UploadedFile|InMemoryUploadedFile", re.I),
|
|
30
|
+
("django.core.files",),
|
|
31
|
+
(),
|
|
32
|
+
),
|
|
33
|
+
(re.compile(r"(?:cache|caching)", re.I), ("django.core.cache",), ()),
|
|
34
|
+
],
|
|
35
|
+
"werkzeug": [
|
|
36
|
+
(re.compile(r"debug|Debug", re.I), ("werkzeug.debug",), ("debugger.py",)),
|
|
37
|
+
(re.compile(r"formparser|FormDataParser|multipart", re.I), ("werkzeug.formparser",), ()),
|
|
38
|
+
(re.compile(r"safe_join|utils", re.I), ("werkzeug.utils",), ()),
|
|
39
|
+
],
|
|
40
|
+
"jinja2": [
|
|
41
|
+
(re.compile(r"sandbox|Sandbox", re.I), ("jinja2.sandbox",), ("sandbox.py",)),
|
|
42
|
+
(re.compile(r"xmlattr|filters", re.I), ("jinja2.filters",), ()),
|
|
43
|
+
],
|
|
44
|
+
"pyyaml": [
|
|
45
|
+
(re.compile(r"yaml\.load|unsafe_load|FullLoader|UnsafeLoader", re.I), ("yaml",), ()),
|
|
46
|
+
],
|
|
47
|
+
"urllib3": [
|
|
48
|
+
(re.compile(r"CRLF|header.inject", re.I), ("urllib3",), ()),
|
|
49
|
+
(re.compile(r"proxy|CONNECT", re.I), ("urllib3",), ()),
|
|
50
|
+
],
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
_GITHUB_COMMIT_RE = re.compile(r"https://github\.com/([^/]+/[^/]+)/commit/([0-9a-f]{7,40})")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _fetch_commit_files(owner_repo: str, sha: str) -> list[str]:
|
|
57
|
+
url = f"https://api.github.com/repos/{owner_repo}/commits/{sha}"
|
|
58
|
+
req = urllib.request.Request(
|
|
59
|
+
url,
|
|
60
|
+
headers={
|
|
61
|
+
"Accept": "application/json",
|
|
62
|
+
"User-Agent": "ca9-scanner",
|
|
63
|
+
},
|
|
64
|
+
)
|
|
65
|
+
try:
|
|
66
|
+
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
67
|
+
data = json.loads(resp.read().decode())
|
|
68
|
+
except (urllib.error.URLError, urllib.error.HTTPError, OSError, json.JSONDecodeError):
|
|
69
|
+
return []
|
|
70
|
+
|
|
71
|
+
return [f["filename"] for f in data.get("files", []) if "filename" in f]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _file_paths_to_submodules(
|
|
75
|
+
file_paths: list[str],
|
|
76
|
+
import_name: str,
|
|
77
|
+
) -> list[str]:
|
|
78
|
+
prefix = import_name.replace(".", "/").lower()
|
|
79
|
+
submodules: set[str] = set()
|
|
80
|
+
|
|
81
|
+
for fp in file_paths:
|
|
82
|
+
if not fp.endswith(".py"):
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
fp_lower = fp.lower()
|
|
86
|
+
|
|
87
|
+
basename = fp.rsplit("/", 1)[-1] if "/" in fp else fp
|
|
88
|
+
if basename.startswith("test_") or basename == "conftest.py":
|
|
89
|
+
continue
|
|
90
|
+
if "/tests/" in fp_lower or "/test/" in fp_lower:
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
idx = fp_lower.find(prefix + "/")
|
|
94
|
+
if idx == -1:
|
|
95
|
+
if fp_lower == prefix + ".py" or fp_lower.endswith("/" + prefix + ".py"):
|
|
96
|
+
submodules.add(import_name)
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
rel = fp[idx:]
|
|
100
|
+
rel = rel[:-3]
|
|
101
|
+
dotted = rel.replace("/", ".")
|
|
102
|
+
if dotted.endswith(".__init__"):
|
|
103
|
+
dotted = dotted[: -len(".__init__")]
|
|
104
|
+
|
|
105
|
+
if dotted:
|
|
106
|
+
submodules.add(dotted)
|
|
107
|
+
|
|
108
|
+
return sorted(submodules)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _match_commits(
|
|
112
|
+
vuln: Vulnerability,
|
|
113
|
+
) -> AffectedComponent | None:
|
|
114
|
+
if not vuln.references:
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
from ca9.analysis.ast_scanner import pypi_to_import_name
|
|
118
|
+
|
|
119
|
+
import_name = pypi_to_import_name(vuln.package_name)
|
|
120
|
+
|
|
121
|
+
all_submodules: set[str] = set()
|
|
122
|
+
file_hints: set[str] = set()
|
|
123
|
+
|
|
124
|
+
for ref in vuln.references:
|
|
125
|
+
m = _GITHUB_COMMIT_RE.search(ref)
|
|
126
|
+
if not m:
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
owner_repo, sha = m.group(1), m.group(2)
|
|
130
|
+
changed_files = _fetch_commit_files(owner_repo, sha)
|
|
131
|
+
if not changed_files:
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
submodules = _file_paths_to_submodules(changed_files, import_name)
|
|
135
|
+
all_submodules.update(submodules)
|
|
136
|
+
|
|
137
|
+
for fp in changed_files:
|
|
138
|
+
if fp.endswith(".py"):
|
|
139
|
+
basename = fp.rsplit("/", 1)[-1] if "/" in fp else fp
|
|
140
|
+
if not basename.startswith("test_") and basename != "conftest.py":
|
|
141
|
+
file_hints.add(basename)
|
|
142
|
+
|
|
143
|
+
if all_submodules:
|
|
144
|
+
return AffectedComponent(
|
|
145
|
+
package_import_name=import_name,
|
|
146
|
+
submodule_paths=tuple(sorted(all_submodules)),
|
|
147
|
+
file_hints=tuple(sorted(file_hints)),
|
|
148
|
+
confidence="high",
|
|
149
|
+
extraction_source="commit_analysis",
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
_DOTTED_PATH_RE = re.compile(r"`([a-zA-Z_]\w*(?:\.[a-zA-Z_]\w*)+)`")
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _match_curated(
|
|
159
|
+
package_name: str,
|
|
160
|
+
text: str,
|
|
161
|
+
) -> AffectedComponent | None:
|
|
162
|
+
key = package_name.lower()
|
|
163
|
+
patterns = _CURATED.get(key)
|
|
164
|
+
if not patterns:
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
from ca9.analysis.ast_scanner import pypi_to_import_name
|
|
168
|
+
|
|
169
|
+
import_name = pypi_to_import_name(package_name)
|
|
170
|
+
|
|
171
|
+
for regex, submodule_paths, file_hints in patterns:
|
|
172
|
+
if regex.search(text):
|
|
173
|
+
return AffectedComponent(
|
|
174
|
+
package_import_name=import_name,
|
|
175
|
+
submodule_paths=submodule_paths,
|
|
176
|
+
file_hints=file_hints,
|
|
177
|
+
confidence="high",
|
|
178
|
+
extraction_source=f"curated:{key}:{regex.pattern}",
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _extract_from_text(
|
|
185
|
+
package_name: str,
|
|
186
|
+
text: str,
|
|
187
|
+
) -> AffectedComponent | None:
|
|
188
|
+
from ca9.analysis.ast_scanner import pypi_to_import_name
|
|
189
|
+
|
|
190
|
+
import_name = pypi_to_import_name(package_name)
|
|
191
|
+
prefix = import_name.lower()
|
|
192
|
+
|
|
193
|
+
matches = _DOTTED_PATH_RE.findall(text)
|
|
194
|
+
submodule_paths: list[str] = []
|
|
195
|
+
|
|
196
|
+
for match in matches:
|
|
197
|
+
if match.lower().startswith(prefix + "."):
|
|
198
|
+
submodule_paths.append(match)
|
|
199
|
+
|
|
200
|
+
if submodule_paths:
|
|
201
|
+
return AffectedComponent(
|
|
202
|
+
package_import_name=import_name,
|
|
203
|
+
submodule_paths=tuple(sorted(set(submodule_paths))),
|
|
204
|
+
confidence="medium",
|
|
205
|
+
extraction_source="regex:dotted_path",
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
_CLASS_NAME_RE = re.compile(r"\b([A-Z][a-z]+(?:[A-Z][a-z0-9]+)+)\b")
|
|
212
|
+
_GENERIC_NAMES = frozenset(
|
|
213
|
+
{
|
|
214
|
+
"JavaScript",
|
|
215
|
+
"TypeError",
|
|
216
|
+
"ValueError",
|
|
217
|
+
"KeyError",
|
|
218
|
+
"IndexError",
|
|
219
|
+
"RuntimeError",
|
|
220
|
+
"ImportError",
|
|
221
|
+
"AttributeError",
|
|
222
|
+
"HttpResponse",
|
|
223
|
+
"ContentType",
|
|
224
|
+
"StackOverflow",
|
|
225
|
+
"GitHub",
|
|
226
|
+
"PullRequest",
|
|
227
|
+
"ChangeLog",
|
|
228
|
+
"ReadOnly",
|
|
229
|
+
"ReleaseNotes",
|
|
230
|
+
}
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _find_package_source_dir(package_name: str) -> str | None:
|
|
235
|
+
import importlib.metadata
|
|
236
|
+
import importlib.util
|
|
237
|
+
|
|
238
|
+
from ca9.analysis.ast_scanner import pypi_to_import_name
|
|
239
|
+
|
|
240
|
+
import_name = pypi_to_import_name(package_name)
|
|
241
|
+
top_level = import_name.split(".")[0]
|
|
242
|
+
|
|
243
|
+
spec = importlib.util.find_spec(top_level)
|
|
244
|
+
if spec is None or spec.origin is None:
|
|
245
|
+
return None
|
|
246
|
+
|
|
247
|
+
origin = spec.origin
|
|
248
|
+
if origin.endswith("__init__.py"):
|
|
249
|
+
return str(origin.rsplit("/", 1)[0]) if "/" in origin else None
|
|
250
|
+
return origin
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _scan_package_for_name(
|
|
254
|
+
source_dir: str,
|
|
255
|
+
class_name: str,
|
|
256
|
+
import_name: str,
|
|
257
|
+
) -> str | None:
|
|
258
|
+
import ast
|
|
259
|
+
import os
|
|
260
|
+
|
|
261
|
+
if source_dir.endswith(".py"):
|
|
262
|
+
try:
|
|
263
|
+
with open(source_dir, encoding="utf-8", errors="replace") as f:
|
|
264
|
+
tree = ast.parse(f.read(), filename=source_dir)
|
|
265
|
+
except (SyntaxError, OSError):
|
|
266
|
+
return None
|
|
267
|
+
for node in ast.walk(tree):
|
|
268
|
+
if (
|
|
269
|
+
isinstance(node, ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef)
|
|
270
|
+
and node.name == class_name
|
|
271
|
+
):
|
|
272
|
+
return import_name
|
|
273
|
+
return None
|
|
274
|
+
|
|
275
|
+
for dirpath, _dirnames, filenames in os.walk(source_dir):
|
|
276
|
+
for fname in filenames:
|
|
277
|
+
if not fname.endswith(".py"):
|
|
278
|
+
continue
|
|
279
|
+
fpath = os.path.join(dirpath, fname)
|
|
280
|
+
try:
|
|
281
|
+
with open(fpath, encoding="utf-8", errors="replace") as f:
|
|
282
|
+
source = f.read()
|
|
283
|
+
except OSError:
|
|
284
|
+
continue
|
|
285
|
+
|
|
286
|
+
if class_name not in source:
|
|
287
|
+
continue
|
|
288
|
+
|
|
289
|
+
try:
|
|
290
|
+
tree = ast.parse(source, filename=fpath)
|
|
291
|
+
except SyntaxError:
|
|
292
|
+
continue
|
|
293
|
+
|
|
294
|
+
for node in ast.walk(tree):
|
|
295
|
+
if (
|
|
296
|
+
isinstance(node, ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef)
|
|
297
|
+
and node.name == class_name
|
|
298
|
+
):
|
|
299
|
+
rel = fpath[len(source_dir) :]
|
|
300
|
+
if rel.startswith("/"):
|
|
301
|
+
rel = rel[1:]
|
|
302
|
+
if rel.endswith(".py"):
|
|
303
|
+
rel = rel[:-3]
|
|
304
|
+
dotted = rel.replace("/", ".")
|
|
305
|
+
if dotted.endswith(".__init__"):
|
|
306
|
+
dotted = dotted[:-9]
|
|
307
|
+
return f"{import_name}.{dotted}" if dotted else import_name
|
|
308
|
+
|
|
309
|
+
return None
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _resolve_class_names(
|
|
313
|
+
package_name: str,
|
|
314
|
+
text: str,
|
|
315
|
+
) -> AffectedComponent | None:
|
|
316
|
+
from ca9.analysis.ast_scanner import pypi_to_import_name
|
|
317
|
+
|
|
318
|
+
import_name = pypi_to_import_name(package_name)
|
|
319
|
+
|
|
320
|
+
candidates = set(_CLASS_NAME_RE.findall(text)) - _GENERIC_NAMES
|
|
321
|
+
if not candidates:
|
|
322
|
+
return None
|
|
323
|
+
|
|
324
|
+
source_dir = _find_package_source_dir(package_name)
|
|
325
|
+
if source_dir is None:
|
|
326
|
+
return None
|
|
327
|
+
|
|
328
|
+
submodule_paths: list[str] = []
|
|
329
|
+
for name in candidates:
|
|
330
|
+
result = _scan_package_for_name(source_dir, name, import_name)
|
|
331
|
+
if result:
|
|
332
|
+
submodule_paths.append(result)
|
|
333
|
+
|
|
334
|
+
if submodule_paths:
|
|
335
|
+
return AffectedComponent(
|
|
336
|
+
package_import_name=import_name,
|
|
337
|
+
submodule_paths=tuple(sorted(set(submodule_paths))),
|
|
338
|
+
confidence="medium",
|
|
339
|
+
extraction_source="class_name_resolution",
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
return None
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def extract_affected_component(vuln: Vulnerability) -> AffectedComponent:
|
|
346
|
+
from ca9.analysis.ast_scanner import pypi_to_import_name
|
|
347
|
+
|
|
348
|
+
text = f"{vuln.title} {vuln.description}"
|
|
349
|
+
|
|
350
|
+
result = _match_commits(vuln)
|
|
351
|
+
if result is not None:
|
|
352
|
+
return result
|
|
353
|
+
|
|
354
|
+
result = _match_curated(vuln.package_name, text)
|
|
355
|
+
if result is not None:
|
|
356
|
+
return result
|
|
357
|
+
|
|
358
|
+
result = _extract_from_text(vuln.package_name, text)
|
|
359
|
+
if result is not None:
|
|
360
|
+
return result
|
|
361
|
+
|
|
362
|
+
result = _resolve_class_names(vuln.package_name, text)
|
|
363
|
+
if result is not None:
|
|
364
|
+
return result
|
|
365
|
+
|
|
366
|
+
import_name = pypi_to_import_name(vuln.package_name)
|
|
367
|
+
return AffectedComponent(
|
|
368
|
+
package_import_name=import_name,
|
|
369
|
+
confidence="low",
|
|
370
|
+
extraction_source="fallback",
|
|
371
|
+
)
|