cartography 0.94.0rc2__py3-none-any.whl → 0.95.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/cli.py +42 -24
- cartography/config.py +12 -8
- cartography/data/indexes.cypher +0 -2
- cartography/data/jobs/scoped_analysis/semgrep_sca_risk_analysis.json +13 -13
- cartography/driftdetect/cli.py +1 -1
- cartography/graph/job.py +8 -1
- cartography/intel/aws/permission_relationships.py +6 -2
- cartography/intel/gcp/__init__.py +110 -23
- cartography/intel/kandji/__init__.py +1 -1
- cartography/intel/semgrep/__init__.py +9 -2
- cartography/intel/semgrep/dependencies.py +201 -0
- cartography/intel/semgrep/deployment.py +67 -0
- cartography/intel/semgrep/findings.py +126 -110
- cartography/intel/snipeit/__init__.py +30 -0
- cartography/intel/snipeit/asset.py +74 -0
- cartography/intel/snipeit/user.py +75 -0
- cartography/intel/snipeit/util.py +35 -0
- cartography/models/semgrep/dependencies.py +77 -0
- cartography/models/semgrep/findings.py +3 -1
- cartography/models/snipeit/__init__.py +0 -0
- cartography/models/snipeit/asset.py +81 -0
- cartography/models/snipeit/tenant.py +17 -0
- cartography/models/snipeit/user.py +49 -0
- cartography/sync.py +2 -2
- {cartography-0.94.0rc2.dist-info → cartography-0.95.0.dist-info}/LICENSE +1 -1
- {cartography-0.94.0rc2.dist-info → cartography-0.95.0.dist-info}/METADATA +3 -5
- {cartography-0.94.0rc2.dist-info → cartography-0.95.0.dist-info}/RECORD +30 -23
- {cartography-0.94.0rc2.dist-info → cartography-0.95.0.dist-info}/WHEEL +1 -1
- cartography/data/jobs/cleanup/crxcavator_import_cleanup.json +0 -18
- cartography/intel/crxcavator/__init__.py +0 -44
- cartography/intel/crxcavator/crxcavator.py +0 -329
- cartography-0.94.0rc2.dist-info/NOTICE +0 -4
- {cartography-0.94.0rc2.dist-info → cartography-0.95.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.94.0rc2.dist-info → cartography-0.95.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import Callable
|
|
4
|
+
from typing import Dict
|
|
5
|
+
from typing import List
|
|
6
|
+
|
|
7
|
+
import neo4j
|
|
8
|
+
import requests
|
|
9
|
+
from requests.exceptions import HTTPError
|
|
10
|
+
from requests.exceptions import ReadTimeout
|
|
11
|
+
|
|
12
|
+
from cartography.client.core.tx import load
|
|
13
|
+
from cartography.graph.job import GraphJob
|
|
14
|
+
from cartography.models.semgrep.dependencies import SemgrepGoLibrarySchema
|
|
15
|
+
from cartography.stats import get_stats_client
|
|
16
|
+
from cartography.util import merge_module_sync_metadata
|
|
17
|
+
from cartography.util import timeit
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
stat_handler = get_stats_client(__name__)
|
|
21
|
+
_PAGE_SIZE = 10000
|
|
22
|
+
_TIMEOUT = (60, 60)
|
|
23
|
+
_MAX_RETRIES = 3
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@timeit
|
|
27
|
+
def get_dependencies(semgrep_app_token: str, deployment_id: str, ecosystems: List[str]) -> List[Dict[str, Any]]:
|
|
28
|
+
"""
|
|
29
|
+
Gets all dependencies for the given ecosystems within the given Semgrep deployment ID.
|
|
30
|
+
param: semgrep_app_token: The Semgrep App token to use for authentication.
|
|
31
|
+
param: deployment_id: The Semgrep deployment ID to use for retrieving dependencies.
|
|
32
|
+
param: ecosystems: One or more ecosystems to import dependencies from, e.g. "gomod" or "pypi".
|
|
33
|
+
The list of supported ecosystems is defined here:
|
|
34
|
+
https://semgrep.dev/api/v1/docs/#tag/SupplyChainService/operation/semgrep_app.products.sca.handlers.dependency.list_dependencies_conexxion
|
|
35
|
+
"""
|
|
36
|
+
all_deps = []
|
|
37
|
+
deps_url = f"https://semgrep.dev/api/v1/deployments/{deployment_id}/dependencies"
|
|
38
|
+
has_more = True
|
|
39
|
+
page = 0
|
|
40
|
+
retries = 0
|
|
41
|
+
headers = {
|
|
42
|
+
"Content-Type": "application/json",
|
|
43
|
+
"Authorization": f"Bearer {semgrep_app_token}",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
request_data: dict[str, Any] = {
|
|
47
|
+
"pageSize": _PAGE_SIZE,
|
|
48
|
+
"dependencyFilter": {
|
|
49
|
+
"ecosystem": ecosystems,
|
|
50
|
+
},
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
logger.info(f"Retrieving Semgrep dependencies for deployment '{deployment_id}'.")
|
|
54
|
+
while has_more:
|
|
55
|
+
try:
|
|
56
|
+
response = requests.post(deps_url, json=request_data, headers=headers, timeout=_TIMEOUT)
|
|
57
|
+
response.raise_for_status()
|
|
58
|
+
data = response.json()
|
|
59
|
+
except (ReadTimeout, HTTPError):
|
|
60
|
+
logger.warning(f"Failed to retrieve Semgrep dependencies for page {page}. Retrying...")
|
|
61
|
+
retries += 1
|
|
62
|
+
if retries >= _MAX_RETRIES:
|
|
63
|
+
raise
|
|
64
|
+
continue
|
|
65
|
+
deps = data.get("dependencies", [])
|
|
66
|
+
has_more = data.get("hasMore", False)
|
|
67
|
+
logger.info(f"Processed page {page} of Semgrep dependencies.")
|
|
68
|
+
all_deps.extend(deps)
|
|
69
|
+
retries = 0
|
|
70
|
+
page += 1
|
|
71
|
+
request_data["cursor"] = data.get("cursor")
|
|
72
|
+
|
|
73
|
+
logger.info(f"Retrieved {len(all_deps)} Semgrep dependencies in {page} pages.")
|
|
74
|
+
return all_deps
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def transform_dependencies(raw_deps: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
78
|
+
"""
|
|
79
|
+
Transforms the raw dependencies response from Semgrep API into a list of dicts
|
|
80
|
+
that can be used to create the Dependency nodes.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
"""
|
|
84
|
+
sample raw_dep as of November 2024:
|
|
85
|
+
{
|
|
86
|
+
"repositoryId": "123456",
|
|
87
|
+
"definedAt": {
|
|
88
|
+
"path": "go.mod",
|
|
89
|
+
"startLine": "6",
|
|
90
|
+
"endLine": "6",
|
|
91
|
+
"url": "https://github.com/org/repo-name/blob/00000000000000000000000000000000/go.mod#L6",
|
|
92
|
+
"committedAt": "1970-01-01T00:00:00Z",
|
|
93
|
+
"startCol": "0",
|
|
94
|
+
"endCol": "0"
|
|
95
|
+
},
|
|
96
|
+
"transitivity": "DIRECT",
|
|
97
|
+
"package": {
|
|
98
|
+
"name": "github.com/foo/bar",
|
|
99
|
+
"versionSpecifier": "1.2.3"
|
|
100
|
+
},
|
|
101
|
+
"ecosystem": "gomod",
|
|
102
|
+
"licenses": [],
|
|
103
|
+
"pathToTransitivity": []
|
|
104
|
+
},
|
|
105
|
+
"""
|
|
106
|
+
deps = []
|
|
107
|
+
for raw_dep in raw_deps:
|
|
108
|
+
|
|
109
|
+
# We could call a different endpoint to get all repo IDs and store a mapping of repo ID to URL,
|
|
110
|
+
# but it's much simpler to just extract the URL from the definedAt field.
|
|
111
|
+
repo_url = raw_dep["definedAt"]["url"].split("/blob/", 1)[0]
|
|
112
|
+
|
|
113
|
+
name = raw_dep["package"]["name"]
|
|
114
|
+
version = raw_dep["package"]["versionSpecifier"]
|
|
115
|
+
id = f"{name}|{version}"
|
|
116
|
+
|
|
117
|
+
# As of November 2024, Semgrep does not import dependencies with version specifiers such as >, <, etc.
|
|
118
|
+
# For now, hardcode the specifier to ==<version> to align with GitHub-sourced Python dependencies.
|
|
119
|
+
# If Semgrep eventually supports version specifiers, update this line accordingly.
|
|
120
|
+
specifier = f"=={version}"
|
|
121
|
+
|
|
122
|
+
deps.append({
|
|
123
|
+
# existing dependency properties:
|
|
124
|
+
"id": id,
|
|
125
|
+
"name": name,
|
|
126
|
+
"specifier": specifier,
|
|
127
|
+
"version": version,
|
|
128
|
+
"repo_url": repo_url,
|
|
129
|
+
|
|
130
|
+
# Semgrep-specific properties:
|
|
131
|
+
"ecosystem": raw_dep["ecosystem"],
|
|
132
|
+
"transitivity": raw_dep["transitivity"].lower(),
|
|
133
|
+
"url": raw_dep["definedAt"]["url"],
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
return deps
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@timeit
|
|
140
|
+
def load_dependencies(
|
|
141
|
+
neo4j_session: neo4j.Session,
|
|
142
|
+
dependency_schema: Callable,
|
|
143
|
+
dependencies: List[Dict],
|
|
144
|
+
deployment_id: str,
|
|
145
|
+
update_tag: int,
|
|
146
|
+
) -> None:
|
|
147
|
+
logger.info(f"Loading {len(dependencies)} {dependency_schema().label} objects into the graph.")
|
|
148
|
+
load(
|
|
149
|
+
neo4j_session,
|
|
150
|
+
dependency_schema(),
|
|
151
|
+
dependencies,
|
|
152
|
+
lastupdated=update_tag,
|
|
153
|
+
DEPLOYMENT_ID=deployment_id,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@timeit
|
|
158
|
+
def cleanup(
|
|
159
|
+
neo4j_session: neo4j.Session,
|
|
160
|
+
common_job_parameters: Dict[str, Any],
|
|
161
|
+
) -> None:
|
|
162
|
+
logger.info("Running Semgrep Go Library cleanup job.")
|
|
163
|
+
go_libraries_cleanup_job = GraphJob.from_node_schema(
|
|
164
|
+
SemgrepGoLibrarySchema(), common_job_parameters,
|
|
165
|
+
)
|
|
166
|
+
go_libraries_cleanup_job.run(neo4j_session)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@timeit
|
|
170
|
+
def sync_dependencies(
|
|
171
|
+
neo4j_session: neo4j.Session,
|
|
172
|
+
semgrep_app_token: str,
|
|
173
|
+
update_tag: int,
|
|
174
|
+
common_job_parameters: Dict[str, Any],
|
|
175
|
+
) -> None:
|
|
176
|
+
|
|
177
|
+
deployment_id = common_job_parameters.get("DEPLOYMENT_ID")
|
|
178
|
+
if not deployment_id:
|
|
179
|
+
logger.warning(
|
|
180
|
+
"Missing Semgrep deployment ID, ensure that sync_deployment() has been called."
|
|
181
|
+
"Skipping Semgrep dependencies sync job.",
|
|
182
|
+
)
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
logger.info("Running Semgrep dependencies sync job.")
|
|
186
|
+
|
|
187
|
+
# fetch and load dependencies for the Go ecosystem
|
|
188
|
+
raw_go_deps = get_dependencies(semgrep_app_token, deployment_id, ecosystems=["gomod"])
|
|
189
|
+
go_deps = transform_dependencies(raw_go_deps)
|
|
190
|
+
load_dependencies(neo4j_session, SemgrepGoLibrarySchema, go_deps, deployment_id, update_tag)
|
|
191
|
+
|
|
192
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
193
|
+
|
|
194
|
+
merge_module_sync_metadata(
|
|
195
|
+
neo4j_session=neo4j_session,
|
|
196
|
+
group_type='Semgrep',
|
|
197
|
+
group_id=deployment_id,
|
|
198
|
+
synced_type='SemgrepDependency',
|
|
199
|
+
update_tag=update_tag,
|
|
200
|
+
stat_handler=stat_handler,
|
|
201
|
+
)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import Dict
|
|
4
|
+
|
|
5
|
+
import neo4j
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from cartography.client.core.tx import load
|
|
9
|
+
from cartography.models.semgrep.deployment import SemgrepDeploymentSchema
|
|
10
|
+
from cartography.stats import get_stats_client
|
|
11
|
+
from cartography.util import timeit
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
stat_handler = get_stats_client(__name__)
|
|
15
|
+
_TIMEOUT = (60, 60)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@timeit
|
|
19
|
+
def get_deployment(semgrep_app_token: str) -> Dict[str, Any]:
|
|
20
|
+
"""
|
|
21
|
+
Gets the deployment associated with the passed Semgrep App token.
|
|
22
|
+
param: semgrep_app_token: The Semgrep App token to use for authentication.
|
|
23
|
+
"""
|
|
24
|
+
deployment = {}
|
|
25
|
+
deployment_url = "https://semgrep.dev/api/v1/deployments"
|
|
26
|
+
headers = {
|
|
27
|
+
"Content-Type": "application/json",
|
|
28
|
+
"Authorization": f"Bearer {semgrep_app_token}",
|
|
29
|
+
}
|
|
30
|
+
response = requests.get(deployment_url, headers=headers, timeout=_TIMEOUT)
|
|
31
|
+
response.raise_for_status()
|
|
32
|
+
|
|
33
|
+
data = response.json()
|
|
34
|
+
deployment["id"] = data["deployments"][0]["id"]
|
|
35
|
+
deployment["name"] = data["deployments"][0]["name"]
|
|
36
|
+
deployment["slug"] = data["deployments"][0]["slug"]
|
|
37
|
+
|
|
38
|
+
return deployment
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@timeit
|
|
42
|
+
def load_semgrep_deployment(
|
|
43
|
+
neo4j_session: neo4j.Session, deployment: Dict[str, Any], update_tag: int,
|
|
44
|
+
) -> None:
|
|
45
|
+
logger.info(f"Loading SemgrepDeployment {deployment} into the graph.")
|
|
46
|
+
load(
|
|
47
|
+
neo4j_session,
|
|
48
|
+
SemgrepDeploymentSchema(),
|
|
49
|
+
[deployment],
|
|
50
|
+
lastupdated=update_tag,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@timeit
|
|
55
|
+
def sync_deployment(
|
|
56
|
+
neo4j_session: neo4j.Session,
|
|
57
|
+
semgrep_app_token: str,
|
|
58
|
+
update_tag: int,
|
|
59
|
+
common_job_parameters: Dict[str, Any],
|
|
60
|
+
) -> None:
|
|
61
|
+
|
|
62
|
+
semgrep_deployment = get_deployment(semgrep_app_token)
|
|
63
|
+
deployment_id = semgrep_deployment["id"]
|
|
64
|
+
deployment_slug = semgrep_deployment["slug"]
|
|
65
|
+
load_semgrep_deployment(neo4j_session, semgrep_deployment, update_tag)
|
|
66
|
+
common_job_parameters["DEPLOYMENT_ID"] = deployment_id
|
|
67
|
+
common_job_parameters["DEPLOYMENT_SLUG"] = deployment_slug
|
|
@@ -3,14 +3,14 @@ from typing import Any
|
|
|
3
3
|
from typing import Dict
|
|
4
4
|
from typing import List
|
|
5
5
|
from typing import Tuple
|
|
6
|
-
from urllib.error import HTTPError
|
|
7
6
|
|
|
8
7
|
import neo4j
|
|
9
8
|
import requests
|
|
9
|
+
from requests.exceptions import HTTPError
|
|
10
|
+
from requests.exceptions import ReadTimeout
|
|
10
11
|
|
|
11
12
|
from cartography.client.core.tx import load
|
|
12
13
|
from cartography.graph.job import GraphJob
|
|
13
|
-
from cartography.models.semgrep.deployment import SemgrepDeploymentSchema
|
|
14
14
|
from cartography.models.semgrep.findings import SemgrepSCAFindingSchema
|
|
15
15
|
from cartography.models.semgrep.locations import SemgrepSCALocationSchema
|
|
16
16
|
from cartography.stats import get_stats_client
|
|
@@ -20,88 +20,97 @@ from cartography.util import timeit
|
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
stat_handler = get_stats_client(__name__)
|
|
23
|
+
_PAGE_SIZE = 500
|
|
23
24
|
_TIMEOUT = (60, 60)
|
|
24
25
|
_MAX_RETRIES = 3
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
@timeit
|
|
28
|
-
def
|
|
29
|
-
"""
|
|
30
|
-
Gets the deployment associated with the passed Semgrep App token.
|
|
31
|
-
param: semgrep_app_token: The Semgrep App token to use for authentication.
|
|
32
|
-
"""
|
|
33
|
-
deployment = {}
|
|
34
|
-
deployment_url = "https://semgrep.dev/api/v1/deployments"
|
|
35
|
-
headers = {
|
|
36
|
-
"Content-Type": "application/json",
|
|
37
|
-
"Authorization": f"Bearer {semgrep_app_token}",
|
|
38
|
-
}
|
|
39
|
-
response = requests.get(deployment_url, headers=headers, timeout=_TIMEOUT)
|
|
40
|
-
response.raise_for_status()
|
|
41
|
-
|
|
42
|
-
data = response.json()
|
|
43
|
-
deployment["id"] = data["deployments"][0]["id"]
|
|
44
|
-
deployment["name"] = data["deployments"][0]["name"]
|
|
45
|
-
deployment["slug"] = data["deployments"][0]["slug"]
|
|
46
|
-
|
|
47
|
-
return deployment
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
@timeit
|
|
51
|
-
def get_sca_vulns(semgrep_app_token: str, deployment_id: str) -> List[Dict[str, Any]]:
|
|
29
|
+
def get_sca_vulns(semgrep_app_token: str, deployment_slug: str) -> List[Dict[str, Any]]:
|
|
52
30
|
"""
|
|
53
31
|
Gets the SCA vulns associated with the passed Semgrep App token and deployment id.
|
|
54
32
|
param: semgrep_app_token: The Semgrep App token to use for authentication.
|
|
55
|
-
param:
|
|
33
|
+
param: deployment_slug: The Semgrep deployment slug to use for retrieving SCA vulns.
|
|
56
34
|
"""
|
|
57
35
|
all_vulns = []
|
|
58
|
-
sca_url = f"https://semgrep.dev/api/v1/deployments/{
|
|
36
|
+
sca_url = f"https://semgrep.dev/api/v1/deployments/{deployment_slug}/findings"
|
|
59
37
|
has_more = True
|
|
60
|
-
|
|
61
|
-
page = 1
|
|
38
|
+
page = 0
|
|
62
39
|
retries = 0
|
|
63
40
|
headers = {
|
|
64
41
|
"Content-Type": "application/json",
|
|
65
42
|
"Authorization": f"Bearer {semgrep_app_token}",
|
|
66
43
|
}
|
|
67
44
|
|
|
68
|
-
request_data = {
|
|
69
|
-
"
|
|
70
|
-
"
|
|
71
|
-
"
|
|
72
|
-
"
|
|
45
|
+
request_data: dict[str, Any] = {
|
|
46
|
+
"page": page,
|
|
47
|
+
"page_size": _PAGE_SIZE,
|
|
48
|
+
"issue_type": "sca",
|
|
49
|
+
"exposures": "reachable,always_reachable,conditionally_reachable,unreachable,unknown",
|
|
50
|
+
"ref": "_default",
|
|
51
|
+
"dedup": "true",
|
|
73
52
|
}
|
|
74
|
-
|
|
53
|
+
logger.info(f"Retrieving Semgrep SCA vulns for deployment '{deployment_slug}'.")
|
|
75
54
|
while has_more:
|
|
76
55
|
|
|
77
|
-
if cursor:
|
|
78
|
-
request_data.update({
|
|
79
|
-
"cursor": {
|
|
80
|
-
"vulnOffset": cursor["vulnOffset"],
|
|
81
|
-
"issueOffset": cursor["issueOffset"],
|
|
82
|
-
},
|
|
83
|
-
})
|
|
84
56
|
try:
|
|
85
|
-
response = requests.
|
|
57
|
+
response = requests.get(sca_url, params=request_data, headers=headers, timeout=_TIMEOUT)
|
|
86
58
|
response.raise_for_status()
|
|
87
59
|
data = response.json()
|
|
88
|
-
except HTTPError
|
|
60
|
+
except (ReadTimeout, HTTPError):
|
|
89
61
|
logger.warning(f"Failed to retrieve Semgrep SCA vulns for page {page}. Retrying...")
|
|
90
62
|
retries += 1
|
|
91
63
|
if retries >= _MAX_RETRIES:
|
|
92
|
-
raise
|
|
64
|
+
raise
|
|
93
65
|
continue
|
|
94
|
-
vulns = data["
|
|
95
|
-
|
|
96
|
-
has_more = data.get("hasMore", False)
|
|
66
|
+
vulns = data["findings"]
|
|
67
|
+
has_more = len(vulns) > 0
|
|
97
68
|
if page % 10 == 0:
|
|
98
|
-
logger.info(f"Processed {page}
|
|
69
|
+
logger.info(f"Processed page {page} of Semgrep SCA vulnerabilities.")
|
|
99
70
|
all_vulns.extend(vulns)
|
|
100
71
|
retries = 0
|
|
72
|
+
page += 1
|
|
73
|
+
request_data["page"] = page
|
|
101
74
|
|
|
75
|
+
logger.info(f"Retrieved {len(all_vulns)} Semgrep SCA vulns in {page} pages.")
|
|
102
76
|
return all_vulns
|
|
103
77
|
|
|
104
78
|
|
|
79
|
+
def _get_vuln_class(vuln: Dict) -> str:
|
|
80
|
+
vulnerability_classes = vuln["rule"].get("vulnerability_classes", [])
|
|
81
|
+
if vulnerability_classes:
|
|
82
|
+
return vulnerability_classes[0]
|
|
83
|
+
return "Other"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _determine_exposure(vuln: Dict[str, Any]) -> str | None:
|
|
87
|
+
# See Semgrep reachability types:
|
|
88
|
+
# https://semgrep.dev/docs/semgrep-supply-chain/overview#types-of-semgrep-supply-chain-findings
|
|
89
|
+
reachability_types = {
|
|
90
|
+
"NO REACHABILITY ANALYSIS": 2,
|
|
91
|
+
"UNREACHABLE": 2,
|
|
92
|
+
"REACHABLE": 0,
|
|
93
|
+
"ALWAYS REACHABLE": 0,
|
|
94
|
+
"CONDITIONALLY REACHABLE": 1,
|
|
95
|
+
}
|
|
96
|
+
reachable_flag = vuln["reachability"]
|
|
97
|
+
if reachable_flag and reachable_flag.upper() in reachability_types:
|
|
98
|
+
reach_score = reachability_types[reachable_flag.upper()]
|
|
99
|
+
if reach_score < reachability_types["UNREACHABLE"]:
|
|
100
|
+
return "REACHABLE"
|
|
101
|
+
else:
|
|
102
|
+
return "UNREACHABLE"
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _build_vuln_url(vuln: str) -> str | None:
|
|
107
|
+
if 'CVE' in vuln:
|
|
108
|
+
return f"https://nvd.nist.gov/vuln/detail/{vuln}"
|
|
109
|
+
if 'GHSA' in vuln:
|
|
110
|
+
return f"https://github.com/advisories/{vuln}"
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
|
|
105
114
|
def transform_sca_vulns(raw_vulns: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], List[Dict[str, str]]]:
|
|
106
115
|
"""
|
|
107
116
|
Transforms the raw SCA vulns response from Semgrep API into a list of dicts
|
|
@@ -112,60 +121,60 @@ def transform_sca_vulns(raw_vulns: List[Dict[str, Any]]) -> Tuple[List[Dict[str,
|
|
|
112
121
|
for vuln in raw_vulns:
|
|
113
122
|
sca_vuln: Dict[str, Any] = {}
|
|
114
123
|
# Mandatory fields
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
sca_vuln["
|
|
120
|
-
sca_vuln["
|
|
121
|
-
sca_vuln["
|
|
122
|
-
sca_vuln["
|
|
123
|
-
sca_vuln["
|
|
124
|
-
sca_vuln["
|
|
125
|
-
|
|
124
|
+
repository_name = vuln["repository"]["name"]
|
|
125
|
+
rule_id = vuln["rule"]["name"]
|
|
126
|
+
vulnerability_class = _get_vuln_class(vuln)
|
|
127
|
+
package = vuln['found_dependency']['package']
|
|
128
|
+
sca_vuln["id"] = vuln["id"]
|
|
129
|
+
sca_vuln["repositoryName"] = repository_name
|
|
130
|
+
sca_vuln["branch"] = vuln["ref"]
|
|
131
|
+
sca_vuln["ruleId"] = rule_id
|
|
132
|
+
sca_vuln["title"] = package + ":" + vulnerability_class
|
|
133
|
+
sca_vuln["description"] = vuln["rule"]["message"]
|
|
134
|
+
sca_vuln["ecosystem"] = vuln["found_dependency"]["ecosystem"]
|
|
135
|
+
sca_vuln["severity"] = vuln["severity"].upper()
|
|
136
|
+
sca_vuln["reachability"] = vuln["reachability"].upper() # Check done to determine rechabilitity
|
|
137
|
+
sca_vuln["reachableIf"] = vuln["reachable_condition"].upper() if vuln["reachable_condition"] else None
|
|
138
|
+
sca_vuln["exposureType"] = _determine_exposure(vuln) # Determintes if reachable or unreachable
|
|
139
|
+
dependency = f"{package}|{vuln['found_dependency']['version']}"
|
|
126
140
|
sca_vuln["matchedDependency"] = dependency
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
141
|
+
dep_url = vuln["found_dependency"]["lockfile_line_url"]
|
|
142
|
+
if dep_url: # Lock file can be null, need to set
|
|
143
|
+
dep_file = dep_url.split("/")[-1].split("#")[0]
|
|
144
|
+
sca_vuln["dependencyFileLocation_path"] = dep_file
|
|
145
|
+
sca_vuln["dependencyFileLocation_url"] = dep_url
|
|
146
|
+
else:
|
|
147
|
+
if sca_vuln.get("location"):
|
|
148
|
+
sca_vuln["dependencyFileLocation_path"] = sca_vuln["location"]["file_path"]
|
|
149
|
+
sca_vuln["transitivity"] = vuln["found_dependency"]["transitivity"].upper()
|
|
150
|
+
if vuln.get("vulnerability_identifier"):
|
|
151
|
+
vuln_id = vuln["vulnerability_identifier"].upper()
|
|
152
|
+
sca_vuln["cveId"] = vuln_id
|
|
153
|
+
sca_vuln["ref_urls"] = [_build_vuln_url(vuln_id)]
|
|
154
|
+
if vuln.get('fix_recommendations') and len(vuln['fix_recommendations']) > 0:
|
|
155
|
+
fix = vuln['fix_recommendations'][0]
|
|
156
|
+
dep_fix = f"{fix['package']}|{fix['version']}"
|
|
137
157
|
sca_vuln["closestSafeDependency"] = dep_fix
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
sca_vuln["
|
|
141
|
-
sca_vuln["
|
|
142
|
-
|
|
143
|
-
|
|
158
|
+
sca_vuln["openedAt"] = vuln["created_at"]
|
|
159
|
+
sca_vuln["fixStatus"] = vuln["status"]
|
|
160
|
+
sca_vuln["triageStatus"] = vuln["triage_state"]
|
|
161
|
+
sca_vuln["confidence"] = vuln["confidence"]
|
|
162
|
+
usage = vuln.get("usage")
|
|
163
|
+
if usage:
|
|
144
164
|
usage_dict = {}
|
|
165
|
+
url = usage["location"]["url"]
|
|
145
166
|
usage_dict["SCA_ID"] = sca_vuln["id"]
|
|
146
|
-
usage_dict["findingId"] =
|
|
167
|
+
usage_dict["findingId"] = hash(url.split("github.com/")[-1])
|
|
147
168
|
usage_dict["path"] = usage["location"]["path"]
|
|
148
|
-
usage_dict["startLine"] = usage["location"]["
|
|
149
|
-
usage_dict["startCol"] = usage["location"]["
|
|
150
|
-
usage_dict["endLine"] = usage["location"]["
|
|
151
|
-
usage_dict["endCol"] = usage["location"]["
|
|
152
|
-
usage_dict["url"] =
|
|
169
|
+
usage_dict["startLine"] = usage["location"]["start_line"]
|
|
170
|
+
usage_dict["startCol"] = usage["location"]["start_col"]
|
|
171
|
+
usage_dict["endLine"] = usage["location"]["end_line"]
|
|
172
|
+
usage_dict["endCol"] = usage["location"]["end_col"]
|
|
173
|
+
usage_dict["url"] = url
|
|
153
174
|
usages.append(usage_dict)
|
|
154
175
|
vulns.append(sca_vuln)
|
|
155
|
-
return vulns, usages
|
|
156
176
|
|
|
157
|
-
|
|
158
|
-
@timeit
|
|
159
|
-
def load_semgrep_deployment(
|
|
160
|
-
neo4j_session: neo4j.Session, deployment: Dict[str, Any], update_tag: int,
|
|
161
|
-
) -> None:
|
|
162
|
-
logger.info(f"Loading Semgrep deployment info {deployment} into the graph...")
|
|
163
|
-
load(
|
|
164
|
-
neo4j_session,
|
|
165
|
-
SemgrepDeploymentSchema(),
|
|
166
|
-
[deployment],
|
|
167
|
-
lastupdated=update_tag,
|
|
168
|
-
)
|
|
177
|
+
return vulns, usages
|
|
169
178
|
|
|
170
179
|
|
|
171
180
|
@timeit
|
|
@@ -175,7 +184,7 @@ def load_semgrep_sca_vulns(
|
|
|
175
184
|
deployment_id: str,
|
|
176
185
|
update_tag: int,
|
|
177
186
|
) -> None:
|
|
178
|
-
logger.info(f"Loading {len(vulns)}
|
|
187
|
+
logger.info(f"Loading {len(vulns)} SemgrepSCAFinding objects into the graph.")
|
|
179
188
|
load(
|
|
180
189
|
neo4j_session,
|
|
181
190
|
SemgrepSCAFindingSchema(),
|
|
@@ -192,7 +201,7 @@ def load_semgrep_sca_usages(
|
|
|
192
201
|
deployment_id: str,
|
|
193
202
|
update_tag: int,
|
|
194
203
|
) -> None:
|
|
195
|
-
logger.info(f"Loading {len(usages)}
|
|
204
|
+
logger.info(f"Loading {len(usages)} SemgrepSCALocation objects into the graph.")
|
|
196
205
|
load(
|
|
197
206
|
neo4j_session,
|
|
198
207
|
SemgrepSCALocationSchema(),
|
|
@@ -219,25 +228,32 @@ def cleanup(
|
|
|
219
228
|
|
|
220
229
|
|
|
221
230
|
@timeit
|
|
222
|
-
def
|
|
223
|
-
|
|
231
|
+
def sync_findings(
|
|
232
|
+
neo4j_session: neo4j.Session,
|
|
224
233
|
semgrep_app_token: str,
|
|
225
234
|
update_tag: int,
|
|
226
235
|
common_job_parameters: Dict[str, Any],
|
|
227
236
|
) -> None:
|
|
237
|
+
|
|
238
|
+
deployment_id = common_job_parameters.get("DEPLOYMENT_ID")
|
|
239
|
+
deployment_slug = common_job_parameters.get("DEPLOYMENT_SLUG")
|
|
240
|
+
if not deployment_id or not deployment_slug:
|
|
241
|
+
logger.warning(
|
|
242
|
+
"Missing Semgrep deployment ID or slug, ensure that sync_deployment() has been called."
|
|
243
|
+
"Skipping SCA findings sync job.",
|
|
244
|
+
)
|
|
245
|
+
return
|
|
246
|
+
|
|
228
247
|
logger.info("Running Semgrep SCA findings sync job.")
|
|
229
|
-
|
|
230
|
-
deployment_id = semgrep_deployment["id"]
|
|
231
|
-
load_semgrep_deployment(neo4j_sesion, semgrep_deployment, update_tag)
|
|
232
|
-
common_job_parameters["DEPLOYMENT_ID"] = deployment_id
|
|
233
|
-
raw_vulns = get_sca_vulns(semgrep_app_token, deployment_id)
|
|
248
|
+
raw_vulns = get_sca_vulns(semgrep_app_token, deployment_slug)
|
|
234
249
|
vulns, usages = transform_sca_vulns(raw_vulns)
|
|
235
|
-
load_semgrep_sca_vulns(
|
|
236
|
-
load_semgrep_sca_usages(
|
|
237
|
-
run_scoped_analysis_job('semgrep_sca_risk_analysis.json',
|
|
238
|
-
|
|
250
|
+
load_semgrep_sca_vulns(neo4j_session, vulns, deployment_id, update_tag)
|
|
251
|
+
load_semgrep_sca_usages(neo4j_session, usages, deployment_id, update_tag)
|
|
252
|
+
run_scoped_analysis_job('semgrep_sca_risk_analysis.json', neo4j_session, common_job_parameters)
|
|
253
|
+
|
|
254
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
239
255
|
merge_module_sync_metadata(
|
|
240
|
-
neo4j_session=
|
|
256
|
+
neo4j_session=neo4j_session,
|
|
241
257
|
group_type='Semgrep',
|
|
242
258
|
group_id=deployment_id,
|
|
243
259
|
synced_type='SCA',
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import neo4j
|
|
4
|
+
|
|
5
|
+
from cartography.config import Config
|
|
6
|
+
from cartography.intel.snipeit import asset
|
|
7
|
+
from cartography.intel.snipeit import user
|
|
8
|
+
from cartography.stats import get_stats_client
|
|
9
|
+
from cartography.util import timeit
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
stat_handler = get_stats_client(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@timeit
|
|
16
|
+
def start_snipeit_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
|
|
17
|
+
if config.snipeit_base_uri is None or config.snipeit_token is None or config.snipeit_tenant_id is None:
|
|
18
|
+
logger.warning(
|
|
19
|
+
"Required parameter(s) missing. Skipping sync.",
|
|
20
|
+
)
|
|
21
|
+
return
|
|
22
|
+
|
|
23
|
+
common_job_parameters = {
|
|
24
|
+
"UPDATE_TAG": config.update_tag,
|
|
25
|
+
"TENANT_ID": config.snipeit_tenant_id,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
# Ingest SnipeIT users and assets
|
|
29
|
+
user.sync(neo4j_session, common_job_parameters, config.snipeit_base_uri, config.snipeit_token)
|
|
30
|
+
asset.sync(neo4j_session, common_job_parameters, config.snipeit_base_uri, config.snipeit_token)
|