cartography 0.95.0rc1__py3-none-any.whl → 0.96.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/cli.py +15 -0
- cartography/client/core/tx.py +1 -1
- cartography/config.py +6 -2
- cartography/data/indexes.cypher +1 -2
- cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +16 -0
- cartography/data/jobs/cleanup/{github_users_cleanup.json → github_org_and_users_cleanup.json} +5 -0
- cartography/data/jobs/cleanup/github_repos_cleanup.json +25 -0
- cartography/graph/querybuilder.py +4 -0
- cartography/intel/aws/apigateway.py +3 -3
- cartography/intel/aws/ec2/auto_scaling_groups.py +147 -185
- cartography/intel/aws/ec2/instances.py +2 -0
- cartography/intel/aws/ec2/network_acls.py +209 -0
- cartography/intel/aws/ec2/subnets.py +2 -0
- cartography/intel/aws/iam.py +4 -3
- cartography/intel/aws/identitycenter.py +307 -0
- cartography/intel/aws/resources.py +4 -0
- cartography/intel/cve/__init__.py +1 -1
- cartography/intel/cve/feed.py +10 -7
- cartography/intel/github/repos.py +176 -27
- cartography/intel/github/users.py +156 -39
- cartography/intel/okta/users.py +2 -1
- cartography/intel/semgrep/__init__.py +9 -2
- cartography/intel/semgrep/dependencies.py +233 -0
- cartography/intel/semgrep/deployment.py +67 -0
- cartography/intel/semgrep/findings.py +22 -53
- cartography/models/aws/ec2/auto_scaling_groups.py +204 -0
- cartography/models/aws/ec2/launch_configurations.py +55 -0
- cartography/models/aws/ec2/network_acl_rules.py +98 -0
- cartography/models/aws/ec2/network_acls.py +86 -0
- cartography/models/aws/identitycenter/__init__.py +0 -0
- cartography/models/aws/identitycenter/awsidentitycenter.py +44 -0
- cartography/models/aws/identitycenter/awspermissionset.py +84 -0
- cartography/models/aws/identitycenter/awsssouser.py +68 -0
- cartography/models/core/common.py +18 -1
- cartography/models/github/orgs.py +26 -0
- cartography/models/github/users.py +119 -0
- cartography/models/semgrep/dependencies.py +90 -0
- cartography-0.96.0.dist-info/METADATA +53 -0
- {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/RECORD +43 -27
- {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/WHEEL +1 -1
- cartography-0.95.0rc1.dist-info/METADATA +0 -53
- {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/LICENSE +0 -0
- {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import Callable
|
|
4
|
+
from typing import Dict
|
|
5
|
+
from typing import List
|
|
6
|
+
|
|
7
|
+
import neo4j
|
|
8
|
+
import requests
|
|
9
|
+
from requests.exceptions import HTTPError
|
|
10
|
+
from requests.exceptions import ReadTimeout
|
|
11
|
+
|
|
12
|
+
from cartography.client.core.tx import load
|
|
13
|
+
from cartography.graph.job import GraphJob
|
|
14
|
+
from cartography.models.semgrep.dependencies import SemgrepGoLibrarySchema
|
|
15
|
+
from cartography.models.semgrep.dependencies import SemgrepNpmLibrarySchema
|
|
16
|
+
from cartography.stats import get_stats_client
|
|
17
|
+
from cartography.util import merge_module_sync_metadata
|
|
18
|
+
from cartography.util import timeit
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
stat_handler = get_stats_client(__name__)
|
|
22
|
+
_PAGE_SIZE = 10000
|
|
23
|
+
_TIMEOUT = (60, 60)
|
|
24
|
+
_MAX_RETRIES = 3
|
|
25
|
+
|
|
26
|
+
# The keys in this dictionary must be in Semgrep's list of supported ecosystems, defined here:
|
|
27
|
+
# https://semgrep.dev/api/v1/docs/#tag/SupplyChainService/operation/semgrep_app.products.sca.handlers.dependency.list_dependencies_conexxion
|
|
28
|
+
ECOSYSTEM_TO_SCHEMA: Dict = {
|
|
29
|
+
'gomod': SemgrepGoLibrarySchema,
|
|
30
|
+
'npm': SemgrepNpmLibrarySchema,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def parse_and_validate_semgrep_ecosystems(ecosystems: str) -> List[str]:
|
|
35
|
+
validated_ecosystems: List[str] = []
|
|
36
|
+
for ecosystem in ecosystems.split(','):
|
|
37
|
+
ecosystem = ecosystem.strip().lower()
|
|
38
|
+
|
|
39
|
+
if ecosystem in ECOSYSTEM_TO_SCHEMA:
|
|
40
|
+
validated_ecosystems.append(ecosystem)
|
|
41
|
+
else:
|
|
42
|
+
valid_ecosystems: str = ','.join(ECOSYSTEM_TO_SCHEMA.keys())
|
|
43
|
+
raise ValueError(
|
|
44
|
+
f'Error parsing `semgrep-dependency-ecosystems`. You specified "{ecosystems}". '
|
|
45
|
+
f'Please check that your input is formatted as comma-separated values, e.g. "gomod,npm". '
|
|
46
|
+
f'Full list of supported ecosystems: {valid_ecosystems}.',
|
|
47
|
+
)
|
|
48
|
+
return validated_ecosystems
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@timeit
|
|
52
|
+
def get_dependencies(semgrep_app_token: str, deployment_id: str, ecosystem: str) -> List[Dict[str, Any]]:
|
|
53
|
+
"""
|
|
54
|
+
Gets all dependencies for the given ecosystem within the given Semgrep deployment ID.
|
|
55
|
+
param: semgrep_app_token: The Semgrep App token to use for authentication.
|
|
56
|
+
param: deployment_id: The Semgrep deployment ID to use for retrieving dependencies.
|
|
57
|
+
param: ecosystem: The ecosystem to import dependencies from, e.g. "gomod" or "npm".
|
|
58
|
+
"""
|
|
59
|
+
all_deps = []
|
|
60
|
+
deps_url = f"https://semgrep.dev/api/v1/deployments/{deployment_id}/dependencies"
|
|
61
|
+
has_more = True
|
|
62
|
+
page = 0
|
|
63
|
+
retries = 0
|
|
64
|
+
headers = {
|
|
65
|
+
"Content-Type": "application/json",
|
|
66
|
+
"Authorization": f"Bearer {semgrep_app_token}",
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
request_data: dict[str, Any] = {
|
|
70
|
+
"pageSize": _PAGE_SIZE,
|
|
71
|
+
"dependencyFilter": {
|
|
72
|
+
"ecosystem": [ecosystem],
|
|
73
|
+
},
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
logger.info(f"Retrieving Semgrep {ecosystem} dependencies for deployment '{deployment_id}'.")
|
|
77
|
+
while has_more:
|
|
78
|
+
try:
|
|
79
|
+
response = requests.post(deps_url, json=request_data, headers=headers, timeout=_TIMEOUT)
|
|
80
|
+
response.raise_for_status()
|
|
81
|
+
data = response.json()
|
|
82
|
+
except (ReadTimeout, HTTPError):
|
|
83
|
+
logger.warning(f"Failed to retrieve Semgrep {ecosystem} dependencies for page {page}. Retrying...")
|
|
84
|
+
retries += 1
|
|
85
|
+
if retries >= _MAX_RETRIES:
|
|
86
|
+
raise
|
|
87
|
+
continue
|
|
88
|
+
deps = data.get("dependencies", [])
|
|
89
|
+
has_more = data.get("hasMore", False)
|
|
90
|
+
logger.info(f"Processed page {page} of Semgrep {ecosystem} dependencies.")
|
|
91
|
+
all_deps.extend(deps)
|
|
92
|
+
retries = 0
|
|
93
|
+
page += 1
|
|
94
|
+
request_data["cursor"] = data.get("cursor")
|
|
95
|
+
|
|
96
|
+
logger.info(f"Retrieved {len(all_deps)} Semgrep {ecosystem} dependencies in {page} pages.")
|
|
97
|
+
return all_deps
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def transform_dependencies(raw_deps: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
101
|
+
"""
|
|
102
|
+
Transforms the raw dependencies response from Semgrep API into a list of dicts
|
|
103
|
+
that can be used to create the Dependency nodes.
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
"""
|
|
107
|
+
sample raw_dep as of November 2024:
|
|
108
|
+
{
|
|
109
|
+
"repositoryId": "123456",
|
|
110
|
+
"definedAt": {
|
|
111
|
+
"path": "go.mod",
|
|
112
|
+
"startLine": "6",
|
|
113
|
+
"endLine": "6",
|
|
114
|
+
"url": "https://github.com/org/repo-name/blob/00000000000000000000000000000000/go.mod#L6",
|
|
115
|
+
"committedAt": "1970-01-01T00:00:00Z",
|
|
116
|
+
"startCol": "0",
|
|
117
|
+
"endCol": "0"
|
|
118
|
+
},
|
|
119
|
+
"transitivity": "DIRECT",
|
|
120
|
+
"package": {
|
|
121
|
+
"name": "github.com/foo/bar",
|
|
122
|
+
"versionSpecifier": "1.2.3"
|
|
123
|
+
},
|
|
124
|
+
"ecosystem": "gomod",
|
|
125
|
+
"licenses": [],
|
|
126
|
+
"pathToTransitivity": []
|
|
127
|
+
},
|
|
128
|
+
"""
|
|
129
|
+
deps = []
|
|
130
|
+
for raw_dep in raw_deps:
|
|
131
|
+
|
|
132
|
+
# We could call a different endpoint to get all repo IDs and store a mapping of repo ID to URL,
|
|
133
|
+
# but it's much simpler to just extract the URL from the definedAt field.
|
|
134
|
+
repo_url = raw_dep["definedAt"]["url"].split("/blob/", 1)[0]
|
|
135
|
+
|
|
136
|
+
name = raw_dep["package"]["name"]
|
|
137
|
+
version = raw_dep["package"]["versionSpecifier"]
|
|
138
|
+
id = f"{name}|{version}"
|
|
139
|
+
|
|
140
|
+
# As of November 2024, Semgrep does not import dependencies with version specifiers such as >, <, etc.
|
|
141
|
+
# For now, hardcode the specifier to ==<version> to align with GitHub-sourced Python dependencies.
|
|
142
|
+
# If Semgrep eventually supports version specifiers, update this line accordingly.
|
|
143
|
+
specifier = f"=={version}"
|
|
144
|
+
|
|
145
|
+
deps.append({
|
|
146
|
+
# existing dependency properties:
|
|
147
|
+
"id": id,
|
|
148
|
+
"name": name,
|
|
149
|
+
"specifier": specifier,
|
|
150
|
+
"version": version,
|
|
151
|
+
"repo_url": repo_url,
|
|
152
|
+
|
|
153
|
+
# Semgrep-specific properties:
|
|
154
|
+
"ecosystem": raw_dep["ecosystem"],
|
|
155
|
+
"transitivity": raw_dep["transitivity"].lower(),
|
|
156
|
+
"url": raw_dep["definedAt"]["url"],
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
return deps
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@timeit
|
|
163
|
+
def load_dependencies(
|
|
164
|
+
neo4j_session: neo4j.Session,
|
|
165
|
+
dependency_schema: Callable,
|
|
166
|
+
dependencies: List[Dict],
|
|
167
|
+
deployment_id: str,
|
|
168
|
+
update_tag: int,
|
|
169
|
+
) -> None:
|
|
170
|
+
logger.info(f"Loading {len(dependencies)} {dependency_schema().label} objects into the graph.")
|
|
171
|
+
load(
|
|
172
|
+
neo4j_session,
|
|
173
|
+
dependency_schema(),
|
|
174
|
+
dependencies,
|
|
175
|
+
lastupdated=update_tag,
|
|
176
|
+
DEPLOYMENT_ID=deployment_id,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@timeit
|
|
181
|
+
def cleanup(
|
|
182
|
+
neo4j_session: neo4j.Session,
|
|
183
|
+
dependency_schema: Callable,
|
|
184
|
+
common_job_parameters: Dict[str, Any],
|
|
185
|
+
) -> None:
|
|
186
|
+
logger.info(f"Running Semgrep Dependencies cleanup job for {dependency_schema().label}.")
|
|
187
|
+
GraphJob.from_node_schema(dependency_schema(), common_job_parameters).run(neo4j_session)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@timeit
|
|
191
|
+
def sync_dependencies(
|
|
192
|
+
neo4j_session: neo4j.Session,
|
|
193
|
+
semgrep_app_token: str,
|
|
194
|
+
ecosystems_str: str,
|
|
195
|
+
update_tag: int,
|
|
196
|
+
common_job_parameters: Dict[str, Any],
|
|
197
|
+
) -> None:
|
|
198
|
+
|
|
199
|
+
deployment_id = common_job_parameters.get("DEPLOYMENT_ID")
|
|
200
|
+
if not deployment_id:
|
|
201
|
+
logger.warning(
|
|
202
|
+
"Missing Semgrep deployment ID, ensure that sync_deployment() has been called. "
|
|
203
|
+
"Skipping Semgrep dependencies sync job.",
|
|
204
|
+
)
|
|
205
|
+
return
|
|
206
|
+
|
|
207
|
+
if not ecosystems_str:
|
|
208
|
+
logger.warning(
|
|
209
|
+
"Semgrep is not configured to import dependencies for any ecosystems, see docs to configure. "
|
|
210
|
+
"Skipping Semgrep dependencies sync job.",
|
|
211
|
+
)
|
|
212
|
+
return
|
|
213
|
+
|
|
214
|
+
# We don't expect an error here since we've already validated the input in cli.py
|
|
215
|
+
ecosystems = parse_and_validate_semgrep_ecosystems(ecosystems_str)
|
|
216
|
+
|
|
217
|
+
logger.info("Running Semgrep dependencies sync job.")
|
|
218
|
+
|
|
219
|
+
for ecosystem in ecosystems:
|
|
220
|
+
schema = ECOSYSTEM_TO_SCHEMA[ecosystem]
|
|
221
|
+
raw_deps = get_dependencies(semgrep_app_token, deployment_id, ecosystem)
|
|
222
|
+
deps = transform_dependencies(raw_deps)
|
|
223
|
+
load_dependencies(neo4j_session, schema, deps, deployment_id, update_tag)
|
|
224
|
+
cleanup(neo4j_session, schema, common_job_parameters)
|
|
225
|
+
|
|
226
|
+
merge_module_sync_metadata(
|
|
227
|
+
neo4j_session=neo4j_session,
|
|
228
|
+
group_type='Semgrep',
|
|
229
|
+
group_id=deployment_id,
|
|
230
|
+
synced_type='SemgrepDependency',
|
|
231
|
+
update_tag=update_tag,
|
|
232
|
+
stat_handler=stat_handler,
|
|
233
|
+
)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import Dict
|
|
4
|
+
|
|
5
|
+
import neo4j
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from cartography.client.core.tx import load
|
|
9
|
+
from cartography.models.semgrep.deployment import SemgrepDeploymentSchema
|
|
10
|
+
from cartography.stats import get_stats_client
|
|
11
|
+
from cartography.util import timeit
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
stat_handler = get_stats_client(__name__)
|
|
15
|
+
_TIMEOUT = (60, 60)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@timeit
|
|
19
|
+
def get_deployment(semgrep_app_token: str) -> Dict[str, Any]:
|
|
20
|
+
"""
|
|
21
|
+
Gets the deployment associated with the passed Semgrep App token.
|
|
22
|
+
param: semgrep_app_token: The Semgrep App token to use for authentication.
|
|
23
|
+
"""
|
|
24
|
+
deployment = {}
|
|
25
|
+
deployment_url = "https://semgrep.dev/api/v1/deployments"
|
|
26
|
+
headers = {
|
|
27
|
+
"Content-Type": "application/json",
|
|
28
|
+
"Authorization": f"Bearer {semgrep_app_token}",
|
|
29
|
+
}
|
|
30
|
+
response = requests.get(deployment_url, headers=headers, timeout=_TIMEOUT)
|
|
31
|
+
response.raise_for_status()
|
|
32
|
+
|
|
33
|
+
data = response.json()
|
|
34
|
+
deployment["id"] = data["deployments"][0]["id"]
|
|
35
|
+
deployment["name"] = data["deployments"][0]["name"]
|
|
36
|
+
deployment["slug"] = data["deployments"][0]["slug"]
|
|
37
|
+
|
|
38
|
+
return deployment
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@timeit
|
|
42
|
+
def load_semgrep_deployment(
|
|
43
|
+
neo4j_session: neo4j.Session, deployment: Dict[str, Any], update_tag: int,
|
|
44
|
+
) -> None:
|
|
45
|
+
logger.info(f"Loading SemgrepDeployment {deployment} into the graph.")
|
|
46
|
+
load(
|
|
47
|
+
neo4j_session,
|
|
48
|
+
SemgrepDeploymentSchema(),
|
|
49
|
+
[deployment],
|
|
50
|
+
lastupdated=update_tag,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@timeit
|
|
55
|
+
def sync_deployment(
|
|
56
|
+
neo4j_session: neo4j.Session,
|
|
57
|
+
semgrep_app_token: str,
|
|
58
|
+
update_tag: int,
|
|
59
|
+
common_job_parameters: Dict[str, Any],
|
|
60
|
+
) -> None:
|
|
61
|
+
|
|
62
|
+
semgrep_deployment = get_deployment(semgrep_app_token)
|
|
63
|
+
deployment_id = semgrep_deployment["id"]
|
|
64
|
+
deployment_slug = semgrep_deployment["slug"]
|
|
65
|
+
load_semgrep_deployment(neo4j_session, semgrep_deployment, update_tag)
|
|
66
|
+
common_job_parameters["DEPLOYMENT_ID"] = deployment_id
|
|
67
|
+
common_job_parameters["DEPLOYMENT_SLUG"] = deployment_slug
|
|
@@ -11,7 +11,6 @@ from requests.exceptions import ReadTimeout
|
|
|
11
11
|
|
|
12
12
|
from cartography.client.core.tx import load
|
|
13
13
|
from cartography.graph.job import GraphJob
|
|
14
|
-
from cartography.models.semgrep.deployment import SemgrepDeploymentSchema
|
|
15
14
|
from cartography.models.semgrep.findings import SemgrepSCAFindingSchema
|
|
16
15
|
from cartography.models.semgrep.locations import SemgrepSCALocationSchema
|
|
17
16
|
from cartography.stats import get_stats_client
|
|
@@ -26,29 +25,6 @@ _TIMEOUT = (60, 60)
|
|
|
26
25
|
_MAX_RETRIES = 3
|
|
27
26
|
|
|
28
27
|
|
|
29
|
-
@timeit
|
|
30
|
-
def get_deployment(semgrep_app_token: str) -> Dict[str, Any]:
|
|
31
|
-
"""
|
|
32
|
-
Gets the deployment associated with the passed Semgrep App token.
|
|
33
|
-
param: semgrep_app_token: The Semgrep App token to use for authentication.
|
|
34
|
-
"""
|
|
35
|
-
deployment = {}
|
|
36
|
-
deployment_url = "https://semgrep.dev/api/v1/deployments"
|
|
37
|
-
headers = {
|
|
38
|
-
"Content-Type": "application/json",
|
|
39
|
-
"Authorization": f"Bearer {semgrep_app_token}",
|
|
40
|
-
}
|
|
41
|
-
response = requests.get(deployment_url, headers=headers, timeout=_TIMEOUT)
|
|
42
|
-
response.raise_for_status()
|
|
43
|
-
|
|
44
|
-
data = response.json()
|
|
45
|
-
deployment["id"] = data["deployments"][0]["id"]
|
|
46
|
-
deployment["name"] = data["deployments"][0]["name"]
|
|
47
|
-
deployment["slug"] = data["deployments"][0]["slug"]
|
|
48
|
-
|
|
49
|
-
return deployment
|
|
50
|
-
|
|
51
|
-
|
|
52
28
|
@timeit
|
|
53
29
|
def get_sca_vulns(semgrep_app_token: str, deployment_slug: str) -> List[Dict[str, Any]]:
|
|
54
30
|
"""
|
|
@@ -81,11 +57,11 @@ def get_sca_vulns(semgrep_app_token: str, deployment_slug: str) -> List[Dict[str
|
|
|
81
57
|
response = requests.get(sca_url, params=request_data, headers=headers, timeout=_TIMEOUT)
|
|
82
58
|
response.raise_for_status()
|
|
83
59
|
data = response.json()
|
|
84
|
-
except (ReadTimeout, HTTPError)
|
|
60
|
+
except (ReadTimeout, HTTPError):
|
|
85
61
|
logger.warning(f"Failed to retrieve Semgrep SCA vulns for page {page}. Retrying...")
|
|
86
62
|
retries += 1
|
|
87
63
|
if retries >= _MAX_RETRIES:
|
|
88
|
-
raise
|
|
64
|
+
raise
|
|
89
65
|
continue
|
|
90
66
|
vulns = data["findings"]
|
|
91
67
|
has_more = len(vulns) > 0
|
|
@@ -201,19 +177,6 @@ def transform_sca_vulns(raw_vulns: List[Dict[str, Any]]) -> Tuple[List[Dict[str,
|
|
|
201
177
|
return vulns, usages
|
|
202
178
|
|
|
203
179
|
|
|
204
|
-
@timeit
|
|
205
|
-
def load_semgrep_deployment(
|
|
206
|
-
neo4j_session: neo4j.Session, deployment: Dict[str, Any], update_tag: int,
|
|
207
|
-
) -> None:
|
|
208
|
-
logger.info(f"Loading Semgrep deployment info {deployment} into the graph...")
|
|
209
|
-
load(
|
|
210
|
-
neo4j_session,
|
|
211
|
-
SemgrepDeploymentSchema(),
|
|
212
|
-
[deployment],
|
|
213
|
-
lastupdated=update_tag,
|
|
214
|
-
)
|
|
215
|
-
|
|
216
|
-
|
|
217
180
|
@timeit
|
|
218
181
|
def load_semgrep_sca_vulns(
|
|
219
182
|
neo4j_session: neo4j.Session,
|
|
@@ -221,7 +184,7 @@ def load_semgrep_sca_vulns(
|
|
|
221
184
|
deployment_id: str,
|
|
222
185
|
update_tag: int,
|
|
223
186
|
) -> None:
|
|
224
|
-
logger.info(f"Loading {len(vulns)}
|
|
187
|
+
logger.info(f"Loading {len(vulns)} SemgrepSCAFinding objects into the graph.")
|
|
225
188
|
load(
|
|
226
189
|
neo4j_session,
|
|
227
190
|
SemgrepSCAFindingSchema(),
|
|
@@ -238,7 +201,7 @@ def load_semgrep_sca_usages(
|
|
|
238
201
|
deployment_id: str,
|
|
239
202
|
update_tag: int,
|
|
240
203
|
) -> None:
|
|
241
|
-
logger.info(f"Loading {len(usages)}
|
|
204
|
+
logger.info(f"Loading {len(usages)} SemgrepSCALocation objects into the graph.")
|
|
242
205
|
load(
|
|
243
206
|
neo4j_session,
|
|
244
207
|
SemgrepSCALocationSchema(),
|
|
@@ -265,26 +228,32 @@ def cleanup(
|
|
|
265
228
|
|
|
266
229
|
|
|
267
230
|
@timeit
|
|
268
|
-
def
|
|
269
|
-
|
|
231
|
+
def sync_findings(
|
|
232
|
+
neo4j_session: neo4j.Session,
|
|
270
233
|
semgrep_app_token: str,
|
|
271
234
|
update_tag: int,
|
|
272
235
|
common_job_parameters: Dict[str, Any],
|
|
273
236
|
) -> None:
|
|
237
|
+
|
|
238
|
+
deployment_id = common_job_parameters.get("DEPLOYMENT_ID")
|
|
239
|
+
deployment_slug = common_job_parameters.get("DEPLOYMENT_SLUG")
|
|
240
|
+
if not deployment_id or not deployment_slug:
|
|
241
|
+
logger.warning(
|
|
242
|
+
"Missing Semgrep deployment ID or slug, ensure that sync_deployment() has been called."
|
|
243
|
+
"Skipping SCA findings sync job.",
|
|
244
|
+
)
|
|
245
|
+
return
|
|
246
|
+
|
|
274
247
|
logger.info("Running Semgrep SCA findings sync job.")
|
|
275
|
-
semgrep_deployment = get_deployment(semgrep_app_token)
|
|
276
|
-
deployment_id = semgrep_deployment["id"]
|
|
277
|
-
deployment_slug = semgrep_deployment["slug"]
|
|
278
|
-
load_semgrep_deployment(neo4j_sesion, semgrep_deployment, update_tag)
|
|
279
|
-
common_job_parameters["DEPLOYMENT_ID"] = deployment_id
|
|
280
248
|
raw_vulns = get_sca_vulns(semgrep_app_token, deployment_slug)
|
|
281
249
|
vulns, usages = transform_sca_vulns(raw_vulns)
|
|
282
|
-
load_semgrep_sca_vulns(
|
|
283
|
-
load_semgrep_sca_usages(
|
|
284
|
-
run_scoped_analysis_job('semgrep_sca_risk_analysis.json',
|
|
285
|
-
|
|
250
|
+
load_semgrep_sca_vulns(neo4j_session, vulns, deployment_id, update_tag)
|
|
251
|
+
load_semgrep_sca_usages(neo4j_session, usages, deployment_id, update_tag)
|
|
252
|
+
run_scoped_analysis_job('semgrep_sca_risk_analysis.json', neo4j_session, common_job_parameters)
|
|
253
|
+
|
|
254
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
286
255
|
merge_module_sync_metadata(
|
|
287
|
-
neo4j_session=
|
|
256
|
+
neo4j_session=neo4j_session,
|
|
288
257
|
group_type='Semgrep',
|
|
289
258
|
group_id=deployment_id,
|
|
290
259
|
synced_type='SCA',
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from cartography.models.core.common import PropertyRef
|
|
4
|
+
from cartography.models.core.nodes import CartographyNodeProperties
|
|
5
|
+
from cartography.models.core.nodes import CartographyNodeSchema
|
|
6
|
+
from cartography.models.core.relationships import CartographyRelProperties
|
|
7
|
+
from cartography.models.core.relationships import CartographyRelSchema
|
|
8
|
+
from cartography.models.core.relationships import LinkDirection
|
|
9
|
+
from cartography.models.core.relationships import make_target_node_matcher
|
|
10
|
+
from cartography.models.core.relationships import OtherRelationships
|
|
11
|
+
from cartography.models.core.relationships import TargetNodeMatcher
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class AutoScalingGroupNodeProperties(CartographyNodeProperties):
|
|
16
|
+
id: PropertyRef = PropertyRef('AutoScalingGroupARN')
|
|
17
|
+
arn: PropertyRef = PropertyRef('AutoScalingGroupARN')
|
|
18
|
+
capacityrebalance: PropertyRef = PropertyRef('CapacityRebalance')
|
|
19
|
+
createdtime: PropertyRef = PropertyRef('CreatedTime')
|
|
20
|
+
defaultcooldown: PropertyRef = PropertyRef('DefaultCooldown')
|
|
21
|
+
desiredcapacity: PropertyRef = PropertyRef('DesiredCapacity')
|
|
22
|
+
healthcheckgraceperiod: PropertyRef = PropertyRef('HealthCheckGracePeriod')
|
|
23
|
+
healthchecktype: PropertyRef = PropertyRef('HealthCheckType')
|
|
24
|
+
launchconfigurationname: PropertyRef = PropertyRef('LaunchConfigurationName')
|
|
25
|
+
launchtemplatename: PropertyRef = PropertyRef('LaunchTemplateName')
|
|
26
|
+
launchtemplateid: PropertyRef = PropertyRef('LaunchTemplateId')
|
|
27
|
+
launchtemplateversion: PropertyRef = PropertyRef('LaunchTemplateVersion')
|
|
28
|
+
maxinstancelifetime: PropertyRef = PropertyRef('MaxInstanceLifetime')
|
|
29
|
+
maxsize: PropertyRef = PropertyRef('MaxSize')
|
|
30
|
+
minsize: PropertyRef = PropertyRef('MinSize')
|
|
31
|
+
name: PropertyRef = PropertyRef('AutoScalingGroupName')
|
|
32
|
+
newinstancesprotectedfromscalein: PropertyRef = PropertyRef('NewInstancesProtectedFromScaleIn')
|
|
33
|
+
region: PropertyRef = PropertyRef('Region', set_in_kwargs=True)
|
|
34
|
+
status: PropertyRef = PropertyRef('Status')
|
|
35
|
+
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# EC2 to AutoScalingGroup
|
|
39
|
+
@dataclass(frozen=True)
|
|
40
|
+
class EC2InstanceToAwsAccountRelProperties(CartographyRelProperties):
|
|
41
|
+
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class EC2InstanceToAWSAccount(CartographyRelSchema):
|
|
46
|
+
target_node_label: str = 'AWSAccount'
|
|
47
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
48
|
+
{'id': PropertyRef('AWS_ID', set_in_kwargs=True)},
|
|
49
|
+
)
|
|
50
|
+
direction: LinkDirection = LinkDirection.INWARD
|
|
51
|
+
rel_label: str = "RESOURCE"
|
|
52
|
+
properties: EC2InstanceToAwsAccountRelProperties = EC2InstanceToAwsAccountRelProperties()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(frozen=True)
|
|
56
|
+
class EC2InstanceToAutoScalingGroupRelProperties(CartographyRelProperties):
|
|
57
|
+
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass(frozen=True)
|
|
61
|
+
class EC2InstanceToAutoScalingGroup(CartographyRelSchema):
|
|
62
|
+
target_node_label: str = 'AutoScalingGroup'
|
|
63
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
64
|
+
{'id': PropertyRef('AutoScalingGroupARN')},
|
|
65
|
+
)
|
|
66
|
+
direction: LinkDirection = LinkDirection.OUTWARD
|
|
67
|
+
rel_label: str = "MEMBER_AUTO_SCALE_GROUP"
|
|
68
|
+
properties: EC2InstanceToAutoScalingGroupRelProperties = EC2InstanceToAutoScalingGroupRelProperties()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass(frozen=True)
|
|
72
|
+
class EC2InstanceAutoScalingGroupProperties(CartographyNodeProperties):
|
|
73
|
+
id: PropertyRef = PropertyRef('InstanceId')
|
|
74
|
+
instanceid: PropertyRef = PropertyRef('InstanceId', extra_index=True)
|
|
75
|
+
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
|
|
76
|
+
region: PropertyRef = PropertyRef('Region', set_in_kwargs=True)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass(frozen=True)
|
|
80
|
+
class EC2InstanceAutoScalingGroupSchema(CartographyNodeSchema):
|
|
81
|
+
label: str = 'EC2Instance'
|
|
82
|
+
properties: EC2InstanceAutoScalingGroupProperties = EC2InstanceAutoScalingGroupProperties()
|
|
83
|
+
sub_resource_relationship: EC2InstanceToAWSAccount = EC2InstanceToAWSAccount()
|
|
84
|
+
other_relationships: OtherRelationships = OtherRelationships(
|
|
85
|
+
[
|
|
86
|
+
EC2InstanceToAutoScalingGroup(),
|
|
87
|
+
],
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# EC2Subnet to AutoScalingGroup
|
|
92
|
+
@dataclass(frozen=True)
|
|
93
|
+
class EC2SubnetToAwsAccountRelProperties(CartographyRelProperties):
|
|
94
|
+
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass(frozen=True)
|
|
98
|
+
class EC2SubnetToAWSAccount(CartographyRelSchema):
|
|
99
|
+
target_node_label: str = 'AWSAccount'
|
|
100
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
101
|
+
{'id': PropertyRef('AWS_ID', set_in_kwargs=True)},
|
|
102
|
+
)
|
|
103
|
+
direction: LinkDirection = LinkDirection.INWARD
|
|
104
|
+
rel_label: str = "RESOURCE"
|
|
105
|
+
properties: EC2SubnetToAwsAccountRelProperties = EC2SubnetToAwsAccountRelProperties()
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@dataclass(frozen=True)
|
|
109
|
+
class EC2SubnetToAutoScalingGroupRelProperties(CartographyRelProperties):
|
|
110
|
+
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass(frozen=True)
|
|
114
|
+
class EC2SubnetToAutoScalingGroup(CartographyRelSchema):
|
|
115
|
+
target_node_label: str = 'AutoScalingGroup'
|
|
116
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
117
|
+
{'id': PropertyRef('AutoScalingGroupARN')},
|
|
118
|
+
)
|
|
119
|
+
direction: LinkDirection = LinkDirection.INWARD
|
|
120
|
+
rel_label: str = "VPC_IDENTIFIER"
|
|
121
|
+
properties: EC2SubnetToAutoScalingGroupRelProperties = EC2SubnetToAutoScalingGroupRelProperties()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@dataclass(frozen=True)
|
|
125
|
+
class EC2SubnetAutoScalingGroupNodeProperties(CartographyNodeProperties):
|
|
126
|
+
id: PropertyRef = PropertyRef('VPCZoneIdentifier')
|
|
127
|
+
subnetid: PropertyRef = PropertyRef('VPCZoneIdentifier', extra_index=True)
|
|
128
|
+
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@dataclass(frozen=True)
|
|
132
|
+
class EC2SubnetAutoScalingGroupSchema(CartographyNodeSchema):
|
|
133
|
+
label: str = 'EC2Subnet'
|
|
134
|
+
properties: EC2SubnetAutoScalingGroupNodeProperties = EC2SubnetAutoScalingGroupNodeProperties()
|
|
135
|
+
sub_resource_relationship: EC2SubnetToAWSAccount = EC2SubnetToAWSAccount()
|
|
136
|
+
other_relationships: OtherRelationships = OtherRelationships(
|
|
137
|
+
[
|
|
138
|
+
EC2SubnetToAutoScalingGroup(),
|
|
139
|
+
],
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# AutoScalingGroup
|
|
144
|
+
@dataclass(frozen=True)
|
|
145
|
+
class AutoScalingGroupToAwsAccountRelProperties(CartographyRelProperties):
|
|
146
|
+
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dataclass(frozen=True)
|
|
150
|
+
class AutoScalingGroupToAWSAccount(CartographyRelSchema):
|
|
151
|
+
target_node_label: str = 'AWSAccount'
|
|
152
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
153
|
+
{'id': PropertyRef('AWS_ID', set_in_kwargs=True)},
|
|
154
|
+
)
|
|
155
|
+
direction: LinkDirection = LinkDirection.INWARD
|
|
156
|
+
rel_label: str = "RESOURCE"
|
|
157
|
+
properties: AutoScalingGroupToAwsAccountRelProperties = AutoScalingGroupToAwsAccountRelProperties()
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@dataclass(frozen=True)
|
|
161
|
+
class AutoScalingGroupToLaunchTemplateRelProperties(CartographyRelProperties):
|
|
162
|
+
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@dataclass(frozen=True)
|
|
166
|
+
class AutoScalingGroupToLaunchTemplate(CartographyRelSchema):
|
|
167
|
+
target_node_label: str = 'LaunchTemplate'
|
|
168
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
169
|
+
{'id': PropertyRef('LaunchTemplateId')},
|
|
170
|
+
)
|
|
171
|
+
direction: LinkDirection = LinkDirection.OUTWARD
|
|
172
|
+
rel_label: str = "HAS_LAUNCH_TEMPLATE"
|
|
173
|
+
properties: AutoScalingGroupToLaunchTemplateRelProperties = AutoScalingGroupToLaunchTemplateRelProperties()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@dataclass(frozen=True)
|
|
177
|
+
class AutoScalingGroupToLaunchConfigurationRelProperties(CartographyRelProperties):
|
|
178
|
+
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@dataclass(frozen=True)
|
|
182
|
+
class AutoScalingGroupToLaunchConfiguration(CartographyRelSchema):
|
|
183
|
+
target_node_label: str = 'LaunchConfiguration'
|
|
184
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
185
|
+
{'name': PropertyRef('LaunchConfigurationName')},
|
|
186
|
+
)
|
|
187
|
+
direction: LinkDirection = LinkDirection.OUTWARD
|
|
188
|
+
rel_label: str = "HAS_LAUNCH_CONFIG"
|
|
189
|
+
properties: AutoScalingGroupToLaunchConfigurationRelProperties = (
|
|
190
|
+
AutoScalingGroupToLaunchConfigurationRelProperties()
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@dataclass(frozen=True)
|
|
195
|
+
class AutoScalingGroupSchema(CartographyNodeSchema):
|
|
196
|
+
label: str = 'AutoScalingGroup'
|
|
197
|
+
properties: AutoScalingGroupNodeProperties = AutoScalingGroupNodeProperties()
|
|
198
|
+
sub_resource_relationship: AutoScalingGroupToAWSAccount = AutoScalingGroupToAWSAccount()
|
|
199
|
+
other_relationships: OtherRelationships = OtherRelationships(
|
|
200
|
+
[
|
|
201
|
+
AutoScalingGroupToLaunchTemplate(),
|
|
202
|
+
AutoScalingGroupToLaunchConfiguration(),
|
|
203
|
+
],
|
|
204
|
+
)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from cartography.models.core.common import PropertyRef
|
|
4
|
+
from cartography.models.core.nodes import CartographyNodeProperties
|
|
5
|
+
from cartography.models.core.nodes import CartographyNodeSchema
|
|
6
|
+
from cartography.models.core.relationships import CartographyRelProperties
|
|
7
|
+
from cartography.models.core.relationships import CartographyRelSchema
|
|
8
|
+
from cartography.models.core.relationships import LinkDirection
|
|
9
|
+
from cartography.models.core.relationships import make_target_node_matcher
|
|
10
|
+
from cartography.models.core.relationships import TargetNodeMatcher
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class LaunchConfigurationNodeProperties(CartographyNodeProperties):
|
|
15
|
+
id: PropertyRef = PropertyRef('LaunchConfigurationARN')
|
|
16
|
+
arn: PropertyRef = PropertyRef('LaunchConfigurationARN')
|
|
17
|
+
created_time = PropertyRef('CreatedTime')
|
|
18
|
+
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
|
|
19
|
+
image_id: PropertyRef = PropertyRef('ImageId')
|
|
20
|
+
key_name: PropertyRef = PropertyRef('KeyName')
|
|
21
|
+
name: PropertyRef = PropertyRef('LaunchConfigurationName')
|
|
22
|
+
security_groups: PropertyRef = PropertyRef('SecurityGroups')
|
|
23
|
+
instance_type: PropertyRef = PropertyRef('InstanceType')
|
|
24
|
+
kernel_id: PropertyRef = PropertyRef('KernelId')
|
|
25
|
+
ramdisk_id: PropertyRef = PropertyRef('RamdiskId')
|
|
26
|
+
instance_monitoring_enabled: PropertyRef = PropertyRef('InstanceMonitoringEnabled')
|
|
27
|
+
spot_price: PropertyRef = PropertyRef('SpotPrice')
|
|
28
|
+
iam_instance_profile: PropertyRef = PropertyRef('IamInstanceProfile')
|
|
29
|
+
ebs_optimized: PropertyRef = PropertyRef('EbsOptimized')
|
|
30
|
+
associate_public_ip_address: PropertyRef = PropertyRef('AssociatePublicIpAddress')
|
|
31
|
+
placement_tenancy: PropertyRef = PropertyRef('PlacementTenancy')
|
|
32
|
+
region: PropertyRef = PropertyRef('Region', set_in_kwargs=True)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class LaunchConfigurationToAwsAccountRelProperties(CartographyRelProperties):
|
|
37
|
+
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True)
|
|
41
|
+
class LaunchConfigurationToAwsAccount(CartographyRelSchema):
|
|
42
|
+
target_node_label: str = 'AWSAccount'
|
|
43
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
44
|
+
{'id': PropertyRef('AWS_ID', set_in_kwargs=True)},
|
|
45
|
+
)
|
|
46
|
+
direction: LinkDirection = LinkDirection.INWARD
|
|
47
|
+
rel_label: str = "RESOURCE"
|
|
48
|
+
properties: LaunchConfigurationToAwsAccountRelProperties = LaunchConfigurationToAwsAccountRelProperties()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True)
|
|
52
|
+
class LaunchConfigurationSchema(CartographyNodeSchema):
|
|
53
|
+
label: str = 'LaunchConfiguration'
|
|
54
|
+
properties: LaunchConfigurationNodeProperties = LaunchConfigurationNodeProperties()
|
|
55
|
+
sub_resource_relationship: LaunchConfigurationToAwsAccount = LaunchConfigurationToAwsAccount()
|