cartography 0.94.0rc3__py3-none-any.whl → 0.95.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/cli.py +42 -24
- cartography/config.py +12 -8
- cartography/data/indexes.cypher +0 -2
- cartography/driftdetect/cli.py +1 -1
- cartography/graph/job.py +8 -1
- cartography/intel/aws/permission_relationships.py +6 -2
- cartography/intel/gcp/__init__.py +110 -23
- cartography/intel/kandji/__init__.py +1 -1
- cartography/intel/semgrep/__init__.py +9 -2
- cartography/intel/semgrep/dependencies.py +201 -0
- cartography/intel/semgrep/deployment.py +67 -0
- cartography/intel/semgrep/findings.py +22 -53
- cartography/intel/snipeit/__init__.py +30 -0
- cartography/intel/snipeit/asset.py +74 -0
- cartography/intel/snipeit/user.py +75 -0
- cartography/intel/snipeit/util.py +35 -0
- cartography/models/semgrep/dependencies.py +77 -0
- cartography/models/snipeit/__init__.py +0 -0
- cartography/models/snipeit/asset.py +81 -0
- cartography/models/snipeit/tenant.py +17 -0
- cartography/models/snipeit/user.py +49 -0
- cartography/sync.py +2 -2
- {cartography-0.94.0rc3.dist-info → cartography-0.95.0.dist-info}/LICENSE +1 -1
- {cartography-0.94.0rc3.dist-info → cartography-0.95.0.dist-info}/METADATA +3 -5
- {cartography-0.94.0rc3.dist-info → cartography-0.95.0.dist-info}/RECORD +28 -21
- {cartography-0.94.0rc3.dist-info → cartography-0.95.0.dist-info}/WHEEL +1 -1
- cartography/data/jobs/cleanup/crxcavator_import_cleanup.json +0 -18
- cartography/intel/crxcavator/__init__.py +0 -44
- cartography/intel/crxcavator/crxcavator.py +0 -329
- cartography-0.94.0rc3.dist-info/NOTICE +0 -4
- {cartography-0.94.0rc3.dist-info → cartography-0.95.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.94.0rc3.dist-info → cartography-0.95.0.dist-info}/top_level.txt +0 -0
cartography/cli.py
CHANGED
|
@@ -220,23 +220,6 @@ class CLI:
|
|
|
220
220
|
' If not specified, cartography by default will run all AWS sync modules available.'
|
|
221
221
|
),
|
|
222
222
|
)
|
|
223
|
-
parser.add_argument(
|
|
224
|
-
'--crxcavator-api-base-uri',
|
|
225
|
-
type=str,
|
|
226
|
-
default='https://api.crxcavator.io/v1',
|
|
227
|
-
help=(
|
|
228
|
-
'Base URI for the CRXcavator API. Defaults to public API endpoint.'
|
|
229
|
-
),
|
|
230
|
-
)
|
|
231
|
-
parser.add_argument(
|
|
232
|
-
'--crxcavator-api-key-env-var',
|
|
233
|
-
type=str,
|
|
234
|
-
default=None,
|
|
235
|
-
help=(
|
|
236
|
-
'The name of an environment variable containing a key with which to auth to the CRXcavator API. '
|
|
237
|
-
'Required if you are using the CRXcavator intel module. Ignored otherwise.'
|
|
238
|
-
),
|
|
239
|
-
)
|
|
240
223
|
parser.add_argument(
|
|
241
224
|
'--analysis-job-directory',
|
|
242
225
|
type=str,
|
|
@@ -541,6 +524,28 @@ class CLI:
|
|
|
541
524
|
'Required if you are using the Semgrep intel module. Ignored otherwise.'
|
|
542
525
|
),
|
|
543
526
|
)
|
|
527
|
+
parser.add_argument(
|
|
528
|
+
'--snipeit-base-uri',
|
|
529
|
+
type=str,
|
|
530
|
+
default=None,
|
|
531
|
+
help=(
|
|
532
|
+
'Your SnipeIT base URI'
|
|
533
|
+
'Required if you are using the SnipeIT intel module. Ignored otherwise.'
|
|
534
|
+
),
|
|
535
|
+
)
|
|
536
|
+
parser.add_argument(
|
|
537
|
+
'--snipeit-token-env-var',
|
|
538
|
+
type=str,
|
|
539
|
+
default=None,
|
|
540
|
+
help='The name of an environment variable containing token with which to authenticate to SnipeIT.',
|
|
541
|
+
)
|
|
542
|
+
parser.add_argument(
|
|
543
|
+
'--snipeit-tenant-id',
|
|
544
|
+
type=str,
|
|
545
|
+
default=None,
|
|
546
|
+
help='An ID for the SnipeIT tenant.',
|
|
547
|
+
)
|
|
548
|
+
|
|
544
549
|
return parser
|
|
545
550
|
|
|
546
551
|
def main(self, argv: str) -> int:
|
|
@@ -604,13 +609,6 @@ class CLI:
|
|
|
604
609
|
else:
|
|
605
610
|
config.okta_api_key = None
|
|
606
611
|
|
|
607
|
-
# CRXcavator config
|
|
608
|
-
if config.crxcavator_api_base_uri and config.crxcavator_api_key_env_var:
|
|
609
|
-
logger.debug(f"Reading API key for CRXcavator from env variable {config.crxcavator_api_key_env_var}.")
|
|
610
|
-
config.crxcavator_api_key = os.environ.get(config.crxcavator_api_key_env_var)
|
|
611
|
-
else:
|
|
612
|
-
config.crxcavator_api_key = None
|
|
613
|
-
|
|
614
612
|
# GitHub config
|
|
615
613
|
if config.github_config_env_var:
|
|
616
614
|
logger.debug(f"Reading config string for GitHub from environment variable {config.github_config_env_var}")
|
|
@@ -744,6 +742,26 @@ class CLI:
|
|
|
744
742
|
else:
|
|
745
743
|
config.cve_api_key = None
|
|
746
744
|
|
|
745
|
+
# SnipeIT config
|
|
746
|
+
if config.snipeit_base_uri:
|
|
747
|
+
if config.snipeit_token_env_var:
|
|
748
|
+
logger.debug(
|
|
749
|
+
"Reading SnipeIT API token from environment variable '%s'.",
|
|
750
|
+
config.snipeit_token_env_var,
|
|
751
|
+
)
|
|
752
|
+
config.snipeit_token = os.environ.get(config.snipeit_token_env_var)
|
|
753
|
+
elif os.environ.get('SNIPEIT_TOKEN'):
|
|
754
|
+
logger.debug(
|
|
755
|
+
"Reading SnipeIT API token from environment variable 'SNIPEIT_TOKEN'.",
|
|
756
|
+
)
|
|
757
|
+
config.snipeit_token = os.environ.get('SNIPEIT_TOKEN')
|
|
758
|
+
else:
|
|
759
|
+
logger.warning("A SnipeIT base URI was provided but a token was not.")
|
|
760
|
+
config.kandji_token = None
|
|
761
|
+
else:
|
|
762
|
+
logger.warning("A SnipeIT base URI was not provided.")
|
|
763
|
+
config.snipeit_base_uri = None
|
|
764
|
+
|
|
747
765
|
# Run cartography
|
|
748
766
|
try:
|
|
749
767
|
return cartography.sync.run_with_config(self.sync, config)
|
cartography/config.py
CHANGED
|
@@ -43,10 +43,6 @@ class Config:
|
|
|
43
43
|
:param azure_client_secret: Client Secret for connecting in a Service Principal Authentication approach. Optional.
|
|
44
44
|
:type aws_requested_syncs: str
|
|
45
45
|
:param aws_requested_syncs: Comma-separated list of AWS resources to sync. Optional.
|
|
46
|
-
:type crxcavator_api_base_uri: str
|
|
47
|
-
:param crxcavator_api_base_uri: URI for CRXcavator API. Optional.
|
|
48
|
-
:type crxcavator_api_key: str
|
|
49
|
-
:param crxcavator_api_key: Auth key for CRXcavator API. Optional.
|
|
50
46
|
:type analysis_job_directory: str
|
|
51
47
|
:param analysis_job_directory: Path to a directory tree containing analysis jobs to run. Optional.
|
|
52
48
|
:type oci_sync_all_profiles: bool
|
|
@@ -111,6 +107,12 @@ class Config:
|
|
|
111
107
|
:param duo_api_hostname: The Duo api hostname, e.g. "api-abc123.duosecurity.com". Optional.
|
|
112
108
|
:param semgrep_app_token: The Semgrep api token. Optional.
|
|
113
109
|
:type semgrep_app_token: str
|
|
110
|
+
:type snipeit_base_uri: string
|
|
111
|
+
:param snipeit_base_uri: SnipeIT data provider base URI. Optional.
|
|
112
|
+
:type snipeit_token: string
|
|
113
|
+
:param snipeit_token: Token used to authenticate to the SnipeIT data provider. Optional.
|
|
114
|
+
:type snipeit_tenant_id: string
|
|
115
|
+
:param snipeit_tenant_id: Token used to authenticate to the SnipeIT data provider. Optional.
|
|
114
116
|
"""
|
|
115
117
|
|
|
116
118
|
def __init__(
|
|
@@ -131,8 +133,6 @@ class Config:
|
|
|
131
133
|
azure_client_secret=None,
|
|
132
134
|
aws_requested_syncs=None,
|
|
133
135
|
analysis_job_directory=None,
|
|
134
|
-
crxcavator_api_base_uri=None,
|
|
135
|
-
crxcavator_api_key=None,
|
|
136
136
|
oci_sync_all_profiles=None,
|
|
137
137
|
okta_org_id=None,
|
|
138
138
|
okta_api_key=None,
|
|
@@ -170,6 +170,9 @@ class Config:
|
|
|
170
170
|
duo_api_secret=None,
|
|
171
171
|
duo_api_hostname=None,
|
|
172
172
|
semgrep_app_token=None,
|
|
173
|
+
snipeit_base_uri=None,
|
|
174
|
+
snipeit_token=None,
|
|
175
|
+
snipeit_tenant_id=None,
|
|
173
176
|
):
|
|
174
177
|
self.neo4j_uri = neo4j_uri
|
|
175
178
|
self.neo4j_user = neo4j_user
|
|
@@ -187,8 +190,6 @@ class Config:
|
|
|
187
190
|
self.azure_client_secret = azure_client_secret
|
|
188
191
|
self.aws_requested_syncs = aws_requested_syncs
|
|
189
192
|
self.analysis_job_directory = analysis_job_directory
|
|
190
|
-
self.crxcavator_api_base_uri = crxcavator_api_base_uri
|
|
191
|
-
self.crxcavator_api_key = crxcavator_api_key
|
|
192
193
|
self.oci_sync_all_profiles = oci_sync_all_profiles
|
|
193
194
|
self.okta_org_id = okta_org_id
|
|
194
195
|
self.okta_api_key = okta_api_key
|
|
@@ -226,3 +227,6 @@ class Config:
|
|
|
226
227
|
self.duo_api_secret = duo_api_secret
|
|
227
228
|
self.duo_api_hostname = duo_api_hostname
|
|
228
229
|
self.semgrep_app_token = semgrep_app_token
|
|
230
|
+
self.snipeit_base_uri = snipeit_base_uri
|
|
231
|
+
self.snipeit_token = snipeit_token
|
|
232
|
+
self.snipeit_tenant_id = snipeit_tenant_id
|
cartography/data/indexes.cypher
CHANGED
|
@@ -65,8 +65,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:AccountAccessKey) ON (n.accesskeyid);
|
|
|
65
65
|
CREATE INDEX IF NOT EXISTS FOR (n:AccountAccessKey) ON (n.lastupdated);
|
|
66
66
|
CREATE INDEX IF NOT EXISTS FOR (n:AutoScalingGroup) ON (n.arn);
|
|
67
67
|
CREATE INDEX IF NOT EXISTS FOR (n:AutoScalingGroup) ON (n.lastupdated);
|
|
68
|
-
CREATE INDEX IF NOT EXISTS FOR (n:ChromeExtension) ON (n.id);
|
|
69
|
-
CREATE INDEX IF NOT EXISTS FOR (n:ChromeExtension) ON (n.lastupdated);
|
|
70
68
|
CREATE INDEX IF NOT EXISTS FOR (n:CrowdstrikeHost) ON (n.id);
|
|
71
69
|
CREATE INDEX IF NOT EXISTS FOR (n:CrowdstrikeHost) ON (n.instance_id);
|
|
72
70
|
CREATE INDEX IF NOT EXISTS FOR (n:CrowdstrikeHost) ON (n.lastupdated);
|
cartography/driftdetect/cli.py
CHANGED
|
@@ -30,7 +30,7 @@ class CLI:
|
|
|
30
30
|
'graph database and reports the deviations.'
|
|
31
31
|
),
|
|
32
32
|
epilog='For more documentation please visit: '
|
|
33
|
-
'https://github.
|
|
33
|
+
'https://cartography-cncf.github.io/cartography/usage/drift-detect.html',
|
|
34
34
|
)
|
|
35
35
|
parser.add_argument(
|
|
36
36
|
'-v',
|
cartography/graph/job.py
CHANGED
|
@@ -150,7 +150,14 @@ class GraphJob:
|
|
|
150
150
|
)
|
|
151
151
|
|
|
152
152
|
statements: List[GraphStatement] = [
|
|
153
|
-
GraphStatement(
|
|
153
|
+
GraphStatement(
|
|
154
|
+
query,
|
|
155
|
+
parameters=parameters,
|
|
156
|
+
iterative=True,
|
|
157
|
+
iterationsize=100,
|
|
158
|
+
parent_job_name=node_schema.label,
|
|
159
|
+
parent_job_sequence_num=idx,
|
|
160
|
+
) for idx, query in enumerate(queries, start=1)
|
|
154
161
|
]
|
|
155
162
|
|
|
156
163
|
return cls(
|
|
@@ -322,8 +322,12 @@ def cleanup_rpr(
|
|
|
322
322
|
)
|
|
323
323
|
|
|
324
324
|
statement = GraphStatement(
|
|
325
|
-
cleanup_rpr_query_template,
|
|
326
|
-
|
|
325
|
+
cleanup_rpr_query_template,
|
|
326
|
+
{'UPDATE_TAG': update_tag, 'AWS_ID': current_aws_id},
|
|
327
|
+
True,
|
|
328
|
+
1000,
|
|
329
|
+
parent_job_name=f"{relationship_name}:{node_label}",
|
|
330
|
+
parent_job_sequence_num=1,
|
|
327
331
|
)
|
|
328
332
|
statement.run(neo4j_session)
|
|
329
333
|
|
|
@@ -120,11 +120,11 @@ def _initialize_resources(credentials: GoogleCredentials) -> Resource:
|
|
|
120
120
|
return Resources(
|
|
121
121
|
crm_v1=_get_crm_resource_v1(credentials),
|
|
122
122
|
crm_v2=_get_crm_resource_v2(credentials),
|
|
123
|
-
compute=_get_compute_resource(credentials),
|
|
124
|
-
storage=_get_storage_resource(credentials),
|
|
125
|
-
container=_get_container_resource(credentials),
|
|
126
123
|
serviceusage=_get_serviceusage_resource(credentials),
|
|
127
|
-
|
|
124
|
+
compute=None,
|
|
125
|
+
container=None,
|
|
126
|
+
dns=None,
|
|
127
|
+
storage=None,
|
|
128
128
|
)
|
|
129
129
|
|
|
130
130
|
|
|
@@ -159,12 +159,12 @@ def _services_enabled_on_project(serviceusage: Resource, project_id: str) -> Set
|
|
|
159
159
|
return set()
|
|
160
160
|
|
|
161
161
|
|
|
162
|
-
def
|
|
162
|
+
def _sync_single_project_compute(
|
|
163
163
|
neo4j_session: neo4j.Session, resources: Resource, project_id: str, gcp_update_tag: int,
|
|
164
164
|
common_job_parameters: Dict,
|
|
165
165
|
) -> None:
|
|
166
166
|
"""
|
|
167
|
-
Handles graph sync for a single GCP project.
|
|
167
|
+
Handles graph sync for a single GCP project on Compute resources.
|
|
168
168
|
:param neo4j_session: The Neo4j session
|
|
169
169
|
:param resources: namedtuple of the GCP resource objects
|
|
170
170
|
:param project_id: The project ID number to sync. See the `projectId` field in
|
|
@@ -175,14 +175,72 @@ def _sync_single_project(
|
|
|
175
175
|
"""
|
|
176
176
|
# Determine the resources available on the project.
|
|
177
177
|
enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
|
|
178
|
+
compute_cred = _get_compute_resource(get_gcp_credentials())
|
|
178
179
|
if service_names.compute in enabled_services:
|
|
179
|
-
compute.sync(neo4j_session,
|
|
180
|
+
compute.sync(neo4j_session, compute_cred, project_id, gcp_update_tag, common_job_parameters)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _sync_single_project_storage(
|
|
184
|
+
neo4j_session: neo4j.Session, resources: Resource, project_id: str, gcp_update_tag: int,
|
|
185
|
+
common_job_parameters: Dict,
|
|
186
|
+
) -> None:
|
|
187
|
+
"""
|
|
188
|
+
Handles graph sync for a single GCP project on Storage resources.
|
|
189
|
+
:param neo4j_session: The Neo4j session
|
|
190
|
+
:param resources: namedtuple of the GCP resource objects
|
|
191
|
+
:param project_id: The project ID number to sync. See the `projectId` field in
|
|
192
|
+
https://cloud.google.com/resource-manager/reference/rest/v1/projects
|
|
193
|
+
:param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
|
|
194
|
+
:param common_job_parameters: Other parameters sent to Neo4j
|
|
195
|
+
:return: Nothing
|
|
196
|
+
"""
|
|
197
|
+
# Determine the resources available on the project.
|
|
198
|
+
enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
|
|
199
|
+
storage_cred = _get_storage_resource(get_gcp_credentials())
|
|
180
200
|
if service_names.storage in enabled_services:
|
|
181
|
-
storage.sync_gcp_buckets(neo4j_session,
|
|
201
|
+
storage.sync_gcp_buckets(neo4j_session, storage_cred, project_id, gcp_update_tag, common_job_parameters)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _sync_single_project_gke(
|
|
205
|
+
neo4j_session: neo4j.Session, resources: Resource, project_id: str, gcp_update_tag: int,
|
|
206
|
+
common_job_parameters: Dict,
|
|
207
|
+
) -> None:
|
|
208
|
+
"""
|
|
209
|
+
Handles graph sync for a single GCP project GKE resources.
|
|
210
|
+
:param neo4j_session: The Neo4j session
|
|
211
|
+
:param resources: namedtuple of the GCP resource objects
|
|
212
|
+
:param project_id: The project ID number to sync. See the `projectId` field in
|
|
213
|
+
https://cloud.google.com/resource-manager/reference/rest/v1/projects
|
|
214
|
+
:param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
|
|
215
|
+
:param common_job_parameters: Other parameters sent to Neo4j
|
|
216
|
+
:return: Nothing
|
|
217
|
+
"""
|
|
218
|
+
# Determine the resources available on the project.
|
|
219
|
+
enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
|
|
220
|
+
container_cred = _get_container_resource(get_gcp_credentials())
|
|
182
221
|
if service_names.gke in enabled_services:
|
|
183
|
-
gke.sync_gke_clusters(neo4j_session,
|
|
222
|
+
gke.sync_gke_clusters(neo4j_session, container_cred, project_id, gcp_update_tag, common_job_parameters)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _sync_single_project_dns(
|
|
226
|
+
neo4j_session: neo4j.Session, resources: Resource, project_id: str, gcp_update_tag: int,
|
|
227
|
+
common_job_parameters: Dict,
|
|
228
|
+
) -> None:
|
|
229
|
+
"""
|
|
230
|
+
Handles graph sync for a single GCP project DNS resources.
|
|
231
|
+
:param neo4j_session: The Neo4j session
|
|
232
|
+
:param resources: namedtuple of the GCP resource objects
|
|
233
|
+
:param project_id: The project ID number to sync. See the `projectId` field in
|
|
234
|
+
https://cloud.google.com/resource-manager/reference/rest/v1/projects
|
|
235
|
+
:param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
|
|
236
|
+
:param common_job_parameters: Other parameters sent to Neo4j
|
|
237
|
+
:return: Nothing
|
|
238
|
+
"""
|
|
239
|
+
# Determine the resources available on the project.
|
|
240
|
+
enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
|
|
241
|
+
dns_cred = _get_dns_resource(get_gcp_credentials())
|
|
184
242
|
if service_names.dns in enabled_services:
|
|
185
|
-
dns.sync(neo4j_session,
|
|
243
|
+
dns.sync(neo4j_session, dns_cred, project_id, gcp_update_tag, common_job_parameters)
|
|
186
244
|
|
|
187
245
|
|
|
188
246
|
def _sync_multiple_projects(
|
|
@@ -203,26 +261,38 @@ def _sync_multiple_projects(
|
|
|
203
261
|
"""
|
|
204
262
|
logger.info("Syncing %d GCP projects.", len(projects))
|
|
205
263
|
crm.sync_gcp_projects(neo4j_session, projects, gcp_update_tag, common_job_parameters)
|
|
264
|
+
# Compute data sync
|
|
265
|
+
for project in projects:
|
|
266
|
+
project_id = project['projectId']
|
|
267
|
+
logger.info("Syncing GCP project %s for Compute.", project_id)
|
|
268
|
+
_sync_single_project_compute(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)
|
|
206
269
|
|
|
270
|
+
# Storage data sync
|
|
207
271
|
for project in projects:
|
|
208
272
|
project_id = project['projectId']
|
|
209
|
-
logger.info("Syncing GCP project %s
|
|
210
|
-
|
|
273
|
+
logger.info("Syncing GCP project %s for Storage", project_id)
|
|
274
|
+
_sync_single_project_storage(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)
|
|
275
|
+
|
|
276
|
+
# GKE data sync
|
|
277
|
+
for project in projects:
|
|
278
|
+
project_id = project['projectId']
|
|
279
|
+
logger.info("Syncing GCP project %s for GKE", project_id)
|
|
280
|
+
_sync_single_project_gke(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)
|
|
281
|
+
|
|
282
|
+
# DNS data sync
|
|
283
|
+
for project in projects:
|
|
284
|
+
project_id = project['projectId']
|
|
285
|
+
logger.info("Syncing GCP project %s for DNS", project_id)
|
|
286
|
+
_sync_single_project_dns(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)
|
|
211
287
|
|
|
212
288
|
|
|
213
289
|
@timeit
|
|
214
|
-
def
|
|
290
|
+
def get_gcp_credentials() -> GoogleCredentials:
|
|
215
291
|
"""
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
:param neo4j_session: The Neo4j session
|
|
220
|
-
:param config: A `cartography.config` object
|
|
221
|
-
:return: Nothing
|
|
292
|
+
Gets access tokens for GCP API access.
|
|
293
|
+
:param: None
|
|
294
|
+
:return: GoogleCredentials
|
|
222
295
|
"""
|
|
223
|
-
common_job_parameters = {
|
|
224
|
-
"UPDATE_TAG": config.update_tag,
|
|
225
|
-
}
|
|
226
296
|
try:
|
|
227
297
|
# Explicitly use Application Default Credentials.
|
|
228
298
|
# See https://oauth2client.readthedocs.io/en/latest/source/
|
|
@@ -239,7 +309,24 @@ def start_gcp_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
|
|
|
239
309
|
),
|
|
240
310
|
e,
|
|
241
311
|
)
|
|
242
|
-
return
|
|
312
|
+
return credentials
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
@timeit
|
|
316
|
+
def start_gcp_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
|
|
317
|
+
"""
|
|
318
|
+
Starts the GCP ingestion process by initializing Google Application Default Credentials, creating the necessary
|
|
319
|
+
resource objects, listing all GCP organizations and projects available to the GCP identity, and supplying that
|
|
320
|
+
context to all intel modules.
|
|
321
|
+
:param neo4j_session: The Neo4j session
|
|
322
|
+
:param config: A `cartography.config` object
|
|
323
|
+
:return: Nothing
|
|
324
|
+
"""
|
|
325
|
+
common_job_parameters = {
|
|
326
|
+
"UPDATE_TAG": config.update_tag,
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
credentials = get_gcp_credentials()
|
|
243
330
|
|
|
244
331
|
resources = _initialize_resources(credentials)
|
|
245
332
|
|
|
@@ -21,7 +21,7 @@ def start_kandji_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
|
|
|
21
21
|
"""
|
|
22
22
|
if config.kandji_base_uri is None or config.kandji_token is None or config.kandji_tenant_id is None:
|
|
23
23
|
logger.warning(
|
|
24
|
-
'Required parameter
|
|
24
|
+
'Required parameter missing. Skipping sync. '
|
|
25
25
|
'See docs to configure.',
|
|
26
26
|
)
|
|
27
27
|
return
|
|
@@ -3,7 +3,9 @@ import logging
|
|
|
3
3
|
import neo4j
|
|
4
4
|
|
|
5
5
|
from cartography.config import Config
|
|
6
|
-
from cartography.intel.semgrep.
|
|
6
|
+
from cartography.intel.semgrep.dependencies import sync_dependencies
|
|
7
|
+
from cartography.intel.semgrep.deployment import sync_deployment
|
|
8
|
+
from cartography.intel.semgrep.findings import sync_findings
|
|
7
9
|
from cartography.util import timeit
|
|
8
10
|
|
|
9
11
|
|
|
@@ -20,4 +22,9 @@ def start_semgrep_ingestion(
|
|
|
20
22
|
if not config.semgrep_app_token:
|
|
21
23
|
logger.info('Semgrep import is not configured - skipping this module. See docs to configure.')
|
|
22
24
|
return
|
|
23
|
-
|
|
25
|
+
|
|
26
|
+
# sync_deployment must be called first since it populates common_job_parameters
|
|
27
|
+
# with the deployment ID and slug, which are required by the other sync functions
|
|
28
|
+
sync_deployment(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)
|
|
29
|
+
sync_dependencies(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)
|
|
30
|
+
sync_findings(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import Callable
|
|
4
|
+
from typing import Dict
|
|
5
|
+
from typing import List
|
|
6
|
+
|
|
7
|
+
import neo4j
|
|
8
|
+
import requests
|
|
9
|
+
from requests.exceptions import HTTPError
|
|
10
|
+
from requests.exceptions import ReadTimeout
|
|
11
|
+
|
|
12
|
+
from cartography.client.core.tx import load
|
|
13
|
+
from cartography.graph.job import GraphJob
|
|
14
|
+
from cartography.models.semgrep.dependencies import SemgrepGoLibrarySchema
|
|
15
|
+
from cartography.stats import get_stats_client
|
|
16
|
+
from cartography.util import merge_module_sync_metadata
|
|
17
|
+
from cartography.util import timeit
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
stat_handler = get_stats_client(__name__)
|
|
21
|
+
_PAGE_SIZE = 10000
|
|
22
|
+
_TIMEOUT = (60, 60)
|
|
23
|
+
_MAX_RETRIES = 3
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@timeit
|
|
27
|
+
def get_dependencies(semgrep_app_token: str, deployment_id: str, ecosystems: List[str]) -> List[Dict[str, Any]]:
|
|
28
|
+
"""
|
|
29
|
+
Gets all dependencies for the given ecosystems within the given Semgrep deployment ID.
|
|
30
|
+
param: semgrep_app_token: The Semgrep App token to use for authentication.
|
|
31
|
+
param: deployment_id: The Semgrep deployment ID to use for retrieving dependencies.
|
|
32
|
+
param: ecosystems: One or more ecosystems to import dependencies from, e.g. "gomod" or "pypi".
|
|
33
|
+
The list of supported ecosystems is defined here:
|
|
34
|
+
https://semgrep.dev/api/v1/docs/#tag/SupplyChainService/operation/semgrep_app.products.sca.handlers.dependency.list_dependencies_conexxion
|
|
35
|
+
"""
|
|
36
|
+
all_deps = []
|
|
37
|
+
deps_url = f"https://semgrep.dev/api/v1/deployments/{deployment_id}/dependencies"
|
|
38
|
+
has_more = True
|
|
39
|
+
page = 0
|
|
40
|
+
retries = 0
|
|
41
|
+
headers = {
|
|
42
|
+
"Content-Type": "application/json",
|
|
43
|
+
"Authorization": f"Bearer {semgrep_app_token}",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
request_data: dict[str, Any] = {
|
|
47
|
+
"pageSize": _PAGE_SIZE,
|
|
48
|
+
"dependencyFilter": {
|
|
49
|
+
"ecosystem": ecosystems,
|
|
50
|
+
},
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
logger.info(f"Retrieving Semgrep dependencies for deployment '{deployment_id}'.")
|
|
54
|
+
while has_more:
|
|
55
|
+
try:
|
|
56
|
+
response = requests.post(deps_url, json=request_data, headers=headers, timeout=_TIMEOUT)
|
|
57
|
+
response.raise_for_status()
|
|
58
|
+
data = response.json()
|
|
59
|
+
except (ReadTimeout, HTTPError):
|
|
60
|
+
logger.warning(f"Failed to retrieve Semgrep dependencies for page {page}. Retrying...")
|
|
61
|
+
retries += 1
|
|
62
|
+
if retries >= _MAX_RETRIES:
|
|
63
|
+
raise
|
|
64
|
+
continue
|
|
65
|
+
deps = data.get("dependencies", [])
|
|
66
|
+
has_more = data.get("hasMore", False)
|
|
67
|
+
logger.info(f"Processed page {page} of Semgrep dependencies.")
|
|
68
|
+
all_deps.extend(deps)
|
|
69
|
+
retries = 0
|
|
70
|
+
page += 1
|
|
71
|
+
request_data["cursor"] = data.get("cursor")
|
|
72
|
+
|
|
73
|
+
logger.info(f"Retrieved {len(all_deps)} Semgrep dependencies in {page} pages.")
|
|
74
|
+
return all_deps
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def transform_dependencies(raw_deps: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
78
|
+
"""
|
|
79
|
+
Transforms the raw dependencies response from Semgrep API into a list of dicts
|
|
80
|
+
that can be used to create the Dependency nodes.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
"""
|
|
84
|
+
sample raw_dep as of November 2024:
|
|
85
|
+
{
|
|
86
|
+
"repositoryId": "123456",
|
|
87
|
+
"definedAt": {
|
|
88
|
+
"path": "go.mod",
|
|
89
|
+
"startLine": "6",
|
|
90
|
+
"endLine": "6",
|
|
91
|
+
"url": "https://github.com/org/repo-name/blob/00000000000000000000000000000000/go.mod#L6",
|
|
92
|
+
"committedAt": "1970-01-01T00:00:00Z",
|
|
93
|
+
"startCol": "0",
|
|
94
|
+
"endCol": "0"
|
|
95
|
+
},
|
|
96
|
+
"transitivity": "DIRECT",
|
|
97
|
+
"package": {
|
|
98
|
+
"name": "github.com/foo/bar",
|
|
99
|
+
"versionSpecifier": "1.2.3"
|
|
100
|
+
},
|
|
101
|
+
"ecosystem": "gomod",
|
|
102
|
+
"licenses": [],
|
|
103
|
+
"pathToTransitivity": []
|
|
104
|
+
},
|
|
105
|
+
"""
|
|
106
|
+
deps = []
|
|
107
|
+
for raw_dep in raw_deps:
|
|
108
|
+
|
|
109
|
+
# We could call a different endpoint to get all repo IDs and store a mapping of repo ID to URL,
|
|
110
|
+
# but it's much simpler to just extract the URL from the definedAt field.
|
|
111
|
+
repo_url = raw_dep["definedAt"]["url"].split("/blob/", 1)[0]
|
|
112
|
+
|
|
113
|
+
name = raw_dep["package"]["name"]
|
|
114
|
+
version = raw_dep["package"]["versionSpecifier"]
|
|
115
|
+
id = f"{name}|{version}"
|
|
116
|
+
|
|
117
|
+
# As of November 2024, Semgrep does not import dependencies with version specifiers such as >, <, etc.
|
|
118
|
+
# For now, hardcode the specifier to ==<version> to align with GitHub-sourced Python dependencies.
|
|
119
|
+
# If Semgrep eventually supports version specifiers, update this line accordingly.
|
|
120
|
+
specifier = f"=={version}"
|
|
121
|
+
|
|
122
|
+
deps.append({
|
|
123
|
+
# existing dependency properties:
|
|
124
|
+
"id": id,
|
|
125
|
+
"name": name,
|
|
126
|
+
"specifier": specifier,
|
|
127
|
+
"version": version,
|
|
128
|
+
"repo_url": repo_url,
|
|
129
|
+
|
|
130
|
+
# Semgrep-specific properties:
|
|
131
|
+
"ecosystem": raw_dep["ecosystem"],
|
|
132
|
+
"transitivity": raw_dep["transitivity"].lower(),
|
|
133
|
+
"url": raw_dep["definedAt"]["url"],
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
return deps
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@timeit
|
|
140
|
+
def load_dependencies(
|
|
141
|
+
neo4j_session: neo4j.Session,
|
|
142
|
+
dependency_schema: Callable,
|
|
143
|
+
dependencies: List[Dict],
|
|
144
|
+
deployment_id: str,
|
|
145
|
+
update_tag: int,
|
|
146
|
+
) -> None:
|
|
147
|
+
logger.info(f"Loading {len(dependencies)} {dependency_schema().label} objects into the graph.")
|
|
148
|
+
load(
|
|
149
|
+
neo4j_session,
|
|
150
|
+
dependency_schema(),
|
|
151
|
+
dependencies,
|
|
152
|
+
lastupdated=update_tag,
|
|
153
|
+
DEPLOYMENT_ID=deployment_id,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@timeit
|
|
158
|
+
def cleanup(
|
|
159
|
+
neo4j_session: neo4j.Session,
|
|
160
|
+
common_job_parameters: Dict[str, Any],
|
|
161
|
+
) -> None:
|
|
162
|
+
logger.info("Running Semgrep Go Library cleanup job.")
|
|
163
|
+
go_libraries_cleanup_job = GraphJob.from_node_schema(
|
|
164
|
+
SemgrepGoLibrarySchema(), common_job_parameters,
|
|
165
|
+
)
|
|
166
|
+
go_libraries_cleanup_job.run(neo4j_session)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@timeit
|
|
170
|
+
def sync_dependencies(
|
|
171
|
+
neo4j_session: neo4j.Session,
|
|
172
|
+
semgrep_app_token: str,
|
|
173
|
+
update_tag: int,
|
|
174
|
+
common_job_parameters: Dict[str, Any],
|
|
175
|
+
) -> None:
|
|
176
|
+
|
|
177
|
+
deployment_id = common_job_parameters.get("DEPLOYMENT_ID")
|
|
178
|
+
if not deployment_id:
|
|
179
|
+
logger.warning(
|
|
180
|
+
"Missing Semgrep deployment ID, ensure that sync_deployment() has been called."
|
|
181
|
+
"Skipping Semgrep dependencies sync job.",
|
|
182
|
+
)
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
logger.info("Running Semgrep dependencies sync job.")
|
|
186
|
+
|
|
187
|
+
# fetch and load dependencies for the Go ecosystem
|
|
188
|
+
raw_go_deps = get_dependencies(semgrep_app_token, deployment_id, ecosystems=["gomod"])
|
|
189
|
+
go_deps = transform_dependencies(raw_go_deps)
|
|
190
|
+
load_dependencies(neo4j_session, SemgrepGoLibrarySchema, go_deps, deployment_id, update_tag)
|
|
191
|
+
|
|
192
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
193
|
+
|
|
194
|
+
merge_module_sync_metadata(
|
|
195
|
+
neo4j_session=neo4j_session,
|
|
196
|
+
group_type='Semgrep',
|
|
197
|
+
group_id=deployment_id,
|
|
198
|
+
synced_type='SemgrepDependency',
|
|
199
|
+
update_tag=update_tag,
|
|
200
|
+
stat_handler=stat_handler,
|
|
201
|
+
)
|