cartography 0.109.0rc2__py3-none-any.whl → 0.110.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (46) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +14 -0
  3. cartography/config.py +4 -0
  4. cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
  5. cartography/intel/aws/cloudtrail_management_events.py +21 -0
  6. cartography/intel/aws/cognito.py +201 -0
  7. cartography/intel/aws/ecs.py +7 -1
  8. cartography/intel/aws/eventbridge.py +91 -0
  9. cartography/intel/aws/glue.py +64 -0
  10. cartography/intel/aws/kms.py +13 -1
  11. cartography/intel/aws/rds.py +105 -0
  12. cartography/intel/aws/resources.py +4 -0
  13. cartography/intel/aws/route53.py +3 -1
  14. cartography/intel/aws/s3.py +104 -0
  15. cartography/intel/github/__init__.py +21 -25
  16. cartography/intel/github/repos.py +4 -36
  17. cartography/intel/kubernetes/__init__.py +4 -0
  18. cartography/intel/kubernetes/rbac.py +464 -0
  19. cartography/intel/kubernetes/util.py +17 -0
  20. cartography/intel/trivy/__init__.py +73 -13
  21. cartography/intel/trivy/scanner.py +115 -92
  22. cartography/models/aws/cognito/__init__.py +0 -0
  23. cartography/models/aws/cognito/identity_pool.py +70 -0
  24. cartography/models/aws/cognito/user_pool.py +47 -0
  25. cartography/models/aws/ec2/security_groups.py +1 -1
  26. cartography/models/aws/ecs/services.py +17 -0
  27. cartography/models/aws/ecs/tasks.py +1 -0
  28. cartography/models/aws/eventbridge/__init__.py +0 -0
  29. cartography/models/aws/eventbridge/rule.py +77 -0
  30. cartography/models/aws/glue/job.py +69 -0
  31. cartography/models/aws/rds/event_subscription.py +146 -0
  32. cartography/models/aws/route53/dnsrecord.py +21 -0
  33. cartography/models/github/dependencies.py +1 -2
  34. cartography/models/kubernetes/clusterrolebindings.py +98 -0
  35. cartography/models/kubernetes/clusterroles.py +52 -0
  36. cartography/models/kubernetes/rolebindings.py +119 -0
  37. cartography/models/kubernetes/roles.py +76 -0
  38. cartography/models/kubernetes/serviceaccounts.py +77 -0
  39. cartography/models/snipeit/asset.py +1 -0
  40. {cartography-0.109.0rc2.dist-info → cartography-0.110.0.dist-info}/METADATA +3 -3
  41. {cartography-0.109.0rc2.dist-info → cartography-0.110.0.dist-info}/RECORD +46 -31
  42. /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
  43. {cartography-0.109.0rc2.dist-info → cartography-0.110.0.dist-info}/WHEEL +0 -0
  44. {cartography-0.109.0rc2.dist-info → cartography-0.110.0.dist-info}/entry_points.txt +0 -0
  45. {cartography-0.109.0rc2.dist-info → cartography-0.110.0.dist-info}/licenses/LICENSE +0 -0
  46. {cartography-0.109.0rc2.dist-info → cartography-0.110.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,464 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import neo4j
7
+ from kubernetes.client import V1ClusterRole
8
+ from kubernetes.client import V1ClusterRoleBinding
9
+ from kubernetes.client import V1Role
10
+ from kubernetes.client import V1RoleBinding
11
+ from kubernetes.client import V1ServiceAccount
12
+
13
+ from cartography.client.core.tx import load
14
+ from cartography.graph.job import GraphJob
15
+ from cartography.intel.kubernetes.util import get_epoch
16
+ from cartography.intel.kubernetes.util import k8s_paginate
17
+ from cartography.intel.kubernetes.util import K8sClient
18
+ from cartography.models.kubernetes.clusterrolebindings import (
19
+ KubernetesClusterRoleBindingSchema,
20
+ )
21
+ from cartography.models.kubernetes.clusterroles import KubernetesClusterRoleSchema
22
+ from cartography.models.kubernetes.rolebindings import KubernetesRoleBindingSchema
23
+ from cartography.models.kubernetes.roles import KubernetesRoleSchema
24
+ from cartography.models.kubernetes.serviceaccounts import KubernetesServiceAccountSchema
25
+ from cartography.util import timeit
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ @timeit
31
+ def get_service_accounts(k8s_client: K8sClient) -> List[V1ServiceAccount]:
32
+
33
+ return k8s_paginate(k8s_client.core.list_service_account_for_all_namespaces)
34
+
35
+
36
+ @timeit
37
+ def get_roles(k8s_client: K8sClient) -> List[V1Role]:
38
+
39
+ return k8s_paginate(k8s_client.rbac.list_role_for_all_namespaces)
40
+
41
+
42
+ @timeit
43
+ def get_role_bindings(k8s_client: K8sClient) -> List[V1RoleBinding]:
44
+
45
+ return k8s_paginate(k8s_client.rbac.list_role_binding_for_all_namespaces)
46
+
47
+
48
+ @timeit
49
+ def get_cluster_roles(k8s_client: K8sClient) -> List[V1ClusterRole]:
50
+
51
+ return k8s_paginate(k8s_client.rbac.list_cluster_role)
52
+
53
+
54
+ @timeit
55
+ def get_cluster_role_bindings(k8s_client: K8sClient) -> List[V1ClusterRoleBinding]:
56
+
57
+ return k8s_paginate(k8s_client.rbac.list_cluster_role_binding)
58
+
59
+
60
+ def transform_service_accounts(
61
+ service_accounts: List[V1ServiceAccount], cluster_name: str
62
+ ) -> List[Dict[str, Any]]:
63
+ """
64
+ Transform Kubernetes ServiceAccounts into a list of dictionaries.
65
+ Uses cluster-scoped IDs to prevent collisions across multiple clusters.
66
+ """
67
+ result = []
68
+ for sa in service_accounts:
69
+ result.append(
70
+ {
71
+ "id": f"{cluster_name}/{sa.metadata.namespace}/{sa.metadata.name}",
72
+ "name": sa.metadata.name,
73
+ "namespace": sa.metadata.namespace,
74
+ "uid": sa.metadata.uid,
75
+ "creation_timestamp": get_epoch(sa.metadata.creation_timestamp),
76
+ "resource_version": sa.metadata.resource_version,
77
+ }
78
+ )
79
+ return result
80
+
81
+
82
+ def transform_roles(roles: List[V1Role], cluster_name: str) -> List[Dict[str, Any]]:
83
+ """
84
+ Transform Kubernetes Roles into a list of dictionaries.
85
+ Flattens rules into separate api_groups, resources, and verbs lists.
86
+ """
87
+ result = []
88
+ for role in roles:
89
+ # Flatten all rules into combined sets
90
+ all_api_groups: set[str] = set()
91
+ all_resources: set[str] = set()
92
+ all_verbs: set[str] = set()
93
+
94
+ for rule in role.rules or []:
95
+ # Update api_groups, handling None and empty string cases
96
+ all_api_groups.update(
97
+ {
98
+ "core" if api_group == "" else api_group
99
+ for api_group in rule.api_groups or []
100
+ }
101
+ )
102
+ all_resources.update(rule.resources or [])
103
+ all_verbs.update(rule.verbs or [])
104
+
105
+ result.append(
106
+ {
107
+ "id": f"{cluster_name}/{role.metadata.namespace}/{role.metadata.name}",
108
+ "name": role.metadata.name,
109
+ "namespace": role.metadata.namespace,
110
+ "uid": role.metadata.uid,
111
+ "creation_timestamp": get_epoch(role.metadata.creation_timestamp),
112
+ "resource_version": role.metadata.resource_version,
113
+ "api_groups": sorted(
114
+ all_api_groups
115
+ ), # sorts to keep consistent ordering and converts to list to appease neo4j
116
+ "resources": sorted(all_resources),
117
+ "verbs": sorted(all_verbs),
118
+ }
119
+ )
120
+ return result
121
+
122
+
123
+ def transform_role_bindings(
124
+ role_bindings: List[V1RoleBinding], cluster_name: str
125
+ ) -> List[Dict[str, Any]]:
126
+ """
127
+ Transform Kubernetes RoleBindings into a list of dictionaries.
128
+ Creates one RoleBinding node per Kubernetes RoleBinding with lists of subject IDs.
129
+ """
130
+ result = []
131
+ for rb in role_bindings:
132
+ # Collect all subjects by type
133
+ service_account_subjects = [
134
+ subject
135
+ for subject in (rb.subjects or [])
136
+ if subject.kind == "ServiceAccount"
137
+ ]
138
+ user_subjects = [
139
+ subject for subject in (rb.subjects or []) if subject.kind == "User"
140
+ ]
141
+ group_subjects = [
142
+ subject for subject in (rb.subjects or []) if subject.kind == "Group"
143
+ ]
144
+
145
+ # Only create a RoleBinding node if it has at least one subject
146
+ if rb.subjects:
147
+ result.append(
148
+ {
149
+ "id": f"{cluster_name}/{rb.metadata.namespace}/{rb.metadata.name}",
150
+ "name": rb.metadata.name,
151
+ "namespace": rb.metadata.namespace,
152
+ "uid": rb.metadata.uid,
153
+ "creation_timestamp": get_epoch(rb.metadata.creation_timestamp),
154
+ "resource_version": rb.metadata.resource_version,
155
+ "role_name": rb.role_ref.name,
156
+ "role_kind": rb.role_ref.kind,
157
+ "service_account_ids": [
158
+ f"{cluster_name}/{subject.namespace}/{subject.name}"
159
+ for subject in service_account_subjects
160
+ ],
161
+ "user_ids": [
162
+ f"{cluster_name}/{subject.name}" for subject in user_subjects
163
+ ],
164
+ "group_ids": [
165
+ f"{cluster_name}/{subject.name}" for subject in group_subjects
166
+ ],
167
+ "role_id": f"{cluster_name}/{rb.metadata.namespace}/{rb.role_ref.name}",
168
+ }
169
+ )
170
+ return result
171
+
172
+
173
+ def transform_cluster_roles(
174
+ cluster_roles: List[V1ClusterRole], cluster_name: str
175
+ ) -> List[Dict[str, Any]]:
176
+ """
177
+ Transform Kubernetes ClusterRoles into a list of dictionaries.
178
+ Flattens rules into separate api_groups, resources, and verbs lists.
179
+ """
180
+ result = []
181
+ for cluster_role in cluster_roles:
182
+ # Flatten all rules into combined sets
183
+ all_api_groups: set[str] = set()
184
+ all_resources: set[str] = set()
185
+ all_verbs: set[str] = set()
186
+
187
+ for rule in cluster_role.rules or []:
188
+ # Update api_groups, handling None and empty string cases
189
+ all_api_groups.update(
190
+ {
191
+ "core" if api_group == "" else api_group
192
+ for api_group in rule.api_groups or []
193
+ }
194
+ )
195
+ all_resources.update(rule.resources or [])
196
+ all_verbs.update(rule.verbs or [])
197
+
198
+ result.append(
199
+ {
200
+ "id": f"{cluster_name}/{cluster_role.metadata.name}",
201
+ "name": cluster_role.metadata.name,
202
+ "uid": cluster_role.metadata.uid,
203
+ "creation_timestamp": get_epoch(
204
+ cluster_role.metadata.creation_timestamp
205
+ ),
206
+ "resource_version": cluster_role.metadata.resource_version,
207
+ "api_groups": sorted(
208
+ all_api_groups
209
+ ), # sorts to keep consistent ordering and converts to list to appease neo4j
210
+ "resources": sorted(all_resources),
211
+ "verbs": sorted(all_verbs),
212
+ }
213
+ )
214
+ return result
215
+
216
+
217
+ def transform_cluster_role_bindings(
218
+ cluster_role_bindings: List[V1ClusterRoleBinding], cluster_name: str
219
+ ) -> List[Dict[str, Any]]:
220
+ """
221
+ Transform Kubernetes ClusterRoleBindings into a list of dictionaries.
222
+ Creates one ClusterRoleBinding node per Kubernetes ClusterRoleBinding with lists of subject IDs.
223
+ """
224
+ result = []
225
+ for crb in cluster_role_bindings:
226
+ # Collect all subjects by type
227
+ service_account_subjects = [
228
+ subject
229
+ for subject in (crb.subjects or [])
230
+ if subject.kind == "ServiceAccount"
231
+ ]
232
+ user_subjects = [
233
+ subject for subject in (crb.subjects or []) if subject.kind == "User"
234
+ ]
235
+ group_subjects = [
236
+ subject for subject in (crb.subjects or []) if subject.kind == "Group"
237
+ ]
238
+
239
+ # Only create a ClusterRoleBinding node if it has at least one subject
240
+ if crb.subjects:
241
+ result.append(
242
+ {
243
+ "id": f"{cluster_name}/{crb.metadata.name}",
244
+ "name": crb.metadata.name,
245
+ "uid": crb.metadata.uid,
246
+ "creation_timestamp": get_epoch(crb.metadata.creation_timestamp),
247
+ "resource_version": crb.metadata.resource_version,
248
+ "role_name": crb.role_ref.name,
249
+ "role_kind": crb.role_ref.kind,
250
+ "service_account_ids": [
251
+ f"{cluster_name}/{subject.namespace}/{subject.name}"
252
+ for subject in service_account_subjects
253
+ ],
254
+ "user_ids": [
255
+ f"{cluster_name}/{subject.name}" for subject in user_subjects
256
+ ],
257
+ "group_ids": [
258
+ f"{cluster_name}/{subject.name}" for subject in group_subjects
259
+ ],
260
+ "role_id": f"{cluster_name}/{crb.role_ref.name}",
261
+ }
262
+ )
263
+ return result
264
+
265
+
266
+ @timeit
267
+ def load_service_accounts(
268
+ session: neo4j.Session,
269
+ service_accounts: List[Dict[str, Any]],
270
+ update_tag: int,
271
+ cluster_id: str,
272
+ cluster_name: str,
273
+ ) -> None:
274
+ logger.info(f"Loading {len(service_accounts)} KubernetesServiceAccounts")
275
+ load(
276
+ session,
277
+ KubernetesServiceAccountSchema(),
278
+ service_accounts,
279
+ lastupdated=update_tag,
280
+ CLUSTER_ID=cluster_id,
281
+ CLUSTER_NAME=cluster_name,
282
+ )
283
+
284
+
285
+ @timeit
286
+ def load_roles(
287
+ session: neo4j.Session,
288
+ roles: List[Dict[str, Any]],
289
+ update_tag: int,
290
+ cluster_id: str,
291
+ cluster_name: str,
292
+ ) -> None:
293
+ logger.info(f"Loading {len(roles)} KubernetesRoles")
294
+ load(
295
+ session,
296
+ KubernetesRoleSchema(),
297
+ roles,
298
+ lastupdated=update_tag,
299
+ CLUSTER_ID=cluster_id,
300
+ CLUSTER_NAME=cluster_name,
301
+ )
302
+
303
+
304
+ @timeit
305
+ def load_role_bindings(
306
+ session: neo4j.Session,
307
+ role_bindings: List[Dict[str, Any]],
308
+ update_tag: int,
309
+ cluster_id: str,
310
+ cluster_name: str,
311
+ ) -> None:
312
+ logger.info(f"Loading {len(role_bindings)} KubernetesRoleBindings")
313
+ load(
314
+ session,
315
+ KubernetesRoleBindingSchema(),
316
+ role_bindings,
317
+ lastupdated=update_tag,
318
+ CLUSTER_ID=cluster_id,
319
+ CLUSTER_NAME=cluster_name,
320
+ )
321
+
322
+
323
+ @timeit
324
+ def load_cluster_roles(
325
+ session: neo4j.Session,
326
+ cluster_roles: List[Dict[str, Any]],
327
+ update_tag: int,
328
+ cluster_id: str,
329
+ cluster_name: str,
330
+ ) -> None:
331
+ logger.info(f"Loading {len(cluster_roles)} KubernetesClusterRoles")
332
+ load(
333
+ session,
334
+ KubernetesClusterRoleSchema(),
335
+ cluster_roles,
336
+ lastupdated=update_tag,
337
+ CLUSTER_ID=cluster_id,
338
+ CLUSTER_NAME=cluster_name,
339
+ )
340
+
341
+
342
+ @timeit
343
+ def load_cluster_role_bindings(
344
+ session: neo4j.Session,
345
+ cluster_role_bindings: List[Dict[str, Any]],
346
+ update_tag: int,
347
+ cluster_id: str,
348
+ cluster_name: str,
349
+ ) -> None:
350
+ logger.info(f"Loading {len(cluster_role_bindings)} KubernetesClusterRoleBindings")
351
+ load(
352
+ session,
353
+ KubernetesClusterRoleBindingSchema(),
354
+ cluster_role_bindings,
355
+ lastupdated=update_tag,
356
+ CLUSTER_ID=cluster_id,
357
+ CLUSTER_NAME=cluster_name,
358
+ )
359
+
360
+
361
+ @timeit
362
+ def cleanup(session: neo4j.Session, common_job_parameters: Dict[str, Any]) -> None:
363
+ logger.debug("Running cleanup job for Kubernetes RBAC resources")
364
+ cleanup_job = GraphJob.from_node_schema(
365
+ KubernetesServiceAccountSchema(), common_job_parameters
366
+ )
367
+ cleanup_job.run(session)
368
+
369
+ cleanup_job = GraphJob.from_node_schema(
370
+ KubernetesRoleSchema(), common_job_parameters
371
+ )
372
+ cleanup_job.run(session)
373
+
374
+ cleanup_job = GraphJob.from_node_schema(
375
+ KubernetesRoleBindingSchema(), common_job_parameters
376
+ )
377
+ cleanup_job.run(session)
378
+
379
+ cleanup_job = GraphJob.from_node_schema(
380
+ KubernetesClusterRoleSchema(), common_job_parameters
381
+ )
382
+ cleanup_job.run(session)
383
+
384
+ cleanup_job = GraphJob.from_node_schema(
385
+ KubernetesClusterRoleBindingSchema(), common_job_parameters
386
+ )
387
+ cleanup_job.run(session)
388
+
389
+
390
+ @timeit
391
+ def sync_kubernetes_rbac(
392
+ session: neo4j.Session,
393
+ client: K8sClient,
394
+ update_tag: int,
395
+ common_job_parameters: Dict[str, Any],
396
+ ) -> None:
397
+ logger.info(f"Syncing Kubernetes RBAC resources for cluster {client.name}")
398
+
399
+ # Get namespace-scoped resources
400
+ service_accounts = get_service_accounts(client)
401
+ roles = get_roles(client)
402
+ role_bindings = get_role_bindings(client)
403
+
404
+ # Get cluster-scoped resources
405
+ cluster_roles = get_cluster_roles(client)
406
+ cluster_role_bindings = get_cluster_role_bindings(client)
407
+
408
+ # Transform namespace-scoped resources
409
+ transformed_service_accounts = transform_service_accounts(
410
+ service_accounts, client.name
411
+ )
412
+ transformed_roles = transform_roles(roles, client.name)
413
+ transformed_role_bindings = transform_role_bindings(role_bindings, client.name)
414
+
415
+ # Transform cluster-scoped resources
416
+ transformed_cluster_roles = transform_cluster_roles(cluster_roles, client.name)
417
+ transformed_cluster_role_bindings = transform_cluster_role_bindings(
418
+ cluster_role_bindings, client.name
419
+ )
420
+
421
+ cluster_id = common_job_parameters["CLUSTER_ID"]
422
+ cluster_name = client.name
423
+
424
+ load_service_accounts(
425
+ session=session,
426
+ service_accounts=transformed_service_accounts,
427
+ update_tag=update_tag,
428
+ cluster_id=cluster_id,
429
+ cluster_name=cluster_name,
430
+ )
431
+
432
+ load_roles(
433
+ session=session,
434
+ roles=transformed_roles,
435
+ update_tag=update_tag,
436
+ cluster_id=cluster_id,
437
+ cluster_name=cluster_name,
438
+ )
439
+
440
+ load_cluster_roles(
441
+ session=session,
442
+ cluster_roles=transformed_cluster_roles,
443
+ update_tag=update_tag,
444
+ cluster_id=cluster_id,
445
+ cluster_name=cluster_name,
446
+ )
447
+
448
+ load_role_bindings(
449
+ session=session,
450
+ role_bindings=transformed_role_bindings,
451
+ update_tag=update_tag,
452
+ cluster_id=cluster_id,
453
+ cluster_name=cluster_name,
454
+ )
455
+
456
+ load_cluster_role_bindings(
457
+ session=session,
458
+ cluster_role_bindings=transformed_cluster_role_bindings,
459
+ update_tag=update_tag,
460
+ cluster_id=cluster_id,
461
+ cluster_name=cluster_name,
462
+ )
463
+
464
+ cleanup(session, common_job_parameters)
@@ -7,6 +7,7 @@ from kubernetes import config
7
7
  from kubernetes.client import ApiClient
8
8
  from kubernetes.client import CoreV1Api
9
9
  from kubernetes.client import NetworkingV1Api
10
+ from kubernetes.client import RbacAuthorizationV1Api
10
11
  from kubernetes.client import VersionApi
11
12
  from kubernetes.client.exceptions import ApiException
12
13
 
@@ -62,6 +63,21 @@ class K8VersionApiClient(VersionApi):
62
63
  super().__init__(api_client=api_client)
63
64
 
64
65
 
66
+ class K8RbacApiClient(RbacAuthorizationV1Api):
67
+ def __init__(
68
+ self,
69
+ name: str,
70
+ config_file: str,
71
+ api_client: ApiClient | None = None,
72
+ ) -> None:
73
+ self.name = name
74
+ if not api_client:
75
+ api_client = config.new_client_from_config(
76
+ context=name, config_file=config_file
77
+ )
78
+ super().__init__(api_client=api_client)
79
+
80
+
65
81
  class K8sClient:
66
82
  def __init__(
67
83
  self,
@@ -75,6 +91,7 @@ class K8sClient:
75
91
  self.core = K8CoreApiClient(self.name, self.config_file)
76
92
  self.networking = K8NetworkingApiClient(self.name, self.config_file)
77
93
  self.version = K8VersionApiClient(self.name, self.config_file)
94
+ self.rbac = K8RbacApiClient(self.name, self.config_file)
78
95
 
79
96
 
80
97
  def get_k8s_clients(kubeconfig: str) -> list[K8sClient]:
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import logging
2
3
  from typing import Any
3
4
 
@@ -8,7 +9,9 @@ from cartography.client.aws import list_accounts
8
9
  from cartography.client.aws.ecr import get_ecr_images
9
10
  from cartography.config import Config
10
11
  from cartography.intel.trivy.scanner import cleanup
12
+ from cartography.intel.trivy.scanner import get_json_files_in_dir
11
13
  from cartography.intel.trivy.scanner import get_json_files_in_s3
14
+ from cartography.intel.trivy.scanner import sync_single_image_from_file
12
15
  from cartography.intel.trivy.scanner import sync_single_image_from_s3
13
16
  from cartography.stats import get_stats_client
14
17
  from cartography.util import timeit
@@ -39,13 +42,13 @@ def get_scan_targets(
39
42
 
40
43
 
41
44
  def _get_intersection(
42
- images_in_graph: set[str], json_files: set[str], trivy_s3_prefix: str
45
+ image_uris: set[str], json_files: set[str], trivy_s3_prefix: str
43
46
  ) -> list[tuple[str, str]]:
44
47
  """
45
48
  Get the intersection of ECR images in the graph and S3 scan results.
46
49
 
47
50
  Args:
48
- images_in_graph: Set of ECR images in the graph
51
+ image_uris: Set of ECR images in the graph
49
52
  json_files: Set of S3 object keys for JSON files
50
53
  trivy_s3_prefix: S3 prefix path containing scan results
51
54
 
@@ -60,7 +63,7 @@ def _get_intersection(
60
63
  # Remove the prefix and the .json suffix
61
64
  image_uri = s3_object_key[prefix_len:-5]
62
65
 
63
- if image_uri in images_in_graph:
66
+ if image_uri in image_uris:
64
67
  intersection.append((image_uri, s3_object_key))
65
68
 
66
69
  return intersection
@@ -90,12 +93,12 @@ def sync_trivy_aws_ecr_from_s3(
90
93
  f"Using Trivy scan results from s3://{trivy_s3_bucket}/{trivy_s3_prefix}"
91
94
  )
92
95
 
93
- images_in_graph: set[str] = get_scan_targets(neo4j_session)
96
+ image_uris: set[str] = get_scan_targets(neo4j_session)
94
97
  json_files: set[str] = get_json_files_in_s3(
95
98
  trivy_s3_bucket, trivy_s3_prefix, boto3_session
96
99
  )
97
100
  intersection: list[tuple[str, str]] = _get_intersection(
98
- images_in_graph, json_files, trivy_s3_prefix
101
+ image_uris, json_files, trivy_s3_prefix
99
102
  )
100
103
 
101
104
  if len(intersection) == 0:
@@ -124,21 +127,79 @@ def sync_trivy_aws_ecr_from_s3(
124
127
  cleanup(neo4j_session, common_job_parameters)
125
128
 
126
129
 
130
+ @timeit
131
+ def sync_trivy_aws_ecr_from_dir(
132
+ neo4j_session: Session,
133
+ results_dir: str,
134
+ update_tag: int,
135
+ common_job_parameters: dict[str, Any],
136
+ ) -> None:
137
+ """Sync Trivy scan results from local files for AWS ECR images."""
138
+ logger.info(f"Using Trivy scan results from {results_dir}")
139
+
140
+ image_uris: set[str] = get_scan_targets(neo4j_session)
141
+ json_files: set[str] = get_json_files_in_dir(results_dir)
142
+
143
+ if not json_files:
144
+ logger.error(
145
+ f"Trivy sync was configured, but no json files were found in {results_dir}."
146
+ )
147
+ raise ValueError("No Trivy json results found on disk")
148
+
149
+ logger.info(f"Processing {len(json_files)} local Trivy result files")
150
+
151
+ for file_path in json_files:
152
+ # First, check if the image exists in the graph before syncing
153
+ try:
154
+ # Peek at the artifact name without processing the file
155
+ with open(file_path, encoding="utf-8") as f:
156
+ trivy_data = json.load(f)
157
+ artifact_name = trivy_data.get("ArtifactName")
158
+
159
+ if artifact_name and artifact_name not in image_uris:
160
+ logger.debug(
161
+ f"Skipping results for {artifact_name} since the image is not present in the graph"
162
+ )
163
+ continue
164
+
165
+ except (json.JSONDecodeError, KeyError) as e:
166
+ logger.error(f"Failed to read artifact name from {file_path}: {e}")
167
+ continue
168
+
169
+ # Now sync the file since we know the image exists in the graph
170
+ sync_single_image_from_file(
171
+ neo4j_session,
172
+ file_path,
173
+ update_tag,
174
+ )
175
+
176
+ cleanup(neo4j_session, common_job_parameters)
177
+
178
+
127
179
  @timeit
128
180
  def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
129
- """
130
- Start Trivy scan ingestion from S3.
181
+ """Start Trivy scan ingestion from S3 or local files.
131
182
 
132
183
  Args:
133
184
  neo4j_session: Neo4j session for database operations
134
- config: Configuration object containing S3 settings
185
+ config: Configuration object containing S3 or directory paths
135
186
  """
136
- # Check if S3 configuration is provided
137
- if not config.trivy_s3_bucket:
138
- logger.info("Trivy S3 configuration not provided. Skipping Trivy ingestion.")
187
+ if not config.trivy_s3_bucket and not config.trivy_results_dir:
188
+ logger.info("Trivy configuration not provided. Skipping Trivy ingestion.")
189
+ return
190
+
191
+ if config.trivy_results_dir:
192
+ common_job_parameters = {
193
+ "UPDATE_TAG": config.update_tag,
194
+ }
195
+ sync_trivy_aws_ecr_from_dir(
196
+ neo4j_session,
197
+ config.trivy_results_dir,
198
+ config.update_tag,
199
+ common_job_parameters,
200
+ )
139
201
  return
140
202
 
141
- # Default to empty string if s3 prefix is not provided
142
203
  if config.trivy_s3_prefix is None:
143
204
  config.trivy_s3_prefix = ""
144
205
 
@@ -146,7 +207,6 @@ def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
146
207
  "UPDATE_TAG": config.update_tag,
147
208
  }
148
209
 
149
- # Get ECR images to scan
150
210
  boto3_session = boto3.Session()
151
211
 
152
212
  sync_trivy_aws_ecr_from_s3(