cartography 0.108.0rc1__py3-none-any.whl → 0.109.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (81) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +14 -0
  3. cartography/config.py +4 -0
  4. cartography/data/indexes.cypher +0 -17
  5. cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
  6. cartography/intel/aws/cloudtrail.py +17 -4
  7. cartography/intel/aws/cloudtrail_management_events.py +614 -16
  8. cartography/intel/aws/cloudwatch.py +73 -4
  9. cartography/intel/aws/ec2/subnets.py +37 -63
  10. cartography/intel/aws/ecr.py +55 -80
  11. cartography/intel/aws/elasticache.py +102 -79
  12. cartography/intel/aws/eventbridge.py +91 -0
  13. cartography/intel/aws/glue.py +117 -0
  14. cartography/intel/aws/identitycenter.py +71 -23
  15. cartography/intel/aws/kms.py +160 -200
  16. cartography/intel/aws/lambda_function.py +206 -190
  17. cartography/intel/aws/rds.py +243 -458
  18. cartography/intel/aws/resourcegroupstaggingapi.py +77 -18
  19. cartography/intel/aws/resources.py +4 -0
  20. cartography/intel/aws/route53.py +334 -332
  21. cartography/intel/aws/secretsmanager.py +62 -44
  22. cartography/intel/entra/groups.py +29 -1
  23. cartography/intel/gcp/__init__.py +10 -0
  24. cartography/intel/gcp/compute.py +19 -42
  25. cartography/intel/trivy/__init__.py +73 -13
  26. cartography/intel/trivy/scanner.py +115 -92
  27. cartography/models/aws/cloudtrail/management_events.py +95 -6
  28. cartography/models/aws/cloudtrail/trail.py +21 -0
  29. cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
  30. cartography/models/aws/ec2/subnets.py +65 -0
  31. cartography/models/aws/ecr/__init__.py +0 -0
  32. cartography/models/aws/ecr/image.py +41 -0
  33. cartography/models/aws/ecr/repository.py +72 -0
  34. cartography/models/aws/ecr/repository_image.py +95 -0
  35. cartography/models/aws/elasticache/__init__.py +0 -0
  36. cartography/models/aws/elasticache/cluster.py +65 -0
  37. cartography/models/aws/elasticache/topic.py +67 -0
  38. cartography/models/aws/eventbridge/__init__.py +0 -0
  39. cartography/models/aws/eventbridge/rule.py +77 -0
  40. cartography/models/aws/glue/__init__.py +0 -0
  41. cartography/models/aws/glue/connection.py +51 -0
  42. cartography/models/aws/identitycenter/awspermissionset.py +44 -0
  43. cartography/models/aws/kms/__init__.py +0 -0
  44. cartography/models/aws/kms/aliases.py +86 -0
  45. cartography/models/aws/kms/grants.py +65 -0
  46. cartography/models/aws/kms/keys.py +88 -0
  47. cartography/models/aws/lambda_function/__init__.py +0 -0
  48. cartography/models/aws/lambda_function/alias.py +74 -0
  49. cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
  50. cartography/models/aws/lambda_function/lambda_function.py +89 -0
  51. cartography/models/aws/lambda_function/layer.py +72 -0
  52. cartography/models/aws/rds/__init__.py +0 -0
  53. cartography/models/aws/rds/cluster.py +89 -0
  54. cartography/models/aws/rds/instance.py +154 -0
  55. cartography/models/aws/rds/snapshot.py +108 -0
  56. cartography/models/aws/rds/subnet_group.py +101 -0
  57. cartography/models/aws/route53/__init__.py +0 -0
  58. cartography/models/aws/route53/dnsrecord.py +214 -0
  59. cartography/models/aws/route53/nameserver.py +63 -0
  60. cartography/models/aws/route53/subzone.py +40 -0
  61. cartography/models/aws/route53/zone.py +47 -0
  62. cartography/models/aws/secretsmanager/secret.py +106 -0
  63. cartography/models/entra/group.py +26 -0
  64. cartography/models/entra/user.py +6 -0
  65. cartography/models/gcp/compute/__init__.py +0 -0
  66. cartography/models/gcp/compute/vpc.py +50 -0
  67. cartography/util.py +8 -1
  68. {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/METADATA +2 -2
  69. {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/RECORD +73 -44
  70. cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
  71. cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
  72. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
  73. cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
  74. cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
  75. cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
  76. cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
  77. cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
  78. {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/WHEEL +0 -0
  79. {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/entry_points.txt +0 -0
  80. {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/licenses/LICENSE +0 -0
  81. {cartography-0.108.0rc1.dist-info → cartography-0.109.0.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ import neo4j
7
7
 
8
8
  from cartography.client.core.tx import load
9
9
  from cartography.graph.job import GraphJob
10
+ from cartography.models.aws.secretsmanager.secret import SecretsManagerSecretSchema
10
11
  from cartography.models.aws.secretsmanager.secret_version import (
11
12
  SecretsManagerSecretVersionSchema,
12
13
  )
@@ -14,7 +15,6 @@ from cartography.stats import get_stats_client
14
15
  from cartography.util import aws_handle_regions
15
16
  from cartography.util import dict_date_to_epoch
16
17
  from cartography.util import merge_module_sync_metadata
17
- from cartography.util import run_cleanup_job
18
18
  from cartography.util import timeit
19
19
 
20
20
  logger = logging.getLogger(__name__)
@@ -32,6 +32,37 @@ def get_secret_list(boto3_session: boto3.session.Session, region: str) -> List[D
32
32
  return secrets
33
33
 
34
34
 
35
+ def transform_secrets(
36
+ secrets: List[Dict],
37
+ ) -> List[Dict]:
38
+ """
39
+ Transform AWS Secrets Manager Secrets to match the data model.
40
+ """
41
+ transformed_data = []
42
+ for secret in secrets:
43
+ # Start with a copy of the original secret data
44
+ transformed = dict(secret)
45
+
46
+ # Convert date fields to epoch timestamps
47
+ transformed["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
48
+ transformed["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
49
+ transformed["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
50
+ transformed["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
51
+ transformed["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
52
+
53
+ # Flatten nested RotationRules.AutomaticallyAfterDays property
54
+ if "RotationRules" in secret and secret["RotationRules"]:
55
+ rotation_rules = secret["RotationRules"]
56
+ if "AutomaticallyAfterDays" in rotation_rules:
57
+ transformed["RotationRulesAutomaticallyAfterDays"] = rotation_rules[
58
+ "AutomaticallyAfterDays"
59
+ ]
60
+
61
+ transformed_data.append(transformed)
62
+
63
+ return transformed_data
64
+
65
+
35
66
  @timeit
36
67
  def load_secrets(
37
68
  neo4j_session: neo4j.Session,
@@ -40,48 +71,33 @@ def load_secrets(
40
71
  current_aws_account_id: str,
41
72
  aws_update_tag: int,
42
73
  ) -> None:
43
- ingest_secrets = """
44
- UNWIND $Secrets as secret
45
- MERGE (s:SecretsManagerSecret{id: secret.ARN})
46
- ON CREATE SET s.firstseen = timestamp()
47
- SET s.name = secret.Name, s.arn = secret.ARN, s.description = secret.Description,
48
- s.kms_key_id = secret.KmsKeyId, s.rotation_enabled = secret.RotationEnabled,
49
- s.rotation_lambda_arn = secret.RotationLambdaARN,
50
- s.rotation_rules_automatically_after_days = secret.RotationRules.AutomaticallyAfterDays,
51
- s.last_rotated_date = secret.LastRotatedDate, s.last_changed_date = secret.LastChangedDate,
52
- s.last_accessed_date = secret.LastAccessedDate, s.deleted_date = secret.DeletedDate,
53
- s.owning_service = secret.OwningService, s.created_date = secret.CreatedDate,
54
- s.primary_region = secret.PrimaryRegion, s.region = $Region,
55
- s.lastupdated = $aws_update_tag
56
- WITH s
57
- MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
58
- MERGE (owner)-[r:RESOURCE]->(s)
59
- ON CREATE SET r.firstseen = timestamp()
60
- SET r.lastupdated = $aws_update_tag
61
- """
62
- for secret in data:
63
- secret["LastRotatedDate"] = dict_date_to_epoch(secret, "LastRotatedDate")
64
- secret["LastChangedDate"] = dict_date_to_epoch(secret, "LastChangedDate")
65
- secret["LastAccessedDate"] = dict_date_to_epoch(secret, "LastAccessedDate")
66
- secret["DeletedDate"] = dict_date_to_epoch(secret, "DeletedDate")
67
- secret["CreatedDate"] = dict_date_to_epoch(secret, "CreatedDate")
68
-
69
- neo4j_session.run(
70
- ingest_secrets,
71
- Secrets=data,
74
+ """
75
+ Load transformed secrets into Neo4j using the data model.
76
+ Expects data to already be transformed by transform_secrets().
77
+ """
78
+ logger.info(f"Loading {len(data)} Secrets for region {region} into graph.")
79
+
80
+ # Load using the schema-based approach
81
+ load(
82
+ neo4j_session,
83
+ SecretsManagerSecretSchema(),
84
+ data,
85
+ lastupdated=aws_update_tag,
72
86
  Region=region,
73
- AWS_ACCOUNT_ID=current_aws_account_id,
74
- aws_update_tag=aws_update_tag,
87
+ AWS_ID=current_aws_account_id,
75
88
  )
76
89
 
77
90
 
78
91
  @timeit
79
92
  def cleanup_secrets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
80
- run_cleanup_job(
81
- "aws_import_secrets_cleanup.json",
82
- neo4j_session,
83
- common_job_parameters,
93
+ """
94
+ Run Secrets cleanup job using the data model.
95
+ """
96
+ logger.debug("Running Secrets cleanup job.")
97
+ cleanup_job = GraphJob.from_node_schema(
98
+ SecretsManagerSecretSchema(), common_job_parameters
84
99
  )
100
+ cleanup_job.run(neo4j_session)
85
101
 
86
102
 
87
103
  @timeit
@@ -121,8 +137,6 @@ def get_secret_versions(
121
137
 
122
138
  def transform_secret_versions(
123
139
  versions: List[Dict],
124
- region: str,
125
- aws_account_id: str,
126
140
  ) -> List[Dict]:
127
141
  """
128
142
  Transform AWS Secrets Manager Secret Versions to match the data model.
@@ -203,7 +217,15 @@ def sync(
203
217
  )
204
218
  secrets = get_secret_list(boto3_session, region)
205
219
 
206
- load_secrets(neo4j_session, secrets, region, current_aws_account_id, update_tag)
220
+ transformed_secrets = transform_secrets(secrets)
221
+
222
+ load_secrets(
223
+ neo4j_session,
224
+ transformed_secrets,
225
+ region,
226
+ current_aws_account_id,
227
+ update_tag,
228
+ )
207
229
 
208
230
  all_versions = []
209
231
  for secret in secrets:
@@ -216,11 +238,7 @@ def sync(
216
238
  )
217
239
  all_versions.extend(versions)
218
240
 
219
- transformed_data = transform_secret_versions(
220
- all_versions,
221
- region,
222
- current_aws_account_id,
223
- )
241
+ transformed_data = transform_secret_versions(all_versions)
224
242
 
225
243
  load_secret_versions(
226
244
  neo4j_session,
@@ -59,10 +59,29 @@ async def get_group_members(
59
59
  return user_ids, group_ids
60
60
 
61
61
 
62
+ @timeit
63
+ async def get_group_owners(client: GraphServiceClient, group_id: str) -> list[str]:
64
+ """Get owner user IDs for a given group."""
65
+ owner_ids: list[str] = []
66
+ request_builder = client.groups.by_group_id(group_id).owners
67
+ page = await request_builder.get()
68
+ while page:
69
+ if page.value:
70
+ for obj in page.value:
71
+ odata_type = getattr(obj, "odata_type", "")
72
+ if odata_type == "#microsoft.graph.user":
73
+ owner_ids.append(obj.id)
74
+ if not page.odata_next_link:
75
+ break
76
+ page = await request_builder.with_url(page.odata_next_link).get()
77
+ return owner_ids
78
+
79
+
62
80
  def transform_groups(
63
81
  groups: list[Group],
64
82
  user_member_map: dict[str, list[str]],
65
83
  group_member_map: dict[str, list[str]],
84
+ group_owner_map: dict[str, list[str]],
66
85
  ) -> list[dict[str, Any]]:
67
86
  """Transform API responses into dictionaries for ingestion."""
68
87
  result: list[dict[str, Any]] = []
@@ -82,6 +101,7 @@ def transform_groups(
82
101
  "deleted_date_time": g.deleted_date_time,
83
102
  "member_ids": user_member_map.get(g.id, []),
84
103
  "member_group_ids": group_member_map.get(g.id, []),
104
+ "owner_ids": group_owner_map.get(g.id, []),
85
105
  }
86
106
  result.append(transformed)
87
107
  return result
@@ -134,6 +154,12 @@ async def sync_entra_groups(
134
154
 
135
155
  user_member_map: dict[str, list[str]] = {}
136
156
  group_member_map: dict[str, list[str]] = {}
157
+ group_owner_map: dict[str, list[str]] = {}
158
+
159
+ for group in groups:
160
+ owners = await get_group_owners(client, group.id)
161
+ group_owner_map[group.id] = owners
162
+
137
163
  for group in groups:
138
164
  try:
139
165
  users, subgroups = await get_group_members(client, group.id)
@@ -144,7 +170,9 @@ async def sync_entra_groups(
144
170
  user_member_map[group.id] = []
145
171
  group_member_map[group.id] = []
146
172
 
147
- transformed_groups = transform_groups(groups, user_member_map, group_member_map)
173
+ transformed_groups = transform_groups(
174
+ groups, user_member_map, group_member_map, group_owner_map
175
+ )
148
176
 
149
177
  load_tenant(neo4j_session, {"id": tenant_id}, update_tag)
150
178
  load_groups(neo4j_session, transformed_groups, update_tag, tenant_id)
@@ -391,6 +391,7 @@ def _sync_multiple_projects(
391
391
  # Compute data sync
392
392
  for project in projects:
393
393
  project_id = project["projectId"]
394
+ common_job_parameters["PROJECT_ID"] = project_id
394
395
  logger.info("Syncing GCP project %s for Compute.", project_id)
395
396
  _sync_single_project_compute(
396
397
  neo4j_session,
@@ -399,10 +400,12 @@ def _sync_multiple_projects(
399
400
  gcp_update_tag,
400
401
  common_job_parameters,
401
402
  )
403
+ del common_job_parameters["PROJECT_ID"]
402
404
 
403
405
  # Storage data sync
404
406
  for project in projects:
405
407
  project_id = project["projectId"]
408
+ common_job_parameters["PROJECT_ID"] = project_id
406
409
  logger.info("Syncing GCP project %s for Storage", project_id)
407
410
  _sync_single_project_storage(
408
411
  neo4j_session,
@@ -411,10 +414,12 @@ def _sync_multiple_projects(
411
414
  gcp_update_tag,
412
415
  common_job_parameters,
413
416
  )
417
+ del common_job_parameters["PROJECT_ID"]
414
418
 
415
419
  # GKE data sync
416
420
  for project in projects:
417
421
  project_id = project["projectId"]
422
+ common_job_parameters["PROJECT_ID"] = project_id
418
423
  logger.info("Syncing GCP project %s for GKE", project_id)
419
424
  _sync_single_project_gke(
420
425
  neo4j_session,
@@ -423,10 +428,12 @@ def _sync_multiple_projects(
423
428
  gcp_update_tag,
424
429
  common_job_parameters,
425
430
  )
431
+ del common_job_parameters["PROJECT_ID"]
426
432
 
427
433
  # DNS data sync
428
434
  for project in projects:
429
435
  project_id = project["projectId"]
436
+ common_job_parameters["PROJECT_ID"] = project_id
430
437
  logger.info("Syncing GCP project %s for DNS", project_id)
431
438
  _sync_single_project_dns(
432
439
  neo4j_session,
@@ -435,14 +442,17 @@ def _sync_multiple_projects(
435
442
  gcp_update_tag,
436
443
  common_job_parameters,
437
444
  )
445
+ del common_job_parameters["PROJECT_ID"]
438
446
 
439
447
  # IAM data sync
440
448
  for project in projects:
441
449
  project_id = project["projectId"]
450
+ common_job_parameters["PROJECT_ID"] = project_id
442
451
  logger.info("Syncing GCP project %s for IAM", project_id)
443
452
  _sync_single_project_iam(
444
453
  neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters
445
454
  )
455
+ del common_job_parameters["PROJECT_ID"]
446
456
 
447
457
 
448
458
  @timeit
@@ -14,6 +14,9 @@ import neo4j
14
14
  from googleapiclient.discovery import HttpError
15
15
  from googleapiclient.discovery import Resource
16
16
 
17
+ from cartography.client.core.tx import load
18
+ from cartography.graph.job import GraphJob
19
+ from cartography.models.gcp.compute.vpc import GCPVpcSchema
17
20
  from cartography.util import run_cleanup_job
18
21
  from cartography.util import timeit
19
22
 
@@ -600,48 +603,17 @@ def load_gcp_instances(
600
603
  @timeit
601
604
  def load_gcp_vpcs(
602
605
  neo4j_session: neo4j.Session,
603
- vpcs: List[Dict],
606
+ vpcs: list[dict[str, Any]],
604
607
  gcp_update_tag: int,
608
+ project_id: str,
605
609
  ) -> None:
606
- """
607
- Ingest VPCs to Neo4j
608
- :param neo4j_session: The Neo4j session object
609
- :param vpcs: List of VPCs to ingest
610
- :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
611
- :return: Nothing
612
- """
613
- query = """
614
- MERGE(p:GCPProject{id:$ProjectId})
615
- ON CREATE SET p.firstseen = timestamp()
616
- SET p.lastupdated = $gcp_update_tag
617
-
618
- MERGE(vpc:GCPVpc{id:$PartialUri})
619
- ON CREATE SET vpc.firstseen = timestamp(),
620
- vpc.partial_uri = $PartialUri
621
- SET vpc.self_link = $SelfLink,
622
- vpc.name = $VpcName,
623
- vpc.project_id = $ProjectId,
624
- vpc.auto_create_subnetworks = $AutoCreateSubnetworks,
625
- vpc.routing_config_routing_mode = $RoutingMode,
626
- vpc.description = $Description,
627
- vpc.lastupdated = $gcp_update_tag
628
-
629
- MERGE (p)-[r:RESOURCE]->(vpc)
630
- ON CREATE SET r.firstseen = timestamp()
631
- SET r.lastupdated = $gcp_update_tag
632
- """
633
- for vpc in vpcs:
634
- neo4j_session.run(
635
- query,
636
- ProjectId=vpc["project_id"],
637
- PartialUri=vpc["partial_uri"],
638
- SelfLink=vpc["self_link"],
639
- VpcName=vpc["name"],
640
- AutoCreateSubnetworks=vpc["auto_create_subnetworks"],
641
- RoutingMode=vpc["routing_config_routing_mode"],
642
- Description=vpc["description"],
643
- gcp_update_tag=gcp_update_tag,
644
- )
610
+ load(
611
+ neo4j_session,
612
+ GCPVpcSchema(),
613
+ vpcs,
614
+ PROJECT_ID=project_id,
615
+ LASTUPDATED=gcp_update_tag,
616
+ )
645
617
 
646
618
 
647
619
  @timeit
@@ -1159,6 +1131,12 @@ def cleanup_gcp_vpcs(neo4j_session: neo4j.Session, common_job_parameters: Dict)
1159
1131
  :param common_job_parameters: dict of other job parameters to pass to Neo4j
1160
1132
  :return: Nothing
1161
1133
  """
1134
+ GraphJob.from_node_schema(
1135
+ GCPVpcSchema(),
1136
+ common_job_parameters,
1137
+ ).run(neo4j_session)
1138
+
1139
+ # TODO: remove this once we refactor GCP instances and add the instance to vpc rel as an object
1162
1140
  run_cleanup_job(
1163
1141
  "gcp_compute_vpc_cleanup.json",
1164
1142
  neo4j_session,
@@ -1267,8 +1245,7 @@ def sync_gcp_vpcs(
1267
1245
  """
1268
1246
  vpc_res = get_gcp_vpcs(project_id, compute)
1269
1247
  vpcs = transform_gcp_vpcs(vpc_res)
1270
- load_gcp_vpcs(neo4j_session, vpcs, gcp_update_tag)
1271
- # TODO scope the cleanup to the current project - https://github.com/cartography-cncf/cartography/issues/381
1248
+ load_gcp_vpcs(neo4j_session, vpcs, gcp_update_tag, project_id)
1272
1249
  cleanup_gcp_vpcs(neo4j_session, common_job_parameters)
1273
1250
 
1274
1251
 
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import logging
2
3
  from typing import Any
3
4
 
@@ -8,7 +9,9 @@ from cartography.client.aws import list_accounts
8
9
  from cartography.client.aws.ecr import get_ecr_images
9
10
  from cartography.config import Config
10
11
  from cartography.intel.trivy.scanner import cleanup
12
+ from cartography.intel.trivy.scanner import get_json_files_in_dir
11
13
  from cartography.intel.trivy.scanner import get_json_files_in_s3
14
+ from cartography.intel.trivy.scanner import sync_single_image_from_file
12
15
  from cartography.intel.trivy.scanner import sync_single_image_from_s3
13
16
  from cartography.stats import get_stats_client
14
17
  from cartography.util import timeit
@@ -39,13 +42,13 @@ def get_scan_targets(
39
42
 
40
43
 
41
44
  def _get_intersection(
42
- images_in_graph: set[str], json_files: set[str], trivy_s3_prefix: str
45
+ image_uris: set[str], json_files: set[str], trivy_s3_prefix: str
43
46
  ) -> list[tuple[str, str]]:
44
47
  """
45
48
  Get the intersection of ECR images in the graph and S3 scan results.
46
49
 
47
50
  Args:
48
- images_in_graph: Set of ECR images in the graph
51
+ image_uris: Set of ECR images in the graph
49
52
  json_files: Set of S3 object keys for JSON files
50
53
  trivy_s3_prefix: S3 prefix path containing scan results
51
54
 
@@ -60,7 +63,7 @@ def _get_intersection(
60
63
  # Remove the prefix and the .json suffix
61
64
  image_uri = s3_object_key[prefix_len:-5]
62
65
 
63
- if image_uri in images_in_graph:
66
+ if image_uri in image_uris:
64
67
  intersection.append((image_uri, s3_object_key))
65
68
 
66
69
  return intersection
@@ -90,12 +93,12 @@ def sync_trivy_aws_ecr_from_s3(
90
93
  f"Using Trivy scan results from s3://{trivy_s3_bucket}/{trivy_s3_prefix}"
91
94
  )
92
95
 
93
- images_in_graph: set[str] = get_scan_targets(neo4j_session)
96
+ image_uris: set[str] = get_scan_targets(neo4j_session)
94
97
  json_files: set[str] = get_json_files_in_s3(
95
98
  trivy_s3_bucket, trivy_s3_prefix, boto3_session
96
99
  )
97
100
  intersection: list[tuple[str, str]] = _get_intersection(
98
- images_in_graph, json_files, trivy_s3_prefix
101
+ image_uris, json_files, trivy_s3_prefix
99
102
  )
100
103
 
101
104
  if len(intersection) == 0:
@@ -124,21 +127,79 @@ def sync_trivy_aws_ecr_from_s3(
124
127
  cleanup(neo4j_session, common_job_parameters)
125
128
 
126
129
 
130
+ @timeit
131
+ def sync_trivy_aws_ecr_from_dir(
132
+ neo4j_session: Session,
133
+ results_dir: str,
134
+ update_tag: int,
135
+ common_job_parameters: dict[str, Any],
136
+ ) -> None:
137
+ """Sync Trivy scan results from local files for AWS ECR images."""
138
+ logger.info(f"Using Trivy scan results from {results_dir}")
139
+
140
+ image_uris: set[str] = get_scan_targets(neo4j_session)
141
+ json_files: set[str] = get_json_files_in_dir(results_dir)
142
+
143
+ if not json_files:
144
+ logger.error(
145
+ f"Trivy sync was configured, but no json files were found in {results_dir}."
146
+ )
147
+ raise ValueError("No Trivy json results found on disk")
148
+
149
+ logger.info(f"Processing {len(json_files)} local Trivy result files")
150
+
151
+ for file_path in json_files:
152
+ # First, check if the image exists in the graph before syncing
153
+ try:
154
+ # Peek at the artifact name without processing the file
155
+ with open(file_path, encoding="utf-8") as f:
156
+ trivy_data = json.load(f)
157
+ artifact_name = trivy_data.get("ArtifactName")
158
+
159
+ if artifact_name and artifact_name not in image_uris:
160
+ logger.debug(
161
+ f"Skipping results for {artifact_name} since the image is not present in the graph"
162
+ )
163
+ continue
164
+
165
+ except (json.JSONDecodeError, KeyError) as e:
166
+ logger.error(f"Failed to read artifact name from {file_path}: {e}")
167
+ continue
168
+
169
+ # Now sync the file since we know the image exists in the graph
170
+ sync_single_image_from_file(
171
+ neo4j_session,
172
+ file_path,
173
+ update_tag,
174
+ )
175
+
176
+ cleanup(neo4j_session, common_job_parameters)
177
+
178
+
127
179
  @timeit
128
180
  def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
129
- """
130
- Start Trivy scan ingestion from S3.
181
+ """Start Trivy scan ingestion from S3 or local files.
131
182
 
132
183
  Args:
133
184
  neo4j_session: Neo4j session for database operations
134
- config: Configuration object containing S3 settings
185
+ config: Configuration object containing S3 or directory paths
135
186
  """
136
- # Check if S3 configuration is provided
137
- if not config.trivy_s3_bucket:
138
- logger.info("Trivy S3 configuration not provided. Skipping Trivy ingestion.")
187
+ if not config.trivy_s3_bucket and not config.trivy_results_dir:
188
+ logger.info("Trivy configuration not provided. Skipping Trivy ingestion.")
189
+ return
190
+
191
+ if config.trivy_results_dir:
192
+ common_job_parameters = {
193
+ "UPDATE_TAG": config.update_tag,
194
+ }
195
+ sync_trivy_aws_ecr_from_dir(
196
+ neo4j_session,
197
+ config.trivy_results_dir,
198
+ config.update_tag,
199
+ common_job_parameters,
200
+ )
139
201
  return
140
202
 
141
- # Default to empty string if s3 prefix is not provided
142
203
  if config.trivy_s3_prefix is None:
143
204
  config.trivy_s3_prefix = ""
144
205
 
@@ -146,7 +207,6 @@ def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
146
207
  "UPDATE_TAG": config.update_tag,
147
208
  }
148
209
 
149
- # Get ECR images to scan
150
210
  boto3_session = boto3.Session()
151
211
 
152
212
  sync_trivy_aws_ecr_from_s3(