cartography 0.109.0rc1__py3-none-any.whl → 0.110.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (58) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +22 -0
  3. cartography/config.py +13 -0
  4. cartography/data/indexes.cypher +0 -15
  5. cartography/intel/aws/cloudtrail_management_events.py +21 -0
  6. cartography/intel/aws/eventbridge.py +91 -0
  7. cartography/intel/aws/glue.py +117 -0
  8. cartography/intel/aws/identitycenter.py +71 -23
  9. cartography/intel/aws/kms.py +160 -200
  10. cartography/intel/aws/lambda_function.py +206 -190
  11. cartography/intel/aws/rds.py +243 -458
  12. cartography/intel/aws/resources.py +4 -0
  13. cartography/intel/aws/route53.py +334 -332
  14. cartography/intel/entra/__init__.py +43 -41
  15. cartography/intel/entra/applications.py +1 -2
  16. cartography/intel/entra/ou.py +1 -1
  17. cartography/intel/entra/resources.py +20 -0
  18. cartography/intel/trivy/__init__.py +73 -13
  19. cartography/intel/trivy/scanner.py +115 -92
  20. cartography/models/aws/eventbridge/__init__.py +0 -0
  21. cartography/models/aws/eventbridge/rule.py +77 -0
  22. cartography/models/aws/glue/__init__.py +0 -0
  23. cartography/models/aws/glue/connection.py +51 -0
  24. cartography/models/aws/identitycenter/awspermissionset.py +44 -0
  25. cartography/models/aws/kms/__init__.py +0 -0
  26. cartography/models/aws/kms/aliases.py +86 -0
  27. cartography/models/aws/kms/grants.py +65 -0
  28. cartography/models/aws/kms/keys.py +88 -0
  29. cartography/models/aws/lambda_function/__init__.py +0 -0
  30. cartography/models/aws/lambda_function/alias.py +74 -0
  31. cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
  32. cartography/models/aws/lambda_function/lambda_function.py +89 -0
  33. cartography/models/aws/lambda_function/layer.py +72 -0
  34. cartography/models/aws/rds/__init__.py +0 -0
  35. cartography/models/aws/rds/cluster.py +89 -0
  36. cartography/models/aws/rds/instance.py +154 -0
  37. cartography/models/aws/rds/snapshot.py +108 -0
  38. cartography/models/aws/rds/subnet_group.py +101 -0
  39. cartography/models/aws/route53/__init__.py +0 -0
  40. cartography/models/aws/route53/dnsrecord.py +214 -0
  41. cartography/models/aws/route53/nameserver.py +63 -0
  42. cartography/models/aws/route53/subzone.py +40 -0
  43. cartography/models/aws/route53/zone.py +47 -0
  44. cartography/models/snipeit/asset.py +1 -0
  45. cartography/util.py +8 -1
  46. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/METADATA +2 -2
  47. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/RECORD +51 -32
  48. cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
  49. cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
  50. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
  51. cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
  52. cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
  53. cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
  54. cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
  55. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/WHEEL +0 -0
  56. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/entry_points.txt +0 -0
  57. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/licenses/LICENSE +0 -0
  58. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,14 @@
1
1
  import asyncio
2
+ import datetime
2
3
  import logging
4
+ from traceback import TracebackException
5
+ from typing import Awaitable
6
+ from typing import Callable
3
7
 
4
8
  import neo4j
5
9
 
6
10
  from cartography.config import Config
7
- from cartography.intel.entra.applications import sync_entra_applications
8
- from cartography.intel.entra.groups import sync_entra_groups
9
- from cartography.intel.entra.ou import sync_entra_ous
10
- from cartography.intel.entra.users import sync_entra_users
11
+ from cartography.intel.entra.resources import RESOURCE_FUNCTIONS
11
12
  from cartography.util import timeit
12
13
 
13
14
  logger = logging.getLogger(__name__)
@@ -39,45 +40,46 @@ def start_entra_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
39
40
  }
40
41
 
41
42
  async def main() -> None:
42
- # Run user sync
43
- await sync_entra_users(
44
- neo4j_session,
45
- config.entra_tenant_id,
46
- config.entra_client_id,
47
- config.entra_client_secret,
48
- config.update_tag,
49
- common_job_parameters,
50
- )
43
+ failed_stages = []
44
+ exception_tracebacks = []
51
45
 
52
- # Run group sync
53
- await sync_entra_groups(
54
- neo4j_session,
55
- config.entra_tenant_id,
56
- config.entra_client_id,
57
- config.entra_client_secret,
58
- config.update_tag,
59
- common_job_parameters,
60
- )
46
+ async def run_stage(name: str, func: Callable[..., Awaitable[None]]) -> None:
47
+ try:
48
+ await func(
49
+ neo4j_session,
50
+ config.entra_tenant_id,
51
+ config.entra_client_id,
52
+ config.entra_client_secret,
53
+ config.update_tag,
54
+ common_job_parameters,
55
+ )
56
+ except Exception as e:
57
+ if config.entra_best_effort_mode:
58
+ timestamp = datetime.datetime.now()
59
+ failed_stages.append(name)
60
+ exception_traceback = TracebackException.from_exception(e)
61
+ traceback_string = "".join(exception_traceback.format())
62
+ exception_tracebacks.append(
63
+ f"{timestamp} - Exception for stage {name}\n{traceback_string}"
64
+ )
65
+ logger.warning(
66
+ f"Caught exception syncing {name}. entra-best-effort-mode is on so we are continuing "
67
+ "on to the next Entra sync. All exceptions will be aggregated and re-logged at the end of the sync.",
68
+ exc_info=True,
69
+ )
70
+ else:
71
+ logger.error("Error during Entra sync", exc_info=True)
72
+ raise
61
73
 
62
- # Run OU sync
63
- await sync_entra_ous(
64
- neo4j_session,
65
- config.entra_tenant_id,
66
- config.entra_client_id,
67
- config.entra_client_secret,
68
- config.update_tag,
69
- common_job_parameters,
70
- )
74
+ for name, func in RESOURCE_FUNCTIONS:
75
+ await run_stage(name, func)
71
76
 
72
- # Run application sync
73
- await sync_entra_applications(
74
- neo4j_session,
75
- config.entra_tenant_id,
76
- config.entra_client_id,
77
- config.entra_client_secret,
78
- config.update_tag,
79
- common_job_parameters,
80
- )
77
+ if failed_stages:
78
+ logger.error(
79
+ f"Entra sync failed for the following stages: {', '.join(failed_stages)}. "
80
+ "See the logs for more details.",
81
+ )
82
+ raise Exception("\n".join(exception_tracebacks))
81
83
 
82
- # Execute both syncs in sequence
84
+ # Execute all syncs in sequence
83
85
  asyncio.run(main())
@@ -172,12 +172,11 @@ async def get_app_role_assignments(
172
172
  )
173
173
  continue
174
174
  except Exception as e:
175
- # Only catch truly unexpected errors - these should be rare
176
175
  logger.error(
177
176
  f"Unexpected error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}",
178
177
  exc_info=True,
179
178
  )
180
- continue
179
+ raise
181
180
 
182
181
  logger.info(f"Retrieved {len(assignments)} app role assignments total")
183
182
  return assignments
@@ -43,7 +43,7 @@ async def get_entra_ous(client: GraphServiceClient) -> list[AdministrativeUnit]:
43
43
  current_request = None
44
44
  except Exception as e:
45
45
  logger.error(f"Failed to retrieve administrative units: {str(e)}")
46
- current_request = None
46
+ raise
47
47
 
48
48
  return all_units
49
49
 
@@ -0,0 +1,20 @@
1
+ from cartography.intel.entra.applications import sync_entra_applications
2
+ from cartography.intel.entra.groups import sync_entra_groups
3
+ from cartography.intel.entra.ou import sync_entra_ous
4
+ from cartography.intel.entra.users import sync_entra_users
5
+
6
+ # This is a list so that we sync these resources in order.
7
+ # Data shape: [("resource_name", sync_function), ...]
8
+ # Each sync function will be called with the following arguments:
9
+ # - neo4j_session
10
+ # - config.entra_tenant_id
11
+ # - config.entra_client_id
12
+ # - config.entra_client_secret
13
+ # - config.update_tag
14
+ # - common_job_parameters
15
+ RESOURCE_FUNCTIONS = [
16
+ ("users", sync_entra_users),
17
+ ("groups", sync_entra_groups),
18
+ ("ous", sync_entra_ous),
19
+ ("applications", sync_entra_applications),
20
+ ]
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import logging
2
3
  from typing import Any
3
4
 
@@ -8,7 +9,9 @@ from cartography.client.aws import list_accounts
8
9
  from cartography.client.aws.ecr import get_ecr_images
9
10
  from cartography.config import Config
10
11
  from cartography.intel.trivy.scanner import cleanup
12
+ from cartography.intel.trivy.scanner import get_json_files_in_dir
11
13
  from cartography.intel.trivy.scanner import get_json_files_in_s3
14
+ from cartography.intel.trivy.scanner import sync_single_image_from_file
12
15
  from cartography.intel.trivy.scanner import sync_single_image_from_s3
13
16
  from cartography.stats import get_stats_client
14
17
  from cartography.util import timeit
@@ -39,13 +42,13 @@ def get_scan_targets(
39
42
 
40
43
 
41
44
  def _get_intersection(
42
- images_in_graph: set[str], json_files: set[str], trivy_s3_prefix: str
45
+ image_uris: set[str], json_files: set[str], trivy_s3_prefix: str
43
46
  ) -> list[tuple[str, str]]:
44
47
  """
45
48
  Get the intersection of ECR images in the graph and S3 scan results.
46
49
 
47
50
  Args:
48
- images_in_graph: Set of ECR images in the graph
51
+ image_uris: Set of ECR images in the graph
49
52
  json_files: Set of S3 object keys for JSON files
50
53
  trivy_s3_prefix: S3 prefix path containing scan results
51
54
 
@@ -60,7 +63,7 @@ def _get_intersection(
60
63
  # Remove the prefix and the .json suffix
61
64
  image_uri = s3_object_key[prefix_len:-5]
62
65
 
63
- if image_uri in images_in_graph:
66
+ if image_uri in image_uris:
64
67
  intersection.append((image_uri, s3_object_key))
65
68
 
66
69
  return intersection
@@ -90,12 +93,12 @@ def sync_trivy_aws_ecr_from_s3(
90
93
  f"Using Trivy scan results from s3://{trivy_s3_bucket}/{trivy_s3_prefix}"
91
94
  )
92
95
 
93
- images_in_graph: set[str] = get_scan_targets(neo4j_session)
96
+ image_uris: set[str] = get_scan_targets(neo4j_session)
94
97
  json_files: set[str] = get_json_files_in_s3(
95
98
  trivy_s3_bucket, trivy_s3_prefix, boto3_session
96
99
  )
97
100
  intersection: list[tuple[str, str]] = _get_intersection(
98
- images_in_graph, json_files, trivy_s3_prefix
101
+ image_uris, json_files, trivy_s3_prefix
99
102
  )
100
103
 
101
104
  if len(intersection) == 0:
@@ -124,21 +127,79 @@ def sync_trivy_aws_ecr_from_s3(
124
127
  cleanup(neo4j_session, common_job_parameters)
125
128
 
126
129
 
130
+ @timeit
131
+ def sync_trivy_aws_ecr_from_dir(
132
+ neo4j_session: Session,
133
+ results_dir: str,
134
+ update_tag: int,
135
+ common_job_parameters: dict[str, Any],
136
+ ) -> None:
137
+ """Sync Trivy scan results from local files for AWS ECR images."""
138
+ logger.info(f"Using Trivy scan results from {results_dir}")
139
+
140
+ image_uris: set[str] = get_scan_targets(neo4j_session)
141
+ json_files: set[str] = get_json_files_in_dir(results_dir)
142
+
143
+ if not json_files:
144
+ logger.error(
145
+ f"Trivy sync was configured, but no json files were found in {results_dir}."
146
+ )
147
+ raise ValueError("No Trivy json results found on disk")
148
+
149
+ logger.info(f"Processing {len(json_files)} local Trivy result files")
150
+
151
+ for file_path in json_files:
152
+ # First, check if the image exists in the graph before syncing
153
+ try:
154
+ # Peek at the artifact name without processing the file
155
+ with open(file_path, encoding="utf-8") as f:
156
+ trivy_data = json.load(f)
157
+ artifact_name = trivy_data.get("ArtifactName")
158
+
159
+ if artifact_name and artifact_name not in image_uris:
160
+ logger.debug(
161
+ f"Skipping results for {artifact_name} since the image is not present in the graph"
162
+ )
163
+ continue
164
+
165
+ except (json.JSONDecodeError, KeyError) as e:
166
+ logger.error(f"Failed to read artifact name from {file_path}: {e}")
167
+ continue
168
+
169
+ # Now sync the file since we know the image exists in the graph
170
+ sync_single_image_from_file(
171
+ neo4j_session,
172
+ file_path,
173
+ update_tag,
174
+ )
175
+
176
+ cleanup(neo4j_session, common_job_parameters)
177
+
178
+
127
179
  @timeit
128
180
  def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
129
- """
130
- Start Trivy scan ingestion from S3.
181
+ """Start Trivy scan ingestion from S3 or local files.
131
182
 
132
183
  Args:
133
184
  neo4j_session: Neo4j session for database operations
134
- config: Configuration object containing S3 settings
185
+ config: Configuration object containing S3 or directory paths
135
186
  """
136
- # Check if S3 configuration is provided
137
- if not config.trivy_s3_bucket:
138
- logger.info("Trivy S3 configuration not provided. Skipping Trivy ingestion.")
187
+ if not config.trivy_s3_bucket and not config.trivy_results_dir:
188
+ logger.info("Trivy configuration not provided. Skipping Trivy ingestion.")
189
+ return
190
+
191
+ if config.trivy_results_dir:
192
+ common_job_parameters = {
193
+ "UPDATE_TAG": config.update_tag,
194
+ }
195
+ sync_trivy_aws_ecr_from_dir(
196
+ neo4j_session,
197
+ config.trivy_results_dir,
198
+ config.update_tag,
199
+ common_job_parameters,
200
+ )
139
201
  return
140
202
 
141
- # Default to empty string if s3 prefix is not provided
142
203
  if config.trivy_s3_prefix is None:
143
204
  config.trivy_s3_prefix = ""
144
205
 
@@ -146,7 +207,6 @@ def start_trivy_ingestion(neo4j_session: Session, config: Config) -> None:
146
207
  "UPDATE_TAG": config.update_tag,
147
208
  }
148
209
 
149
- # Get ECR images to scan
150
210
  boto3_session = boto3.Session()
151
211
 
152
212
  sync_trivy_aws_ecr_from_s3(
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import logging
3
+ import os
3
4
  from typing import Any
4
5
 
5
6
  import boto3
@@ -127,6 +128,90 @@ def transform_scan_results(
127
128
  return findings_list, packages_list, fixes_list
128
129
 
129
130
 
131
+ def _parse_trivy_data(
132
+ trivy_data: dict, source: str
133
+ ) -> tuple[str | None, list[dict], str]:
134
+ """
135
+ Parse Trivy scan data and extract common fields.
136
+
137
+ Args:
138
+ trivy_data: Raw JSON Trivy data
139
+ source: Source identifier for error messages (file path or S3 URI)
140
+
141
+ Returns:
142
+ Tuple of (artifact_name, results, image_digest)
143
+ """
144
+ # Extract artifact name if present (only for file-based)
145
+ artifact_name = trivy_data.get("ArtifactName")
146
+
147
+ if "Results" not in trivy_data:
148
+ logger.error(
149
+ f"Scan data did not contain a `Results` key for {source}. This indicates a malformed scan result."
150
+ )
151
+ raise ValueError(f"Missing 'Results' key in scan data for {source}")
152
+
153
+ results = trivy_data["Results"]
154
+ if not results:
155
+ stat_handler.incr("image_scan_no_results_count")
156
+ logger.info(f"No vulnerabilities found for {source}")
157
+
158
+ if "Metadata" not in trivy_data or not trivy_data["Metadata"]:
159
+ raise ValueError(f"Missing 'Metadata' in scan data for {source}")
160
+
161
+ repo_digests = trivy_data["Metadata"].get("RepoDigests", [])
162
+ if not repo_digests:
163
+ raise ValueError(f"Missing 'RepoDigests' in scan metadata for {source}")
164
+
165
+ repo_digest = repo_digests[0]
166
+ if "@" not in repo_digest:
167
+ raise ValueError(f"Invalid repo digest format in {source}: {repo_digest}")
168
+
169
+ image_digest = repo_digest.split("@")[1]
170
+ if not image_digest:
171
+ raise ValueError(f"Empty image digest for {source}")
172
+
173
+ return artifact_name, results, image_digest
174
+
175
+
176
+ @timeit
177
+ def sync_single_image(
178
+ neo4j_session: Session,
179
+ trivy_data: dict,
180
+ source: str,
181
+ update_tag: int,
182
+ ) -> None:
183
+ """
184
+ Sync a single image's Trivy scan results to Neo4j.
185
+
186
+ Args:
187
+ neo4j_session: Neo4j session for database operations
188
+ trivy_data: Raw Trivy JSON data
189
+ source: Source identifier for logging (file path or image URI)
190
+ update_tag: Update tag for tracking
191
+ """
192
+ try:
193
+ _, results, image_digest = _parse_trivy_data(trivy_data, source)
194
+
195
+ # Transform all data in one pass
196
+ findings_list, packages_list, fixes_list = transform_scan_results(
197
+ results,
198
+ image_digest,
199
+ )
200
+
201
+ num_findings = len(findings_list)
202
+ stat_handler.incr("image_scan_cve_count", num_findings)
203
+
204
+ # Load the transformed data
205
+ load_scan_vulns(neo4j_session, findings_list, update_tag=update_tag)
206
+ load_scan_packages(neo4j_session, packages_list, update_tag=update_tag)
207
+ load_scan_fixes(neo4j_session, fixes_list, update_tag=update_tag)
208
+ stat_handler.incr("images_processed_count")
209
+
210
+ except Exception as e:
211
+ logger.error(f"Failed to process scan results for {source}: {e}")
212
+ raise
213
+
214
+
130
215
  @timeit
131
216
  def get_json_files_in_s3(
132
217
  s3_bucket: str, s3_prefix: str, boto3_session: boto3.Session
@@ -177,6 +262,18 @@ def get_json_files_in_s3(
177
262
  return results
178
263
 
179
264
 
265
+ @timeit
266
+ def get_json_files_in_dir(results_dir: str) -> set[str]:
267
+ """Return set of JSON file paths under a directory."""
268
+ results = set()
269
+ for root, _dirs, files in os.walk(results_dir):
270
+ for filename in files:
271
+ if filename.endswith(".json"):
272
+ results.add(os.path.join(root, filename))
273
+ logger.info(f"Found {len(results)} json files in {results_dir}")
274
+ return results
275
+
276
+
180
277
  @timeit
181
278
  def cleanup(neo4j_session: Session, common_job_parameters: dict[str, Any]) -> None:
182
279
  """
@@ -245,58 +342,6 @@ def load_scan_fixes(
245
342
  )
246
343
 
247
344
 
248
- @timeit
249
- def read_scan_results_from_s3(
250
- boto3_session: boto3.Session,
251
- s3_bucket: str,
252
- s3_object_key: str,
253
- image_uri: str,
254
- ) -> tuple[list[dict], str | None]:
255
- """
256
- Read and parse Trivy scan results from S3.
257
-
258
- Args:
259
- boto3_session: boto3 session for S3 operations
260
- s3_bucket: S3 bucket containing scan results
261
- s3_object_key: S3 object key for the scan results
262
- image_uri: ECR image URI (for logging purposes)
263
-
264
- Returns:
265
- Tuple of (list of scan result dictionaries from the "Results" key, image digest)
266
- """
267
- s3_client = boto3_session.client("s3")
268
-
269
- # Read JSON scan results from S3
270
- logger.debug(f"Reading scan results from S3: s3://{s3_bucket}/{s3_object_key}")
271
- response = s3_client.get_object(Bucket=s3_bucket, Key=s3_object_key)
272
- scan_data_json = response["Body"].read().decode("utf-8")
273
-
274
- # Parse JSON data
275
- trivy_data = json.loads(scan_data_json)
276
-
277
- # Extract results using the same logic as binary scanning
278
- if "Results" in trivy_data and trivy_data["Results"]:
279
- results = trivy_data["Results"]
280
- else:
281
- stat_handler.incr("image_scan_no_results_count")
282
- logger.warning(
283
- f"S3 scan data did not contain a `Results` key for URI = {image_uri}; continuing."
284
- )
285
- results = []
286
-
287
- image_digest = None
288
- if "Metadata" in trivy_data and trivy_data["Metadata"]:
289
- repo_digests = trivy_data["Metadata"].get("RepoDigests", [])
290
- if repo_digests:
291
- # Sample input: 000000000000.dkr.ecr.us-east-1.amazonaws.com/test-repository@sha256:88016
292
- # Sample output: sha256:88016
293
- repo_digest = repo_digests[0]
294
- if "@" in repo_digest:
295
- image_digest = repo_digest.split("@")[1]
296
-
297
- return results, image_digest
298
-
299
-
300
345
  @timeit
301
346
  def sync_single_image_from_s3(
302
347
  neo4j_session: Session,
@@ -317,47 +362,25 @@ def sync_single_image_from_s3(
317
362
  s3_object_key: S3 object key for this image's scan results
318
363
  boto3_session: boto3 session for S3 operations
319
364
  """
320
- try:
321
- # Read and parse scan results from S3
322
- results, image_digest = read_scan_results_from_s3(
323
- boto3_session,
324
- s3_bucket,
325
- s3_object_key,
326
- image_uri,
327
- )
328
- if not image_digest:
329
- logger.warning(f"No image digest found for {image_uri}; skipping over.")
330
- return
365
+ s3_client = boto3_session.client("s3")
331
366
 
332
- # Transform all data in one pass using existing function
333
- findings_list, packages_list, fixes_list = transform_scan_results(
334
- results,
335
- image_digest,
336
- )
367
+ logger.debug(f"Reading scan results from S3: s3://{s3_bucket}/{s3_object_key}")
368
+ response = s3_client.get_object(Bucket=s3_bucket, Key=s3_object_key)
369
+ scan_data_json = response["Body"].read().decode("utf-8")
337
370
 
338
- num_findings = len(findings_list)
339
- stat_handler.incr("image_scan_cve_count", num_findings)
371
+ trivy_data = json.loads(scan_data_json)
372
+ sync_single_image(neo4j_session, trivy_data, image_uri, update_tag)
340
373
 
341
- # Load the transformed data using existing functions
342
- load_scan_vulns(
343
- neo4j_session,
344
- findings_list,
345
- update_tag=update_tag,
346
- )
347
- load_scan_packages(
348
- neo4j_session,
349
- packages_list,
350
- update_tag=update_tag,
351
- )
352
- load_scan_fixes(
353
- neo4j_session,
354
- fixes_list,
355
- update_tag=update_tag,
356
- )
357
- stat_handler.incr("images_processed_count")
358
374
 
359
- except Exception as e:
360
- logger.error(
361
- f"Failed to process S3 scan results for {image_uri} from {s3_object_key}: {e}"
362
- )
363
- raise
375
+ @timeit
376
+ def sync_single_image_from_file(
377
+ neo4j_session: Session,
378
+ file_path: str,
379
+ update_tag: int,
380
+ ) -> None:
381
+ """Read a Trivy JSON file from disk and sync to Neo4j."""
382
+ logger.debug(f"Reading scan results from file: {file_path}")
383
+ with open(file_path, encoding="utf-8") as f:
384
+ trivy_data = json.load(f)
385
+
386
+ sync_single_image(neo4j_session, trivy_data, file_path, update_tag)
File without changes
@@ -0,0 +1,77 @@
1
+ from dataclasses import dataclass
2
+
3
+ from cartography.models.core.common import PropertyRef
4
+ from cartography.models.core.nodes import CartographyNodeProperties
5
+ from cartography.models.core.nodes import CartographyNodeSchema
6
+ from cartography.models.core.relationships import CartographyRelProperties
7
+ from cartography.models.core.relationships import CartographyRelSchema
8
+ from cartography.models.core.relationships import LinkDirection
9
+ from cartography.models.core.relationships import make_target_node_matcher
10
+ from cartography.models.core.relationships import OtherRelationships
11
+ from cartography.models.core.relationships import TargetNodeMatcher
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class EventBridgeRuleNodeProperties(CartographyNodeProperties):
16
+ id: PropertyRef = PropertyRef("Arn")
17
+ arn: PropertyRef = PropertyRef("Arn", extra_index=True)
18
+ name: PropertyRef = PropertyRef("Name")
19
+ region: PropertyRef = PropertyRef("Region", set_in_kwargs=True)
20
+ event_pattern: PropertyRef = PropertyRef("EventPattern")
21
+ state: PropertyRef = PropertyRef("State")
22
+ description: PropertyRef = PropertyRef("Description")
23
+ schedule_expression: PropertyRef = PropertyRef("ScheduleExpression")
24
+ role_arn: PropertyRef = PropertyRef("RoleArn")
25
+ managed_by: PropertyRef = PropertyRef("ManagedBy")
26
+ event_bus_name: PropertyRef = PropertyRef("EventBusName")
27
+ lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class EventBridgeRuleToAwsAccountRelProperties(CartographyRelProperties):
32
+ lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class EventBridgeRuleToAWSAccountRel(CartographyRelSchema):
37
+ target_node_label: str = "AWSAccount"
38
+ target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
39
+ {"id": PropertyRef("AWS_ID", set_in_kwargs=True)},
40
+ )
41
+ direction: LinkDirection = LinkDirection.INWARD
42
+ rel_label: str = "RESOURCE"
43
+ properties: EventBridgeRuleToAwsAccountRelProperties = (
44
+ EventBridgeRuleToAwsAccountRelProperties()
45
+ )
46
+
47
+
48
+ @dataclass(frozen=True)
49
+ class EventBridgeRuleToAWSRoleRelProperties(CartographyRelProperties):
50
+ lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class EventBridgeRuleToAWSRoleRel(CartographyRelSchema):
55
+ target_node_label: str = "AWSRole"
56
+ target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
57
+ {"arn": PropertyRef("RoleArn")},
58
+ )
59
+ direction: LinkDirection = LinkDirection.OUTWARD
60
+ rel_label: str = "ASSOCIATED_WITH"
61
+ properties: EventBridgeRuleToAWSRoleRelProperties = (
62
+ EventBridgeRuleToAWSRoleRelProperties()
63
+ )
64
+
65
+
66
+ @dataclass(frozen=True)
67
+ class EventBridgeRuleSchema(CartographyNodeSchema):
68
+ label: str = "EventBridgeRule"
69
+ properties: EventBridgeRuleNodeProperties = EventBridgeRuleNodeProperties()
70
+ sub_resource_relationship: EventBridgeRuleToAWSAccountRel = (
71
+ EventBridgeRuleToAWSAccountRel()
72
+ )
73
+ other_relationships: OtherRelationships = OtherRelationships(
74
+ [
75
+ EventBridgeRuleToAWSRoleRel(),
76
+ ]
77
+ )
File without changes
@@ -0,0 +1,51 @@
1
+ from dataclasses import dataclass
2
+
3
+ from cartography.models.core.common import PropertyRef
4
+ from cartography.models.core.nodes import CartographyNodeProperties
5
+ from cartography.models.core.nodes import CartographyNodeSchema
6
+ from cartography.models.core.relationships import CartographyRelProperties
7
+ from cartography.models.core.relationships import CartographyRelSchema
8
+ from cartography.models.core.relationships import LinkDirection
9
+ from cartography.models.core.relationships import make_target_node_matcher
10
+ from cartography.models.core.relationships import TargetNodeMatcher
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class GlueConnectionNodeProperties(CartographyNodeProperties):
15
+ id: PropertyRef = PropertyRef("Name")
16
+ arn: PropertyRef = PropertyRef("Name", extra_index=True)
17
+ region: PropertyRef = PropertyRef("Region", set_in_kwargs=True)
18
+ description: PropertyRef = PropertyRef("Description")
19
+ connection_type: PropertyRef = PropertyRef("ConnectionType")
20
+ status: PropertyRef = PropertyRef("Status")
21
+ status_reason: PropertyRef = PropertyRef("StatusReason")
22
+ authentication_type: PropertyRef = PropertyRef("AuthenticationType")
23
+ secret_arn: PropertyRef = PropertyRef("SecretArn")
24
+ lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class GlueConnectionToAwsAccountRelProperties(CartographyRelProperties):
29
+ lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class GlueConnectionToAWSAccountRel(CartographyRelSchema):
34
+ target_node_label: str = "AWSAccount"
35
+ target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
36
+ {"id": PropertyRef("AWS_ID", set_in_kwargs=True)},
37
+ )
38
+ direction: LinkDirection = LinkDirection.INWARD
39
+ rel_label: str = "RESOURCE"
40
+ properties: GlueConnectionToAwsAccountRelProperties = (
41
+ GlueConnectionToAwsAccountRelProperties()
42
+ )
43
+
44
+
45
+ @dataclass(frozen=True)
46
+ class GlueConnectionSchema(CartographyNodeSchema):
47
+ label: str = "GlueConnection"
48
+ properties: GlueConnectionNodeProperties = GlueConnectionNodeProperties()
49
+ sub_resource_relationship: GlueConnectionToAWSAccountRel = (
50
+ GlueConnectionToAWSAccountRel()
51
+ )