cartography 0.115.0__py3-none-any.whl → 0.116.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (28) hide show
  1. cartography/_version.py +2 -2
  2. cartography/client/core/tx.py +1 -1
  3. cartography/intel/aws/ecr_image_layers.py +664 -0
  4. cartography/intel/aws/resources.py +2 -0
  5. cartography/intel/azure/__init__.py +8 -0
  6. cartography/intel/azure/resource_groups.py +82 -0
  7. cartography/models/aws/ecr/image.py +21 -0
  8. cartography/models/aws/ecr/image_layer.py +107 -0
  9. cartography/models/azure/resource_groups.py +52 -0
  10. cartography/rules/README.md +1 -0
  11. cartography/rules/__init__.py +0 -0
  12. cartography/rules/cli.py +342 -0
  13. cartography/rules/data/__init__.py +0 -0
  14. cartography/rules/data/frameworks/__init__.py +12 -0
  15. cartography/rules/data/frameworks/mitre_attack/__init__.py +14 -0
  16. cartography/rules/data/frameworks/mitre_attack/requirements/__init__.py +0 -0
  17. cartography/rules/data/frameworks/mitre_attack/requirements/t1190_exploit_public_facing_application/__init__.py +135 -0
  18. cartography/rules/formatters.py +46 -0
  19. cartography/rules/runners.py +338 -0
  20. cartography/rules/spec/__init__.py +0 -0
  21. cartography/rules/spec/model.py +88 -0
  22. cartography/rules/spec/result.py +46 -0
  23. {cartography-0.115.0.dist-info → cartography-0.116.0.dist-info}/METADATA +18 -4
  24. {cartography-0.115.0.dist-info → cartography-0.116.0.dist-info}/RECORD +28 -11
  25. {cartography-0.115.0.dist-info → cartography-0.116.0.dist-info}/entry_points.txt +1 -0
  26. {cartography-0.115.0.dist-info → cartography-0.116.0.dist-info}/WHEEL +0 -0
  27. {cartography-0.115.0.dist-info → cartography-0.116.0.dist-info}/licenses/LICENSE +0 -0
  28. {cartography-0.115.0.dist-info → cartography-0.116.0.dist-info}/top_level.txt +0 -0
cartography/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.115.0'
32
- __version_tuple__ = version_tuple = (0, 115, 0)
31
+ __version__ = version = '0.116.0'
32
+ __version_tuple__ = version_tuple = (0, 116, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -42,7 +42,7 @@ def read_list_of_values_tx(
42
42
  Example usage:
43
43
  query = "MATCH (a:TestNode) RETURN a.name ORDER BY a.name"
44
44
 
45
- values = neo4j_session.read_transaction(read_list_of_values_tx, query)
45
+ values = neo4j_session.execute_read(read_list_of_values_tx, query)
46
46
 
47
47
  :param tx: A neo4j read transaction object
48
48
  :param query: A neo4j query string that returns a list of single values. For example,
@@ -0,0 +1,664 @@
1
+ """
2
+ ECR Image Layers module - fetches and syncs detailed container image layer information.
3
+
4
+ This is separate from the main ECR module to allow independent execution since layer
5
+ fetching can be significantly slower than basic ECR repository/image syncing.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import logging
11
+ from typing import Any
12
+ from typing import Optional
13
+
14
+ import aioboto3
15
+ import boto3
16
+ import httpx
17
+ import neo4j
18
+ from botocore.exceptions import ClientError
19
+ from types_aiobotocore_ecr import ECRClient
20
+
21
+ from cartography.client.core.tx import load
22
+ from cartography.graph.job import GraphJob
23
+ from cartography.models.aws.ecr.image import ECRImageSchema
24
+ from cartography.models.aws.ecr.image_layer import ECRImageLayerSchema
25
+ from cartography.util import timeit
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ EMPTY_LAYER_DIFF_ID = (
31
+ "sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef"
32
+ )
33
+
34
+ # ECR manifest media types
35
+ ECR_DOCKER_INDEX_MT = "application/vnd.docker.distribution.manifest.list.v2+json"
36
+ ECR_DOCKER_MANIFEST_MT = "application/vnd.docker.distribution.manifest.v2+json"
37
+ ECR_OCI_INDEX_MT = "application/vnd.oci.image.index.v1+json"
38
+ ECR_OCI_MANIFEST_MT = "application/vnd.oci.image.manifest.v1+json"
39
+
40
+ ALL_ACCEPTED = [
41
+ ECR_OCI_INDEX_MT,
42
+ ECR_DOCKER_INDEX_MT,
43
+ ECR_OCI_MANIFEST_MT,
44
+ ECR_DOCKER_MANIFEST_MT,
45
+ ]
46
+
47
+ INDEX_MEDIA_TYPES = {ECR_OCI_INDEX_MT, ECR_DOCKER_INDEX_MT}
48
+ INDEX_MEDIA_TYPES_LOWER = {mt.lower() for mt in INDEX_MEDIA_TYPES}
49
+
50
+ # Media types that should be skipped when processing manifests
51
+ SKIP_CONFIG_MEDIA_TYPE_FRAGMENTS = {"buildkit", "attestation", "in-toto"}
52
+
53
+
54
+ def extract_repo_uri_from_image_uri(image_uri: str) -> str:
55
+ """
56
+ Extract repository URI from image URI by removing tag or digest.
57
+
58
+ Examples:
59
+ "repo@sha256:digest" -> "repo"
60
+ "repo:tag" -> "repo"
61
+ "repo" -> "repo"
62
+ """
63
+ if "@sha256:" in image_uri:
64
+ return image_uri.split("@", 1)[0]
65
+ elif ":" in image_uri:
66
+ return image_uri.rsplit(":", 1)[0]
67
+ else:
68
+ return image_uri
69
+
70
+
71
+ def extract_platform_from_manifest(manifest_ref: dict) -> str:
72
+ """Extract platform string from manifest reference."""
73
+ platform_info = manifest_ref.get("platform", {})
74
+ return _format_platform(
75
+ platform_info.get("os"),
76
+ platform_info.get("architecture"),
77
+ platform_info.get("variant"),
78
+ )
79
+
80
+
81
+ def _format_platform(
82
+ os_name: Optional[str],
83
+ architecture: Optional[str],
84
+ variant: Optional[str] = None,
85
+ ) -> str:
86
+ components = [os_name or "unknown", architecture or "unknown"]
87
+ if variant:
88
+ components.append(variant)
89
+ return "/".join(components)
90
+
91
+
92
+ async def batch_get_manifest(
93
+ ecr_client: ECRClient, repo: str, image_ref: str, accepted_media_types: list[str]
94
+ ) -> tuple[dict, str]:
95
+ """Get image manifest using batch_get_image API."""
96
+ try:
97
+ resp = await ecr_client.batch_get_image(
98
+ repositoryName=repo,
99
+ imageIds=(
100
+ [{"imageDigest": image_ref}]
101
+ if image_ref.startswith("sha256:")
102
+ else [{"imageTag": image_ref}]
103
+ ),
104
+ acceptedMediaTypes=accepted_media_types,
105
+ )
106
+ except ClientError as error:
107
+ error_code = error.response.get("Error", {}).get("Code", "")
108
+ if error_code == "ImageNotFoundException":
109
+ logger.warning(
110
+ "Image %s:%s not found while fetching manifest", repo, image_ref
111
+ )
112
+ return {}, ""
113
+ # Fail loudly on throttling or unexpected AWS errors
114
+ logger.error(
115
+ "Failed to get manifest for %s:%s due to AWS error %s",
116
+ repo,
117
+ image_ref,
118
+ error_code,
119
+ )
120
+ raise
121
+ except Exception:
122
+ logger.exception(
123
+ "Unexpected error fetching manifest for %s:%s", repo, image_ref
124
+ )
125
+ raise
126
+
127
+ if not resp.get("images"):
128
+ logger.warning(f"No image found for {repo}:{image_ref}")
129
+ return {}, ""
130
+
131
+ manifest_json = json.loads(resp["images"][0]["imageManifest"])
132
+ media_type = resp["images"][0].get("imageManifestMediaType", "")
133
+ return manifest_json, media_type
134
+
135
+
136
+ async def get_blob_json_via_presigned(
137
+ ecr_client: ECRClient,
138
+ repo: str,
139
+ digest: str,
140
+ http_client: httpx.AsyncClient,
141
+ ) -> dict:
142
+ """Download and parse JSON blob using presigned URL."""
143
+ try:
144
+ url_response = await ecr_client.get_download_url_for_layer(
145
+ repositoryName=repo,
146
+ layerDigest=digest,
147
+ )
148
+ except ClientError as error:
149
+ logger.error(
150
+ "Failed to request download URL for layer %s in repo %s: %s",
151
+ digest,
152
+ repo,
153
+ error.response.get("Error", {}).get("Code", "unknown"),
154
+ )
155
+ raise
156
+
157
+ url = url_response["downloadUrl"]
158
+ try:
159
+ response = await http_client.get(url, timeout=30.0)
160
+ response.raise_for_status()
161
+ except httpx.HTTPError as error:
162
+ logger.error(
163
+ "HTTP error downloading blob %s for repo %s: %s",
164
+ digest,
165
+ repo,
166
+ error,
167
+ )
168
+ raise
169
+
170
+ return response.json()
171
+
172
+
173
+ async def _diff_ids_for_manifest(
174
+ ecr_client: ECRClient,
175
+ repo_name: str,
176
+ manifest_doc: dict[str, Any],
177
+ http_client: httpx.AsyncClient,
178
+ platform_hint: Optional[str],
179
+ ) -> dict[str, list[str]]:
180
+ config = manifest_doc.get("config", {})
181
+ config_media_type = config.get("mediaType", "").lower()
182
+
183
+ # Skip certain media types
184
+ if any(
185
+ skip_fragment in config_media_type
186
+ for skip_fragment in SKIP_CONFIG_MEDIA_TYPE_FRAGMENTS
187
+ ):
188
+ return {}
189
+
190
+ layers = manifest_doc.get("layers", [])
191
+ if layers and all(
192
+ "in-toto" in layer.get("mediaType", "").lower() for layer in layers
193
+ ):
194
+ return {}
195
+
196
+ cfg_digest = config.get("digest")
197
+ if not cfg_digest:
198
+ return {}
199
+
200
+ cfg_json = await get_blob_json_via_presigned(
201
+ ecr_client,
202
+ repo_name,
203
+ cfg_digest,
204
+ http_client,
205
+ )
206
+ if not cfg_json:
207
+ return {}
208
+
209
+ # Docker API uses inconsistent casing - check for known variations
210
+ rootfs = cfg_json.get("rootfs") or cfg_json.get("RootFS") or {}
211
+ diff_ids = rootfs.get("diff_ids") or rootfs.get("DiffIDs") or []
212
+ if not diff_ids:
213
+ return {}
214
+
215
+ if platform_hint:
216
+ platform = platform_hint
217
+ else:
218
+ # Docker API uses inconsistent casing for platform components
219
+ platform = _format_platform(
220
+ cfg_json.get("os") or cfg_json.get("OS"),
221
+ cfg_json.get("architecture") or cfg_json.get("Architecture"),
222
+ cfg_json.get("variant") or cfg_json.get("Variant"),
223
+ )
224
+
225
+ return {platform: diff_ids}
226
+
227
+
228
+ def transform_ecr_image_layers(
229
+ image_layers_data: dict[str, dict[str, list[str]]],
230
+ image_digest_map: dict[str, str],
231
+ ) -> tuple[list[dict], list[dict]]:
232
+ """
233
+ Transform image layer data into format suitable for Neo4j ingestion.
234
+ Creates linked list structure with NEXT relationships and HEAD/TAIL markers.
235
+
236
+ :param image_layers_data: Map of image URI to platform to diff_ids
237
+ :param image_digest_map: Map of image URI to image digest
238
+ :return: List of layer objects ready for ingestion
239
+ """
240
+ layers_by_diff_id: dict[str, dict[str, Any]] = {}
241
+ memberships_by_digest: dict[str, dict[str, Any]] = {}
242
+
243
+ for image_uri, platforms in image_layers_data.items():
244
+ # fetch_image_layers_async guarantees every uri in image_layers_data has a digest
245
+ image_digest = image_digest_map[image_uri]
246
+
247
+ ordered_layers_for_image: Optional[list[str]] = None
248
+
249
+ for _, diff_ids in platforms.items():
250
+ if not diff_ids:
251
+ continue
252
+
253
+ if ordered_layers_for_image is None:
254
+ ordered_layers_for_image = list(diff_ids)
255
+
256
+ # Process each layer in the chain
257
+ for i, diff_id in enumerate(diff_ids):
258
+ # Get or create layer
259
+ if diff_id not in layers_by_diff_id:
260
+ layers_by_diff_id[diff_id] = {
261
+ "diff_id": diff_id,
262
+ "is_empty": diff_id == EMPTY_LAYER_DIFF_ID,
263
+ "next_diff_ids": set(),
264
+ "head_image_ids": set(),
265
+ "tail_image_ids": set(),
266
+ }
267
+
268
+ layer = layers_by_diff_id[diff_id]
269
+
270
+ # Add NEXT relationship if not the last layer
271
+ if i < len(diff_ids) - 1:
272
+ layer["next_diff_ids"].add(diff_ids[i + 1])
273
+
274
+ # Track which images this layer is HEAD or TAIL of
275
+ if i == 0:
276
+ layer["head_image_ids"].add(image_digest)
277
+ if i == len(diff_ids) - 1:
278
+ layer["tail_image_ids"].add(image_digest)
279
+
280
+ if ordered_layers_for_image:
281
+ memberships_by_digest[image_digest] = {
282
+ "layer_diff_ids": ordered_layers_for_image,
283
+ }
284
+
285
+ # Convert sets back to lists for Neo4j ingestion
286
+ layers = []
287
+ for layer in layers_by_diff_id.values():
288
+ layer_dict: dict[str, Any] = {
289
+ "diff_id": layer["diff_id"],
290
+ "is_empty": layer["is_empty"],
291
+ }
292
+ if layer["next_diff_ids"]:
293
+ layer_dict["next_diff_ids"] = list(layer["next_diff_ids"])
294
+ if layer["head_image_ids"]:
295
+ layer_dict["head_image_ids"] = list(layer["head_image_ids"])
296
+ if layer["tail_image_ids"]:
297
+ layer_dict["tail_image_ids"] = list(layer["tail_image_ids"])
298
+ layers.append(layer_dict)
299
+
300
+ # Reconstruct memberships list with imageDigest field
301
+ memberships = [
302
+ {"imageDigest": digest, **membership_data}
303
+ for digest, membership_data in memberships_by_digest.items()
304
+ ]
305
+
306
+ return layers, memberships
307
+
308
+
309
+ @timeit
310
+ def load_ecr_image_layers(
311
+ neo4j_session: neo4j.Session,
312
+ image_layers: list[dict],
313
+ region: str,
314
+ current_aws_account_id: str,
315
+ aws_update_tag: int,
316
+ ) -> None:
317
+ """Load image layers into Neo4j."""
318
+ logger.info(
319
+ f"Loading {len(image_layers)} image layers for region {region} into graph.",
320
+ )
321
+
322
+ load(
323
+ neo4j_session,
324
+ ECRImageLayerSchema(),
325
+ image_layers,
326
+ lastupdated=aws_update_tag,
327
+ AWS_ID=current_aws_account_id,
328
+ )
329
+
330
+
331
+ @timeit
332
+ def load_ecr_image_layer_memberships(
333
+ neo4j_session: neo4j.Session,
334
+ memberships: list[dict[str, Any]],
335
+ region: str,
336
+ current_aws_account_id: str,
337
+ aws_update_tag: int,
338
+ ) -> None:
339
+ load(
340
+ neo4j_session,
341
+ ECRImageSchema(),
342
+ memberships,
343
+ lastupdated=aws_update_tag,
344
+ Region=region,
345
+ AWS_ID=current_aws_account_id,
346
+ )
347
+
348
+
349
+ async def fetch_image_layers_async(
350
+ ecr_client: ECRClient,
351
+ repo_images_list: list[dict],
352
+ max_concurrent: int = 200,
353
+ ) -> tuple[dict[str, dict[str, list[str]]], dict[str, str]]:
354
+ """
355
+ Fetch image layers for ECR images in parallel with caching and non-blocking I/O.
356
+ """
357
+ image_layers_data: dict[str, dict[str, list[str]]] = {}
358
+ image_digest_map: dict[str, str] = {}
359
+ semaphore = asyncio.Semaphore(max_concurrent)
360
+
361
+ # Cache for manifest fetches keyed by (repo_name, imageDigest)
362
+ manifest_cache: dict[tuple[str, str], tuple[dict, str]] = {}
363
+ # Lock for thread-safe cache access
364
+ cache_lock = asyncio.Lock()
365
+ # In-flight requests to coalesce duplicate fetches
366
+ inflight: dict[tuple[str, str], asyncio.Task] = {}
367
+
368
+ async def _fetch_and_cache_manifest(
369
+ repo_name: str, digest_or_tag: str, accepted: list[str]
370
+ ) -> tuple[dict, str]:
371
+ """
372
+ Fetch and cache manifest with double-checked locking and in-flight coalescing.
373
+ """
374
+ key = (repo_name, digest_or_tag)
375
+
376
+ # Fast path: check cache without lock
377
+ if key in manifest_cache:
378
+ return manifest_cache[key]
379
+
380
+ # Check for existing in-flight request
381
+ task = inflight.get(key)
382
+ if task is None:
383
+ # Create new task for this manifest
384
+ async def _do() -> tuple[dict, str]:
385
+ # Fetch without holding the lock
386
+ doc, mt = await batch_get_manifest(
387
+ ecr_client, repo_name, digest_or_tag, accepted
388
+ )
389
+ # Store result under lock (second check to avoid races)
390
+ async with cache_lock:
391
+ return manifest_cache.setdefault(key, (doc, mt))
392
+
393
+ task = asyncio.create_task(_do())
394
+ inflight[key] = task
395
+
396
+ try:
397
+ return await task
398
+ finally:
399
+ # Clean up inflight entry
400
+ inflight.pop(key, None)
401
+
402
+ async def fetch_single_image_layers(
403
+ repo_image: dict,
404
+ http_client: httpx.AsyncClient,
405
+ ) -> Optional[tuple[str, str, dict[str, list[str]]]]:
406
+ """Fetch layers for a single image."""
407
+ async with semaphore:
408
+ # Caller guarantees these fields exist in every repo_image
409
+ uri = repo_image["uri"]
410
+ digest = repo_image["imageDigest"]
411
+ repo_uri = repo_image["repo_uri"]
412
+
413
+ # Extract repository name
414
+ parts = repo_uri.split("/", 1)
415
+ if len(parts) != 2:
416
+ raise ValueError(f"Unexpected ECR repository URI format: {repo_uri}")
417
+ repo_name = parts[1]
418
+
419
+ # Get manifest using optimized caching
420
+ doc, media_type = await _fetch_and_cache_manifest(
421
+ repo_name, digest, ALL_ACCEPTED
422
+ )
423
+
424
+ if not doc:
425
+ return None
426
+
427
+ manifest_media_type = (media_type or doc.get("mediaType", "")).lower()
428
+ platform_layers: dict[str, list[str]] = {}
429
+
430
+ if doc.get("manifests") and manifest_media_type in INDEX_MEDIA_TYPES_LOWER:
431
+
432
+ async def _process_child_manifest(
433
+ manifest_ref: dict,
434
+ ) -> dict[str, list[str]]:
435
+ # Skip attestation manifests - these aren't real images
436
+ if (
437
+ manifest_ref.get("annotations", {}).get(
438
+ "vnd.docker.reference.type"
439
+ )
440
+ == "attestation-manifest"
441
+ ):
442
+ return {}
443
+
444
+ child_digest = manifest_ref.get("digest")
445
+ if not child_digest:
446
+ return {}
447
+
448
+ # Use optimized caching for child manifest
449
+ child_doc, _ = await _fetch_and_cache_manifest(
450
+ repo_name,
451
+ child_digest,
452
+ [ECR_OCI_MANIFEST_MT, ECR_DOCKER_MANIFEST_MT],
453
+ )
454
+ if not child_doc:
455
+ return {}
456
+
457
+ platform_hint = extract_platform_from_manifest(manifest_ref)
458
+ return await _diff_ids_for_manifest(
459
+ ecr_client,
460
+ repo_name,
461
+ child_doc,
462
+ http_client,
463
+ platform_hint,
464
+ )
465
+
466
+ # Process all child manifests in parallel
467
+ child_tasks = [
468
+ _process_child_manifest(manifest_ref)
469
+ for manifest_ref in doc.get("manifests", [])
470
+ ]
471
+ child_results = await asyncio.gather(
472
+ *child_tasks, return_exceptions=True
473
+ )
474
+
475
+ # Merge results from successful child manifest processing
476
+ for result in child_results:
477
+ if isinstance(result, dict):
478
+ platform_layers.update(result)
479
+ else:
480
+ diff_map = await _diff_ids_for_manifest(
481
+ ecr_client,
482
+ repo_name,
483
+ doc,
484
+ http_client,
485
+ None,
486
+ )
487
+ platform_layers.update(diff_map)
488
+
489
+ if platform_layers:
490
+ return uri, digest, platform_layers
491
+
492
+ return None
493
+
494
+ async with httpx.AsyncClient() as http_client:
495
+ # Create tasks for all images
496
+ tasks = [
497
+ asyncio.create_task(
498
+ fetch_single_image_layers(repo_image, http_client),
499
+ )
500
+ for repo_image in repo_images_list
501
+ ]
502
+
503
+ # Process with progress logging
504
+ total = len(tasks)
505
+ logger.info(
506
+ f"Fetching layers for {total} images with {max_concurrent} concurrent connections..."
507
+ )
508
+
509
+ if not tasks:
510
+ return image_layers_data, image_digest_map
511
+
512
+ progress_interval = max(1, min(100, total // 10 or 1))
513
+ completed = 0
514
+
515
+ for task in asyncio.as_completed(tasks):
516
+ result = await task
517
+ completed += 1
518
+
519
+ if completed % progress_interval == 0 or completed == total:
520
+ percent = (completed / total) * 100
521
+ logger.info(
522
+ "Fetched layer metadata for %d/%d images (%.1f%%)",
523
+ completed,
524
+ total,
525
+ percent,
526
+ )
527
+
528
+ if result:
529
+ uri, digest, layer_data = result
530
+ if not digest:
531
+ raise ValueError(f"Empty digest returned for image {uri}")
532
+ image_layers_data[uri] = layer_data
533
+ image_digest_map[uri] = digest
534
+
535
+ logger.info(
536
+ f"Successfully fetched layers for {len(image_layers_data)}/{len(repo_images_list)} images"
537
+ )
538
+ return image_layers_data, image_digest_map
539
+
540
+
541
+ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: dict) -> None:
542
+ logger.debug("Running image layer cleanup job.")
543
+ GraphJob.from_node_schema(ECRImageLayerSchema(), common_job_parameters).run(
544
+ neo4j_session
545
+ )
546
+
547
+
548
+ @timeit
549
+ def sync(
550
+ neo4j_session: neo4j.Session,
551
+ boto3_session: boto3.session.Session,
552
+ regions: list[str],
553
+ current_aws_account_id: str,
554
+ update_tag: int,
555
+ common_job_parameters: dict,
556
+ ) -> None:
557
+ """
558
+ Sync ECR image layers. This fetches detailed layer information for ECR images
559
+ that already exist in the graph.
560
+
561
+ Prerequisites: Basic ECR data (repositories and images) must already be loaded
562
+ via the 'ecr' module before running this.
563
+
564
+ Layer fetching can be slow for accounts with many container images.
565
+ """
566
+
567
+ for region in regions:
568
+ logger.info(
569
+ "Syncing ECR image layers for region '%s' in account '%s'.",
570
+ region,
571
+ current_aws_account_id,
572
+ )
573
+
574
+ # Get ECR images from graph using standard client function
575
+ from cartography.client.aws.ecr import get_ecr_images
576
+
577
+ ecr_images = get_ecr_images(neo4j_session, current_aws_account_id)
578
+
579
+ # Filter by region and deduplicate by digest
580
+ repo_images_list = []
581
+ seen_digests = set()
582
+
583
+ for region_name, _, uri, _, digest in ecr_images:
584
+ if region_name == region and digest not in seen_digests:
585
+ seen_digests.add(digest)
586
+ repo_uri = extract_repo_uri_from_image_uri(uri)
587
+
588
+ # Create digest-based URI for manifest fetching
589
+ digest_uri = f"{repo_uri}@{digest}"
590
+
591
+ repo_images_list.append(
592
+ {
593
+ "imageDigest": digest,
594
+ "uri": digest_uri,
595
+ "repo_uri": repo_uri,
596
+ }
597
+ )
598
+
599
+ logger.info(
600
+ f"Found {len(repo_images_list)} distinct ECR image digests in graph for region {region}"
601
+ )
602
+
603
+ if not repo_images_list:
604
+ logger.warning(
605
+ f"No ECR images found in graph for region {region}. "
606
+ f"Run 'ecr' sync first to populate basic ECR data."
607
+ )
608
+ continue
609
+
610
+ # Fetch and load image layers using async ECR client
611
+ if repo_images_list:
612
+ logger.info(
613
+ f"Starting to fetch layers for {len(repo_images_list)} images..."
614
+ )
615
+
616
+ async def _fetch_with_async_client() -> (
617
+ tuple[dict[str, dict[str, list[str]]], dict[str, str]]
618
+ ):
619
+ # Use credentials from the existing boto3 session
620
+ credentials = boto3_session.get_credentials()
621
+ session = aioboto3.Session(
622
+ aws_access_key_id=credentials.access_key,
623
+ aws_secret_access_key=credentials.secret_key,
624
+ aws_session_token=credentials.token,
625
+ region_name=region,
626
+ )
627
+ async with session.client("ecr") as ecr_client:
628
+ return await fetch_image_layers_async(ecr_client, repo_images_list)
629
+
630
+ # Use get_event_loop() + run_until_complete() to avoid tearing down loop
631
+ try:
632
+ loop = asyncio.get_event_loop()
633
+ except RuntimeError:
634
+ # No event loop in current thread, create one
635
+ loop = asyncio.new_event_loop()
636
+ asyncio.set_event_loop(loop)
637
+
638
+ image_layers_data, image_digest_map = loop.run_until_complete(
639
+ _fetch_with_async_client()
640
+ )
641
+
642
+ logger.info(
643
+ f"Successfully fetched layers for {len(image_layers_data)} images"
644
+ )
645
+ layers, memberships = transform_ecr_image_layers(
646
+ image_layers_data,
647
+ image_digest_map,
648
+ )
649
+ load_ecr_image_layers(
650
+ neo4j_session,
651
+ layers,
652
+ region,
653
+ current_aws_account_id,
654
+ update_tag,
655
+ )
656
+ load_ecr_image_layer_memberships(
657
+ neo4j_session,
658
+ memberships,
659
+ region,
660
+ current_aws_account_id,
661
+ update_tag,
662
+ )
663
+
664
+ cleanup(neo4j_session, common_job_parameters)
@@ -14,6 +14,7 @@ from . import cognito
14
14
  from . import config
15
15
  from . import dynamodb
16
16
  from . import ecr
17
+ from . import ecr_image_layers
17
18
  from . import ecs
18
19
  from . import efs
19
20
  from . import eks
@@ -88,6 +89,7 @@ RESOURCE_FUNCTIONS: Dict[str, Callable[..., None]] = {
88
89
  "ec2:volumes": sync_ebs_volumes,
89
90
  "ec2:snapshots": sync_ebs_snapshots,
90
91
  "ecr": ecr.sync,
92
+ "ecr:image_layers": ecr_image_layers.sync,
91
93
  "ecs": ecs.sync,
92
94
  "eks": eks.sync,
93
95
  "elasticache": elasticache.sync,