cartography 0.116.1__py3-none-any.whl → 0.118.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (70) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +11 -0
  3. cartography/client/core/tx.py +23 -2
  4. cartography/config.py +5 -0
  5. cartography/graph/job.py +6 -2
  6. cartography/graph/statement.py +4 -0
  7. cartography/intel/aws/__init__.py +1 -0
  8. cartography/intel/aws/apigateway.py +18 -5
  9. cartography/intel/aws/ec2/elastic_ip_addresses.py +3 -1
  10. cartography/intel/aws/ec2/internet_gateways.py +4 -2
  11. cartography/intel/aws/ec2/load_balancer_v2s.py +11 -5
  12. cartography/intel/aws/ec2/network_interfaces.py +4 -0
  13. cartography/intel/aws/ec2/reserved_instances.py +3 -1
  14. cartography/intel/aws/ec2/tgw.py +11 -5
  15. cartography/intel/aws/ec2/volumes.py +1 -1
  16. cartography/intel/aws/ecr.py +202 -26
  17. cartography/intel/aws/ecr_image_layers.py +174 -21
  18. cartography/intel/aws/elasticsearch.py +13 -4
  19. cartography/intel/aws/identitycenter.py +93 -54
  20. cartography/intel/aws/inspector.py +26 -14
  21. cartography/intel/aws/permission_relationships.py +3 -3
  22. cartography/intel/aws/s3.py +26 -13
  23. cartography/intel/aws/ssm.py +3 -5
  24. cartography/intel/azure/__init__.py +16 -0
  25. cartography/intel/azure/compute.py +9 -4
  26. cartography/intel/azure/container_instances.py +95 -0
  27. cartography/intel/azure/cosmosdb.py +31 -15
  28. cartography/intel/azure/data_lake.py +124 -0
  29. cartography/intel/azure/sql.py +25 -12
  30. cartography/intel/azure/storage.py +19 -9
  31. cartography/intel/azure/subscription.py +3 -1
  32. cartography/intel/crowdstrike/spotlight.py +5 -2
  33. cartography/intel/entra/app_role_assignments.py +9 -2
  34. cartography/intel/gcp/__init__.py +26 -9
  35. cartography/intel/gcp/clients.py +8 -4
  36. cartography/intel/gcp/compute.py +39 -18
  37. cartography/intel/gcp/crm/folders.py +9 -3
  38. cartography/intel/gcp/crm/orgs.py +8 -3
  39. cartography/intel/gcp/crm/projects.py +14 -3
  40. cartography/intel/github/teams.py +3 -3
  41. cartography/intel/jamf/computers.py +7 -1
  42. cartography/intel/oci/iam.py +23 -9
  43. cartography/intel/oci/organizations.py +3 -1
  44. cartography/intel/oci/utils.py +28 -5
  45. cartography/intel/okta/awssaml.py +8 -7
  46. cartography/intel/pagerduty/escalation_policies.py +13 -6
  47. cartography/intel/pagerduty/schedules.py +9 -4
  48. cartography/intel/pagerduty/services.py +7 -3
  49. cartography/intel/pagerduty/teams.py +5 -2
  50. cartography/intel/pagerduty/users.py +3 -1
  51. cartography/intel/pagerduty/vendors.py +3 -1
  52. cartography/intel/trivy/__init__.py +109 -58
  53. cartography/models/aws/ec2/networkinterfaces.py +2 -0
  54. cartography/models/aws/ecr/image.py +38 -1
  55. cartography/models/aws/ecr/repository_image.py +1 -1
  56. cartography/models/azure/container_instance.py +55 -0
  57. cartography/models/azure/data_lake_filesystem.py +51 -0
  58. cartography/rules/cli.py +8 -6
  59. cartography/rules/data/frameworks/mitre_attack/__init__.py +7 -1
  60. cartography/rules/data/frameworks/mitre_attack/requirements/t1098_account_manipulation/__init__.py +317 -0
  61. cartography/rules/data/frameworks/mitre_attack/requirements/t1190_exploit_public_facing_application/__init__.py +1 -0
  62. cartography/rules/spec/model.py +13 -0
  63. cartography/sync.py +1 -1
  64. cartography/util.py +5 -1
  65. {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/METADATA +5 -4
  66. {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/RECORD +70 -65
  67. {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/WHEEL +0 -0
  68. {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/entry_points.txt +0 -0
  69. {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/licenses/LICENSE +0 -0
  70. {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import logging
2
3
  from typing import Any
3
4
  from typing import Dict
@@ -18,6 +19,12 @@ from cartography.util import to_synchronous
18
19
 
19
20
  logger = logging.getLogger(__name__)
20
21
 
22
+ # Manifest list media types
23
+ MANIFEST_LIST_MEDIA_TYPES = {
24
+ "application/vnd.docker.distribution.manifest.list.v2+json",
25
+ "application/vnd.oci.image.index.v1+json",
26
+ }
27
+
21
28
 
22
29
  @timeit
23
30
  @aws_handle_regions
@@ -34,6 +41,84 @@ def get_ecr_repositories(
34
41
  return ecr_repositories
35
42
 
36
43
 
44
+ def _get_platform_specific_digests(
45
+ client: Any, repository_name: str, manifest_list_digest: str
46
+ ) -> tuple[List[Dict[str, Any]], set[str]]:
47
+ """
48
+ Fetch manifest list and extract platform-specific image digests and attestations.
49
+
50
+ Returns:
51
+ - List of all images (platform-specific + attestations) with digest, type, architecture, os, variant
52
+ - Set of ALL digests referenced in the manifest list
53
+ """
54
+ response = client.batch_get_image(
55
+ repositoryName=repository_name,
56
+ imageIds=[{"imageDigest": manifest_list_digest}],
57
+ acceptedMediaTypes=list(MANIFEST_LIST_MEDIA_TYPES),
58
+ )
59
+
60
+ if not response.get("images"):
61
+ raise ValueError(
62
+ f"No manifest list found for digest {manifest_list_digest} in repository {repository_name}"
63
+ )
64
+
65
+ # batch_get_image returns a single manifest list (hence [0])
66
+ # The manifests[] array inside contains all platform-specific images and attestations
67
+ manifest_json = json.loads(response["images"][0]["imageManifest"])
68
+ manifests = manifest_json.get("manifests", [])
69
+
70
+ if not manifests:
71
+ raise ValueError(
72
+ f"Manifest list {manifest_list_digest} has no manifests in repository {repository_name}"
73
+ )
74
+
75
+ all_images = []
76
+ all_referenced_digests = set()
77
+
78
+ for manifest_ref in manifests:
79
+ digest = manifest_ref.get("digest")
80
+ if not digest:
81
+ raise ValueError(
82
+ f"Manifest in list {manifest_list_digest} has no digest in repository {repository_name}"
83
+ )
84
+
85
+ all_referenced_digests.add(digest)
86
+
87
+ platform_info = manifest_ref.get("platform", {})
88
+ architecture = platform_info.get("architecture")
89
+ os_name = platform_info.get("os")
90
+
91
+ # Determine if this is an attestation
92
+ annotations = manifest_ref.get("annotations", {})
93
+ is_attestation = (
94
+ architecture == "unknown" and os_name == "unknown"
95
+ ) or annotations.get("vnd.docker.reference.type") == "attestation-manifest"
96
+
97
+ all_images.append(
98
+ {
99
+ "digest": digest,
100
+ "type": "attestation" if is_attestation else "image",
101
+ "architecture": architecture,
102
+ "os": os_name,
103
+ "variant": platform_info.get("variant"),
104
+ "attestation_type": (
105
+ annotations.get("vnd.docker.reference.type")
106
+ if is_attestation
107
+ else None
108
+ ),
109
+ "attests_digest": (
110
+ annotations.get("vnd.docker.reference.digest")
111
+ if is_attestation
112
+ else None
113
+ ),
114
+ "media_type": manifest_ref.get("mediaType"),
115
+ "artifact_media_type": manifest_ref.get("artifactType"),
116
+ }
117
+ )
118
+
119
+ return all_images, all_referenced_digests
120
+
121
+
37
122
  @timeit
38
123
  @aws_handle_regions
39
124
  def get_ecr_repository_images(
@@ -46,7 +131,11 @@ def get_ecr_repository_images(
46
131
  )
47
132
  client = boto3_session.client("ecr", region_name=region)
48
133
  list_paginator = client.get_paginator("list_images")
49
- ecr_repository_images: List[Dict] = []
134
+
135
+ # First pass: Collect all image details and track manifest list referenced digests
136
+ all_image_details: List[Dict] = []
137
+ manifest_list_referenced_digests: set[str] = set()
138
+
50
139
  for page in list_paginator.paginate(repositoryName=repository_name):
51
140
  image_ids = page["imageIds"]
52
141
  if not image_ids:
@@ -58,14 +147,37 @@ def get_ecr_repository_images(
58
147
  for response in describe_response:
59
148
  image_details = response["imageDetails"]
60
149
  for detail in image_details:
61
- tags = detail.get("imageTags") or []
62
- if tags:
63
- for tag in tags:
64
- image_detail = {**detail, "imageTag": tag}
65
- image_detail.pop("imageTags", None)
66
- ecr_repository_images.append(image_detail)
67
- else:
68
- ecr_repository_images.append({**detail})
150
+ # Check if this is a manifest list
151
+ media_type = detail.get("imageManifestMediaType")
152
+ if media_type in MANIFEST_LIST_MEDIA_TYPES:
153
+ # Fetch all images from manifest list (platform-specific + attestations)
154
+ manifest_list_digest = detail["imageDigest"]
155
+ manifest_images, all_digests = _get_platform_specific_digests(
156
+ client, repository_name, manifest_list_digest
157
+ )
158
+ detail["_manifest_images"] = manifest_images
159
+
160
+ # Track ALL digests so we don't create ECRRepositoryImages for them
161
+ manifest_list_referenced_digests.update(all_digests)
162
+
163
+ all_image_details.append(detail)
164
+
165
+ # Second pass: Only add images that should have ECRRepositoryImage nodes
166
+ ecr_repository_images: List[Dict] = []
167
+ for detail in all_image_details:
168
+ tags = detail.get("imageTags") or []
169
+ digest = detail.get("imageDigest")
170
+
171
+ if tags:
172
+ # Tagged images always get ECRRepositoryImage nodes (one per tag)
173
+ for tag in tags:
174
+ image_detail = {**detail, "imageTag": tag}
175
+ image_detail.pop("imageTags", None)
176
+ ecr_repository_images.append(image_detail)
177
+ elif digest not in manifest_list_referenced_digests:
178
+ # Untagged images only get nodes if they're NOT part of a manifest list
179
+ ecr_repository_images.append({**detail})
180
+
69
181
  return ecr_repository_images
70
182
 
71
183
 
@@ -91,52 +203,115 @@ def load_ecr_repositories(
91
203
 
92
204
 
93
205
  @timeit
94
- def transform_ecr_repository_images(repo_data: Dict) -> List[Dict]:
206
+ def transform_ecr_repository_images(repo_data: Dict) -> tuple[List[Dict], List[Dict]]:
95
207
  """
96
- Ensure that we only load ECRImage nodes to the graph if they have a defined imageDigest field.
97
- Process repositories in a consistent order to handle overlapping image digests deterministically.
208
+ Transform ECR repository images into repo image list and ECR image list.
209
+ For manifest lists, creates ECR images for manifest list, platform-specific images, and attestations.
210
+
211
+ Returns:
212
+ - repo_images_list: List of ECRRepositoryImage nodes with imageDigests field (one-to-many)
213
+ - ecr_images_list: List of ECRImage nodes with type, architecture, os, variant fields
98
214
  """
99
215
  repo_images_list = []
216
+ ecr_images_dict: Dict[str, Dict] = {} # Deduplicate by digest
217
+
100
218
  # Sort repository URIs to ensure consistent processing order
101
219
  for repo_uri in sorted(repo_data.keys()):
102
220
  repo_images = repo_data[repo_uri]
103
221
  for img in repo_images:
104
222
  digest = img.get("imageDigest")
105
- if digest:
106
- tag = img.get("imageTag")
107
- uri = repo_uri + (f":{tag}" if tag else "")
108
- img["repo_uri"] = repo_uri
109
- img["uri"] = uri
110
- img["id"] = uri
111
- repo_images_list.append(img)
112
- else:
223
+ if not digest:
113
224
  logger.warning(
114
225
  "Repo %s has an image that has no imageDigest. Its tag is %s. Continuing on.",
115
226
  repo_uri,
116
227
  img.get("imageTag"),
117
228
  )
229
+ continue
230
+
231
+ tag = img.get("imageTag")
232
+ uri = repo_uri + (f":{tag}" if tag else "")
233
+
234
+ # Build ECRRepositoryImage node
235
+ repo_image = {
236
+ **img,
237
+ "repo_uri": repo_uri,
238
+ "uri": uri,
239
+ "id": uri,
240
+ }
241
+
242
+ # Check if this is a manifest list with images
243
+ manifest_images = img.get("_manifest_images")
244
+ if manifest_images:
245
+ # For manifest list: include manifest list digest + all referenced digests
246
+ all_digests = [digest] + [m["digest"] for m in manifest_images]
247
+ repo_image["imageDigests"] = all_digests
248
+
249
+ # Create ECRImage for the manifest list itself
250
+ if digest not in ecr_images_dict:
251
+ ecr_images_dict[digest] = {
252
+ "imageDigest": digest,
253
+ "type": "manifest_list",
254
+ "architecture": None,
255
+ "os": None,
256
+ "variant": None,
257
+ }
258
+
259
+ # Create ECRImage nodes for each image in the manifest list
260
+ for manifest_img in manifest_images:
261
+ manifest_digest = manifest_img["digest"]
262
+ if manifest_digest not in ecr_images_dict:
263
+ ecr_images_dict[manifest_digest] = {
264
+ "imageDigest": manifest_digest,
265
+ "type": manifest_img.get("type"),
266
+ "architecture": manifest_img.get("architecture"),
267
+ "os": manifest_img.get("os"),
268
+ "variant": manifest_img.get("variant"),
269
+ "attestation_type": manifest_img.get("attestation_type"),
270
+ "attests_digest": manifest_img.get("attests_digest"),
271
+ "media_type": manifest_img.get("media_type"),
272
+ "artifact_media_type": manifest_img.get(
273
+ "artifact_media_type"
274
+ ),
275
+ }
276
+ else:
277
+ # Regular image: single digest
278
+ repo_image["imageDigests"] = [digest]
279
+
280
+ # Create ECRImage for regular image
281
+ if digest not in ecr_images_dict:
282
+ ecr_images_dict[digest] = {
283
+ "imageDigest": digest,
284
+ "type": "image",
285
+ "architecture": None,
286
+ "os": None,
287
+ "variant": None,
288
+ }
289
+
290
+ # Remove internal field before returning
291
+ repo_image.pop("_manifest_images", None)
292
+ repo_images_list.append(repo_image)
118
293
 
119
- return repo_images_list
294
+ ecr_images_list = list(ecr_images_dict.values())
295
+ return repo_images_list, ecr_images_list
120
296
 
121
297
 
122
298
  @timeit
123
299
  def load_ecr_repository_images(
124
300
  neo4j_session: neo4j.Session,
125
301
  repo_images_list: List[Dict],
302
+ ecr_images_list: List[Dict],
126
303
  region: str,
127
304
  current_aws_account_id: str,
128
305
  aws_update_tag: int,
129
306
  ) -> None:
130
307
  logger.info(
131
- f"Loading {len(repo_images_list)} ECR repository images in {region} into graph.",
308
+ f"Loading {len(ecr_images_list)} ECR images and {len(repo_images_list)} ECR repository images in {region} into graph.",
132
309
  )
133
- image_digests = {img["imageDigest"] for img in repo_images_list}
134
- ecr_images = [{"imageDigest": d} for d in image_digests]
135
310
 
136
311
  load(
137
312
  neo4j_session,
138
313
  ECRImageSchema(),
139
- ecr_images,
314
+ ecr_images_list,
140
315
  lastupdated=aws_update_tag,
141
316
  Region=region,
142
317
  AWS_ID=current_aws_account_id,
@@ -219,10 +394,11 @@ def sync(
219
394
  current_aws_account_id,
220
395
  update_tag,
221
396
  )
222
- repo_images_list = transform_ecr_repository_images(image_data)
397
+ repo_images_list, ecr_images_list = transform_ecr_repository_images(image_data)
223
398
  load_ecr_repository_images(
224
399
  neo4j_session,
225
400
  repo_images_list,
401
+ ecr_images_list,
226
402
  region,
227
403
  current_aws_account_id,
228
404
  update_tag,
@@ -170,6 +170,111 @@ async def get_blob_json_via_presigned(
170
170
  return response.json()
171
171
 
172
172
 
173
+ async def _extract_parent_image_from_attestation(
174
+ ecr_client: ECRClient,
175
+ repo_name: str,
176
+ attestation_manifest_digest: str,
177
+ http_client: httpx.AsyncClient,
178
+ ) -> Optional[dict[str, str]]:
179
+ """
180
+ Extract parent image information from an in-toto provenance attestation.
181
+
182
+ This function fetches an attestation manifest, downloads its in-toto layer,
183
+ and extracts the parent image reference from the SLSA provenance materials.
184
+
185
+ :param ecr_client: ECR client for fetching manifests and layers
186
+ :param repo_name: ECR repository name
187
+ :param attestation_manifest_digest: Digest of the attestation manifest
188
+ :param http_client: HTTP client for downloading blobs
189
+ :return: Dict with parent_image_uri and parent_image_digest, or None if no parent image found
190
+ """
191
+ try:
192
+ attestation_manifest, _ = await batch_get_manifest(
193
+ ecr_client,
194
+ repo_name,
195
+ attestation_manifest_digest,
196
+ [ECR_OCI_MANIFEST_MT, ECR_DOCKER_MANIFEST_MT],
197
+ )
198
+
199
+ if not attestation_manifest:
200
+ logger.debug(
201
+ "No attestation manifest found for digest %s in repo %s",
202
+ attestation_manifest_digest,
203
+ repo_name,
204
+ )
205
+ return None
206
+
207
+ # Get the in-toto layer from the attestation manifest
208
+ layers = attestation_manifest.get("layers", [])
209
+ intoto_layer = next(
210
+ (
211
+ layer
212
+ for layer in layers
213
+ if "in-toto" in layer.get("mediaType", "").lower()
214
+ ),
215
+ None,
216
+ )
217
+
218
+ if not intoto_layer:
219
+ logger.debug(
220
+ "No in-toto layer found in attestation manifest %s",
221
+ attestation_manifest_digest,
222
+ )
223
+ return None
224
+
225
+ # Download the in-toto attestation blob
226
+ intoto_digest = intoto_layer.get("digest")
227
+ if not intoto_digest:
228
+ logger.debug("No digest found for in-toto layer")
229
+ return None
230
+
231
+ attestation_blob = await get_blob_json_via_presigned(
232
+ ecr_client,
233
+ repo_name,
234
+ intoto_digest,
235
+ http_client,
236
+ )
237
+
238
+ if not attestation_blob:
239
+ logger.debug("Failed to download attestation blob")
240
+ return None
241
+
242
+ # Extract parent image from SLSA provenance materials
243
+ materials = attestation_blob.get("predicate", {}).get("materials", [])
244
+ for material in materials:
245
+ uri = material.get("uri", "")
246
+ uri_l = uri.lower()
247
+ # Look for container image URIs that are NOT the dockerfile itself
248
+ is_container_ref = (
249
+ uri_l.startswith("pkg:docker/")
250
+ or uri_l.startswith("pkg:oci/")
251
+ or uri_l.startswith("oci://")
252
+ )
253
+ if is_container_ref and "dockerfile" not in uri_l:
254
+ digest_obj = material.get("digest", {})
255
+ sha256_digest = digest_obj.get("sha256")
256
+ if sha256_digest:
257
+ return {
258
+ "parent_image_uri": uri,
259
+ "parent_image_digest": f"sha256:{sha256_digest}",
260
+ }
261
+
262
+ logger.debug(
263
+ "No parent image found in attestation materials for %s",
264
+ attestation_manifest_digest,
265
+ )
266
+ return None
267
+
268
+ except Exception as e:
269
+ logger.warning(
270
+ "Error extracting parent image from attestation %s in repo %s: %s",
271
+ attestation_manifest_digest,
272
+ repo_name,
273
+ e,
274
+ )
275
+ return None
276
+
277
+
173
278
  async def _diff_ids_for_manifest(
174
279
  ecr_client: ECRClient,
175
280
  repo_name: str,
@@ -228,6 +333,7 @@ async def _diff_ids_for_manifest(
228
333
  def transform_ecr_image_layers(
229
334
  image_layers_data: dict[str, dict[str, list[str]]],
230
335
  image_digest_map: dict[str, str],
336
+ image_attestation_map: Optional[dict[str, dict[str, str]]] = None,
231
337
  ) -> tuple[list[dict], list[dict]]:
232
338
  """
233
339
  Transform image layer data into format suitable for Neo4j ingestion.
@@ -235,8 +341,11 @@ def transform_ecr_image_layers(
235
341
 
236
342
  :param image_layers_data: Map of image URI to platform to diff_ids
237
343
  :param image_digest_map: Map of image URI to image digest
344
+ :param image_attestation_map: Map of image URI to attestation data (parent_image_uri, parent_image_digest)
238
345
  :return: List of layer objects ready for ingestion
239
346
  """
347
+ if image_attestation_map is None:
348
+ image_attestation_map = {}
240
349
  layers_by_diff_id: dict[str, dict[str, Any]] = {}
241
350
  memberships_by_digest: dict[str, dict[str, Any]] = {}
242
351
 
@@ -278,10 +387,20 @@ def transform_ecr_image_layers(
278
387
  layer["tail_image_ids"].add(image_digest)
279
388
 
280
389
  if ordered_layers_for_image:
281
- memberships_by_digest[image_digest] = {
390
+ membership: dict[str, Any] = {
282
391
  "layer_diff_ids": ordered_layers_for_image,
283
392
  }
284
393
 
394
+ # Add attestation data if available for this image
395
+ if image_uri in image_attestation_map:
396
+ attestation = image_attestation_map[image_uri]
397
+ membership["parent_image_uri"] = attestation["parent_image_uri"]
398
+ membership["parent_image_digest"] = attestation["parent_image_digest"]
399
+ membership["from_attestation"] = True
400
+ membership["confidence"] = "explicit"
401
+
402
+ memberships_by_digest[image_digest] = membership
403
+
285
404
  # Convert sets back to lists for Neo4j ingestion
286
405
  layers = []
287
406
  for layer in layers_by_diff_id.values():
@@ -350,12 +469,18 @@ async def fetch_image_layers_async(
350
469
  ecr_client: ECRClient,
351
470
  repo_images_list: list[dict],
352
471
  max_concurrent: int = 200,
353
- ) -> tuple[dict[str, dict[str, list[str]]], dict[str, str]]:
472
+ ) -> tuple[dict[str, dict[str, list[str]]], dict[str, str], dict[str, dict[str, str]]]:
354
473
  """
355
474
  Fetch image layers for ECR images in parallel with caching and non-blocking I/O.
475
+
476
+ Returns:
477
+ - image_layers_data: Map of image URI to platform to diff_ids
478
+ - image_digest_map: Map of image URI to image digest
479
+ - image_attestation_map: Map of image URI to attestation data (parent_image_uri, parent_image_digest)
356
480
  """
357
481
  image_layers_data: dict[str, dict[str, list[str]]] = {}
358
482
  image_digest_map: dict[str, str] = {}
483
+ image_attestation_map: dict[str, dict[str, str]] = {}
359
484
  semaphore = asyncio.Semaphore(max_concurrent)
360
485
 
361
486
  # Cache for manifest fetches keyed by (repo_name, imageDigest)
@@ -402,8 +527,8 @@ async def fetch_image_layers_async(
402
527
  async def fetch_single_image_layers(
403
528
  repo_image: dict,
404
529
  http_client: httpx.AsyncClient,
405
- ) -> Optional[tuple[str, str, dict[str, list[str]]]]:
406
- """Fetch layers for a single image."""
530
+ ) -> Optional[tuple[str, str, dict[str, list[str]], Optional[dict[str, str]]]]:
531
+ """Fetch layers for a single image and extract attestation if present."""
407
532
  async with semaphore:
408
533
  # Caller guarantees these fields exist in every repo_image
409
534
  uri = repo_image["uri"]
@@ -426,24 +551,37 @@ async def fetch_image_layers_async(
426
551
 
427
552
  manifest_media_type = (media_type or doc.get("mediaType", "")).lower()
428
553
  platform_layers: dict[str, list[str]] = {}
554
+ attestation_data: Optional[dict[str, str]] = None
429
555
 
430
556
  if doc.get("manifests") and manifest_media_type in INDEX_MEDIA_TYPES_LOWER:
431
557
 
432
558
  async def _process_child_manifest(
433
559
  manifest_ref: dict,
434
- ) -> dict[str, list[str]]:
435
- # Skip attestation manifests - these aren't real images
560
+ ) -> tuple[dict[str, list[str]], Optional[dict[str, str]]]:
561
+ # Check if this is an attestation manifest
436
562
  if (
437
563
  manifest_ref.get("annotations", {}).get(
438
564
  "vnd.docker.reference.type"
439
565
  )
440
566
  == "attestation-manifest"
441
567
  ):
442
- return {}
568
+ # Extract base image from attestation
569
+ child_digest = manifest_ref.get("digest")
570
+ if child_digest:
571
+ attestation_info = (
572
+ await _extract_parent_image_from_attestation(
573
+ ecr_client,
574
+ repo_name,
575
+ child_digest,
576
+ http_client,
577
+ )
578
+ )
579
+ return {}, attestation_info
580
+ return {}, None
443
581
 
444
582
  child_digest = manifest_ref.get("digest")
445
583
  if not child_digest:
446
- return {}
584
+ return {}, None
447
585
 
448
586
  # Use optimized caching for child manifest
449
587
  child_doc, _ = await _fetch_and_cache_manifest(
@@ -452,16 +590,17 @@ async def fetch_image_layers_async(
452
590
  [ECR_OCI_MANIFEST_MT, ECR_DOCKER_MANIFEST_MT],
453
591
  )
454
592
  if not child_doc:
455
- return {}
593
+ return {}, None
456
594
 
457
595
  platform_hint = extract_platform_from_manifest(manifest_ref)
458
- return await _diff_ids_for_manifest(
596
+ diff_map = await _diff_ids_for_manifest(
459
597
  ecr_client,
460
598
  repo_name,
461
599
  child_doc,
462
600
  http_client,
463
601
  platform_hint,
464
602
  )
603
+ return diff_map, None
465
604
 
466
605
  # Process all child manifests in parallel
467
606
  child_tasks = [
@@ -474,8 +613,13 @@ async def fetch_image_layers_async(
474
613
 
475
614
  # Merge results from successful child manifest processing
476
615
  for result in child_results:
477
- if isinstance(result, dict):
478
- platform_layers.update(result)
616
+ if isinstance(result, tuple) and len(result) == 2:
617
+ layer_data, attest_data = result
618
+ if layer_data:
619
+ platform_layers.update(layer_data)
620
+ if attest_data and not attestation_data:
621
+ # Use first attestation found
622
+ attestation_data = attest_data
479
623
  else:
480
624
  diff_map = await _diff_ids_for_manifest(
481
625
  ecr_client,
@@ -487,7 +631,7 @@ async def fetch_image_layers_async(
487
631
  platform_layers.update(diff_map)
488
632
 
489
633
  if platform_layers:
490
- return uri, digest, platform_layers
634
+ return uri, digest, platform_layers, attestation_data
491
635
 
492
636
  return None
493
637
 
@@ -507,7 +651,7 @@ async def fetch_image_layers_async(
507
651
  )
508
652
 
509
653
  if not tasks:
510
- return image_layers_data, image_digest_map
654
+ return image_layers_data, image_digest_map, image_attestation_map
511
655
 
512
656
  progress_interval = max(1, min(100, total // 10 or 1))
513
657
  completed = 0
@@ -526,16 +670,22 @@ async def fetch_image_layers_async(
526
670
  )
527
671
 
528
672
  if result:
529
- uri, digest, layer_data = result
673
+ uri, digest, layer_data, attestation_data = result
530
674
  if not digest:
531
675
  raise ValueError(f"Empty digest returned for image {uri}")
532
676
  image_layers_data[uri] = layer_data
533
677
  image_digest_map[uri] = digest
678
+ if attestation_data:
679
+ image_attestation_map[uri] = attestation_data
534
680
 
535
681
  logger.info(
536
682
  f"Successfully fetched layers for {len(image_layers_data)}/{len(repo_images_list)} images"
537
683
  )
538
- return image_layers_data, image_digest_map
684
+ if image_attestation_map:
685
+ logger.info(
686
+ f"Found attestations with base image info for {len(image_attestation_map)} images"
687
+ )
688
+ return image_layers_data, image_digest_map, image_attestation_map
539
689
 
540
690
 
541
691
  def cleanup(neo4j_session: neo4j.Session, common_job_parameters: dict) -> None:
@@ -613,9 +763,11 @@ def sync(
613
763
  f"Starting to fetch layers for {len(repo_images_list)} images..."
614
764
  )
615
765
 
616
- async def _fetch_with_async_client() -> (
617
- tuple[dict[str, dict[str, list[str]]], dict[str, str]]
618
- ):
766
+ async def _fetch_with_async_client() -> tuple[
767
+ dict[str, dict[str, list[str]]],
768
+ dict[str, str],
769
+ dict[str, dict[str, str]],
770
+ ]:
619
771
  # Use credentials from the existing boto3 session
620
772
  credentials = boto3_session.get_credentials()
621
773
  session = aioboto3.Session(
@@ -635,8 +787,8 @@ def sync(
635
787
  loop = asyncio.new_event_loop()
636
788
  asyncio.set_event_loop(loop)
637
789
 
638
- image_layers_data, image_digest_map = loop.run_until_complete(
639
- _fetch_with_async_client()
790
+ image_layers_data, image_digest_map, image_attestation_map = (
791
+ loop.run_until_complete(_fetch_with_async_client())
640
792
  )
641
793
 
642
794
  logger.info(
@@ -645,6 +797,7 @@ def sync(
645
797
  layers, memberships = transform_ecr_image_layers(
646
798
  image_layers_data,
647
799
  image_digest_map,
800
+ image_attestation_map,
648
801
  )
649
802
  load_ecr_image_layers(
650
803
  neo4j_session,
@@ -8,6 +8,7 @@ import botocore.config
8
8
  import neo4j
9
9
  from policyuniverse.policy import Policy
10
10
 
11
+ from cartography.client.core.tx import run_write_query
11
12
  from cartography.intel.dns import ingest_dns_record_by_fqdn
12
13
  from cartography.util import aws_handle_regions
13
14
  from cartography.util import run_cleanup_job
@@ -95,7 +96,8 @@ def _load_es_domains(
95
96
  for d in domain_list:
96
97
  del d["ServiceSoftwareOptions"]
97
98
 
98
- neo4j_session.run(
99
+ run_write_query(
100
+ neo4j_session,
99
101
  ingest_records,
100
102
  Records=domain_list,
101
103
  AWS_ACCOUNT_ID=aws_account_id,
@@ -179,7 +181,8 @@ def _link_es_domain_vpc(
179
181
  groupList = vpc_data.get("SecurityGroupIds", [])
180
182
 
181
183
  if len(subnetList) > 0:
182
- neo4j_session.run(
184
+ run_write_query(
185
+ neo4j_session,
183
186
  ingest_subnet,
184
187
  DomainId=domain_id,
185
188
  SubnetList=subnetList,
@@ -187,7 +190,8 @@ def _link_es_domain_vpc(
187
190
  )
188
191
 
189
192
  if len(groupList) > 0:
190
- neo4j_session.run(
193
+ run_write_query(
194
+ neo4j_session,
191
195
  ingest_sec_groups,
192
196
  DomainId=domain_id,
193
197
  SecGroupList=groupList,
@@ -220,7 +224,12 @@ def _process_access_policy(
220
224
  if policy.is_internet_accessible():
221
225
  exposed_internet = True
222
226
 
223
- neo4j_session.run(tag_es, DomainId=domain_id, InternetExposed=exposed_internet)
227
+ run_write_query(
228
+ neo4j_session,
229
+ tag_es,
230
+ DomainId=domain_id,
231
+ InternetExposed=exposed_internet,
232
+ )
224
233
 
225
234
 
226
235
  @timeit