cartography 0.116.1__py3-none-any.whl → 0.118.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +11 -0
- cartography/client/core/tx.py +23 -2
- cartography/config.py +5 -0
- cartography/graph/job.py +6 -2
- cartography/graph/statement.py +4 -0
- cartography/intel/aws/__init__.py +1 -0
- cartography/intel/aws/apigateway.py +18 -5
- cartography/intel/aws/ec2/elastic_ip_addresses.py +3 -1
- cartography/intel/aws/ec2/internet_gateways.py +4 -2
- cartography/intel/aws/ec2/load_balancer_v2s.py +11 -5
- cartography/intel/aws/ec2/network_interfaces.py +4 -0
- cartography/intel/aws/ec2/reserved_instances.py +3 -1
- cartography/intel/aws/ec2/tgw.py +11 -5
- cartography/intel/aws/ec2/volumes.py +1 -1
- cartography/intel/aws/ecr.py +202 -26
- cartography/intel/aws/ecr_image_layers.py +174 -21
- cartography/intel/aws/elasticsearch.py +13 -4
- cartography/intel/aws/identitycenter.py +93 -54
- cartography/intel/aws/inspector.py +26 -14
- cartography/intel/aws/permission_relationships.py +3 -3
- cartography/intel/aws/s3.py +26 -13
- cartography/intel/aws/ssm.py +3 -5
- cartography/intel/azure/__init__.py +16 -0
- cartography/intel/azure/compute.py +9 -4
- cartography/intel/azure/container_instances.py +95 -0
- cartography/intel/azure/cosmosdb.py +31 -15
- cartography/intel/azure/data_lake.py +124 -0
- cartography/intel/azure/sql.py +25 -12
- cartography/intel/azure/storage.py +19 -9
- cartography/intel/azure/subscription.py +3 -1
- cartography/intel/crowdstrike/spotlight.py +5 -2
- cartography/intel/entra/app_role_assignments.py +9 -2
- cartography/intel/gcp/__init__.py +26 -9
- cartography/intel/gcp/clients.py +8 -4
- cartography/intel/gcp/compute.py +39 -18
- cartography/intel/gcp/crm/folders.py +9 -3
- cartography/intel/gcp/crm/orgs.py +8 -3
- cartography/intel/gcp/crm/projects.py +14 -3
- cartography/intel/github/teams.py +3 -3
- cartography/intel/jamf/computers.py +7 -1
- cartography/intel/oci/iam.py +23 -9
- cartography/intel/oci/organizations.py +3 -1
- cartography/intel/oci/utils.py +28 -5
- cartography/intel/okta/awssaml.py +8 -7
- cartography/intel/pagerduty/escalation_policies.py +13 -6
- cartography/intel/pagerduty/schedules.py +9 -4
- cartography/intel/pagerduty/services.py +7 -3
- cartography/intel/pagerduty/teams.py +5 -2
- cartography/intel/pagerduty/users.py +3 -1
- cartography/intel/pagerduty/vendors.py +3 -1
- cartography/intel/trivy/__init__.py +109 -58
- cartography/models/aws/ec2/networkinterfaces.py +2 -0
- cartography/models/aws/ecr/image.py +38 -1
- cartography/models/aws/ecr/repository_image.py +1 -1
- cartography/models/azure/container_instance.py +55 -0
- cartography/models/azure/data_lake_filesystem.py +51 -0
- cartography/rules/cli.py +8 -6
- cartography/rules/data/frameworks/mitre_attack/__init__.py +7 -1
- cartography/rules/data/frameworks/mitre_attack/requirements/t1098_account_manipulation/__init__.py +317 -0
- cartography/rules/data/frameworks/mitre_attack/requirements/t1190_exploit_public_facing_application/__init__.py +1 -0
- cartography/rules/spec/model.py +13 -0
- cartography/sync.py +1 -1
- cartography/util.py +5 -1
- {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/METADATA +5 -4
- {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/RECORD +70 -65
- {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/WHEEL +0 -0
- {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.116.1.dist-info → cartography-0.118.0.dist-info}/top_level.txt +0 -0
cartography/intel/aws/ecr.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import logging
|
|
2
3
|
from typing import Any
|
|
3
4
|
from typing import Dict
|
|
@@ -18,6 +19,12 @@ from cartography.util import to_synchronous
|
|
|
18
19
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
20
21
|
|
|
22
|
+
# Manifest list media types
|
|
23
|
+
MANIFEST_LIST_MEDIA_TYPES = {
|
|
24
|
+
"application/vnd.docker.distribution.manifest.list.v2+json",
|
|
25
|
+
"application/vnd.oci.image.index.v1+json",
|
|
26
|
+
}
|
|
27
|
+
|
|
21
28
|
|
|
22
29
|
@timeit
|
|
23
30
|
@aws_handle_regions
|
|
@@ -34,6 +41,84 @@ def get_ecr_repositories(
|
|
|
34
41
|
return ecr_repositories
|
|
35
42
|
|
|
36
43
|
|
|
44
|
+
def _get_platform_specific_digests(
|
|
45
|
+
client: Any, repository_name: str, manifest_list_digest: str
|
|
46
|
+
) -> tuple[List[Dict[str, Any]], set[str]]:
|
|
47
|
+
"""
|
|
48
|
+
Fetch manifest list and extract platform-specific image digests and attestations.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
- List of all images (platform-specific + attestations) with digest, type, architecture, os, variant
|
|
52
|
+
- Set of ALL digests referenced in the manifest list
|
|
53
|
+
"""
|
|
54
|
+
response = client.batch_get_image(
|
|
55
|
+
repositoryName=repository_name,
|
|
56
|
+
imageIds=[{"imageDigest": manifest_list_digest}],
|
|
57
|
+
acceptedMediaTypes=list(MANIFEST_LIST_MEDIA_TYPES),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
if not response.get("images"):
|
|
61
|
+
raise ValueError(
|
|
62
|
+
f"No manifest list found for digest {manifest_list_digest} in repository {repository_name}"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# batch_get_image returns a single manifest list (hence [0])
|
|
66
|
+
# The manifests[] array inside contains all platform-specific images and attestations
|
|
67
|
+
manifest_json = json.loads(response["images"][0]["imageManifest"])
|
|
68
|
+
manifests = manifest_json.get("manifests", [])
|
|
69
|
+
|
|
70
|
+
if not manifests:
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"Manifest list {manifest_list_digest} has no manifests in repository {repository_name}"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
all_images = []
|
|
76
|
+
all_referenced_digests = set()
|
|
77
|
+
|
|
78
|
+
for manifest_ref in manifests:
|
|
79
|
+
digest = manifest_ref.get("digest")
|
|
80
|
+
if not digest:
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"Manifest in list {manifest_list_digest} has no digest in repository {repository_name}"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
all_referenced_digests.add(digest)
|
|
86
|
+
|
|
87
|
+
platform_info = manifest_ref.get("platform", {})
|
|
88
|
+
architecture = platform_info.get("architecture")
|
|
89
|
+
os_name = platform_info.get("os")
|
|
90
|
+
|
|
91
|
+
# Determine if this is an attestation
|
|
92
|
+
annotations = manifest_ref.get("annotations", {})
|
|
93
|
+
is_attestation = (
|
|
94
|
+
architecture == "unknown" and os_name == "unknown"
|
|
95
|
+
) or annotations.get("vnd.docker.reference.type") == "attestation-manifest"
|
|
96
|
+
|
|
97
|
+
all_images.append(
|
|
98
|
+
{
|
|
99
|
+
"digest": digest,
|
|
100
|
+
"type": "attestation" if is_attestation else "image",
|
|
101
|
+
"architecture": architecture,
|
|
102
|
+
"os": os_name,
|
|
103
|
+
"variant": platform_info.get("variant"),
|
|
104
|
+
"attestation_type": (
|
|
105
|
+
annotations.get("vnd.docker.reference.type")
|
|
106
|
+
if is_attestation
|
|
107
|
+
else None
|
|
108
|
+
),
|
|
109
|
+
"attests_digest": (
|
|
110
|
+
annotations.get("vnd.docker.reference.digest")
|
|
111
|
+
if is_attestation
|
|
112
|
+
else None
|
|
113
|
+
),
|
|
114
|
+
"media_type": manifest_ref.get("mediaType"),
|
|
115
|
+
"artifact_media_type": manifest_ref.get("artifactType"),
|
|
116
|
+
}
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return all_images, all_referenced_digests
|
|
120
|
+
|
|
121
|
+
|
|
37
122
|
@timeit
|
|
38
123
|
@aws_handle_regions
|
|
39
124
|
def get_ecr_repository_images(
|
|
@@ -46,7 +131,11 @@ def get_ecr_repository_images(
|
|
|
46
131
|
)
|
|
47
132
|
client = boto3_session.client("ecr", region_name=region)
|
|
48
133
|
list_paginator = client.get_paginator("list_images")
|
|
49
|
-
|
|
134
|
+
|
|
135
|
+
# First pass: Collect all image details and track manifest list referenced digests
|
|
136
|
+
all_image_details: List[Dict] = []
|
|
137
|
+
manifest_list_referenced_digests: set[str] = set()
|
|
138
|
+
|
|
50
139
|
for page in list_paginator.paginate(repositoryName=repository_name):
|
|
51
140
|
image_ids = page["imageIds"]
|
|
52
141
|
if not image_ids:
|
|
@@ -58,14 +147,37 @@ def get_ecr_repository_images(
|
|
|
58
147
|
for response in describe_response:
|
|
59
148
|
image_details = response["imageDetails"]
|
|
60
149
|
for detail in image_details:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
150
|
+
# Check if this is a manifest list
|
|
151
|
+
media_type = detail.get("imageManifestMediaType")
|
|
152
|
+
if media_type in MANIFEST_LIST_MEDIA_TYPES:
|
|
153
|
+
# Fetch all images from manifest list (platform-specific + attestations)
|
|
154
|
+
manifest_list_digest = detail["imageDigest"]
|
|
155
|
+
manifest_images, all_digests = _get_platform_specific_digests(
|
|
156
|
+
client, repository_name, manifest_list_digest
|
|
157
|
+
)
|
|
158
|
+
detail["_manifest_images"] = manifest_images
|
|
159
|
+
|
|
160
|
+
# Track ALL digests so we don't create ECRRepositoryImages for them
|
|
161
|
+
manifest_list_referenced_digests.update(all_digests)
|
|
162
|
+
|
|
163
|
+
all_image_details.append(detail)
|
|
164
|
+
|
|
165
|
+
# Second pass: Only add images that should have ECRRepositoryImage nodes
|
|
166
|
+
ecr_repository_images: List[Dict] = []
|
|
167
|
+
for detail in all_image_details:
|
|
168
|
+
tags = detail.get("imageTags") or []
|
|
169
|
+
digest = detail.get("imageDigest")
|
|
170
|
+
|
|
171
|
+
if tags:
|
|
172
|
+
# Tagged images always get ECRRepositoryImage nodes (one per tag)
|
|
173
|
+
for tag in tags:
|
|
174
|
+
image_detail = {**detail, "imageTag": tag}
|
|
175
|
+
image_detail.pop("imageTags", None)
|
|
176
|
+
ecr_repository_images.append(image_detail)
|
|
177
|
+
elif digest not in manifest_list_referenced_digests:
|
|
178
|
+
# Untagged images only get nodes if they're NOT part of a manifest list
|
|
179
|
+
ecr_repository_images.append({**detail})
|
|
180
|
+
|
|
69
181
|
return ecr_repository_images
|
|
70
182
|
|
|
71
183
|
|
|
@@ -91,52 +203,115 @@ def load_ecr_repositories(
|
|
|
91
203
|
|
|
92
204
|
|
|
93
205
|
@timeit
|
|
94
|
-
def transform_ecr_repository_images(repo_data: Dict) -> List[Dict]:
|
|
206
|
+
def transform_ecr_repository_images(repo_data: Dict) -> tuple[List[Dict], List[Dict]]:
|
|
95
207
|
"""
|
|
96
|
-
|
|
97
|
-
|
|
208
|
+
Transform ECR repository images into repo image list and ECR image list.
|
|
209
|
+
For manifest lists, creates ECR images for manifest list, platform-specific images, and attestations.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
- repo_images_list: List of ECRRepositoryImage nodes with imageDigests field (one-to-many)
|
|
213
|
+
- ecr_images_list: List of ECRImage nodes with type, architecture, os, variant fields
|
|
98
214
|
"""
|
|
99
215
|
repo_images_list = []
|
|
216
|
+
ecr_images_dict: Dict[str, Dict] = {} # Deduplicate by digest
|
|
217
|
+
|
|
100
218
|
# Sort repository URIs to ensure consistent processing order
|
|
101
219
|
for repo_uri in sorted(repo_data.keys()):
|
|
102
220
|
repo_images = repo_data[repo_uri]
|
|
103
221
|
for img in repo_images:
|
|
104
222
|
digest = img.get("imageDigest")
|
|
105
|
-
if digest:
|
|
106
|
-
tag = img.get("imageTag")
|
|
107
|
-
uri = repo_uri + (f":{tag}" if tag else "")
|
|
108
|
-
img["repo_uri"] = repo_uri
|
|
109
|
-
img["uri"] = uri
|
|
110
|
-
img["id"] = uri
|
|
111
|
-
repo_images_list.append(img)
|
|
112
|
-
else:
|
|
223
|
+
if not digest:
|
|
113
224
|
logger.warning(
|
|
114
225
|
"Repo %s has an image that has no imageDigest. Its tag is %s. Continuing on.",
|
|
115
226
|
repo_uri,
|
|
116
227
|
img.get("imageTag"),
|
|
117
228
|
)
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
tag = img.get("imageTag")
|
|
232
|
+
uri = repo_uri + (f":{tag}" if tag else "")
|
|
233
|
+
|
|
234
|
+
# Build ECRRepositoryImage node
|
|
235
|
+
repo_image = {
|
|
236
|
+
**img,
|
|
237
|
+
"repo_uri": repo_uri,
|
|
238
|
+
"uri": uri,
|
|
239
|
+
"id": uri,
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
# Check if this is a manifest list with images
|
|
243
|
+
manifest_images = img.get("_manifest_images")
|
|
244
|
+
if manifest_images:
|
|
245
|
+
# For manifest list: include manifest list digest + all referenced digests
|
|
246
|
+
all_digests = [digest] + [m["digest"] for m in manifest_images]
|
|
247
|
+
repo_image["imageDigests"] = all_digests
|
|
248
|
+
|
|
249
|
+
# Create ECRImage for the manifest list itself
|
|
250
|
+
if digest not in ecr_images_dict:
|
|
251
|
+
ecr_images_dict[digest] = {
|
|
252
|
+
"imageDigest": digest,
|
|
253
|
+
"type": "manifest_list",
|
|
254
|
+
"architecture": None,
|
|
255
|
+
"os": None,
|
|
256
|
+
"variant": None,
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
# Create ECRImage nodes for each image in the manifest list
|
|
260
|
+
for manifest_img in manifest_images:
|
|
261
|
+
manifest_digest = manifest_img["digest"]
|
|
262
|
+
if manifest_digest not in ecr_images_dict:
|
|
263
|
+
ecr_images_dict[manifest_digest] = {
|
|
264
|
+
"imageDigest": manifest_digest,
|
|
265
|
+
"type": manifest_img.get("type"),
|
|
266
|
+
"architecture": manifest_img.get("architecture"),
|
|
267
|
+
"os": manifest_img.get("os"),
|
|
268
|
+
"variant": manifest_img.get("variant"),
|
|
269
|
+
"attestation_type": manifest_img.get("attestation_type"),
|
|
270
|
+
"attests_digest": manifest_img.get("attests_digest"),
|
|
271
|
+
"media_type": manifest_img.get("media_type"),
|
|
272
|
+
"artifact_media_type": manifest_img.get(
|
|
273
|
+
"artifact_media_type"
|
|
274
|
+
),
|
|
275
|
+
}
|
|
276
|
+
else:
|
|
277
|
+
# Regular image: single digest
|
|
278
|
+
repo_image["imageDigests"] = [digest]
|
|
279
|
+
|
|
280
|
+
# Create ECRImage for regular image
|
|
281
|
+
if digest not in ecr_images_dict:
|
|
282
|
+
ecr_images_dict[digest] = {
|
|
283
|
+
"imageDigest": digest,
|
|
284
|
+
"type": "image",
|
|
285
|
+
"architecture": None,
|
|
286
|
+
"os": None,
|
|
287
|
+
"variant": None,
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
# Remove internal field before returning
|
|
291
|
+
repo_image.pop("_manifest_images", None)
|
|
292
|
+
repo_images_list.append(repo_image)
|
|
118
293
|
|
|
119
|
-
|
|
294
|
+
ecr_images_list = list(ecr_images_dict.values())
|
|
295
|
+
return repo_images_list, ecr_images_list
|
|
120
296
|
|
|
121
297
|
|
|
122
298
|
@timeit
|
|
123
299
|
def load_ecr_repository_images(
|
|
124
300
|
neo4j_session: neo4j.Session,
|
|
125
301
|
repo_images_list: List[Dict],
|
|
302
|
+
ecr_images_list: List[Dict],
|
|
126
303
|
region: str,
|
|
127
304
|
current_aws_account_id: str,
|
|
128
305
|
aws_update_tag: int,
|
|
129
306
|
) -> None:
|
|
130
307
|
logger.info(
|
|
131
|
-
f"Loading {len(repo_images_list)} ECR repository images in {region} into graph.",
|
|
308
|
+
f"Loading {len(ecr_images_list)} ECR images and {len(repo_images_list)} ECR repository images in {region} into graph.",
|
|
132
309
|
)
|
|
133
|
-
image_digests = {img["imageDigest"] for img in repo_images_list}
|
|
134
|
-
ecr_images = [{"imageDigest": d} for d in image_digests]
|
|
135
310
|
|
|
136
311
|
load(
|
|
137
312
|
neo4j_session,
|
|
138
313
|
ECRImageSchema(),
|
|
139
|
-
|
|
314
|
+
ecr_images_list,
|
|
140
315
|
lastupdated=aws_update_tag,
|
|
141
316
|
Region=region,
|
|
142
317
|
AWS_ID=current_aws_account_id,
|
|
@@ -219,10 +394,11 @@ def sync(
|
|
|
219
394
|
current_aws_account_id,
|
|
220
395
|
update_tag,
|
|
221
396
|
)
|
|
222
|
-
repo_images_list = transform_ecr_repository_images(image_data)
|
|
397
|
+
repo_images_list, ecr_images_list = transform_ecr_repository_images(image_data)
|
|
223
398
|
load_ecr_repository_images(
|
|
224
399
|
neo4j_session,
|
|
225
400
|
repo_images_list,
|
|
401
|
+
ecr_images_list,
|
|
226
402
|
region,
|
|
227
403
|
current_aws_account_id,
|
|
228
404
|
update_tag,
|
|
@@ -170,6 +170,111 @@ async def get_blob_json_via_presigned(
|
|
|
170
170
|
return response.json()
|
|
171
171
|
|
|
172
172
|
|
|
173
|
+
async def _extract_parent_image_from_attestation(
|
|
174
|
+
ecr_client: ECRClient,
|
|
175
|
+
repo_name: str,
|
|
176
|
+
attestation_manifest_digest: str,
|
|
177
|
+
http_client: httpx.AsyncClient,
|
|
178
|
+
) -> Optional[dict[str, str]]:
|
|
179
|
+
"""
|
|
180
|
+
Extract parent image information from an in-toto provenance attestation.
|
|
181
|
+
|
|
182
|
+
This function fetches an attestation manifest, downloads its in-toto layer,
|
|
183
|
+
and extracts the parent image reference from the SLSA provenance materials.
|
|
184
|
+
|
|
185
|
+
:param ecr_client: ECR client for fetching manifests and layers
|
|
186
|
+
:param repo_name: ECR repository name
|
|
187
|
+
:param attestation_manifest_digest: Digest of the attestation manifest
|
|
188
|
+
:param http_client: HTTP client for downloading blobs
|
|
189
|
+
:return: Dict with parent_image_uri and parent_image_digest, or None if no parent image found
|
|
190
|
+
"""
|
|
191
|
+
try:
|
|
192
|
+
attestation_manifest, _ = await batch_get_manifest(
|
|
193
|
+
ecr_client,
|
|
194
|
+
repo_name,
|
|
195
|
+
attestation_manifest_digest,
|
|
196
|
+
[ECR_OCI_MANIFEST_MT, ECR_DOCKER_MANIFEST_MT],
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
if not attestation_manifest:
|
|
200
|
+
logger.debug(
|
|
201
|
+
"No attestation manifest found for digest %s in repo %s",
|
|
202
|
+
attestation_manifest_digest,
|
|
203
|
+
repo_name,
|
|
204
|
+
)
|
|
205
|
+
return None
|
|
206
|
+
|
|
207
|
+
# Get the in-toto layer from the attestation manifest
|
|
208
|
+
layers = attestation_manifest.get("layers", [])
|
|
209
|
+
intoto_layer = next(
|
|
210
|
+
(
|
|
211
|
+
layer
|
|
212
|
+
for layer in layers
|
|
213
|
+
if "in-toto" in layer.get("mediaType", "").lower()
|
|
214
|
+
),
|
|
215
|
+
None,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if not intoto_layer:
|
|
219
|
+
logger.debug(
|
|
220
|
+
"No in-toto layer found in attestation manifest %s",
|
|
221
|
+
attestation_manifest_digest,
|
|
222
|
+
)
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
# Download the in-toto attestation blob
|
|
226
|
+
intoto_digest = intoto_layer.get("digest")
|
|
227
|
+
if not intoto_digest:
|
|
228
|
+
logger.debug("No digest found for in-toto layer")
|
|
229
|
+
return None
|
|
230
|
+
|
|
231
|
+
attestation_blob = await get_blob_json_via_presigned(
|
|
232
|
+
ecr_client,
|
|
233
|
+
repo_name,
|
|
234
|
+
intoto_digest,
|
|
235
|
+
http_client,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
if not attestation_blob:
|
|
239
|
+
logger.debug("Failed to download attestation blob")
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
# Extract parent image from SLSA provenance materials
|
|
243
|
+
materials = attestation_blob.get("predicate", {}).get("materials", [])
|
|
244
|
+
for material in materials:
|
|
245
|
+
uri = material.get("uri", "")
|
|
246
|
+
uri_l = uri.lower()
|
|
247
|
+
# Look for container image URIs that are NOT the dockerfile itself
|
|
248
|
+
is_container_ref = (
|
|
249
|
+
uri_l.startswith("pkg:docker/")
|
|
250
|
+
or uri_l.startswith("pkg:oci/")
|
|
251
|
+
or uri_l.startswith("oci://")
|
|
252
|
+
)
|
|
253
|
+
if is_container_ref and "dockerfile" not in uri_l:
|
|
254
|
+
digest_obj = material.get("digest", {})
|
|
255
|
+
sha256_digest = digest_obj.get("sha256")
|
|
256
|
+
if sha256_digest:
|
|
257
|
+
return {
|
|
258
|
+
"parent_image_uri": uri,
|
|
259
|
+
"parent_image_digest": f"sha256:{sha256_digest}",
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
logger.debug(
|
|
263
|
+
"No parent image found in attestation materials for %s",
|
|
264
|
+
attestation_manifest_digest,
|
|
265
|
+
)
|
|
266
|
+
return None
|
|
267
|
+
|
|
268
|
+
except Exception as e:
|
|
269
|
+
logger.warning(
|
|
270
|
+
"Error extracting parent image from attestation %s in repo %s: %s",
|
|
271
|
+
attestation_manifest_digest,
|
|
272
|
+
repo_name,
|
|
273
|
+
e,
|
|
274
|
+
)
|
|
275
|
+
return None
|
|
276
|
+
|
|
277
|
+
|
|
173
278
|
async def _diff_ids_for_manifest(
|
|
174
279
|
ecr_client: ECRClient,
|
|
175
280
|
repo_name: str,
|
|
@@ -228,6 +333,7 @@ async def _diff_ids_for_manifest(
|
|
|
228
333
|
def transform_ecr_image_layers(
|
|
229
334
|
image_layers_data: dict[str, dict[str, list[str]]],
|
|
230
335
|
image_digest_map: dict[str, str],
|
|
336
|
+
image_attestation_map: Optional[dict[str, dict[str, str]]] = None,
|
|
231
337
|
) -> tuple[list[dict], list[dict]]:
|
|
232
338
|
"""
|
|
233
339
|
Transform image layer data into format suitable for Neo4j ingestion.
|
|
@@ -235,8 +341,11 @@ def transform_ecr_image_layers(
|
|
|
235
341
|
|
|
236
342
|
:param image_layers_data: Map of image URI to platform to diff_ids
|
|
237
343
|
:param image_digest_map: Map of image URI to image digest
|
|
344
|
+
:param image_attestation_map: Map of image URI to attestation data (parent_image_uri, parent_image_digest)
|
|
238
345
|
:return: List of layer objects ready for ingestion
|
|
239
346
|
"""
|
|
347
|
+
if image_attestation_map is None:
|
|
348
|
+
image_attestation_map = {}
|
|
240
349
|
layers_by_diff_id: dict[str, dict[str, Any]] = {}
|
|
241
350
|
memberships_by_digest: dict[str, dict[str, Any]] = {}
|
|
242
351
|
|
|
@@ -278,10 +387,20 @@ def transform_ecr_image_layers(
|
|
|
278
387
|
layer["tail_image_ids"].add(image_digest)
|
|
279
388
|
|
|
280
389
|
if ordered_layers_for_image:
|
|
281
|
-
|
|
390
|
+
membership: dict[str, Any] = {
|
|
282
391
|
"layer_diff_ids": ordered_layers_for_image,
|
|
283
392
|
}
|
|
284
393
|
|
|
394
|
+
# Add attestation data if available for this image
|
|
395
|
+
if image_uri in image_attestation_map:
|
|
396
|
+
attestation = image_attestation_map[image_uri]
|
|
397
|
+
membership["parent_image_uri"] = attestation["parent_image_uri"]
|
|
398
|
+
membership["parent_image_digest"] = attestation["parent_image_digest"]
|
|
399
|
+
membership["from_attestation"] = True
|
|
400
|
+
membership["confidence"] = "explicit"
|
|
401
|
+
|
|
402
|
+
memberships_by_digest[image_digest] = membership
|
|
403
|
+
|
|
285
404
|
# Convert sets back to lists for Neo4j ingestion
|
|
286
405
|
layers = []
|
|
287
406
|
for layer in layers_by_diff_id.values():
|
|
@@ -350,12 +469,18 @@ async def fetch_image_layers_async(
|
|
|
350
469
|
ecr_client: ECRClient,
|
|
351
470
|
repo_images_list: list[dict],
|
|
352
471
|
max_concurrent: int = 200,
|
|
353
|
-
) -> tuple[dict[str, dict[str, list[str]]], dict[str, str]]:
|
|
472
|
+
) -> tuple[dict[str, dict[str, list[str]]], dict[str, str], dict[str, dict[str, str]]]:
|
|
354
473
|
"""
|
|
355
474
|
Fetch image layers for ECR images in parallel with caching and non-blocking I/O.
|
|
475
|
+
|
|
476
|
+
Returns:
|
|
477
|
+
- image_layers_data: Map of image URI to platform to diff_ids
|
|
478
|
+
- image_digest_map: Map of image URI to image digest
|
|
479
|
+
- image_attestation_map: Map of image URI to attestation data (parent_image_uri, parent_image_digest)
|
|
356
480
|
"""
|
|
357
481
|
image_layers_data: dict[str, dict[str, list[str]]] = {}
|
|
358
482
|
image_digest_map: dict[str, str] = {}
|
|
483
|
+
image_attestation_map: dict[str, dict[str, str]] = {}
|
|
359
484
|
semaphore = asyncio.Semaphore(max_concurrent)
|
|
360
485
|
|
|
361
486
|
# Cache for manifest fetches keyed by (repo_name, imageDigest)
|
|
@@ -402,8 +527,8 @@ async def fetch_image_layers_async(
|
|
|
402
527
|
async def fetch_single_image_layers(
|
|
403
528
|
repo_image: dict,
|
|
404
529
|
http_client: httpx.AsyncClient,
|
|
405
|
-
) -> Optional[tuple[str, str, dict[str, list[str]]]]:
|
|
406
|
-
"""Fetch layers for a single image."""
|
|
530
|
+
) -> Optional[tuple[str, str, dict[str, list[str]], Optional[dict[str, str]]]]:
|
|
531
|
+
"""Fetch layers for a single image and extract attestation if present."""
|
|
407
532
|
async with semaphore:
|
|
408
533
|
# Caller guarantees these fields exist in every repo_image
|
|
409
534
|
uri = repo_image["uri"]
|
|
@@ -426,24 +551,37 @@ async def fetch_image_layers_async(
|
|
|
426
551
|
|
|
427
552
|
manifest_media_type = (media_type or doc.get("mediaType", "")).lower()
|
|
428
553
|
platform_layers: dict[str, list[str]] = {}
|
|
554
|
+
attestation_data: Optional[dict[str, str]] = None
|
|
429
555
|
|
|
430
556
|
if doc.get("manifests") and manifest_media_type in INDEX_MEDIA_TYPES_LOWER:
|
|
431
557
|
|
|
432
558
|
async def _process_child_manifest(
|
|
433
559
|
manifest_ref: dict,
|
|
434
|
-
) -> dict[str, list[str]]:
|
|
435
|
-
#
|
|
560
|
+
) -> tuple[dict[str, list[str]], Optional[dict[str, str]]]:
|
|
561
|
+
# Check if this is an attestation manifest
|
|
436
562
|
if (
|
|
437
563
|
manifest_ref.get("annotations", {}).get(
|
|
438
564
|
"vnd.docker.reference.type"
|
|
439
565
|
)
|
|
440
566
|
== "attestation-manifest"
|
|
441
567
|
):
|
|
442
|
-
|
|
568
|
+
# Extract base image from attestation
|
|
569
|
+
child_digest = manifest_ref.get("digest")
|
|
570
|
+
if child_digest:
|
|
571
|
+
attestation_info = (
|
|
572
|
+
await _extract_parent_image_from_attestation(
|
|
573
|
+
ecr_client,
|
|
574
|
+
repo_name,
|
|
575
|
+
child_digest,
|
|
576
|
+
http_client,
|
|
577
|
+
)
|
|
578
|
+
)
|
|
579
|
+
return {}, attestation_info
|
|
580
|
+
return {}, None
|
|
443
581
|
|
|
444
582
|
child_digest = manifest_ref.get("digest")
|
|
445
583
|
if not child_digest:
|
|
446
|
-
return {}
|
|
584
|
+
return {}, None
|
|
447
585
|
|
|
448
586
|
# Use optimized caching for child manifest
|
|
449
587
|
child_doc, _ = await _fetch_and_cache_manifest(
|
|
@@ -452,16 +590,17 @@ async def fetch_image_layers_async(
|
|
|
452
590
|
[ECR_OCI_MANIFEST_MT, ECR_DOCKER_MANIFEST_MT],
|
|
453
591
|
)
|
|
454
592
|
if not child_doc:
|
|
455
|
-
return {}
|
|
593
|
+
return {}, None
|
|
456
594
|
|
|
457
595
|
platform_hint = extract_platform_from_manifest(manifest_ref)
|
|
458
|
-
|
|
596
|
+
diff_map = await _diff_ids_for_manifest(
|
|
459
597
|
ecr_client,
|
|
460
598
|
repo_name,
|
|
461
599
|
child_doc,
|
|
462
600
|
http_client,
|
|
463
601
|
platform_hint,
|
|
464
602
|
)
|
|
603
|
+
return diff_map, None
|
|
465
604
|
|
|
466
605
|
# Process all child manifests in parallel
|
|
467
606
|
child_tasks = [
|
|
@@ -474,8 +613,13 @@ async def fetch_image_layers_async(
|
|
|
474
613
|
|
|
475
614
|
# Merge results from successful child manifest processing
|
|
476
615
|
for result in child_results:
|
|
477
|
-
if isinstance(result,
|
|
478
|
-
|
|
616
|
+
if isinstance(result, tuple) and len(result) == 2:
|
|
617
|
+
layer_data, attest_data = result
|
|
618
|
+
if layer_data:
|
|
619
|
+
platform_layers.update(layer_data)
|
|
620
|
+
if attest_data and not attestation_data:
|
|
621
|
+
# Use first attestation found
|
|
622
|
+
attestation_data = attest_data
|
|
479
623
|
else:
|
|
480
624
|
diff_map = await _diff_ids_for_manifest(
|
|
481
625
|
ecr_client,
|
|
@@ -487,7 +631,7 @@ async def fetch_image_layers_async(
|
|
|
487
631
|
platform_layers.update(diff_map)
|
|
488
632
|
|
|
489
633
|
if platform_layers:
|
|
490
|
-
return uri, digest, platform_layers
|
|
634
|
+
return uri, digest, platform_layers, attestation_data
|
|
491
635
|
|
|
492
636
|
return None
|
|
493
637
|
|
|
@@ -507,7 +651,7 @@ async def fetch_image_layers_async(
|
|
|
507
651
|
)
|
|
508
652
|
|
|
509
653
|
if not tasks:
|
|
510
|
-
return image_layers_data, image_digest_map
|
|
654
|
+
return image_layers_data, image_digest_map, image_attestation_map
|
|
511
655
|
|
|
512
656
|
progress_interval = max(1, min(100, total // 10 or 1))
|
|
513
657
|
completed = 0
|
|
@@ -526,16 +670,22 @@ async def fetch_image_layers_async(
|
|
|
526
670
|
)
|
|
527
671
|
|
|
528
672
|
if result:
|
|
529
|
-
uri, digest, layer_data = result
|
|
673
|
+
uri, digest, layer_data, attestation_data = result
|
|
530
674
|
if not digest:
|
|
531
675
|
raise ValueError(f"Empty digest returned for image {uri}")
|
|
532
676
|
image_layers_data[uri] = layer_data
|
|
533
677
|
image_digest_map[uri] = digest
|
|
678
|
+
if attestation_data:
|
|
679
|
+
image_attestation_map[uri] = attestation_data
|
|
534
680
|
|
|
535
681
|
logger.info(
|
|
536
682
|
f"Successfully fetched layers for {len(image_layers_data)}/{len(repo_images_list)} images"
|
|
537
683
|
)
|
|
538
|
-
|
|
684
|
+
if image_attestation_map:
|
|
685
|
+
logger.info(
|
|
686
|
+
f"Found attestations with base image info for {len(image_attestation_map)} images"
|
|
687
|
+
)
|
|
688
|
+
return image_layers_data, image_digest_map, image_attestation_map
|
|
539
689
|
|
|
540
690
|
|
|
541
691
|
def cleanup(neo4j_session: neo4j.Session, common_job_parameters: dict) -> None:
|
|
@@ -613,9 +763,11 @@ def sync(
|
|
|
613
763
|
f"Starting to fetch layers for {len(repo_images_list)} images..."
|
|
614
764
|
)
|
|
615
765
|
|
|
616
|
-
async def _fetch_with_async_client() ->
|
|
617
|
-
|
|
618
|
-
|
|
766
|
+
async def _fetch_with_async_client() -> tuple[
|
|
767
|
+
dict[str, dict[str, list[str]]],
|
|
768
|
+
dict[str, str],
|
|
769
|
+
dict[str, dict[str, str]],
|
|
770
|
+
]:
|
|
619
771
|
# Use credentials from the existing boto3 session
|
|
620
772
|
credentials = boto3_session.get_credentials()
|
|
621
773
|
session = aioboto3.Session(
|
|
@@ -635,8 +787,8 @@ def sync(
|
|
|
635
787
|
loop = asyncio.new_event_loop()
|
|
636
788
|
asyncio.set_event_loop(loop)
|
|
637
789
|
|
|
638
|
-
image_layers_data, image_digest_map =
|
|
639
|
-
_fetch_with_async_client()
|
|
790
|
+
image_layers_data, image_digest_map, image_attestation_map = (
|
|
791
|
+
loop.run_until_complete(_fetch_with_async_client())
|
|
640
792
|
)
|
|
641
793
|
|
|
642
794
|
logger.info(
|
|
@@ -645,6 +797,7 @@ def sync(
|
|
|
645
797
|
layers, memberships = transform_ecr_image_layers(
|
|
646
798
|
image_layers_data,
|
|
647
799
|
image_digest_map,
|
|
800
|
+
image_attestation_map,
|
|
648
801
|
)
|
|
649
802
|
load_ecr_image_layers(
|
|
650
803
|
neo4j_session,
|
|
@@ -8,6 +8,7 @@ import botocore.config
|
|
|
8
8
|
import neo4j
|
|
9
9
|
from policyuniverse.policy import Policy
|
|
10
10
|
|
|
11
|
+
from cartography.client.core.tx import run_write_query
|
|
11
12
|
from cartography.intel.dns import ingest_dns_record_by_fqdn
|
|
12
13
|
from cartography.util import aws_handle_regions
|
|
13
14
|
from cartography.util import run_cleanup_job
|
|
@@ -95,7 +96,8 @@ def _load_es_domains(
|
|
|
95
96
|
for d in domain_list:
|
|
96
97
|
del d["ServiceSoftwareOptions"]
|
|
97
98
|
|
|
98
|
-
|
|
99
|
+
run_write_query(
|
|
100
|
+
neo4j_session,
|
|
99
101
|
ingest_records,
|
|
100
102
|
Records=domain_list,
|
|
101
103
|
AWS_ACCOUNT_ID=aws_account_id,
|
|
@@ -179,7 +181,8 @@ def _link_es_domain_vpc(
|
|
|
179
181
|
groupList = vpc_data.get("SecurityGroupIds", [])
|
|
180
182
|
|
|
181
183
|
if len(subnetList) > 0:
|
|
182
|
-
|
|
184
|
+
run_write_query(
|
|
185
|
+
neo4j_session,
|
|
183
186
|
ingest_subnet,
|
|
184
187
|
DomainId=domain_id,
|
|
185
188
|
SubnetList=subnetList,
|
|
@@ -187,7 +190,8 @@ def _link_es_domain_vpc(
|
|
|
187
190
|
)
|
|
188
191
|
|
|
189
192
|
if len(groupList) > 0:
|
|
190
|
-
|
|
193
|
+
run_write_query(
|
|
194
|
+
neo4j_session,
|
|
191
195
|
ingest_sec_groups,
|
|
192
196
|
DomainId=domain_id,
|
|
193
197
|
SecGroupList=groupList,
|
|
@@ -220,7 +224,12 @@ def _process_access_policy(
|
|
|
220
224
|
if policy.is_internet_accessible():
|
|
221
225
|
exposed_internet = True
|
|
222
226
|
|
|
223
|
-
|
|
227
|
+
run_write_query(
|
|
228
|
+
neo4j_session,
|
|
229
|
+
tag_es,
|
|
230
|
+
DomainId=domain_id,
|
|
231
|
+
InternetExposed=exposed_internet,
|
|
232
|
+
)
|
|
224
233
|
|
|
225
234
|
|
|
226
235
|
@timeit
|