cartography 0.117.0__py3-none-any.whl → 0.119.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +31 -0
- cartography/client/core/tx.py +19 -3
- cartography/config.py +14 -0
- cartography/data/indexes.cypher +0 -6
- cartography/graph/job.py +13 -7
- cartography/graph/statement.py +4 -0
- cartography/intel/aws/__init__.py +22 -9
- cartography/intel/aws/apigateway.py +18 -5
- cartography/intel/aws/ec2/elastic_ip_addresses.py +3 -1
- cartography/intel/aws/ec2/internet_gateways.py +4 -2
- cartography/intel/aws/ec2/load_balancer_v2s.py +11 -5
- cartography/intel/aws/ec2/network_interfaces.py +4 -0
- cartography/intel/aws/ec2/reserved_instances.py +3 -1
- cartography/intel/aws/ec2/tgw.py +11 -5
- cartography/intel/aws/ec2/volumes.py +1 -1
- cartography/intel/aws/ecr.py +209 -26
- cartography/intel/aws/ecr_image_layers.py +143 -42
- cartography/intel/aws/elasticsearch.py +13 -4
- cartography/intel/aws/identitycenter.py +93 -54
- cartography/intel/aws/inspector.py +90 -46
- cartography/intel/aws/permission_relationships.py +3 -3
- cartography/intel/aws/resourcegroupstaggingapi.py +1 -1
- cartography/intel/aws/s3.py +26 -13
- cartography/intel/aws/ssm.py +3 -5
- cartography/intel/azure/compute.py +9 -4
- cartography/intel/azure/cosmosdb.py +31 -15
- cartography/intel/azure/sql.py +25 -12
- cartography/intel/azure/storage.py +19 -9
- cartography/intel/azure/subscription.py +3 -1
- cartography/intel/crowdstrike/spotlight.py +5 -2
- cartography/intel/entra/app_role_assignments.py +9 -2
- cartography/intel/gcp/__init__.py +26 -9
- cartography/intel/gcp/clients.py +8 -4
- cartography/intel/gcp/compute.py +42 -21
- cartography/intel/gcp/crm/folders.py +9 -3
- cartography/intel/gcp/crm/orgs.py +8 -3
- cartography/intel/gcp/crm/projects.py +14 -3
- cartography/intel/github/repos.py +23 -5
- cartography/intel/gsuite/__init__.py +12 -8
- cartography/intel/gsuite/groups.py +291 -0
- cartography/intel/gsuite/users.py +142 -0
- cartography/intel/jamf/computers.py +7 -1
- cartography/intel/oci/iam.py +23 -9
- cartography/intel/oci/organizations.py +3 -1
- cartography/intel/oci/utils.py +28 -5
- cartography/intel/okta/awssaml.py +9 -8
- cartography/intel/okta/users.py +1 -1
- cartography/intel/ontology/__init__.py +44 -0
- cartography/intel/ontology/devices.py +54 -0
- cartography/intel/ontology/users.py +54 -0
- cartography/intel/ontology/utils.py +121 -0
- cartography/intel/pagerduty/escalation_policies.py +13 -6
- cartography/intel/pagerduty/schedules.py +9 -4
- cartography/intel/pagerduty/services.py +7 -3
- cartography/intel/pagerduty/teams.py +5 -2
- cartography/intel/pagerduty/users.py +3 -1
- cartography/intel/pagerduty/vendors.py +3 -1
- cartography/intel/trivy/__init__.py +109 -58
- cartography/models/airbyte/user.py +4 -0
- cartography/models/anthropic/user.py +4 -0
- cartography/models/aws/ec2/networkinterfaces.py +2 -0
- cartography/models/aws/ecr/image.py +55 -0
- cartography/models/aws/ecr/repository_image.py +1 -1
- cartography/models/aws/iam/group_membership.py +3 -2
- cartography/models/aws/identitycenter/awsssouser.py +3 -1
- cartography/models/bigfix/bigfix_computer.py +1 -1
- cartography/models/cloudflare/member.py +4 -0
- cartography/models/crowdstrike/hosts.py +1 -1
- cartography/models/duo/endpoint.py +1 -1
- cartography/models/duo/phone.py +2 -2
- cartography/models/duo/user.py +4 -0
- cartography/models/entra/user.py +2 -1
- cartography/models/github/users.py +4 -0
- cartography/models/gsuite/__init__.py +0 -0
- cartography/models/gsuite/group.py +218 -0
- cartography/models/gsuite/tenant.py +29 -0
- cartography/models/gsuite/user.py +107 -0
- cartography/models/kandji/device.py +1 -2
- cartography/models/keycloak/user.py +4 -0
- cartography/models/lastpass/user.py +4 -0
- cartography/models/ontology/__init__.py +0 -0
- cartography/models/ontology/device.py +125 -0
- cartography/models/ontology/mapping/__init__.py +16 -0
- cartography/models/ontology/mapping/data/__init__.py +1 -0
- cartography/models/ontology/mapping/data/devices.py +160 -0
- cartography/models/ontology/mapping/data/users.py +239 -0
- cartography/models/ontology/mapping/specs.py +65 -0
- cartography/models/ontology/user.py +52 -0
- cartography/models/openai/user.py +4 -0
- cartography/models/scaleway/iam/user.py +4 -0
- cartography/models/snipeit/asset.py +1 -0
- cartography/models/snipeit/user.py +4 -0
- cartography/models/tailscale/device.py +1 -1
- cartography/models/tailscale/user.py +6 -1
- cartography/rules/data/frameworks/mitre_attack/requirements/t1098_account_manipulation/__init__.py +176 -89
- cartography/sync.py +4 -1
- cartography/util.py +49 -18
- {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/METADATA +3 -3
- {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/RECORD +104 -89
- cartography/data/jobs/cleanup/gsuite_ingest_groups_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gsuite_ingest_users_cleanup.json +0 -11
- cartography/intel/gsuite/api.py +0 -355
- {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/WHEEL +0 -0
- {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/top_level.txt +0 -0
cartography/intel/aws/ecr.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import logging
|
|
2
3
|
from typing import Any
|
|
3
4
|
from typing import Dict
|
|
@@ -18,6 +19,12 @@ from cartography.util import to_synchronous
|
|
|
18
19
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
20
21
|
|
|
22
|
+
# Manifest list media types
|
|
23
|
+
MANIFEST_LIST_MEDIA_TYPES = {
|
|
24
|
+
"application/vnd.docker.distribution.manifest.list.v2+json",
|
|
25
|
+
"application/vnd.oci.image.index.v1+json",
|
|
26
|
+
}
|
|
27
|
+
|
|
21
28
|
|
|
22
29
|
@timeit
|
|
23
30
|
@aws_handle_regions
|
|
@@ -34,6 +41,84 @@ def get_ecr_repositories(
|
|
|
34
41
|
return ecr_repositories
|
|
35
42
|
|
|
36
43
|
|
|
44
|
+
def _get_platform_specific_digests(
|
|
45
|
+
client: Any, repository_name: str, manifest_list_digest: str
|
|
46
|
+
) -> tuple[List[Dict[str, Any]], set[str]]:
|
|
47
|
+
"""
|
|
48
|
+
Fetch manifest list and extract platform-specific image digests and attestations.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
- List of all images (platform-specific + attestations) with digest, type, architecture, os, variant
|
|
52
|
+
- Set of ALL digests referenced in the manifest list
|
|
53
|
+
"""
|
|
54
|
+
response = client.batch_get_image(
|
|
55
|
+
repositoryName=repository_name,
|
|
56
|
+
imageIds=[{"imageDigest": manifest_list_digest}],
|
|
57
|
+
acceptedMediaTypes=list(MANIFEST_LIST_MEDIA_TYPES),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
if not response.get("images"):
|
|
61
|
+
raise ValueError(
|
|
62
|
+
f"No manifest list found for digest {manifest_list_digest} in repository {repository_name}"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# batch_get_image returns a single manifest list (hence [0])
|
|
66
|
+
# The manifests[] array inside contains all platform-specific images and attestations
|
|
67
|
+
manifest_json = json.loads(response["images"][0]["imageManifest"])
|
|
68
|
+
manifests = manifest_json.get("manifests", [])
|
|
69
|
+
|
|
70
|
+
if not manifests:
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"Manifest list {manifest_list_digest} has no manifests in repository {repository_name}"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
all_images = []
|
|
76
|
+
all_referenced_digests = set()
|
|
77
|
+
|
|
78
|
+
for manifest_ref in manifests:
|
|
79
|
+
digest = manifest_ref.get("digest")
|
|
80
|
+
if not digest:
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"Manifest in list {manifest_list_digest} has no digest in repository {repository_name}"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
all_referenced_digests.add(digest)
|
|
86
|
+
|
|
87
|
+
platform_info = manifest_ref.get("platform", {})
|
|
88
|
+
architecture = platform_info.get("architecture")
|
|
89
|
+
os_name = platform_info.get("os")
|
|
90
|
+
|
|
91
|
+
# Determine if this is an attestation
|
|
92
|
+
annotations = manifest_ref.get("annotations", {})
|
|
93
|
+
is_attestation = (
|
|
94
|
+
architecture == "unknown" and os_name == "unknown"
|
|
95
|
+
) or annotations.get("vnd.docker.reference.type") == "attestation-manifest"
|
|
96
|
+
|
|
97
|
+
all_images.append(
|
|
98
|
+
{
|
|
99
|
+
"digest": digest,
|
|
100
|
+
"type": "attestation" if is_attestation else "image",
|
|
101
|
+
"architecture": architecture,
|
|
102
|
+
"os": os_name,
|
|
103
|
+
"variant": platform_info.get("variant"),
|
|
104
|
+
"attestation_type": (
|
|
105
|
+
annotations.get("vnd.docker.reference.type")
|
|
106
|
+
if is_attestation
|
|
107
|
+
else None
|
|
108
|
+
),
|
|
109
|
+
"attests_digest": (
|
|
110
|
+
annotations.get("vnd.docker.reference.digest")
|
|
111
|
+
if is_attestation
|
|
112
|
+
else None
|
|
113
|
+
),
|
|
114
|
+
"media_type": manifest_ref.get("mediaType"),
|
|
115
|
+
"artifact_media_type": manifest_ref.get("artifactType"),
|
|
116
|
+
}
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return all_images, all_referenced_digests
|
|
120
|
+
|
|
121
|
+
|
|
37
122
|
@timeit
|
|
38
123
|
@aws_handle_regions
|
|
39
124
|
def get_ecr_repository_images(
|
|
@@ -46,7 +131,11 @@ def get_ecr_repository_images(
|
|
|
46
131
|
)
|
|
47
132
|
client = boto3_session.client("ecr", region_name=region)
|
|
48
133
|
list_paginator = client.get_paginator("list_images")
|
|
49
|
-
|
|
134
|
+
|
|
135
|
+
# First pass: Collect all image details and track manifest list referenced digests
|
|
136
|
+
all_image_details: List[Dict] = []
|
|
137
|
+
manifest_list_referenced_digests: set[str] = set()
|
|
138
|
+
|
|
50
139
|
for page in list_paginator.paginate(repositoryName=repository_name):
|
|
51
140
|
image_ids = page["imageIds"]
|
|
52
141
|
if not image_ids:
|
|
@@ -58,14 +147,37 @@ def get_ecr_repository_images(
|
|
|
58
147
|
for response in describe_response:
|
|
59
148
|
image_details = response["imageDetails"]
|
|
60
149
|
for detail in image_details:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
150
|
+
# Check if this is a manifest list
|
|
151
|
+
media_type = detail.get("imageManifestMediaType")
|
|
152
|
+
if media_type in MANIFEST_LIST_MEDIA_TYPES:
|
|
153
|
+
# Fetch all images from manifest list (platform-specific + attestations)
|
|
154
|
+
manifest_list_digest = detail["imageDigest"]
|
|
155
|
+
manifest_images, all_digests = _get_platform_specific_digests(
|
|
156
|
+
client, repository_name, manifest_list_digest
|
|
157
|
+
)
|
|
158
|
+
detail["_manifest_images"] = manifest_images
|
|
159
|
+
|
|
160
|
+
# Track ALL digests so we don't create ECRRepositoryImages for them
|
|
161
|
+
manifest_list_referenced_digests.update(all_digests)
|
|
162
|
+
|
|
163
|
+
all_image_details.append(detail)
|
|
164
|
+
|
|
165
|
+
# Second pass: Only add images that should have ECRRepositoryImage nodes
|
|
166
|
+
ecr_repository_images: List[Dict] = []
|
|
167
|
+
for detail in all_image_details:
|
|
168
|
+
tags = detail.get("imageTags") or []
|
|
169
|
+
digest = detail.get("imageDigest")
|
|
170
|
+
|
|
171
|
+
if tags:
|
|
172
|
+
# Tagged images always get ECRRepositoryImage nodes (one per tag)
|
|
173
|
+
for tag in tags:
|
|
174
|
+
image_detail = {**detail, "imageTag": tag}
|
|
175
|
+
image_detail.pop("imageTags", None)
|
|
176
|
+
ecr_repository_images.append(image_detail)
|
|
177
|
+
elif digest not in manifest_list_referenced_digests:
|
|
178
|
+
# Untagged images only get nodes if they're NOT part of a manifest list
|
|
179
|
+
ecr_repository_images.append({**detail})
|
|
180
|
+
|
|
69
181
|
return ecr_repository_images
|
|
70
182
|
|
|
71
183
|
|
|
@@ -91,52 +203,122 @@ def load_ecr_repositories(
|
|
|
91
203
|
|
|
92
204
|
|
|
93
205
|
@timeit
|
|
94
|
-
def transform_ecr_repository_images(repo_data: Dict) -> List[Dict]:
|
|
206
|
+
def transform_ecr_repository_images(repo_data: Dict) -> tuple[List[Dict], List[Dict]]:
|
|
95
207
|
"""
|
|
96
|
-
|
|
97
|
-
|
|
208
|
+
Transform ECR repository images into repo image list and ECR image list.
|
|
209
|
+
For manifest lists, creates ECR images for manifest list, platform-specific images, and attestations.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
- repo_images_list: List of ECRRepositoryImage nodes with imageDigests field (one-to-many)
|
|
213
|
+
- ecr_images_list: List of ECRImage nodes with type, architecture, os, variant fields
|
|
98
214
|
"""
|
|
99
215
|
repo_images_list = []
|
|
216
|
+
ecr_images_dict: Dict[str, Dict] = {} # Deduplicate by digest
|
|
217
|
+
|
|
100
218
|
# Sort repository URIs to ensure consistent processing order
|
|
101
219
|
for repo_uri in sorted(repo_data.keys()):
|
|
102
220
|
repo_images = repo_data[repo_uri]
|
|
103
221
|
for img in repo_images:
|
|
104
222
|
digest = img.get("imageDigest")
|
|
105
|
-
if digest:
|
|
106
|
-
tag = img.get("imageTag")
|
|
107
|
-
uri = repo_uri + (f":{tag}" if tag else "")
|
|
108
|
-
img["repo_uri"] = repo_uri
|
|
109
|
-
img["uri"] = uri
|
|
110
|
-
img["id"] = uri
|
|
111
|
-
repo_images_list.append(img)
|
|
112
|
-
else:
|
|
223
|
+
if not digest:
|
|
113
224
|
logger.warning(
|
|
114
225
|
"Repo %s has an image that has no imageDigest. Its tag is %s. Continuing on.",
|
|
115
226
|
repo_uri,
|
|
116
227
|
img.get("imageTag"),
|
|
117
228
|
)
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
tag = img.get("imageTag")
|
|
232
|
+
uri = repo_uri + (f":{tag}" if tag else "")
|
|
233
|
+
|
|
234
|
+
# Build ECRRepositoryImage node
|
|
235
|
+
repo_image = {
|
|
236
|
+
**img,
|
|
237
|
+
"repo_uri": repo_uri,
|
|
238
|
+
"uri": uri,
|
|
239
|
+
"id": uri,
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
# Check if this is a manifest list with images
|
|
243
|
+
manifest_images = img.get("_manifest_images")
|
|
244
|
+
if manifest_images:
|
|
245
|
+
# For manifest list: include manifest list digest + all referenced digests
|
|
246
|
+
all_digests = [digest] + [m["digest"] for m in manifest_images]
|
|
247
|
+
repo_image["imageDigests"] = all_digests
|
|
248
|
+
|
|
249
|
+
# Create ECRImage for the manifest list itself
|
|
250
|
+
if digest not in ecr_images_dict:
|
|
251
|
+
# Extract child image digests (excluding attestations for CONTAINS_IMAGE relationship)
|
|
252
|
+
child_digests = [
|
|
253
|
+
m["digest"]
|
|
254
|
+
for m in manifest_images
|
|
255
|
+
if m.get("type") != "attestation"
|
|
256
|
+
]
|
|
257
|
+
ecr_images_dict[digest] = {
|
|
258
|
+
"imageDigest": digest,
|
|
259
|
+
"type": "manifest_list",
|
|
260
|
+
"architecture": None,
|
|
261
|
+
"os": None,
|
|
262
|
+
"variant": None,
|
|
263
|
+
"child_image_digests": child_digests if child_digests else None,
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
# Create ECRImage nodes for each image in the manifest list
|
|
267
|
+
for manifest_img in manifest_images:
|
|
268
|
+
manifest_digest = manifest_img["digest"]
|
|
269
|
+
if manifest_digest not in ecr_images_dict:
|
|
270
|
+
ecr_images_dict[manifest_digest] = {
|
|
271
|
+
"imageDigest": manifest_digest,
|
|
272
|
+
"type": manifest_img.get("type"),
|
|
273
|
+
"architecture": manifest_img.get("architecture"),
|
|
274
|
+
"os": manifest_img.get("os"),
|
|
275
|
+
"variant": manifest_img.get("variant"),
|
|
276
|
+
"attestation_type": manifest_img.get("attestation_type"),
|
|
277
|
+
"attests_digest": manifest_img.get("attests_digest"),
|
|
278
|
+
"media_type": manifest_img.get("media_type"),
|
|
279
|
+
"artifact_media_type": manifest_img.get(
|
|
280
|
+
"artifact_media_type"
|
|
281
|
+
),
|
|
282
|
+
}
|
|
283
|
+
else:
|
|
284
|
+
# Regular image: single digest
|
|
285
|
+
repo_image["imageDigests"] = [digest]
|
|
286
|
+
|
|
287
|
+
# Create ECRImage for regular image
|
|
288
|
+
if digest not in ecr_images_dict:
|
|
289
|
+
ecr_images_dict[digest] = {
|
|
290
|
+
"imageDigest": digest,
|
|
291
|
+
"type": "image",
|
|
292
|
+
"architecture": None,
|
|
293
|
+
"os": None,
|
|
294
|
+
"variant": None,
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
# Remove internal field before returning
|
|
298
|
+
repo_image.pop("_manifest_images", None)
|
|
299
|
+
repo_images_list.append(repo_image)
|
|
118
300
|
|
|
119
|
-
|
|
301
|
+
ecr_images_list = list(ecr_images_dict.values())
|
|
302
|
+
return repo_images_list, ecr_images_list
|
|
120
303
|
|
|
121
304
|
|
|
122
305
|
@timeit
|
|
123
306
|
def load_ecr_repository_images(
|
|
124
307
|
neo4j_session: neo4j.Session,
|
|
125
308
|
repo_images_list: List[Dict],
|
|
309
|
+
ecr_images_list: List[Dict],
|
|
126
310
|
region: str,
|
|
127
311
|
current_aws_account_id: str,
|
|
128
312
|
aws_update_tag: int,
|
|
129
313
|
) -> None:
|
|
130
314
|
logger.info(
|
|
131
|
-
f"Loading {len(repo_images_list)} ECR repository images in {region} into graph.",
|
|
315
|
+
f"Loading {len(ecr_images_list)} ECR images and {len(repo_images_list)} ECR repository images in {region} into graph.",
|
|
132
316
|
)
|
|
133
|
-
image_digests = {img["imageDigest"] for img in repo_images_list}
|
|
134
|
-
ecr_images = [{"imageDigest": d} for d in image_digests]
|
|
135
317
|
|
|
136
318
|
load(
|
|
137
319
|
neo4j_session,
|
|
138
320
|
ECRImageSchema(),
|
|
139
|
-
|
|
321
|
+
ecr_images_list,
|
|
140
322
|
lastupdated=aws_update_tag,
|
|
141
323
|
Region=region,
|
|
142
324
|
AWS_ID=current_aws_account_id,
|
|
@@ -219,10 +401,11 @@ def sync(
|
|
|
219
401
|
current_aws_account_id,
|
|
220
402
|
update_tag,
|
|
221
403
|
)
|
|
222
|
-
repo_images_list = transform_ecr_repository_images(image_data)
|
|
404
|
+
repo_images_list, ecr_images_list = transform_ecr_repository_images(image_data)
|
|
223
405
|
load_ecr_repository_images(
|
|
224
406
|
neo4j_session,
|
|
225
407
|
repo_images_list,
|
|
408
|
+
ecr_images_list,
|
|
226
409
|
region,
|
|
227
410
|
current_aws_account_id,
|
|
228
411
|
update_tag,
|
|
@@ -12,7 +12,6 @@ from typing import Any
|
|
|
12
12
|
from typing import Optional
|
|
13
13
|
|
|
14
14
|
import aioboto3
|
|
15
|
-
import boto3
|
|
16
15
|
import httpx
|
|
17
16
|
import neo4j
|
|
18
17
|
from botocore.exceptions import ClientError
|
|
@@ -334,6 +333,7 @@ def transform_ecr_image_layers(
|
|
|
334
333
|
image_layers_data: dict[str, dict[str, list[str]]],
|
|
335
334
|
image_digest_map: dict[str, str],
|
|
336
335
|
image_attestation_map: Optional[dict[str, dict[str, str]]] = None,
|
|
336
|
+
existing_properties_map: Optional[dict[str, dict[str, Any]]] = None,
|
|
337
337
|
) -> tuple[list[dict], list[dict]]:
|
|
338
338
|
"""
|
|
339
339
|
Transform image layer data into format suitable for Neo4j ingestion.
|
|
@@ -342,10 +342,13 @@ def transform_ecr_image_layers(
|
|
|
342
342
|
:param image_layers_data: Map of image URI to platform to diff_ids
|
|
343
343
|
:param image_digest_map: Map of image URI to image digest
|
|
344
344
|
:param image_attestation_map: Map of image URI to attestation data (parent_image_uri, parent_image_digest)
|
|
345
|
+
:param existing_properties_map: Map of image digest to existing ECRImage properties (type, architecture, etc.)
|
|
345
346
|
:return: List of layer objects ready for ingestion
|
|
346
347
|
"""
|
|
347
348
|
if image_attestation_map is None:
|
|
348
349
|
image_attestation_map = {}
|
|
350
|
+
if existing_properties_map is None:
|
|
351
|
+
existing_properties_map = {}
|
|
349
352
|
layers_by_diff_id: dict[str, dict[str, Any]] = {}
|
|
350
353
|
memberships_by_digest: dict[str, dict[str, Any]] = {}
|
|
351
354
|
|
|
@@ -353,6 +356,16 @@ def transform_ecr_image_layers(
|
|
|
353
356
|
# fetch_image_layers_async guarantees every uri in image_layers_data has a digest
|
|
354
357
|
image_digest = image_digest_map[image_uri]
|
|
355
358
|
|
|
359
|
+
# Check if this is a manifest list
|
|
360
|
+
is_manifest_list = False
|
|
361
|
+
if image_digest in existing_properties_map:
|
|
362
|
+
image_type = existing_properties_map[image_digest].get("type")
|
|
363
|
+
is_manifest_list = image_type == "manifest_list"
|
|
364
|
+
|
|
365
|
+
# Skip creating layer relationships for manifest lists
|
|
366
|
+
if is_manifest_list:
|
|
367
|
+
continue
|
|
368
|
+
|
|
356
369
|
ordered_layers_for_image: Optional[list[str]] = None
|
|
357
370
|
|
|
358
371
|
for _, diff_ids in platforms.items():
|
|
@@ -391,6 +404,10 @@ def transform_ecr_image_layers(
|
|
|
391
404
|
"layer_diff_ids": ordered_layers_for_image,
|
|
392
405
|
}
|
|
393
406
|
|
|
407
|
+
# Preserve existing ECRImage properties (type, architecture, os, variant, etc.)
|
|
408
|
+
if image_digest in existing_properties_map:
|
|
409
|
+
membership.update(existing_properties_map[image_digest])
|
|
410
|
+
|
|
394
411
|
# Add attestation data if available for this image
|
|
395
412
|
if image_uri in image_attestation_map:
|
|
396
413
|
attestation = image_attestation_map[image_uri]
|
|
@@ -433,7 +450,12 @@ def load_ecr_image_layers(
|
|
|
433
450
|
current_aws_account_id: str,
|
|
434
451
|
aws_update_tag: int,
|
|
435
452
|
) -> None:
|
|
436
|
-
"""
|
|
453
|
+
"""
|
|
454
|
+
Load image layers into Neo4j.
|
|
455
|
+
|
|
456
|
+
Uses a smaller batch size (1000) to avoid Neo4j transaction memory limits,
|
|
457
|
+
since layer objects can contain large arrays of relationships.
|
|
458
|
+
"""
|
|
437
459
|
logger.info(
|
|
438
460
|
f"Loading {len(image_layers)} image layers for region {region} into graph.",
|
|
439
461
|
)
|
|
@@ -442,6 +464,7 @@ def load_ecr_image_layers(
|
|
|
442
464
|
neo4j_session,
|
|
443
465
|
ECRImageLayerSchema(),
|
|
444
466
|
image_layers,
|
|
467
|
+
batch_size=1000,
|
|
445
468
|
lastupdated=aws_update_tag,
|
|
446
469
|
AWS_ID=current_aws_account_id,
|
|
447
470
|
)
|
|
@@ -455,10 +478,17 @@ def load_ecr_image_layer_memberships(
|
|
|
455
478
|
current_aws_account_id: str,
|
|
456
479
|
aws_update_tag: int,
|
|
457
480
|
) -> None:
|
|
481
|
+
"""
|
|
482
|
+
Load image layer memberships into Neo4j.
|
|
483
|
+
|
|
484
|
+
Uses a smaller batch size (1000) to avoid Neo4j transaction memory limits,
|
|
485
|
+
since membership objects can contain large arrays of layer diff_ids.
|
|
486
|
+
"""
|
|
458
487
|
load(
|
|
459
488
|
neo4j_session,
|
|
460
489
|
ECRImageSchema(),
|
|
461
490
|
memberships,
|
|
491
|
+
batch_size=1000,
|
|
462
492
|
lastupdated=aws_update_tag,
|
|
463
493
|
Region=region,
|
|
464
494
|
AWS_ID=current_aws_account_id,
|
|
@@ -527,8 +557,15 @@ async def fetch_image_layers_async(
|
|
|
527
557
|
async def fetch_single_image_layers(
|
|
528
558
|
repo_image: dict,
|
|
529
559
|
http_client: httpx.AsyncClient,
|
|
530
|
-
) -> Optional[
|
|
531
|
-
|
|
560
|
+
) -> Optional[
|
|
561
|
+
tuple[str, str, dict[str, list[str]], Optional[dict[str, dict[str, str]]]]
|
|
562
|
+
]:
|
|
563
|
+
"""
|
|
564
|
+
Fetch layers for a single image and extract attestation if present.
|
|
565
|
+
|
|
566
|
+
Returns tuple of (uri, digest, platform_layers, attestations_by_child_digest) where
|
|
567
|
+
attestations_by_child_digest maps child image digest to parent image info
|
|
568
|
+
"""
|
|
532
569
|
async with semaphore:
|
|
533
570
|
# Caller guarantees these fields exist in every repo_image
|
|
534
571
|
uri = repo_image["uri"]
|
|
@@ -551,13 +588,13 @@ async def fetch_image_layers_async(
|
|
|
551
588
|
|
|
552
589
|
manifest_media_type = (media_type or doc.get("mediaType", "")).lower()
|
|
553
590
|
platform_layers: dict[str, list[str]] = {}
|
|
554
|
-
attestation_data: Optional[dict[str, str]] = None
|
|
591
|
+
attestation_data: Optional[dict[str, dict[str, str]]] = None
|
|
555
592
|
|
|
556
593
|
if doc.get("manifests") and manifest_media_type in INDEX_MEDIA_TYPES_LOWER:
|
|
557
594
|
|
|
558
595
|
async def _process_child_manifest(
|
|
559
596
|
manifest_ref: dict,
|
|
560
|
-
) -> tuple[dict[str, list[str]], Optional[dict[str, str]]]:
|
|
597
|
+
) -> tuple[dict[str, list[str]], Optional[tuple[str, dict[str, str]]]]:
|
|
561
598
|
# Check if this is an attestation manifest
|
|
562
599
|
if (
|
|
563
600
|
manifest_ref.get("annotations", {}).get(
|
|
@@ -565,18 +602,27 @@ async def fetch_image_layers_async(
|
|
|
565
602
|
)
|
|
566
603
|
== "attestation-manifest"
|
|
567
604
|
):
|
|
605
|
+
# Extract which child image this attestation is for
|
|
606
|
+
attests_child_digest = manifest_ref.get("annotations", {}).get(
|
|
607
|
+
"vnd.docker.reference.digest"
|
|
608
|
+
)
|
|
609
|
+
if not attests_child_digest:
|
|
610
|
+
return {}, None
|
|
611
|
+
|
|
568
612
|
# Extract base image from attestation
|
|
569
|
-
|
|
570
|
-
if
|
|
613
|
+
attestation_digest = manifest_ref.get("digest")
|
|
614
|
+
if attestation_digest:
|
|
571
615
|
attestation_info = (
|
|
572
616
|
await _extract_parent_image_from_attestation(
|
|
573
617
|
ecr_client,
|
|
574
618
|
repo_name,
|
|
575
|
-
|
|
619
|
+
attestation_digest,
|
|
576
620
|
http_client,
|
|
577
621
|
)
|
|
578
622
|
)
|
|
579
|
-
|
|
623
|
+
if attestation_info:
|
|
624
|
+
# Return (attests_child_digest, parent_info) tuple
|
|
625
|
+
return {}, (attests_child_digest, attestation_info)
|
|
580
626
|
return {}, None
|
|
581
627
|
|
|
582
628
|
child_digest = manifest_ref.get("digest")
|
|
@@ -612,14 +658,22 @@ async def fetch_image_layers_async(
|
|
|
612
658
|
)
|
|
613
659
|
|
|
614
660
|
# Merge results from successful child manifest processing
|
|
661
|
+
# Track attestation data by child digest for proper mapping
|
|
662
|
+
attestations_by_child_digest: dict[str, dict[str, str]] = {}
|
|
663
|
+
|
|
615
664
|
for result in child_results:
|
|
616
665
|
if isinstance(result, tuple) and len(result) == 2:
|
|
617
666
|
layer_data, attest_data = result
|
|
618
667
|
if layer_data:
|
|
619
668
|
platform_layers.update(layer_data)
|
|
620
|
-
if attest_data
|
|
621
|
-
#
|
|
622
|
-
|
|
669
|
+
if attest_data:
|
|
670
|
+
# attest_data is (child_digest, parent_info) tuple
|
|
671
|
+
child_digest, parent_info = attest_data
|
|
672
|
+
attestations_by_child_digest[child_digest] = parent_info
|
|
673
|
+
|
|
674
|
+
# Build attestation_data with child digest mapping
|
|
675
|
+
if attestations_by_child_digest:
|
|
676
|
+
attestation_data = attestations_by_child_digest
|
|
623
677
|
else:
|
|
624
678
|
diff_map = await _diff_ids_for_manifest(
|
|
625
679
|
ecr_client,
|
|
@@ -630,7 +684,9 @@ async def fetch_image_layers_async(
|
|
|
630
684
|
)
|
|
631
685
|
platform_layers.update(diff_map)
|
|
632
686
|
|
|
633
|
-
if
|
|
687
|
+
# Return if we found layers or attestation data
|
|
688
|
+
# Manifest lists may have attestation_data without platform_layers
|
|
689
|
+
if platform_layers or attestation_data:
|
|
634
690
|
return uri, digest, platform_layers, attestation_data
|
|
635
691
|
|
|
636
692
|
return None
|
|
@@ -670,13 +726,22 @@ async def fetch_image_layers_async(
|
|
|
670
726
|
)
|
|
671
727
|
|
|
672
728
|
if result:
|
|
673
|
-
uri, digest, layer_data,
|
|
729
|
+
uri, digest, layer_data, attestations_by_child_digest = result
|
|
674
730
|
if not digest:
|
|
675
731
|
raise ValueError(f"Empty digest returned for image {uri}")
|
|
676
732
|
image_layers_data[uri] = layer_data
|
|
677
733
|
image_digest_map[uri] = digest
|
|
678
|
-
if
|
|
679
|
-
|
|
734
|
+
if attestations_by_child_digest:
|
|
735
|
+
# Map attestation data by child digest URIs
|
|
736
|
+
repo_uri = extract_repo_uri_from_image_uri(uri)
|
|
737
|
+
for (
|
|
738
|
+
child_digest,
|
|
739
|
+
parent_info,
|
|
740
|
+
) in attestations_by_child_digest.items():
|
|
741
|
+
child_uri = f"{repo_uri}@{child_digest}"
|
|
742
|
+
image_attestation_map[child_uri] = parent_info
|
|
743
|
+
# Also add to digest map so transform can look up the child digest
|
|
744
|
+
image_digest_map[child_uri] = child_digest
|
|
680
745
|
|
|
681
746
|
logger.info(
|
|
682
747
|
f"Successfully fetched layers for {len(image_layers_data)}/{len(repo_images_list)} images"
|
|
@@ -698,7 +763,7 @@ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: dict) -> None:
|
|
|
698
763
|
@timeit
|
|
699
764
|
def sync(
|
|
700
765
|
neo4j_session: neo4j.Session,
|
|
701
|
-
|
|
766
|
+
aioboto3_session: aioboto3.Session,
|
|
702
767
|
regions: list[str],
|
|
703
768
|
current_aws_account_id: str,
|
|
704
769
|
update_tag: int,
|
|
@@ -721,30 +786,71 @@ def sync(
|
|
|
721
786
|
current_aws_account_id,
|
|
722
787
|
)
|
|
723
788
|
|
|
724
|
-
#
|
|
725
|
-
|
|
789
|
+
# Query for ECR images with all their existing properties to preserve during layer sync
|
|
790
|
+
query = """
|
|
791
|
+
MATCH (img:ECRImage)<-[:IMAGE]-(repo_img:ECRRepositoryImage)<-[:REPO_IMAGE]-(repo:ECRRepository)
|
|
792
|
+
MATCH (repo)<-[:RESOURCE]-(:AWSAccount {id: $AWS_ID})
|
|
793
|
+
WHERE repo.region = $Region
|
|
794
|
+
RETURN DISTINCT
|
|
795
|
+
img.digest AS digest,
|
|
796
|
+
repo_img.id AS uri,
|
|
797
|
+
repo.uri AS repo_uri,
|
|
798
|
+
img.type AS type,
|
|
799
|
+
img.architecture AS architecture,
|
|
800
|
+
img.os AS os,
|
|
801
|
+
img.variant AS variant,
|
|
802
|
+
img.attestation_type AS attestation_type,
|
|
803
|
+
img.attests_digest AS attests_digest,
|
|
804
|
+
img.media_type AS media_type,
|
|
805
|
+
img.artifact_media_type AS artifact_media_type,
|
|
806
|
+
img.child_image_digests AS child_image_digests
|
|
807
|
+
"""
|
|
808
|
+
from cartography.client.core.tx import read_list_of_dicts_tx
|
|
726
809
|
|
|
727
|
-
ecr_images =
|
|
810
|
+
ecr_images = neo4j_session.read_transaction(
|
|
811
|
+
read_list_of_dicts_tx, query, AWS_ID=current_aws_account_id, Region=region
|
|
812
|
+
)
|
|
728
813
|
|
|
729
|
-
#
|
|
814
|
+
# Build repo_images_list and existing_properties map
|
|
730
815
|
repo_images_list = []
|
|
816
|
+
existing_properties = {}
|
|
731
817
|
seen_digests = set()
|
|
732
818
|
|
|
733
|
-
for
|
|
734
|
-
|
|
819
|
+
for img_data in ecr_images:
|
|
820
|
+
digest = img_data["digest"]
|
|
821
|
+
image_type = img_data.get("type")
|
|
822
|
+
|
|
823
|
+
if digest not in seen_digests:
|
|
735
824
|
seen_digests.add(digest)
|
|
736
|
-
repo_uri = extract_repo_uri_from_image_uri(uri)
|
|
737
825
|
|
|
738
|
-
#
|
|
826
|
+
# Store existing properties for ALL images to preserve during updates
|
|
827
|
+
existing_properties[digest] = {
|
|
828
|
+
"type": image_type,
|
|
829
|
+
"architecture": img_data.get("architecture"),
|
|
830
|
+
"os": img_data.get("os"),
|
|
831
|
+
"variant": img_data.get("variant"),
|
|
832
|
+
"attestation_type": img_data.get("attestation_type"),
|
|
833
|
+
"attests_digest": img_data.get("attests_digest"),
|
|
834
|
+
"media_type": img_data.get("media_type"),
|
|
835
|
+
"artifact_media_type": img_data.get("artifact_media_type"),
|
|
836
|
+
"child_image_digests": img_data.get("child_image_digests"),
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
repo_uri = img_data["repo_uri"]
|
|
739
840
|
digest_uri = f"{repo_uri}@{digest}"
|
|
740
841
|
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
842
|
+
# Fetch manifests for:
|
|
843
|
+
# - Platform-specific images (type="image") - to get their layers
|
|
844
|
+
# - Manifest lists (type="manifest_list") - to extract attestation parent image data
|
|
845
|
+
# Skip only attestations since they don't have useful layer or parent data
|
|
846
|
+
if image_type != "attestation":
|
|
847
|
+
repo_images_list.append(
|
|
848
|
+
{
|
|
849
|
+
"imageDigest": digest,
|
|
850
|
+
"uri": digest_uri,
|
|
851
|
+
"repo_uri": repo_uri,
|
|
852
|
+
}
|
|
853
|
+
)
|
|
748
854
|
|
|
749
855
|
logger.info(
|
|
750
856
|
f"Found {len(repo_images_list)} distinct ECR image digests in graph for region {region}"
|
|
@@ -768,15 +874,9 @@ def sync(
|
|
|
768
874
|
dict[str, str],
|
|
769
875
|
dict[str, dict[str, str]],
|
|
770
876
|
]:
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
aws_access_key_id=credentials.access_key,
|
|
775
|
-
aws_secret_access_key=credentials.secret_key,
|
|
776
|
-
aws_session_token=credentials.token,
|
|
777
|
-
region_name=region,
|
|
778
|
-
)
|
|
779
|
-
async with session.client("ecr") as ecr_client:
|
|
877
|
+
async with aioboto3_session.client(
|
|
878
|
+
"ecr", region_name=region
|
|
879
|
+
) as ecr_client:
|
|
780
880
|
return await fetch_image_layers_async(ecr_client, repo_images_list)
|
|
781
881
|
|
|
782
882
|
# Use get_event_loop() + run_until_complete() to avoid tearing down loop
|
|
@@ -798,6 +898,7 @@ def sync(
|
|
|
798
898
|
image_layers_data,
|
|
799
899
|
image_digest_map,
|
|
800
900
|
image_attestation_map,
|
|
901
|
+
existing_properties,
|
|
801
902
|
)
|
|
802
903
|
load_ecr_image_layers(
|
|
803
904
|
neo4j_session,
|