cartography 0.117.0__py3-none-any.whl → 0.118.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +11 -0
- cartography/config.py +5 -0
- cartography/graph/job.py +6 -2
- cartography/graph/statement.py +4 -0
- cartography/intel/aws/__init__.py +1 -0
- cartography/intel/aws/apigateway.py +18 -5
- cartography/intel/aws/ec2/elastic_ip_addresses.py +3 -1
- cartography/intel/aws/ec2/internet_gateways.py +4 -2
- cartography/intel/aws/ec2/load_balancer_v2s.py +11 -5
- cartography/intel/aws/ec2/network_interfaces.py +4 -0
- cartography/intel/aws/ec2/reserved_instances.py +3 -1
- cartography/intel/aws/ec2/tgw.py +11 -5
- cartography/intel/aws/ec2/volumes.py +1 -1
- cartography/intel/aws/ecr.py +202 -26
- cartography/intel/aws/elasticsearch.py +13 -4
- cartography/intel/aws/identitycenter.py +93 -54
- cartography/intel/aws/inspector.py +26 -14
- cartography/intel/aws/permission_relationships.py +3 -3
- cartography/intel/aws/s3.py +26 -13
- cartography/intel/aws/ssm.py +3 -5
- cartography/intel/azure/compute.py +9 -4
- cartography/intel/azure/cosmosdb.py +31 -15
- cartography/intel/azure/sql.py +25 -12
- cartography/intel/azure/storage.py +19 -9
- cartography/intel/azure/subscription.py +3 -1
- cartography/intel/crowdstrike/spotlight.py +5 -2
- cartography/intel/entra/app_role_assignments.py +9 -2
- cartography/intel/gcp/__init__.py +26 -9
- cartography/intel/gcp/clients.py +8 -4
- cartography/intel/gcp/compute.py +39 -18
- cartography/intel/gcp/crm/folders.py +9 -3
- cartography/intel/gcp/crm/orgs.py +8 -3
- cartography/intel/gcp/crm/projects.py +14 -3
- cartography/intel/jamf/computers.py +7 -1
- cartography/intel/oci/iam.py +23 -9
- cartography/intel/oci/organizations.py +3 -1
- cartography/intel/oci/utils.py +28 -5
- cartography/intel/okta/awssaml.py +8 -7
- cartography/intel/pagerduty/escalation_policies.py +13 -6
- cartography/intel/pagerduty/schedules.py +9 -4
- cartography/intel/pagerduty/services.py +7 -3
- cartography/intel/pagerduty/teams.py +5 -2
- cartography/intel/pagerduty/users.py +3 -1
- cartography/intel/pagerduty/vendors.py +3 -1
- cartography/intel/trivy/__init__.py +109 -58
- cartography/models/aws/ec2/networkinterfaces.py +2 -0
- cartography/models/aws/ecr/image.py +8 -0
- cartography/models/aws/ecr/repository_image.py +1 -1
- cartography/sync.py +1 -1
- cartography/util.py +5 -1
- {cartography-0.117.0.dist-info → cartography-0.118.0.dist-info}/METADATA +3 -3
- {cartography-0.117.0.dist-info → cartography-0.118.0.dist-info}/RECORD +57 -57
- {cartography-0.117.0.dist-info → cartography-0.118.0.dist-info}/WHEEL +0 -0
- {cartography-0.117.0.dist-info → cartography-0.118.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.117.0.dist-info → cartography-0.118.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.117.0.dist-info → cartography-0.118.0.dist-info}/top_level.txt +0 -0
cartography/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.
|
|
32
|
-
__version_tuple__ = version_tuple = (0,
|
|
31
|
+
__version__ = version = '0.118.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 118, 0)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
cartography/cli.py
CHANGED
|
@@ -279,6 +279,17 @@ class CLI:
|
|
|
279
279
|
"Example: 'HIGH' will sync only HIGH and CRITICAL findings, filtering out LOW and MEDIUM severity findings."
|
|
280
280
|
),
|
|
281
281
|
)
|
|
282
|
+
parser.add_argument(
|
|
283
|
+
"--experimental-aws-inspector-batch",
|
|
284
|
+
type=int,
|
|
285
|
+
default=1000,
|
|
286
|
+
help=(
|
|
287
|
+
"EXPERIMENTAL: This feature is experimental and may be removed in the future. "
|
|
288
|
+
"Batch size for AWS Inspector findings sync. Controls how many findings are fetched, processed and cleaned up at a time. "
|
|
289
|
+
"Default is 1000. Increase this value if you have a large number of findings and want to reduce API calls, "
|
|
290
|
+
"or decrease it if you're experiencing memory issues."
|
|
291
|
+
),
|
|
292
|
+
)
|
|
282
293
|
parser.add_argument(
|
|
283
294
|
"--analysis-job-directory",
|
|
284
295
|
type=str,
|
cartography/config.py
CHANGED
|
@@ -58,6 +58,9 @@ class Config:
|
|
|
58
58
|
:type aws_guardduty_severity_threshold: str
|
|
59
59
|
:param aws_guardduty_severity_threshold: GuardDuty severity threshold filter. Only findings at or above this
|
|
60
60
|
severity level will be synced. Valid values: LOW, MEDIUM, HIGH, CRITICAL. Optional.
|
|
61
|
+
:type experimental_aws_inspector_batch: int
|
|
62
|
+
:param experimental_aws_inspector_batch: EXPERIMENTAL: Batch size for AWS Inspector findings sync. Controls how
|
|
63
|
+
many findings are fetched, processed and cleaned up at a time. Default is 1000. Optional.
|
|
61
64
|
:type analysis_job_directory: str
|
|
62
65
|
:param analysis_job_directory: Path to a directory tree containing analysis jobs to run. Optional.
|
|
63
66
|
:type oci_sync_all_profiles: bool
|
|
@@ -195,6 +198,7 @@ class Config:
|
|
|
195
198
|
aws_regions=None,
|
|
196
199
|
aws_best_effort_mode=False,
|
|
197
200
|
aws_cloudtrail_management_events_lookback_hours=None,
|
|
201
|
+
experimental_aws_inspector_batch=1000,
|
|
198
202
|
azure_sync_all_subscriptions=False,
|
|
199
203
|
azure_sp_auth=None,
|
|
200
204
|
azure_tenant_id=None,
|
|
@@ -287,6 +291,7 @@ class Config:
|
|
|
287
291
|
self.aws_cloudtrail_management_events_lookback_hours = (
|
|
288
292
|
aws_cloudtrail_management_events_lookback_hours
|
|
289
293
|
)
|
|
294
|
+
self.experimental_aws_inspector_batch = experimental_aws_inspector_batch
|
|
290
295
|
self.azure_sync_all_subscriptions = azure_sync_all_subscriptions
|
|
291
296
|
self.azure_sp_auth = azure_sp_auth
|
|
292
297
|
self.azure_tenant_id = azure_tenant_id
|
cartography/graph/job.py
CHANGED
|
@@ -139,11 +139,13 @@ class GraphJob:
|
|
|
139
139
|
cls,
|
|
140
140
|
node_schema: CartographyNodeSchema,
|
|
141
141
|
parameters: Dict[str, Any],
|
|
142
|
+
iterationsize: int = 100,
|
|
142
143
|
) -> "GraphJob":
|
|
143
144
|
"""
|
|
144
145
|
Create a cleanup job from a CartographyNodeSchema object.
|
|
145
146
|
For a given node, the fields used in the node_schema.sub_resource_relationship.target_node_node_matcher.keys()
|
|
146
147
|
must be provided as keys and values in the params dict.
|
|
148
|
+
:param iterationsize: The number of items to process in each iteration. Defaults to 100.
|
|
147
149
|
"""
|
|
148
150
|
queries: List[str] = build_cleanup_queries(node_schema)
|
|
149
151
|
|
|
@@ -165,7 +167,7 @@ class GraphJob:
|
|
|
165
167
|
query,
|
|
166
168
|
parameters=parameters,
|
|
167
169
|
iterative=True,
|
|
168
|
-
iterationsize=
|
|
170
|
+
iterationsize=iterationsize,
|
|
169
171
|
parent_job_name=node_schema.label,
|
|
170
172
|
parent_job_sequence_num=idx,
|
|
171
173
|
)
|
|
@@ -185,6 +187,7 @@ class GraphJob:
|
|
|
185
187
|
sub_resource_label: str,
|
|
186
188
|
sub_resource_id: str,
|
|
187
189
|
update_tag: int,
|
|
190
|
+
iterationsize: int = 100,
|
|
188
191
|
) -> "GraphJob":
|
|
189
192
|
"""
|
|
190
193
|
Create a cleanup job from a CartographyRelSchema object (specifically, a MatchLink).
|
|
@@ -194,6 +197,7 @@ class GraphJob:
|
|
|
194
197
|
- For a given rel_schema, the fields used in the rel_schema.properties._sub_resource_label.name and
|
|
195
198
|
rel_schema.properties._sub_resource_id.name must be provided as keys and values in the params dict.
|
|
196
199
|
- The rel_schema must have a source_node_matcher and target_node_matcher.
|
|
200
|
+
:param iterationsize: The number of items to process in each iteration. Defaults to 100.
|
|
197
201
|
"""
|
|
198
202
|
cleanup_link_query = build_cleanup_query_for_matchlink(rel_schema)
|
|
199
203
|
logger.debug(f"Cleanup query: {cleanup_link_query}")
|
|
@@ -208,7 +212,7 @@ class GraphJob:
|
|
|
208
212
|
cleanup_link_query,
|
|
209
213
|
parameters=parameters,
|
|
210
214
|
iterative=True,
|
|
211
|
-
iterationsize=
|
|
215
|
+
iterationsize=iterationsize,
|
|
212
216
|
parent_job_name=rel_schema.rel_label,
|
|
213
217
|
)
|
|
214
218
|
|
cartography/graph/statement.py
CHANGED
|
@@ -52,6 +52,10 @@ class GraphStatement:
|
|
|
52
52
|
self.parameters = parameters or {}
|
|
53
53
|
self.iterative = iterative
|
|
54
54
|
self.iterationsize = iterationsize
|
|
55
|
+
if iterationsize < 0:
|
|
56
|
+
raise ValueError(
|
|
57
|
+
f"iterationsize must be a positive integer, got {iterationsize}",
|
|
58
|
+
)
|
|
55
59
|
self.parameters["LIMIT_SIZE"] = self.iterationsize
|
|
56
60
|
|
|
57
61
|
self.parent_job_name = parent_job_name if parent_job_name else None
|
|
@@ -312,6 +312,7 @@ def start_aws_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
|
|
|
312
312
|
"permission_relationships_file": config.permission_relationships_file,
|
|
313
313
|
"aws_guardduty_severity_threshold": config.aws_guardduty_severity_threshold,
|
|
314
314
|
"aws_cloudtrail_management_events_lookback_hours": config.aws_cloudtrail_management_events_lookback_hours,
|
|
315
|
+
"experimental_aws_inspector_batch": config.experimental_aws_inspector_batch,
|
|
315
316
|
}
|
|
316
317
|
try:
|
|
317
318
|
boto3_session = boto3.Session()
|
|
@@ -178,11 +178,24 @@ def get_rest_api_resources_methods_integrations(
|
|
|
178
178
|
method["apiId"] = api["id"]
|
|
179
179
|
method["httpMethod"] = http_method
|
|
180
180
|
methods.append(method)
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
181
|
+
try:
|
|
182
|
+
integration = client.get_integration(
|
|
183
|
+
restApiId=api["id"],
|
|
184
|
+
resourceId=resource_id,
|
|
185
|
+
httpMethod=http_method,
|
|
186
|
+
)
|
|
187
|
+
except ClientError as e:
|
|
188
|
+
error_code = e.response.get("Error", {}).get("Code")
|
|
189
|
+
if error_code == "NotFoundException":
|
|
190
|
+
logger.warning(
|
|
191
|
+
"No integration found for API %s resource %s method %s: %s",
|
|
192
|
+
api["id"],
|
|
193
|
+
resource_id,
|
|
194
|
+
http_method,
|
|
195
|
+
e,
|
|
196
|
+
)
|
|
197
|
+
continue
|
|
198
|
+
raise
|
|
186
199
|
integration["resourceId"] = resource_id
|
|
187
200
|
integration["apiId"] = api["id"]
|
|
188
201
|
integration["integrationHttpMethod"] = integration.get("httpMethod")
|
|
@@ -6,6 +6,7 @@ import boto3
|
|
|
6
6
|
import neo4j
|
|
7
7
|
from botocore.exceptions import ClientError
|
|
8
8
|
|
|
9
|
+
from cartography.client.core.tx import run_write_query
|
|
9
10
|
from cartography.util import aws_handle_regions
|
|
10
11
|
from cartography.util import run_cleanup_job
|
|
11
12
|
from cartography.util import timeit
|
|
@@ -83,7 +84,8 @@ def load_elastic_ip_addresses(
|
|
|
83
84
|
SET r.lastupdated = $update_tag
|
|
84
85
|
"""
|
|
85
86
|
|
|
86
|
-
|
|
87
|
+
run_write_query(
|
|
88
|
+
neo4j_session,
|
|
87
89
|
ingest_addresses,
|
|
88
90
|
elastic_ip_addresses=elastic_ip_addresses,
|
|
89
91
|
Region=region,
|
|
@@ -5,6 +5,7 @@ from typing import List
|
|
|
5
5
|
import boto3
|
|
6
6
|
import neo4j
|
|
7
7
|
|
|
8
|
+
from cartography.client.core.tx import run_write_query
|
|
8
9
|
from cartography.util import aws_handle_regions
|
|
9
10
|
from cartography.util import run_cleanup_job
|
|
10
11
|
from cartography.util import timeit
|
|
@@ -63,13 +64,14 @@ def load_internet_gateways(
|
|
|
63
64
|
SET r.lastupdated = $aws_update_tag
|
|
64
65
|
"""
|
|
65
66
|
|
|
66
|
-
|
|
67
|
+
run_write_query(
|
|
68
|
+
neo4j_session,
|
|
67
69
|
query,
|
|
68
70
|
internet_gateways=internet_gateways,
|
|
69
71
|
region=region,
|
|
70
72
|
aws_account_id=current_aws_account_id,
|
|
71
73
|
aws_update_tag=update_tag,
|
|
72
|
-
)
|
|
74
|
+
)
|
|
73
75
|
|
|
74
76
|
|
|
75
77
|
@timeit
|
|
@@ -6,6 +6,7 @@ import boto3
|
|
|
6
6
|
import botocore
|
|
7
7
|
import neo4j
|
|
8
8
|
|
|
9
|
+
from cartography.client.core.tx import run_write_query
|
|
9
10
|
from cartography.util import aws_handle_regions
|
|
10
11
|
from cartography.util import run_cleanup_job
|
|
11
12
|
from cartography.util import timeit
|
|
@@ -104,7 +105,8 @@ def load_load_balancer_v2s(
|
|
|
104
105
|
logger.warning("Skipping load balancer entry with missing DNSName: %r", lb)
|
|
105
106
|
continue
|
|
106
107
|
|
|
107
|
-
|
|
108
|
+
run_write_query(
|
|
109
|
+
neo4j_session,
|
|
108
110
|
ingest_load_balancer_v2,
|
|
109
111
|
ID=load_balancer_id,
|
|
110
112
|
CREATED_TIME=str(lb["CreatedTime"]),
|
|
@@ -138,7 +140,8 @@ def load_load_balancer_v2s(
|
|
|
138
140
|
SET r.lastupdated = $update_tag
|
|
139
141
|
"""
|
|
140
142
|
for group in lb["SecurityGroups"]:
|
|
141
|
-
|
|
143
|
+
run_write_query(
|
|
144
|
+
neo4j_session,
|
|
142
145
|
ingest_load_balancer_v2_security_group,
|
|
143
146
|
ID=load_balancer_id,
|
|
144
147
|
GROUP_ID=str(group),
|
|
@@ -182,7 +185,8 @@ def load_load_balancer_v2_subnets(
|
|
|
182
185
|
SET r.lastupdated = $update_tag
|
|
183
186
|
"""
|
|
184
187
|
for az in az_data:
|
|
185
|
-
|
|
188
|
+
run_write_query(
|
|
189
|
+
neo4j_session,
|
|
186
190
|
ingest_load_balancer_subnet,
|
|
187
191
|
ID=load_balancer_id,
|
|
188
192
|
SubnetId=az["SubnetId"],
|
|
@@ -219,7 +223,8 @@ def load_load_balancer_v2_target_groups(
|
|
|
219
223
|
continue
|
|
220
224
|
|
|
221
225
|
for instance in target_group["Targets"]:
|
|
222
|
-
|
|
226
|
+
run_write_query(
|
|
227
|
+
neo4j_session,
|
|
223
228
|
ingest_instances,
|
|
224
229
|
ID=load_balancer_id,
|
|
225
230
|
INSTANCE_ID=instance,
|
|
@@ -253,7 +258,8 @@ def load_load_balancer_v2_listeners(
|
|
|
253
258
|
ON CREATE SET r.firstseen = timestamp()
|
|
254
259
|
SET r.lastupdated = $update_tag
|
|
255
260
|
"""
|
|
256
|
-
|
|
261
|
+
run_write_query(
|
|
262
|
+
neo4j_session,
|
|
257
263
|
ingest_listener,
|
|
258
264
|
LoadBalancerId=load_balancer_id,
|
|
259
265
|
Listeners=listener_data,
|
|
@@ -98,6 +98,10 @@ def transform_network_interface_data(
|
|
|
98
98
|
"SourceDestCheck": network_interface["SourceDestCheck"],
|
|
99
99
|
"Status": network_interface["Status"],
|
|
100
100
|
"SubnetId": network_interface["SubnetId"],
|
|
101
|
+
"AttachTime": network_interface.get("Attachment", {}).get("AttachTime"),
|
|
102
|
+
"DeviceIndex": network_interface.get("Attachment", {}).get(
|
|
103
|
+
"DeviceIndex"
|
|
104
|
+
),
|
|
101
105
|
"ElbV1Id": elb_v1_id,
|
|
102
106
|
"ElbV2Id": elb_v2_id,
|
|
103
107
|
},
|
|
@@ -6,6 +6,7 @@ import boto3
|
|
|
6
6
|
import neo4j
|
|
7
7
|
from botocore.exceptions import ClientError
|
|
8
8
|
|
|
9
|
+
from cartography.client.core.tx import run_write_query
|
|
9
10
|
from cartography.util import aws_handle_regions
|
|
10
11
|
from cartography.util import run_cleanup_job
|
|
11
12
|
from cartography.util import timeit
|
|
@@ -64,7 +65,8 @@ def load_reserved_instances(
|
|
|
64
65
|
r_instance["Start"] = str(r_instance["Start"])
|
|
65
66
|
r_instance["End"] = str(r_instance["End"])
|
|
66
67
|
|
|
67
|
-
|
|
68
|
+
run_write_query(
|
|
69
|
+
neo4j_session,
|
|
68
70
|
ingest_reserved_instances,
|
|
69
71
|
reserved_instances_list=data,
|
|
70
72
|
AWS_ACCOUNT_ID=current_aws_account_id,
|
cartography/intel/aws/ec2/tgw.py
CHANGED
|
@@ -6,6 +6,7 @@ import boto3
|
|
|
6
6
|
import botocore.exceptions
|
|
7
7
|
import neo4j
|
|
8
8
|
|
|
9
|
+
from cartography.client.core.tx import run_write_query
|
|
9
10
|
from cartography.util import aws_handle_regions
|
|
10
11
|
from cartography.util import run_cleanup_job
|
|
11
12
|
from cartography.util import timeit
|
|
@@ -120,7 +121,8 @@ def load_transit_gateways(
|
|
|
120
121
|
for tgw in data:
|
|
121
122
|
tgw_id = tgw["TransitGatewayId"]
|
|
122
123
|
|
|
123
|
-
|
|
124
|
+
run_write_query(
|
|
125
|
+
neo4j_session,
|
|
124
126
|
ingest_transit_gateway,
|
|
125
127
|
TgwId=tgw_id,
|
|
126
128
|
ARN=tgw["TransitGatewayArn"],
|
|
@@ -161,7 +163,8 @@ def _attach_shared_transit_gateway(
|
|
|
161
163
|
"""
|
|
162
164
|
|
|
163
165
|
if tgw["OwnerId"] != current_aws_account_id:
|
|
164
|
-
|
|
166
|
+
run_write_query(
|
|
167
|
+
neo4j_session,
|
|
165
168
|
attach_tgw,
|
|
166
169
|
ARN=tgw["TransitGatewayArn"],
|
|
167
170
|
TransitGatewayId=tgw["TransitGatewayId"],
|
|
@@ -202,7 +205,8 @@ def load_tgw_attachments(
|
|
|
202
205
|
for tgwa in data:
|
|
203
206
|
tgwa_id = tgwa["TransitGatewayAttachmentId"]
|
|
204
207
|
|
|
205
|
-
|
|
208
|
+
run_write_query(
|
|
209
|
+
neo4j_session,
|
|
206
210
|
ingest_transit_gateway,
|
|
207
211
|
TgwAttachmentId=tgwa_id,
|
|
208
212
|
TransitGatewayId=tgwa["TransitGatewayId"],
|
|
@@ -261,7 +265,8 @@ def _attach_tgw_vpc_attachment_to_vpc_subnets(
|
|
|
261
265
|
SET p.lastupdated = $update_tag
|
|
262
266
|
"""
|
|
263
267
|
|
|
264
|
-
|
|
268
|
+
run_write_query(
|
|
269
|
+
neo4j_session,
|
|
265
270
|
attach_vpc_tgw_attachment_to_vpc,
|
|
266
271
|
VpcId=tgw_vpc_attachment["VpcId"],
|
|
267
272
|
TgwAttachmentId=tgw_vpc_attachment["TransitGatewayAttachmentId"],
|
|
@@ -269,7 +274,8 @@ def _attach_tgw_vpc_attachment_to_vpc_subnets(
|
|
|
269
274
|
)
|
|
270
275
|
|
|
271
276
|
for subnet_id in tgw_vpc_attachment["SubnetIds"]:
|
|
272
|
-
|
|
277
|
+
run_write_query(
|
|
278
|
+
neo4j_session,
|
|
273
279
|
attach_vpc_tgw_attachment_to_subnet,
|
|
274
280
|
SubnetId=subnet_id,
|
|
275
281
|
TgwAttachmentId=tgw_vpc_attachment["TransitGatewayAttachmentId"],
|
|
@@ -70,7 +70,7 @@ def transform_volumes(
|
|
|
70
70
|
|
|
71
71
|
for attachment in active_attachments:
|
|
72
72
|
vol_with_attachment = raw_vol.copy()
|
|
73
|
-
vol_with_attachment["InstanceId"] = attachment
|
|
73
|
+
vol_with_attachment["InstanceId"] = attachment.get("InstanceId")
|
|
74
74
|
result.append(vol_with_attachment)
|
|
75
75
|
|
|
76
76
|
return result
|
cartography/intel/aws/ecr.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import logging
|
|
2
3
|
from typing import Any
|
|
3
4
|
from typing import Dict
|
|
@@ -18,6 +19,12 @@ from cartography.util import to_synchronous
|
|
|
18
19
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
20
21
|
|
|
22
|
+
# Manifest list media types
|
|
23
|
+
MANIFEST_LIST_MEDIA_TYPES = {
|
|
24
|
+
"application/vnd.docker.distribution.manifest.list.v2+json",
|
|
25
|
+
"application/vnd.oci.image.index.v1+json",
|
|
26
|
+
}
|
|
27
|
+
|
|
21
28
|
|
|
22
29
|
@timeit
|
|
23
30
|
@aws_handle_regions
|
|
@@ -34,6 +41,84 @@ def get_ecr_repositories(
|
|
|
34
41
|
return ecr_repositories
|
|
35
42
|
|
|
36
43
|
|
|
44
|
+
def _get_platform_specific_digests(
|
|
45
|
+
client: Any, repository_name: str, manifest_list_digest: str
|
|
46
|
+
) -> tuple[List[Dict[str, Any]], set[str]]:
|
|
47
|
+
"""
|
|
48
|
+
Fetch manifest list and extract platform-specific image digests and attestations.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
- List of all images (platform-specific + attestations) with digest, type, architecture, os, variant
|
|
52
|
+
- Set of ALL digests referenced in the manifest list
|
|
53
|
+
"""
|
|
54
|
+
response = client.batch_get_image(
|
|
55
|
+
repositoryName=repository_name,
|
|
56
|
+
imageIds=[{"imageDigest": manifest_list_digest}],
|
|
57
|
+
acceptedMediaTypes=list(MANIFEST_LIST_MEDIA_TYPES),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
if not response.get("images"):
|
|
61
|
+
raise ValueError(
|
|
62
|
+
f"No manifest list found for digest {manifest_list_digest} in repository {repository_name}"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# batch_get_image returns a single manifest list (hence [0])
|
|
66
|
+
# The manifests[] array inside contains all platform-specific images and attestations
|
|
67
|
+
manifest_json = json.loads(response["images"][0]["imageManifest"])
|
|
68
|
+
manifests = manifest_json.get("manifests", [])
|
|
69
|
+
|
|
70
|
+
if not manifests:
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"Manifest list {manifest_list_digest} has no manifests in repository {repository_name}"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
all_images = []
|
|
76
|
+
all_referenced_digests = set()
|
|
77
|
+
|
|
78
|
+
for manifest_ref in manifests:
|
|
79
|
+
digest = manifest_ref.get("digest")
|
|
80
|
+
if not digest:
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"Manifest in list {manifest_list_digest} has no digest in repository {repository_name}"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
all_referenced_digests.add(digest)
|
|
86
|
+
|
|
87
|
+
platform_info = manifest_ref.get("platform", {})
|
|
88
|
+
architecture = platform_info.get("architecture")
|
|
89
|
+
os_name = platform_info.get("os")
|
|
90
|
+
|
|
91
|
+
# Determine if this is an attestation
|
|
92
|
+
annotations = manifest_ref.get("annotations", {})
|
|
93
|
+
is_attestation = (
|
|
94
|
+
architecture == "unknown" and os_name == "unknown"
|
|
95
|
+
) or annotations.get("vnd.docker.reference.type") == "attestation-manifest"
|
|
96
|
+
|
|
97
|
+
all_images.append(
|
|
98
|
+
{
|
|
99
|
+
"digest": digest,
|
|
100
|
+
"type": "attestation" if is_attestation else "image",
|
|
101
|
+
"architecture": architecture,
|
|
102
|
+
"os": os_name,
|
|
103
|
+
"variant": platform_info.get("variant"),
|
|
104
|
+
"attestation_type": (
|
|
105
|
+
annotations.get("vnd.docker.reference.type")
|
|
106
|
+
if is_attestation
|
|
107
|
+
else None
|
|
108
|
+
),
|
|
109
|
+
"attests_digest": (
|
|
110
|
+
annotations.get("vnd.docker.reference.digest")
|
|
111
|
+
if is_attestation
|
|
112
|
+
else None
|
|
113
|
+
),
|
|
114
|
+
"media_type": manifest_ref.get("mediaType"),
|
|
115
|
+
"artifact_media_type": manifest_ref.get("artifactType"),
|
|
116
|
+
}
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return all_images, all_referenced_digests
|
|
120
|
+
|
|
121
|
+
|
|
37
122
|
@timeit
|
|
38
123
|
@aws_handle_regions
|
|
39
124
|
def get_ecr_repository_images(
|
|
@@ -46,7 +131,11 @@ def get_ecr_repository_images(
|
|
|
46
131
|
)
|
|
47
132
|
client = boto3_session.client("ecr", region_name=region)
|
|
48
133
|
list_paginator = client.get_paginator("list_images")
|
|
49
|
-
|
|
134
|
+
|
|
135
|
+
# First pass: Collect all image details and track manifest list referenced digests
|
|
136
|
+
all_image_details: List[Dict] = []
|
|
137
|
+
manifest_list_referenced_digests: set[str] = set()
|
|
138
|
+
|
|
50
139
|
for page in list_paginator.paginate(repositoryName=repository_name):
|
|
51
140
|
image_ids = page["imageIds"]
|
|
52
141
|
if not image_ids:
|
|
@@ -58,14 +147,37 @@ def get_ecr_repository_images(
|
|
|
58
147
|
for response in describe_response:
|
|
59
148
|
image_details = response["imageDetails"]
|
|
60
149
|
for detail in image_details:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
150
|
+
# Check if this is a manifest list
|
|
151
|
+
media_type = detail.get("imageManifestMediaType")
|
|
152
|
+
if media_type in MANIFEST_LIST_MEDIA_TYPES:
|
|
153
|
+
# Fetch all images from manifest list (platform-specific + attestations)
|
|
154
|
+
manifest_list_digest = detail["imageDigest"]
|
|
155
|
+
manifest_images, all_digests = _get_platform_specific_digests(
|
|
156
|
+
client, repository_name, manifest_list_digest
|
|
157
|
+
)
|
|
158
|
+
detail["_manifest_images"] = manifest_images
|
|
159
|
+
|
|
160
|
+
# Track ALL digests so we don't create ECRRepositoryImages for them
|
|
161
|
+
manifest_list_referenced_digests.update(all_digests)
|
|
162
|
+
|
|
163
|
+
all_image_details.append(detail)
|
|
164
|
+
|
|
165
|
+
# Second pass: Only add images that should have ECRRepositoryImage nodes
|
|
166
|
+
ecr_repository_images: List[Dict] = []
|
|
167
|
+
for detail in all_image_details:
|
|
168
|
+
tags = detail.get("imageTags") or []
|
|
169
|
+
digest = detail.get("imageDigest")
|
|
170
|
+
|
|
171
|
+
if tags:
|
|
172
|
+
# Tagged images always get ECRRepositoryImage nodes (one per tag)
|
|
173
|
+
for tag in tags:
|
|
174
|
+
image_detail = {**detail, "imageTag": tag}
|
|
175
|
+
image_detail.pop("imageTags", None)
|
|
176
|
+
ecr_repository_images.append(image_detail)
|
|
177
|
+
elif digest not in manifest_list_referenced_digests:
|
|
178
|
+
# Untagged images only get nodes if they're NOT part of a manifest list
|
|
179
|
+
ecr_repository_images.append({**detail})
|
|
180
|
+
|
|
69
181
|
return ecr_repository_images
|
|
70
182
|
|
|
71
183
|
|
|
@@ -91,52 +203,115 @@ def load_ecr_repositories(
|
|
|
91
203
|
|
|
92
204
|
|
|
93
205
|
@timeit
|
|
94
|
-
def transform_ecr_repository_images(repo_data: Dict) -> List[Dict]:
|
|
206
|
+
def transform_ecr_repository_images(repo_data: Dict) -> tuple[List[Dict], List[Dict]]:
|
|
95
207
|
"""
|
|
96
|
-
|
|
97
|
-
|
|
208
|
+
Transform ECR repository images into repo image list and ECR image list.
|
|
209
|
+
For manifest lists, creates ECR images for manifest list, platform-specific images, and attestations.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
- repo_images_list: List of ECRRepositoryImage nodes with imageDigests field (one-to-many)
|
|
213
|
+
- ecr_images_list: List of ECRImage nodes with type, architecture, os, variant fields
|
|
98
214
|
"""
|
|
99
215
|
repo_images_list = []
|
|
216
|
+
ecr_images_dict: Dict[str, Dict] = {} # Deduplicate by digest
|
|
217
|
+
|
|
100
218
|
# Sort repository URIs to ensure consistent processing order
|
|
101
219
|
for repo_uri in sorted(repo_data.keys()):
|
|
102
220
|
repo_images = repo_data[repo_uri]
|
|
103
221
|
for img in repo_images:
|
|
104
222
|
digest = img.get("imageDigest")
|
|
105
|
-
if digest:
|
|
106
|
-
tag = img.get("imageTag")
|
|
107
|
-
uri = repo_uri + (f":{tag}" if tag else "")
|
|
108
|
-
img["repo_uri"] = repo_uri
|
|
109
|
-
img["uri"] = uri
|
|
110
|
-
img["id"] = uri
|
|
111
|
-
repo_images_list.append(img)
|
|
112
|
-
else:
|
|
223
|
+
if not digest:
|
|
113
224
|
logger.warning(
|
|
114
225
|
"Repo %s has an image that has no imageDigest. Its tag is %s. Continuing on.",
|
|
115
226
|
repo_uri,
|
|
116
227
|
img.get("imageTag"),
|
|
117
228
|
)
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
tag = img.get("imageTag")
|
|
232
|
+
uri = repo_uri + (f":{tag}" if tag else "")
|
|
233
|
+
|
|
234
|
+
# Build ECRRepositoryImage node
|
|
235
|
+
repo_image = {
|
|
236
|
+
**img,
|
|
237
|
+
"repo_uri": repo_uri,
|
|
238
|
+
"uri": uri,
|
|
239
|
+
"id": uri,
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
# Check if this is a manifest list with images
|
|
243
|
+
manifest_images = img.get("_manifest_images")
|
|
244
|
+
if manifest_images:
|
|
245
|
+
# For manifest list: include manifest list digest + all referenced digests
|
|
246
|
+
all_digests = [digest] + [m["digest"] for m in manifest_images]
|
|
247
|
+
repo_image["imageDigests"] = all_digests
|
|
248
|
+
|
|
249
|
+
# Create ECRImage for the manifest list itself
|
|
250
|
+
if digest not in ecr_images_dict:
|
|
251
|
+
ecr_images_dict[digest] = {
|
|
252
|
+
"imageDigest": digest,
|
|
253
|
+
"type": "manifest_list",
|
|
254
|
+
"architecture": None,
|
|
255
|
+
"os": None,
|
|
256
|
+
"variant": None,
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
# Create ECRImage nodes for each image in the manifest list
|
|
260
|
+
for manifest_img in manifest_images:
|
|
261
|
+
manifest_digest = manifest_img["digest"]
|
|
262
|
+
if manifest_digest not in ecr_images_dict:
|
|
263
|
+
ecr_images_dict[manifest_digest] = {
|
|
264
|
+
"imageDigest": manifest_digest,
|
|
265
|
+
"type": manifest_img.get("type"),
|
|
266
|
+
"architecture": manifest_img.get("architecture"),
|
|
267
|
+
"os": manifest_img.get("os"),
|
|
268
|
+
"variant": manifest_img.get("variant"),
|
|
269
|
+
"attestation_type": manifest_img.get("attestation_type"),
|
|
270
|
+
"attests_digest": manifest_img.get("attests_digest"),
|
|
271
|
+
"media_type": manifest_img.get("media_type"),
|
|
272
|
+
"artifact_media_type": manifest_img.get(
|
|
273
|
+
"artifact_media_type"
|
|
274
|
+
),
|
|
275
|
+
}
|
|
276
|
+
else:
|
|
277
|
+
# Regular image: single digest
|
|
278
|
+
repo_image["imageDigests"] = [digest]
|
|
279
|
+
|
|
280
|
+
# Create ECRImage for regular image
|
|
281
|
+
if digest not in ecr_images_dict:
|
|
282
|
+
ecr_images_dict[digest] = {
|
|
283
|
+
"imageDigest": digest,
|
|
284
|
+
"type": "image",
|
|
285
|
+
"architecture": None,
|
|
286
|
+
"os": None,
|
|
287
|
+
"variant": None,
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
# Remove internal field before returning
|
|
291
|
+
repo_image.pop("_manifest_images", None)
|
|
292
|
+
repo_images_list.append(repo_image)
|
|
118
293
|
|
|
119
|
-
|
|
294
|
+
ecr_images_list = list(ecr_images_dict.values())
|
|
295
|
+
return repo_images_list, ecr_images_list
|
|
120
296
|
|
|
121
297
|
|
|
122
298
|
@timeit
|
|
123
299
|
def load_ecr_repository_images(
|
|
124
300
|
neo4j_session: neo4j.Session,
|
|
125
301
|
repo_images_list: List[Dict],
|
|
302
|
+
ecr_images_list: List[Dict],
|
|
126
303
|
region: str,
|
|
127
304
|
current_aws_account_id: str,
|
|
128
305
|
aws_update_tag: int,
|
|
129
306
|
) -> None:
|
|
130
307
|
logger.info(
|
|
131
|
-
f"Loading {len(repo_images_list)} ECR repository images in {region} into graph.",
|
|
308
|
+
f"Loading {len(ecr_images_list)} ECR images and {len(repo_images_list)} ECR repository images in {region} into graph.",
|
|
132
309
|
)
|
|
133
|
-
image_digests = {img["imageDigest"] for img in repo_images_list}
|
|
134
|
-
ecr_images = [{"imageDigest": d} for d in image_digests]
|
|
135
310
|
|
|
136
311
|
load(
|
|
137
312
|
neo4j_session,
|
|
138
313
|
ECRImageSchema(),
|
|
139
|
-
|
|
314
|
+
ecr_images_list,
|
|
140
315
|
lastupdated=aws_update_tag,
|
|
141
316
|
Region=region,
|
|
142
317
|
AWS_ID=current_aws_account_id,
|
|
@@ -219,10 +394,11 @@ def sync(
|
|
|
219
394
|
current_aws_account_id,
|
|
220
395
|
update_tag,
|
|
221
396
|
)
|
|
222
|
-
repo_images_list = transform_ecr_repository_images(image_data)
|
|
397
|
+
repo_images_list, ecr_images_list = transform_ecr_repository_images(image_data)
|
|
223
398
|
load_ecr_repository_images(
|
|
224
399
|
neo4j_session,
|
|
225
400
|
repo_images_list,
|
|
401
|
+
ecr_images_list,
|
|
226
402
|
region,
|
|
227
403
|
current_aws_account_id,
|
|
228
404
|
update_tag,
|