anyscale 0.26.44__py3-none-any.whl → 0.26.46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anyscale/_private/anyscale_client/anyscale_client.py +4 -2
- anyscale/_private/anyscale_client/common.py +2 -0
- anyscale/_private/anyscale_client/fake_anyscale_client.py +4 -1
- anyscale/client/README.md +7 -0
- anyscale/client/openapi_client/__init__.py +4 -0
- anyscale/client/openapi_client/api/default_api.py +353 -5
- anyscale/client/openapi_client/models/__init__.py +4 -0
- anyscale/client/openapi_client/models/clouddeployment_response.py +121 -0
- anyscale/client/openapi_client/models/collaborator_type.py +101 -0
- anyscale/client/openapi_client/models/describe_machine_pool_requests_filters.py +150 -0
- anyscale/client/openapi_client/models/describe_machine_pool_requests_request.py +19 -19
- anyscale/client/openapi_client/models/plan_status.py +123 -0
- anyscale/client/openapi_client/models/task_table_row.py +29 -3
- anyscale/cloud_resource.py +120 -150
- anyscale/commands/cloud_commands.py +46 -3
- anyscale/commands/command_examples.py +8 -0
- anyscale/controllers/cloud_controller.py +361 -76
- anyscale/gcp_verification.py +51 -38
- anyscale/telemetry.py +36 -44
- anyscale/utils/s3.py +2 -2
- anyscale/utils/user_utils.py +2 -1
- anyscale/version.py +1 -1
- {anyscale-0.26.44.dist-info → anyscale-0.26.46.dist-info}/METADATA +11 -2
- {anyscale-0.26.44.dist-info → anyscale-0.26.46.dist-info}/RECORD +29 -25
- {anyscale-0.26.44.dist-info → anyscale-0.26.46.dist-info}/WHEEL +1 -1
- {anyscale-0.26.44.dist-info → anyscale-0.26.46.dist-info}/entry_points.txt +0 -0
- {anyscale-0.26.44.dist-info → anyscale-0.26.46.dist-info/licenses}/LICENSE +0 -0
- {anyscale-0.26.44.dist-info → anyscale-0.26.46.dist-info/licenses}/NOTICE +0 -0
- {anyscale-0.26.44.dist-info → anyscale-0.26.46.dist-info}/top_level.txt +0 -0
anyscale/cloud_resource.py
CHANGED
|
@@ -15,12 +15,12 @@ from anyscale.aws_iam_policies import (
|
|
|
15
15
|
get_anyscale_iam_permissions_ec2_restricted,
|
|
16
16
|
)
|
|
17
17
|
from anyscale.cli_logger import CloudSetupLogger
|
|
18
|
+
from anyscale.client.openapi_client.models.aws_memory_db_cluster_config import (
|
|
19
|
+
AWSMemoryDBClusterConfig,
|
|
20
|
+
)
|
|
18
21
|
from anyscale.client.openapi_client.models.cloud_analytics_event_cloud_resource import (
|
|
19
22
|
CloudAnalyticsEventCloudResource,
|
|
20
23
|
)
|
|
21
|
-
from anyscale.client.openapi_client.models.create_cloud_resource import (
|
|
22
|
-
CreateCloudResource,
|
|
23
|
-
)
|
|
24
24
|
from anyscale.client.openapi_client.models.subnet_id_with_availability_zone_aws import (
|
|
25
25
|
SubnetIdWithAvailabilityZoneAWS,
|
|
26
26
|
)
|
|
@@ -94,14 +94,14 @@ def log_resource_not_found_error(
|
|
|
94
94
|
|
|
95
95
|
|
|
96
96
|
def verify_aws_vpc(
|
|
97
|
-
|
|
97
|
+
aws_vpc_id: Optional[str],
|
|
98
98
|
boto3_session: boto3.Session,
|
|
99
99
|
logger: CloudSetupLogger,
|
|
100
100
|
ignore_capacity_errors: bool = False, # TODO: Probably don't do this forever. Its kinda hacky
|
|
101
101
|
strict: bool = False, # strict is currently unused # noqa: ARG001
|
|
102
102
|
) -> bool:
|
|
103
103
|
logger.info("Verifying VPC ...")
|
|
104
|
-
if not
|
|
104
|
+
if not aws_vpc_id:
|
|
105
105
|
logger.log_resource_error(
|
|
106
106
|
CloudAnalyticsEventCloudResource.AWS_VPC,
|
|
107
107
|
CloudSetupError.MISSING_CLOUD_RESOURCE_ID,
|
|
@@ -110,14 +110,14 @@ def verify_aws_vpc(
|
|
|
110
110
|
return False
|
|
111
111
|
|
|
112
112
|
ec2 = boto3_session.resource("ec2")
|
|
113
|
-
vpc = ec2.Vpc(
|
|
113
|
+
vpc = ec2.Vpc(aws_vpc_id)
|
|
114
114
|
|
|
115
115
|
# Verify the VPC exists
|
|
116
116
|
try:
|
|
117
117
|
vpc.load()
|
|
118
118
|
except ClientError as e:
|
|
119
119
|
if e.response["Error"]["Code"] == "InvalidVpcID.NotFound":
|
|
120
|
-
log_resource_not_found_error("VPC",
|
|
120
|
+
log_resource_not_found_error("VPC", aws_vpc_id, logger)
|
|
121
121
|
return False
|
|
122
122
|
else:
|
|
123
123
|
logger.log_resource_exception(CloudAnalyticsEventCloudResource.AWS_VPC, e)
|
|
@@ -142,7 +142,8 @@ def _get_subnets_from_subnet_ids(
|
|
|
142
142
|
|
|
143
143
|
|
|
144
144
|
def verify_aws_subnets( # noqa: PLR0911, PLR0912
|
|
145
|
-
|
|
145
|
+
aws_vpc_id: Optional[str],
|
|
146
|
+
aws_subnet_ids: List[str],
|
|
146
147
|
region: str,
|
|
147
148
|
is_private_network: bool,
|
|
148
149
|
logger: CloudSetupLogger,
|
|
@@ -153,20 +154,11 @@ def verify_aws_subnets( # noqa: PLR0911, PLR0912
|
|
|
153
154
|
|
|
154
155
|
logger.info("Verifying subnets ...")
|
|
155
156
|
|
|
156
|
-
if not
|
|
157
|
+
if not aws_vpc_id:
|
|
157
158
|
logger.error("Missing VPC ID.")
|
|
158
159
|
return False
|
|
159
160
|
|
|
160
|
-
|
|
161
|
-
if (
|
|
162
|
-
cloud_resource.aws_subnet_ids_with_availability_zones
|
|
163
|
-
and len(cloud_resource.aws_subnet_ids_with_availability_zones) > 0
|
|
164
|
-
):
|
|
165
|
-
subnet_ids = [
|
|
166
|
-
subnet_id_with_az.subnet_id
|
|
167
|
-
for subnet_id_with_az in cloud_resource.aws_subnet_ids_with_availability_zones
|
|
168
|
-
]
|
|
169
|
-
else:
|
|
161
|
+
if not aws_subnet_ids:
|
|
170
162
|
logger.log_resource_error(
|
|
171
163
|
CloudAnalyticsEventCloudResource.AWS_SUBNET,
|
|
172
164
|
CloudSetupError.MISSING_CLOUD_RESOURCE_ID,
|
|
@@ -175,7 +167,7 @@ def verify_aws_subnets( # noqa: PLR0911, PLR0912
|
|
|
175
167
|
return False
|
|
176
168
|
|
|
177
169
|
# We must have at least 2 subnets since services requires 2 different subnets to setup ALB.
|
|
178
|
-
if len(
|
|
170
|
+
if len(aws_subnet_ids) < 2:
|
|
179
171
|
logger.log_resource_error(
|
|
180
172
|
CloudAnalyticsEventCloudResource.AWS_SUBNET, CloudSetupError.ONLY_ONE_SUBNET
|
|
181
173
|
)
|
|
@@ -185,11 +177,11 @@ def verify_aws_subnets( # noqa: PLR0911, PLR0912
|
|
|
185
177
|
return False
|
|
186
178
|
|
|
187
179
|
subnets = _get_subnets_from_subnet_ids(
|
|
188
|
-
subnet_ids=
|
|
180
|
+
subnet_ids=aws_subnet_ids, region=region, logger=logger
|
|
189
181
|
)
|
|
190
182
|
subnet_azs = set()
|
|
191
183
|
|
|
192
|
-
for subnet, subnet_id in zip(subnets,
|
|
184
|
+
for subnet, subnet_id in zip(subnets, aws_subnet_ids):
|
|
193
185
|
# Verify subnet exists
|
|
194
186
|
if not subnet:
|
|
195
187
|
log_resource_not_found_error("Subnet", subnet_id, logger)
|
|
@@ -205,13 +197,13 @@ def verify_aws_subnets( # noqa: PLR0911, PLR0912
|
|
|
205
197
|
return False
|
|
206
198
|
|
|
207
199
|
# Verify that the subnet is in the provided VPC all of these are in the same VPC.
|
|
208
|
-
if subnet.vpc_id !=
|
|
200
|
+
if subnet.vpc_id != aws_vpc_id:
|
|
209
201
|
logger.log_resource_error(
|
|
210
202
|
CloudAnalyticsEventCloudResource.AWS_SUBNET,
|
|
211
203
|
CloudSetupError.SUBNET_NOT_IN_VPC,
|
|
212
204
|
)
|
|
213
205
|
logger.error(
|
|
214
|
-
f"The subnet {subnet_id} is not in a vpc of this cloud. The vpc of this subnet is {subnet.vpc_id} and the vpc of this cloud is {
|
|
206
|
+
f"The subnet {subnet_id} is not in a vpc of this cloud. The vpc of this subnet is {subnet.vpc_id} and the vpc of this cloud is {aws_vpc_id}."
|
|
215
207
|
)
|
|
216
208
|
return False
|
|
217
209
|
|
|
@@ -243,9 +235,7 @@ def verify_aws_subnets( # noqa: PLR0911, PLR0912
|
|
|
243
235
|
)
|
|
244
236
|
return False
|
|
245
237
|
|
|
246
|
-
logger.info(
|
|
247
|
-
f"Subnets {cloud_resource.aws_subnet_ids_with_availability_zones} verification succeeded."
|
|
248
|
-
)
|
|
238
|
+
logger.info(f"Subnets {aws_subnet_ids} verification succeeded.")
|
|
249
239
|
return True
|
|
250
240
|
|
|
251
241
|
|
|
@@ -270,34 +260,28 @@ def associate_aws_subnets_with_azs(
|
|
|
270
260
|
return subnet_ids_with_availability_zones
|
|
271
261
|
|
|
272
262
|
|
|
273
|
-
def
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
logger: CloudSetupLogger,
|
|
277
|
-
) -> Optional[List[Any]]:
|
|
263
|
+
def _get_role_from_arn(
|
|
264
|
+
role_arn: str, boto3_session: boto3.Session, logger: CloudSetupLogger,
|
|
265
|
+
) -> Any:
|
|
278
266
|
iam = boto3_session.resource("iam")
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
for role_arn in cloud_resource.aws_iam_role_arns
|
|
282
|
-
]
|
|
283
|
-
# Validate the roles exist.
|
|
267
|
+
|
|
268
|
+
# Validate the role exists.
|
|
284
269
|
# `.load()` will throw an exception if the Role does not exist.
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
raise e
|
|
296
|
-
return roles
|
|
270
|
+
try:
|
|
271
|
+
role = iam.Role(AwsRoleArn.from_string(role_arn).to_role_name())
|
|
272
|
+
role.load()
|
|
273
|
+
return role
|
|
274
|
+
except ClientError as e:
|
|
275
|
+
logger.log_resource_exception(CloudAnalyticsEventCloudResource.AWS_IAM_ROLE, e)
|
|
276
|
+
if e.response["Error"]["Code"] == "NoSuchEntity":
|
|
277
|
+
logger.error(f"Could not find role: {role.name if role else 'unknown'}")
|
|
278
|
+
return None
|
|
279
|
+
raise e
|
|
297
280
|
|
|
298
281
|
|
|
299
282
|
def verify_aws_iam_roles( # noqa: PLR0911, PLR0912
|
|
300
|
-
|
|
283
|
+
control_plane_role: Optional[str],
|
|
284
|
+
data_plane_role: Optional[str],
|
|
301
285
|
boto3_session: boto3.Session,
|
|
302
286
|
anyscale_aws_account: str,
|
|
303
287
|
logger: CloudSetupLogger,
|
|
@@ -307,33 +291,38 @@ def verify_aws_iam_roles( # noqa: PLR0911, PLR0912
|
|
|
307
291
|
) -> bool:
|
|
308
292
|
|
|
309
293
|
logger.info("Verifying IAM roles ...")
|
|
310
|
-
if not
|
|
294
|
+
if not control_plane_role:
|
|
295
|
+
logger.log_resource_error(
|
|
296
|
+
CloudAnalyticsEventCloudResource.AWS_IAM_ROLE,
|
|
297
|
+
CloudSetupError.MISSING_CLOUD_RESOURCE_ID,
|
|
298
|
+
)
|
|
299
|
+
logger.error("Missing Anyscale IAM role.")
|
|
300
|
+
return False
|
|
301
|
+
if not data_plane_role:
|
|
311
302
|
logger.log_resource_error(
|
|
312
303
|
CloudAnalyticsEventCloudResource.AWS_IAM_ROLE,
|
|
313
304
|
CloudSetupError.MISSING_CLOUD_RESOURCE_ID,
|
|
314
305
|
)
|
|
315
|
-
logger.error("Missing IAM role
|
|
306
|
+
logger.error("Missing Cluster Node IAM role.")
|
|
316
307
|
return False
|
|
317
|
-
accounts =
|
|
308
|
+
accounts = {
|
|
318
309
|
AwsRoleArn.from_string(role).account_id
|
|
319
|
-
for role in
|
|
320
|
-
|
|
321
|
-
if len(
|
|
310
|
+
for role in [control_plane_role, data_plane_role]
|
|
311
|
+
}
|
|
312
|
+
if len(accounts) != 1:
|
|
322
313
|
logger.log_resource_error(
|
|
323
314
|
CloudAnalyticsEventCloudResource.AWS_IAM_ROLE,
|
|
324
315
|
CloudSetupError.IAM_ROLE_ACCOUNT_MISMATCH,
|
|
325
316
|
)
|
|
326
317
|
logger.error(
|
|
327
|
-
f"All IAM roles must be in the same AWS account: {
|
|
318
|
+
f"All IAM roles must be in the same AWS account: {control_plane_role}, {data_plane_role}"
|
|
328
319
|
)
|
|
329
320
|
return False
|
|
330
321
|
|
|
331
|
-
roles = _get_roles_from_cloud_resource(cloud_resource, boto3_session, logger)
|
|
332
|
-
if roles is None:
|
|
333
|
-
return False
|
|
334
|
-
|
|
335
322
|
# verifying control plane role: anyscale iam role
|
|
336
|
-
anyscale_iam_role =
|
|
323
|
+
anyscale_iam_role = _get_role_from_arn(control_plane_role, boto3_session, logger)
|
|
324
|
+
if not anyscale_iam_role:
|
|
325
|
+
return False
|
|
337
326
|
assume_role_policy_document = anyscale_iam_role.assume_role_policy_document
|
|
338
327
|
if not contains_control_plane_role(
|
|
339
328
|
assume_role_policy_document=assume_role_policy_document,
|
|
@@ -391,7 +380,9 @@ def verify_aws_iam_roles( # noqa: PLR0911, PLR0912
|
|
|
391
380
|
return False
|
|
392
381
|
|
|
393
382
|
# verifying data plane role: ray autoscaler role
|
|
394
|
-
cluster_node_role =
|
|
383
|
+
cluster_node_role = _get_role_from_arn(data_plane_role, boto3_session, logger)
|
|
384
|
+
if not cluster_node_role:
|
|
385
|
+
return False
|
|
395
386
|
assume_role_policy_document = cluster_node_role.assume_role_policy_document
|
|
396
387
|
if not verify_data_plane_role_assume_role_policy(
|
|
397
388
|
assume_role_policy_document=assume_role_policy_document,
|
|
@@ -431,7 +422,9 @@ def verify_aws_iam_roles( # noqa: PLR0911, PLR0912
|
|
|
431
422
|
)
|
|
432
423
|
return False
|
|
433
424
|
|
|
434
|
-
logger.info(
|
|
425
|
+
logger.info(
|
|
426
|
+
f"IAM roles {control_plane_role}, {data_plane_role} verification succeeded."
|
|
427
|
+
)
|
|
435
428
|
return True
|
|
436
429
|
|
|
437
430
|
|
|
@@ -453,13 +446,13 @@ def is_internal_communication_allowed(
|
|
|
453
446
|
|
|
454
447
|
|
|
455
448
|
def verify_aws_security_groups( # noqa: PLR0912, PLR0911
|
|
456
|
-
|
|
449
|
+
aws_security_group_ids: Optional[List[str]],
|
|
457
450
|
boto3_session: boto3.Session,
|
|
458
451
|
logger: CloudSetupLogger,
|
|
459
452
|
strict: bool = False,
|
|
460
453
|
) -> bool:
|
|
461
454
|
logger.info("Verifying security groups ...")
|
|
462
|
-
if not
|
|
455
|
+
if not aws_security_group_ids:
|
|
463
456
|
logger.log_resource_error(
|
|
464
457
|
CloudAnalyticsEventCloudResource.AWS_SECURITY_GROUP,
|
|
465
458
|
CloudSetupError.MISSING_CLOUD_RESOURCE_ID,
|
|
@@ -469,9 +462,7 @@ def verify_aws_security_groups( # noqa: PLR0912, PLR0911
|
|
|
469
462
|
|
|
470
463
|
ec2 = boto3_session.resource("ec2")
|
|
471
464
|
|
|
472
|
-
aws_security_group_ids = cloud_resource.aws_security_groups
|
|
473
465
|
anyscale_security_groups = []
|
|
474
|
-
|
|
475
466
|
for anyscale_security_group_id in aws_security_group_ids:
|
|
476
467
|
anyscale_security_group = ec2.SecurityGroup(anyscale_security_group_id)
|
|
477
468
|
try:
|
|
@@ -558,14 +549,16 @@ def verify_aws_security_groups( # noqa: PLR0912, PLR0911
|
|
|
558
549
|
|
|
559
550
|
|
|
560
551
|
def verify_aws_s3( # noqa: PLR0911, PLR0912
|
|
561
|
-
|
|
552
|
+
aws_s3_id: Optional[str],
|
|
553
|
+
control_plane_role: Optional[str],
|
|
554
|
+
data_plane_role: Optional[str],
|
|
562
555
|
boto3_session: boto3.Session,
|
|
563
556
|
region: str,
|
|
564
557
|
logger: CloudSetupLogger,
|
|
565
558
|
strict: bool = False,
|
|
566
559
|
) -> bool:
|
|
567
560
|
logger.info("Verifying S3 ...")
|
|
568
|
-
if not
|
|
561
|
+
if not aws_s3_id:
|
|
569
562
|
logger.log_resource_error(
|
|
570
563
|
CloudAnalyticsEventCloudResource.AWS_S3_BUCKET,
|
|
571
564
|
CloudSetupError.MISSING_CLOUD_RESOURCE_ID,
|
|
@@ -574,13 +567,13 @@ def verify_aws_s3( # noqa: PLR0911, PLR0912
|
|
|
574
567
|
return False
|
|
575
568
|
|
|
576
569
|
s3 = boto3_session.resource("s3")
|
|
577
|
-
bucket_name =
|
|
570
|
+
bucket_name = aws_s3_id.split(":")[-1]
|
|
578
571
|
s3_bucket = s3.Bucket(bucket_name)
|
|
579
572
|
|
|
580
573
|
# Check for the existence of `creation_date` because this incurs a `list_bucket` call.
|
|
581
574
|
# Calling `.load()` WILL NOT ERROR in cases where the caller does not have access to the bucket.
|
|
582
575
|
if s3_bucket.creation_date is None:
|
|
583
|
-
log_resource_not_found_error("S3 bucket",
|
|
576
|
+
log_resource_not_found_error("S3 bucket", aws_s3_id, logger)
|
|
584
577
|
return False
|
|
585
578
|
|
|
586
579
|
has_correct_cors_rule = False
|
|
@@ -653,24 +646,25 @@ def verify_aws_s3( # noqa: PLR0911, PLR0912
|
|
|
653
646
|
if strict:
|
|
654
647
|
return False
|
|
655
648
|
|
|
656
|
-
|
|
657
|
-
if roles is None:
|
|
649
|
+
if not control_plane_role or not data_plane_role:
|
|
658
650
|
return False
|
|
659
651
|
|
|
660
|
-
|
|
652
|
+
role = _get_role_from_arn(control_plane_role, boto3_session, logger)
|
|
653
|
+
if not verify_s3_access(boto3_session, s3_bucket, role, logger):
|
|
661
654
|
logger.warning(
|
|
662
|
-
f"S3 Bucket {bucket_name} does not appear to have correct permissions for the Anyscale Control Plane role {
|
|
655
|
+
f"S3 Bucket {bucket_name} does not appear to have correct permissions for the Anyscale Control Plane role {role.name}"
|
|
663
656
|
)
|
|
664
657
|
if strict:
|
|
665
658
|
return False
|
|
666
659
|
|
|
667
|
-
|
|
660
|
+
role = _get_role_from_arn(data_plane_role, boto3_session, logger)
|
|
661
|
+
if not verify_s3_access(boto3_session, s3_bucket, role, logger):
|
|
668
662
|
logger.warning(
|
|
669
|
-
f"S3 Bucket {bucket_name} does not appear to have correct permissions for the Data Plane role {
|
|
663
|
+
f"S3 Bucket {bucket_name} does not appear to have correct permissions for the Data Plane role {role.name}"
|
|
670
664
|
)
|
|
671
665
|
if strict:
|
|
672
666
|
return False
|
|
673
|
-
logger.info(f"S3 {
|
|
667
|
+
logger.info(f"S3 {aws_s3_id} verification succeeded.")
|
|
674
668
|
return True
|
|
675
669
|
|
|
676
670
|
|
|
@@ -694,81 +688,69 @@ def _get_network_interfaces_from_mount_targets(
|
|
|
694
688
|
|
|
695
689
|
|
|
696
690
|
def verify_aws_efs( # noqa: PLR0911, PLR0912, C901
|
|
697
|
-
|
|
691
|
+
aws_efs_id: Optional[str],
|
|
692
|
+
aws_efs_mount_target_ips: Optional[List[str]],
|
|
693
|
+
aws_subnet_ids: List[str],
|
|
694
|
+
aws_security_groups: Optional[List[str]],
|
|
698
695
|
boto3_session: boto3.Session,
|
|
699
696
|
logger: CloudSetupLogger,
|
|
700
697
|
strict: bool = False,
|
|
701
698
|
) -> bool:
|
|
702
699
|
logger.info("Verifying EFS ...")
|
|
703
|
-
if not
|
|
700
|
+
if not aws_efs_id:
|
|
704
701
|
logger.log_resource_error(
|
|
705
702
|
CloudAnalyticsEventCloudResource.AWS_EFS,
|
|
706
703
|
CloudSetupError.MISSING_CLOUD_RESOURCE_ID,
|
|
707
704
|
)
|
|
708
705
|
logger.error("Missing EFS ID.")
|
|
709
706
|
return False
|
|
710
|
-
|
|
711
|
-
if (
|
|
712
|
-
cloud_resource.aws_subnet_ids_with_availability_zones
|
|
713
|
-
and len(cloud_resource.aws_subnet_ids_with_availability_zones) > 0
|
|
714
|
-
):
|
|
715
|
-
subnet_ids = [
|
|
716
|
-
subnet_id_with_az.subnet_id
|
|
717
|
-
for subnet_id_with_az in cloud_resource.aws_subnet_ids_with_availability_zones
|
|
718
|
-
]
|
|
719
|
-
else:
|
|
707
|
+
if not aws_subnet_ids:
|
|
720
708
|
logger.error("Missing subnet IDs.")
|
|
721
709
|
return False
|
|
722
|
-
if not
|
|
710
|
+
if not aws_security_groups:
|
|
723
711
|
logger.error("Missing security group IDs.")
|
|
724
712
|
return False
|
|
725
713
|
|
|
726
714
|
client = boto3_session.client("efs")
|
|
727
715
|
try:
|
|
728
|
-
file_systems_response = client.describe_file_systems(
|
|
729
|
-
FileSystemId=cloud_resource.aws_efs_id
|
|
730
|
-
)
|
|
716
|
+
file_systems_response = client.describe_file_systems(FileSystemId=aws_efs_id)
|
|
731
717
|
except ClientError as e:
|
|
732
718
|
if e.response["Error"]["Code"] == "FileSystemNotFound":
|
|
733
|
-
log_resource_not_found_error("EFS",
|
|
719
|
+
log_resource_not_found_error("EFS", aws_efs_id, logger)
|
|
734
720
|
return False
|
|
735
721
|
else:
|
|
736
722
|
logger.log_resource_exception(CloudAnalyticsEventCloudResource.AWS_EFS, e)
|
|
737
723
|
raise e
|
|
738
724
|
|
|
739
725
|
if len(file_systems_response.get("FileSystems", [])) == 0:
|
|
740
|
-
log_resource_not_found_error("EFS",
|
|
726
|
+
log_resource_not_found_error("EFS", aws_efs_id, logger)
|
|
741
727
|
return False
|
|
742
728
|
|
|
743
729
|
# verify that there is a mount target for each subnet and security group
|
|
744
|
-
mount_targets_response = client.describe_mount_targets(
|
|
745
|
-
FileSystemId=cloud_resource.aws_efs_id
|
|
746
|
-
)
|
|
730
|
+
mount_targets_response = client.describe_mount_targets(FileSystemId=aws_efs_id)
|
|
747
731
|
mount_targets = mount_targets_response.get("MountTargets")
|
|
748
732
|
if not mount_targets:
|
|
749
733
|
logger.log_resource_error(
|
|
750
734
|
CloudAnalyticsEventCloudResource.AWS_EFS,
|
|
751
735
|
CloudSetupError.MOUNT_TARGET_NOT_FOUND,
|
|
752
736
|
)
|
|
753
|
-
logger.error(
|
|
754
|
-
f"EFS with id {cloud_resource.aws_efs_id} does not contain mount targets."
|
|
755
|
-
)
|
|
737
|
+
logger.error(f"EFS with id {aws_efs_id} does not contain mount targets.")
|
|
756
738
|
return False
|
|
757
739
|
|
|
758
740
|
# verify the mount target ID stored in our database is still valid
|
|
759
741
|
mount_target_ips = [mount_target["IpAddress"] for mount_target in mount_targets]
|
|
760
|
-
if
|
|
761
|
-
|
|
742
|
+
if aws_efs_mount_target_ips and any(
|
|
743
|
+
ip not in mount_target_ips for ip in aws_efs_mount_target_ips
|
|
762
744
|
):
|
|
763
745
|
logger.log_resource_error(
|
|
764
746
|
CloudAnalyticsEventCloudResource.AWS_EFS,
|
|
765
747
|
CloudSetupError.INVALID_MOUNT_TARGET,
|
|
766
748
|
)
|
|
767
749
|
logger.error(
|
|
768
|
-
f"Mount target registered with the cloud no longer exists. EFS ID: {
|
|
750
|
+
f"Mount target registered with the cloud no longer exists. EFS ID: {aws_efs_id} IP address: {aws_efs_mount_target_ips}. Please make sure you have the correct AWS credentials set. If the EFS mount target has been deleted, please recreate the cloud or contact Anyscale for support."
|
|
769
751
|
)
|
|
770
752
|
logger.error(
|
|
771
|
-
f"Valid mount target IPs for EFS ID {
|
|
753
|
+
f"Valid mount target IPs for EFS ID {aws_efs_id} are {mount_target_ips}. "
|
|
772
754
|
"If this happens during cloud edit, ensure that: "
|
|
773
755
|
"1) If only editing aws_efs_mount_target_ip, it belongs to the existing EFS ID. "
|
|
774
756
|
"2) If editing both efs_id and efs_mount_target_ip, the new IP is a valid target for the new efs_id."
|
|
@@ -779,7 +761,7 @@ def verify_aws_efs( # noqa: PLR0911, PLR0912, C901
|
|
|
779
761
|
mount_targets_response, boto3_session, logger
|
|
780
762
|
)
|
|
781
763
|
|
|
782
|
-
expected_security_group_id =
|
|
764
|
+
expected_security_group_id = aws_security_groups[0]
|
|
783
765
|
|
|
784
766
|
# Condition 1: No matching network interface in EFS mount targets for a subnet.
|
|
785
767
|
# - 1.1: EFS has mount targets in other subnets.
|
|
@@ -792,7 +774,7 @@ def verify_aws_efs( # noqa: PLR0911, PLR0912, C901
|
|
|
792
774
|
# - 2.2: Network interface lacks a registered security group but has another security group.
|
|
793
775
|
# If configured correctly, subnet can still communicate with EFS. (warning)
|
|
794
776
|
# --------------------------------------------------------------------------------------------------------------
|
|
795
|
-
for subnet_id in
|
|
777
|
+
for subnet_id in aws_subnet_ids:
|
|
796
778
|
contains_subnet_id = False
|
|
797
779
|
contains_registered_security_group = False
|
|
798
780
|
for network_interface in network_interfaces:
|
|
@@ -809,45 +791,39 @@ def verify_aws_efs( # noqa: PLR0911, PLR0912, C901
|
|
|
809
791
|
if not contains_subnet_id:
|
|
810
792
|
# condition 1.1.
|
|
811
793
|
logger.warning(
|
|
812
|
-
f"EFS with id {
|
|
794
|
+
f"EFS with id {aws_efs_id} does not contain a mount target with the subnet {subnet_id}, which might introduce cross AZ networking cost."
|
|
813
795
|
)
|
|
814
796
|
if strict:
|
|
815
797
|
return False
|
|
816
798
|
elif not contains_registered_security_group:
|
|
817
799
|
# condition 2.2.
|
|
818
800
|
logger.warning(
|
|
819
|
-
f"EFS with id {
|
|
801
|
+
f"EFS with id {aws_efs_id} does not contain a mount target with the subnet {subnet_id} and security group id {aws_security_groups[0]}. This misconfiguration might pose security risks and incur connection issues, preventing the EFS from working as expected."
|
|
820
802
|
)
|
|
821
803
|
if strict:
|
|
822
804
|
return False
|
|
823
805
|
try:
|
|
824
|
-
backup_policy_response = client.describe_backup_policy(
|
|
825
|
-
FileSystemId=cloud_resource.aws_efs_id
|
|
826
|
-
)
|
|
806
|
+
backup_policy_response = client.describe_backup_policy(FileSystemId=aws_efs_id)
|
|
827
807
|
backup_policy_status = backup_policy_response.get("BackupPolicy", {}).get(
|
|
828
808
|
"Status", ""
|
|
829
809
|
)
|
|
830
810
|
if backup_policy_status != "ENABLED":
|
|
831
|
-
logger.warning(
|
|
832
|
-
f"EFS {cloud_resource.aws_efs_id} backup policy is not enabled."
|
|
833
|
-
)
|
|
811
|
+
logger.warning(f"EFS {aws_efs_id} backup policy is not enabled.")
|
|
834
812
|
if strict:
|
|
835
813
|
return False
|
|
836
814
|
except ClientError as e:
|
|
837
815
|
if e.response["Error"]["Code"] == "PolicyNotFound":
|
|
838
|
-
logger.warning(f"EFS {
|
|
816
|
+
logger.warning(f"EFS {aws_efs_id} backup policy not found.")
|
|
839
817
|
if strict:
|
|
840
818
|
return False
|
|
841
819
|
else:
|
|
842
820
|
raise e
|
|
843
821
|
|
|
844
822
|
# Verify efs policy
|
|
845
|
-
if not _verify_aws_efs_policy(
|
|
846
|
-
boto3_session, cloud_resource.aws_efs_id, logger, strict
|
|
847
|
-
):
|
|
823
|
+
if not _verify_aws_efs_policy(boto3_session, aws_efs_id, logger, strict):
|
|
848
824
|
return False
|
|
849
825
|
|
|
850
|
-
logger.info(f"EFS {
|
|
826
|
+
logger.info(f"EFS {aws_efs_id} verification succeeded.")
|
|
851
827
|
return True
|
|
852
828
|
|
|
853
829
|
|
|
@@ -892,13 +868,13 @@ def _verify_aws_efs_policy(
|
|
|
892
868
|
|
|
893
869
|
|
|
894
870
|
def verify_aws_cloudformation_stack(
|
|
895
|
-
|
|
871
|
+
aws_cloudformation_stack_id: Optional[str],
|
|
896
872
|
boto3_session: boto3.Session,
|
|
897
873
|
logger: CloudSetupLogger,
|
|
898
874
|
strict: bool = False, # strict is currently unused # noqa: ARG001
|
|
899
875
|
) -> bool:
|
|
900
876
|
logger.info("Verifying CloudFormation stack ...")
|
|
901
|
-
if not
|
|
877
|
+
if not aws_cloudformation_stack_id:
|
|
902
878
|
logger.log_resource_error(
|
|
903
879
|
CloudAnalyticsEventCloudResource.AWS_CLOUDFORMATION,
|
|
904
880
|
CloudSetupError.MISSING_CLOUD_RESOURCE_ID,
|
|
@@ -907,15 +883,13 @@ def verify_aws_cloudformation_stack(
|
|
|
907
883
|
return False
|
|
908
884
|
|
|
909
885
|
cloudformation = boto3_session.resource("cloudformation")
|
|
910
|
-
stack = cloudformation.Stack(
|
|
886
|
+
stack = cloudformation.Stack(aws_cloudformation_stack_id)
|
|
911
887
|
try:
|
|
912
888
|
stack.load()
|
|
913
889
|
except ClientError as e:
|
|
914
890
|
if e.response["Error"]["Code"] == "ValidationError":
|
|
915
891
|
log_resource_not_found_error(
|
|
916
|
-
"CloudFormation stack",
|
|
917
|
-
cloud_resource.aws_cloudformation_stack_id,
|
|
918
|
-
logger,
|
|
892
|
+
"CloudFormation stack", aws_cloudformation_stack_id, logger,
|
|
919
893
|
)
|
|
920
894
|
return False
|
|
921
895
|
else:
|
|
@@ -925,20 +899,23 @@ def verify_aws_cloudformation_stack(
|
|
|
925
899
|
raise e
|
|
926
900
|
|
|
927
901
|
logger.info(
|
|
928
|
-
f"CloudFormation stack {
|
|
902
|
+
f"CloudFormation stack {aws_cloudformation_stack_id} verification succeeded."
|
|
929
903
|
)
|
|
930
904
|
return True
|
|
931
905
|
|
|
932
906
|
|
|
933
907
|
def verify_aws_memorydb_cluster( # noqa: PLR0911, PLR0912
|
|
934
|
-
|
|
908
|
+
memorydb_cluster_config: Optional[AWSMemoryDBClusterConfig],
|
|
909
|
+
aws_security_groups: List[str],
|
|
910
|
+
aws_vpc_id: str,
|
|
911
|
+
aws_subnet_ids: List[str],
|
|
935
912
|
boto3_session: boto3.Session,
|
|
936
913
|
logger: CloudSetupLogger,
|
|
937
914
|
strict: bool = False, # strict is currently unused # noqa: ARG001
|
|
938
915
|
) -> bool:
|
|
939
916
|
"""Verify that the MemoryDB cluster exists and is in the available state."""
|
|
940
917
|
logger.info("Verifying MemoryDB ...")
|
|
941
|
-
if not
|
|
918
|
+
if not memorydb_cluster_config:
|
|
942
919
|
logger.log_resource_error(
|
|
943
920
|
CloudAnalyticsEventCloudResource.AWS_MEMORYDB,
|
|
944
921
|
CloudSetupError.MISSING_CLOUD_RESOURCE_ID,
|
|
@@ -949,12 +926,12 @@ def verify_aws_memorydb_cluster( # noqa: PLR0911, PLR0912
|
|
|
949
926
|
client = boto3_session.client("memorydb")
|
|
950
927
|
try:
|
|
951
928
|
response = client.describe_clusters(
|
|
952
|
-
ClusterName=
|
|
929
|
+
ClusterName=memorydb_cluster_config.id.split("/")[-1],
|
|
953
930
|
ShowShardDetails=True,
|
|
954
931
|
)
|
|
955
932
|
if not response.get("Clusters"):
|
|
956
933
|
log_resource_not_found_error(
|
|
957
|
-
"MemoryDB cluster",
|
|
934
|
+
"MemoryDB cluster", memorydb_cluster_config.id, logger
|
|
958
935
|
)
|
|
959
936
|
return False
|
|
960
937
|
|
|
@@ -962,9 +939,9 @@ def verify_aws_memorydb_cluster( # noqa: PLR0911, PLR0912
|
|
|
962
939
|
security_group_id = response["Clusters"][0].get(
|
|
963
940
|
"SecurityGroups", [{"SecurityGroupId": "NOT_SPECIFIED"}]
|
|
964
941
|
)[0]["SecurityGroupId"]
|
|
965
|
-
if security_group_id !=
|
|
942
|
+
if security_group_id != aws_security_groups[0]:
|
|
966
943
|
logger.warning(
|
|
967
|
-
f"MemoryDB cluster {
|
|
944
|
+
f"MemoryDB cluster {memorydb_cluster_config.id} has security group {security_group_id} that is not the same as the cloud's security group {aws_security_groups[0]}."
|
|
968
945
|
)
|
|
969
946
|
if strict:
|
|
970
947
|
return False
|
|
@@ -973,25 +950,18 @@ def verify_aws_memorydb_cluster( # noqa: PLR0911, PLR0912
|
|
|
973
950
|
subnet_group_response = client.describe_subnet_groups(
|
|
974
951
|
SubnetGroupName=response["Clusters"][0]["SubnetGroupName"]
|
|
975
952
|
)
|
|
976
|
-
if
|
|
977
|
-
subnet_group_response["SubnetGroups"][0]["VpcId"]
|
|
978
|
-
!= cloud_resource.aws_vpc_id
|
|
979
|
-
):
|
|
953
|
+
if subnet_group_response["SubnetGroups"][0]["VpcId"] != aws_vpc_id:
|
|
980
954
|
logger.warning(
|
|
981
|
-
f"MemoryDB cluster {
|
|
955
|
+
f"MemoryDB cluster {memorydb_cluster_config.id} is not in the same VPC as the cloud."
|
|
982
956
|
)
|
|
983
957
|
if strict:
|
|
984
958
|
return False
|
|
985
959
|
|
|
986
960
|
# verify that the subnet group has the subset of subnets that the cloud has
|
|
987
|
-
subnet_ids = [
|
|
988
|
-
subnet.subnet_id
|
|
989
|
-
for subnet in cloud_resource.aws_subnet_ids_with_availability_zones
|
|
990
|
-
]
|
|
991
961
|
for subnet in subnet_group_response["SubnetGroups"][0]["Subnets"]:
|
|
992
|
-
if subnet["Identifier"] not in
|
|
962
|
+
if subnet["Identifier"] not in aws_subnet_ids:
|
|
993
963
|
logger.warning(
|
|
994
|
-
f"MemoryDB cluster {
|
|
964
|
+
f"MemoryDB cluster {memorydb_cluster_config.id} has subnet {subnet['Identifier']} that is not one of the subnets in the cloud."
|
|
995
965
|
)
|
|
996
966
|
if strict:
|
|
997
967
|
return False
|
|
@@ -1003,7 +973,7 @@ def verify_aws_memorydb_cluster( # noqa: PLR0911, PLR0912
|
|
|
1003
973
|
for param in parameter_response["Parameters"]:
|
|
1004
974
|
if param["Name"] == "maxmemory-policy" and param["Value"] != "allkeys-lru":
|
|
1005
975
|
logger.warning(
|
|
1006
|
-
f"MemoryDB cluster {
|
|
976
|
+
f"MemoryDB cluster {memorydb_cluster_config.id} should have parameter group with maxmemory-policy set to allkeys-lru instead of {param['Value']}."
|
|
1007
977
|
)
|
|
1008
978
|
if strict:
|
|
1009
979
|
return False
|
|
@@ -1011,7 +981,7 @@ def verify_aws_memorydb_cluster( # noqa: PLR0911, PLR0912
|
|
|
1011
981
|
# verify TLS is enabled
|
|
1012
982
|
if not response["Clusters"][0]["TLSEnabled"]:
|
|
1013
983
|
logger.error(
|
|
1014
|
-
f"MemoryDB cluster {
|
|
984
|
+
f"MemoryDB cluster {memorydb_cluster_config.id} has TLS disabled. Please create a memorydb cluster with TLS enabled."
|
|
1015
985
|
)
|
|
1016
986
|
return False
|
|
1017
987
|
|
|
@@ -1019,14 +989,14 @@ def verify_aws_memorydb_cluster( # noqa: PLR0911, PLR0912
|
|
|
1019
989
|
for shard in response["Clusters"][0]["Shards"]:
|
|
1020
990
|
if len(shard["Nodes"]) < 2:
|
|
1021
991
|
logger.error(
|
|
1022
|
-
f"MemoryDB cluster {
|
|
992
|
+
f"MemoryDB cluster {memorydb_cluster_config.id} has shard {shard['Name']} with less than 2 nodes. This is not enough for high availability. Please make sure each shard has at least 2 nodes."
|
|
1023
993
|
)
|
|
1024
994
|
return False
|
|
1025
995
|
|
|
1026
996
|
except ClientError as e:
|
|
1027
997
|
logger.log_resource_exception(CloudAnalyticsEventCloudResource.AWS_MEMORYDB, e)
|
|
1028
998
|
raise ClickException(
|
|
1029
|
-
f"Failed to verify MemoryDB cluster {
|
|
999
|
+
f"Failed to verify MemoryDB cluster {memorydb_cluster_config.id}.\nError: {e}"
|
|
1030
1000
|
)
|
|
1031
1001
|
|
|
1032
1002
|
return True
|