anyscale 0.26.69__py3-none-any.whl → 0.26.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. anyscale/_private/anyscale_client/anyscale_client.py +126 -3
  2. anyscale/_private/anyscale_client/common.py +51 -2
  3. anyscale/_private/anyscale_client/fake_anyscale_client.py +103 -11
  4. anyscale/client/README.md +43 -4
  5. anyscale/client/openapi_client/__init__.py +30 -4
  6. anyscale/client/openapi_client/api/default_api.py +1769 -27
  7. anyscale/client/openapi_client/models/__init__.py +30 -4
  8. anyscale/client/openapi_client/models/api_key_info.py +29 -3
  9. anyscale/client/openapi_client/models/apply_autoscaling_config_update_model.py +350 -0
  10. anyscale/client/openapi_client/models/apply_multi_version_update_weights_update_model.py +152 -0
  11. anyscale/client/openapi_client/models/apply_production_service_multi_version_v2_model.py +207 -0
  12. anyscale/client/openapi_client/models/apply_production_service_v2_model.py +31 -3
  13. anyscale/client/openapi_client/models/apply_version_weight_update_model.py +181 -0
  14. anyscale/client/openapi_client/models/backend_server_api_product_models_catalog_client_models_table_metadata.py +546 -0
  15. anyscale/client/openapi_client/models/backend_server_api_product_models_data_catalogs_table_metadata.py +178 -0
  16. anyscale/client/openapi_client/models/baseimagesenum.py +139 -1
  17. anyscale/client/openapi_client/models/catalog_metadata.py +150 -0
  18. anyscale/client/openapi_client/models/cloud_data_bucket_file_type.py +2 -1
  19. anyscale/client/openapi_client/models/{oauthconnectionresponse_response.py → clouddeployment_response.py} +11 -11
  20. anyscale/client/openapi_client/models/column_info.py +265 -0
  21. anyscale/client/openapi_client/models/compute_node_type.py +29 -1
  22. anyscale/client/openapi_client/models/connection_metadata.py +206 -0
  23. anyscale/client/openapi_client/models/create_experimental_workspace.py +29 -1
  24. anyscale/client/openapi_client/models/create_workspace_from_template.py +29 -1
  25. anyscale/client/openapi_client/models/create_workspace_template_version.py +59 -3
  26. anyscale/client/openapi_client/models/data_catalog.py +45 -31
  27. anyscale/client/openapi_client/models/data_catalog_connection.py +74 -58
  28. anyscale/client/openapi_client/models/{ha_job_event_level.py → data_catalog_object_type.py} +7 -8
  29. anyscale/client/openapi_client/models/data_catalog_schema.py +324 -0
  30. anyscale/client/openapi_client/models/data_catalog_table.py +437 -0
  31. anyscale/client/openapi_client/models/data_catalog_volume.py +437 -0
  32. anyscale/client/openapi_client/models/datacatalogschema_list_response.py +147 -0
  33. anyscale/client/openapi_client/models/datacatalogtable_list_response.py +147 -0
  34. anyscale/client/openapi_client/models/datacatalogvolume_list_response.py +147 -0
  35. anyscale/client/openapi_client/models/decorated_list_service_api_model.py +58 -1
  36. anyscale/client/openapi_client/models/decorated_production_service_v2_api_model.py +60 -3
  37. anyscale/client/openapi_client/models/decorated_serve_deployment.py +27 -1
  38. anyscale/client/openapi_client/models/decorated_service_event_api_model.py +3 -3
  39. anyscale/client/openapi_client/models/decoratedproductionservicev2_versionapimodel_response.py +121 -0
  40. anyscale/client/openapi_client/models/describe_machine_pool_machines_filters.py +33 -5
  41. anyscale/client/openapi_client/models/describe_machine_pool_requests_filters.py +33 -5
  42. anyscale/client/openapi_client/models/describe_machine_pool_workloads_filters.py +33 -5
  43. anyscale/client/openapi_client/models/{service_event_level.py → entity_type.py} +9 -9
  44. anyscale/client/openapi_client/models/event_level.py +2 -1
  45. anyscale/client/openapi_client/models/job_event_fields.py +206 -0
  46. anyscale/client/openapi_client/models/machine_type_partition_filter.py +152 -0
  47. anyscale/client/openapi_client/models/partition_info.py +30 -1
  48. anyscale/client/openapi_client/models/physical_resources.py +178 -0
  49. anyscale/client/openapi_client/models/production_job_event.py +3 -3
  50. anyscale/client/openapi_client/models/rollout_strategy.py +2 -1
  51. anyscale/client/openapi_client/models/schema_metadata.py +150 -0
  52. anyscale/client/openapi_client/models/service_event_fields.py +318 -0
  53. anyscale/client/openapi_client/models/sso_config.py +18 -18
  54. anyscale/client/openapi_client/models/supportedbaseimagesenum.py +139 -1
  55. anyscale/client/openapi_client/models/table_data_preview.py +209 -0
  56. anyscale/client/openapi_client/models/task_summary_config.py +29 -3
  57. anyscale/client/openapi_client/models/task_table_config.py +29 -3
  58. anyscale/client/openapi_client/models/unified_event.py +377 -0
  59. anyscale/client/openapi_client/models/unified_origin_filter.py +113 -0
  60. anyscale/client/openapi_client/models/unifiedevent_list_response.py +147 -0
  61. anyscale/client/openapi_client/models/volume_metadata.py +150 -0
  62. anyscale/client/openapi_client/models/worker_node_type.py +29 -1
  63. anyscale/client/openapi_client/models/workspace_event_fields.py +122 -0
  64. anyscale/client/openapi_client/models/workspace_template_version.py +58 -1
  65. anyscale/client/openapi_client/models/workspace_template_version_data_object.py +58 -1
  66. anyscale/cloud/models.py +2 -2
  67. anyscale/commands/cloud_commands.py +133 -2
  68. anyscale/commands/job_commands.py +121 -1
  69. anyscale/commands/job_queue_commands.py +99 -2
  70. anyscale/commands/service_commands.py +267 -67
  71. anyscale/commands/setup_k8s.py +546 -31
  72. anyscale/commands/util.py +104 -1
  73. anyscale/commands/workspace_commands.py +123 -5
  74. anyscale/commands/workspace_commands_v2.py +17 -1
  75. anyscale/compute_config/_private/compute_config_sdk.py +25 -12
  76. anyscale/compute_config/models.py +15 -0
  77. anyscale/controllers/cloud_controller.py +15 -2
  78. anyscale/controllers/job_controller.py +12 -0
  79. anyscale/controllers/kubernetes_verifier.py +80 -66
  80. anyscale/controllers/workspace_controller.py +67 -5
  81. anyscale/job/_private/job_sdk.py +50 -2
  82. anyscale/job/commands.py +3 -0
  83. anyscale/job/models.py +16 -0
  84. anyscale/job_queue/__init__.py +37 -1
  85. anyscale/job_queue/_private/job_queue_sdk.py +28 -1
  86. anyscale/job_queue/commands.py +61 -1
  87. anyscale/sdk/anyscale_client/__init__.py +1 -0
  88. anyscale/sdk/anyscale_client/api/default_api.py +12 -2
  89. anyscale/sdk/anyscale_client/models/__init__.py +1 -0
  90. anyscale/sdk/anyscale_client/models/apply_production_service_v2_model.py +31 -3
  91. anyscale/sdk/anyscale_client/models/apply_service_model.py +31 -3
  92. anyscale/sdk/anyscale_client/models/baseimagesenum.py +139 -1
  93. anyscale/sdk/anyscale_client/models/compute_node_type.py +29 -1
  94. anyscale/sdk/anyscale_client/models/physical_resources.py +178 -0
  95. anyscale/sdk/anyscale_client/models/rollout_strategy.py +2 -1
  96. anyscale/sdk/anyscale_client/models/supportedbaseimagesenum.py +139 -1
  97. anyscale/sdk/anyscale_client/models/worker_node_type.py +29 -1
  98. anyscale/service/__init__.py +51 -3
  99. anyscale/service/_private/service_sdk.py +481 -58
  100. anyscale/service/commands.py +90 -4
  101. anyscale/service/models.py +56 -0
  102. anyscale/shared_anyscale_utils/latest_ray_version.py +1 -1
  103. anyscale/version.py +1 -1
  104. anyscale/workspace/_private/workspace_sdk.py +1 -0
  105. anyscale/workspace/models.py +19 -0
  106. {anyscale-0.26.69.dist-info → anyscale-0.26.71.dist-info}/METADATA +1 -1
  107. {anyscale-0.26.69.dist-info → anyscale-0.26.71.dist-info}/RECORD +112 -85
  108. anyscale/client/openapi_client/models/o_auth_connection_response.py +0 -229
  109. {anyscale-0.26.69.dist-info → anyscale-0.26.71.dist-info}/WHEEL +0 -0
  110. {anyscale-0.26.69.dist-info → anyscale-0.26.71.dist-info}/entry_points.txt +0 -0
  111. {anyscale-0.26.69.dist-info → anyscale-0.26.71.dist-info}/licenses/LICENSE +0 -0
  112. {anyscale-0.26.69.dist-info → anyscale-0.26.71.dist-info}/licenses/NOTICE +0 -0
  113. {anyscale-0.26.69.dist-info → anyscale-0.26.71.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,7 @@ Handles verification of Kubernetes-based cloud deployments including:
12
12
 
13
13
  from contextlib import contextmanager, suppress
14
14
  from dataclasses import dataclass
15
+ from enum import Enum
15
16
  import json
16
17
  import os
17
18
  import shutil
@@ -90,8 +91,19 @@ KUBECTL_COMMON_PATHS = [
90
91
  # Status and result strings
91
92
  PASSED_STATUS = "PASSED"
92
93
  FAILED_STATUS = "FAILED"
94
+ SKIPPED_STATUS = "SKIPPED"
93
95
  RUNNING_STATUS = "Running"
94
96
 
97
+
98
+ # Verification status enum
99
+ class VerificationStatus(Enum):
100
+ """Status of a verification check."""
101
+
102
+ PASSED = "PASSED"
103
+ FAILED = "FAILED"
104
+ SKIPPED = "SKIPPED"
105
+
106
+
95
107
  # Verification component names (for consistent reporting)
96
108
  class VerificationComponents:
97
109
  OPERATOR_POD_INSTALLED = "Operator Pod Installed"
@@ -206,15 +218,15 @@ class ResourceNotFoundError(KubernetesVerificationError):
206
218
  class VerificationResults:
207
219
  """Tracks the results of all verification steps."""
208
220
 
209
- operator_pod_installed: bool = False
210
- operator_health: bool = False
211
- operator_identity: bool = False
212
- file_storage: bool = False
213
- gateway_support: bool = False
214
- nginx_ingress: bool = False
221
+ operator_pod_installed: VerificationStatus = VerificationStatus.FAILED
222
+ operator_health: VerificationStatus = VerificationStatus.FAILED
223
+ operator_identity: VerificationStatus = VerificationStatus.FAILED
224
+ file_storage: VerificationStatus = VerificationStatus.FAILED
225
+ gateway_support: VerificationStatus = VerificationStatus.FAILED
226
+ nginx_ingress: VerificationStatus = VerificationStatus.FAILED
215
227
 
216
- def to_dict(self) -> Dict[str, bool]:
217
- """Convert to dictionary format matching original implementation."""
228
+ def to_dict(self) -> Dict[str, VerificationStatus]:
229
+ """Convert to dictionary format for reporting."""
218
230
  return {
219
231
  VerificationComponents.OPERATOR_POD_INSTALLED: self.operator_pod_installed,
220
232
  VerificationComponents.OPERATOR_HEALTH: self.operator_health,
@@ -226,9 +238,10 @@ class VerificationResults:
226
238
 
227
239
  @property
228
240
  def overall_success(self) -> bool:
229
- """Return True if all verification steps passed."""
241
+ """Return True if all verification steps passed or were skipped."""
230
242
  return all(
231
- [
243
+ status in (VerificationStatus.PASSED, VerificationStatus.SKIPPED)
244
+ for status in [
232
245
  self.operator_pod_installed,
233
246
  self.operator_health,
234
247
  self.operator_identity,
@@ -654,24 +667,24 @@ class OperatorVerifier:
654
667
  port=OPERATOR_HEALTH_PORT,
655
668
  )
656
669
 
657
- def verify_operator_health(self, operator_data: OperatorData) -> bool:
670
+ def verify_operator_health(self, operator_data: OperatorData) -> VerificationStatus:
658
671
  """Verify operator health using pre-fetched data."""
659
672
  if operator_data.health.is_healthy:
660
- return True
673
+ return VerificationStatus.PASSED
661
674
  else:
662
675
  self.log.error(
663
676
  f"Health check failed - HTTP {operator_data.health.status_code}"
664
677
  )
665
678
  if operator_data.health.response_text:
666
679
  self.log.error(f"Response: {operator_data.health.response_text}")
667
- return False
680
+ return VerificationStatus.FAILED
668
681
 
669
682
  def verify_operator_identity(
670
683
  self,
671
684
  operator_data: OperatorData,
672
685
  kubernetes_config: OpenAPIKubernetesConfig,
673
686
  cloud_provider: Optional[CloudProviders],
674
- ) -> bool:
687
+ ) -> VerificationStatus:
675
688
  """Verify operator identity using pre-fetched config data."""
676
689
  # Validate kubernetes_config contents
677
690
  expected_identity = kubernetes_config.anyscale_operator_iam_identity
@@ -679,7 +692,7 @@ class OperatorVerifier:
679
692
  self.log.error(
680
693
  "Missing 'anyscale_operator_iam_identity' in kubernetes config"
681
694
  )
682
- return False
695
+ return VerificationStatus.FAILED
683
696
 
684
697
  # Validate config response
685
698
  if not operator_data.config.is_valid:
@@ -688,32 +701,34 @@ class OperatorVerifier:
688
701
  )
689
702
  if operator_data.config.response_text:
690
703
  self.log.error(f"Response: {operator_data.config.response_text}")
691
- return False
704
+ return VerificationStatus.FAILED
692
705
 
693
706
  # Extract actual identity from config
694
707
  if operator_data.config.config_data is None:
695
708
  self.log.error("Operator config data is None")
696
- return False
709
+ return VerificationStatus.FAILED
697
710
 
698
711
  actual_identity = operator_data.config.config_data.get("iamIdentity")
699
712
  if not actual_identity:
700
713
  self.log.error("Operator config missing 'iamIdentity' field")
701
- return False
714
+ return VerificationStatus.FAILED
702
715
 
703
716
  # Perform identity comparison
704
717
  if self._evaluate_identity_match(
705
718
  expected_identity, actual_identity, cloud_provider
706
719
  ):
720
+ # Get cloud provider string for display
721
+ provider_str = str(cloud_provider) if cloud_provider else "AWS"
707
722
  self.log.info(
708
- f"AWS identity match: Role matches (Expected: {expected_identity})"
723
+ f"{provider_str} identity match: Expected identity matches (Expected: {expected_identity})"
709
724
  )
710
- self.log.info("Expected IAM role matches actual assumed role")
711
- return True
725
+ self.log.info("Expected identity matches actual identity")
726
+ return VerificationStatus.PASSED
712
727
  else:
713
728
  self.log.error("Operator identity mismatch")
714
729
  self.log.error(f"Expected: {expected_identity}")
715
730
  self.log.error(f"Actual: {actual_identity}")
716
- return False
731
+ return VerificationStatus.FAILED
717
732
 
718
733
  @contextmanager
719
734
  def _port_forward_to_operator(self, pod_name: str):
@@ -980,8 +995,12 @@ class StorageVerifier:
980
995
 
981
996
  def verify_file_storage(
982
997
  self, file_storage: FileStorage, cloud_deployment: CloudDeployment
983
- ) -> bool:
984
- """Verify file storage configuration (non-functional checks only)."""
998
+ ) -> VerificationStatus:
999
+ """Verify file storage configuration (non-functional checks only).
1000
+
1001
+ Returns:
1002
+ VerificationStatus enum value
1003
+ """
985
1004
  self.log.info("Verifying file storage configuration...")
986
1005
  verification_results = []
987
1006
 
@@ -1014,12 +1033,15 @@ class StorageVerifier:
1014
1033
  f"Cloud provider API error while verifying file storage: {e}"
1015
1034
  ) from e
1016
1035
 
1017
- # Return overall success
1036
+ # Return overall status
1018
1037
  if verification_results:
1019
- return all(result for _, result in verification_results)
1038
+ if all(result for _, result in verification_results):
1039
+ return VerificationStatus.PASSED
1040
+ else:
1041
+ return VerificationStatus.FAILED
1020
1042
  else:
1021
- self.log.info("INFO: No file storage components found to verify")
1022
- return True
1043
+ self.log.info("No file storage components found to verify")
1044
+ return VerificationStatus.SKIPPED
1023
1045
 
1024
1046
  def _verify_csi_driver(self, driver_name: str) -> bool:
1025
1047
  """Check if CSI driver exists on cluster."""
@@ -1135,13 +1157,17 @@ class GatewayVerifier:
1135
1157
  self.config = k8s_config
1136
1158
  self.log = logger
1137
1159
 
1138
- def verify_gateway_support(self, operator_data: OperatorData) -> bool:
1139
- """Verify gateway support using pre-fetched config data."""
1160
+ def verify_gateway_support(self, operator_data: OperatorData) -> VerificationStatus:
1161
+ """Verify gateway support using pre-fetched config data.
1162
+
1163
+ Returns:
1164
+ VerificationStatus enum value
1165
+ """
1140
1166
  if not operator_data.config.is_valid:
1141
- self.log.warning(
1167
+ self.log.info(
1142
1168
  "Could not retrieve operator configuration - skipping gateway verification"
1143
1169
  )
1144
- return True
1170
+ return VerificationStatus.SKIPPED
1145
1171
 
1146
1172
  # Extract gateway configuration from operator data
1147
1173
  gateway_config = GatewayConfig.from_operator_config(
@@ -1152,21 +1178,24 @@ class GatewayVerifier:
1152
1178
  self.log.info(
1153
1179
  "Gateway support is not enabled - skipping gateway verification"
1154
1180
  )
1155
- return True
1181
+ return VerificationStatus.SKIPPED
1156
1182
 
1157
1183
  if not gateway_config.requires_verification:
1158
1184
  self.log.error(
1159
1185
  "Gateway is enabled but no gateway name found in operator configuration"
1160
1186
  )
1161
- return False
1187
+ return VerificationStatus.FAILED
1162
1188
 
1163
1189
  # Verify gateway exists in cluster
1164
1190
  assert (
1165
1191
  gateway_config.name is not None
1166
1192
  ) # guaranteed by requires_verification check
1167
- return self._verify_gateway_exists(gateway_config.name)
1193
+ if self._verify_gateway_exists(gateway_config.name):
1194
+ return VerificationStatus.PASSED
1195
+ else:
1196
+ return VerificationStatus.FAILED
1168
1197
 
1169
- def verify_nginx_ingress(self) -> bool:
1198
+ def verify_nginx_ingress(self) -> VerificationStatus:
1170
1199
  """Check for NGINX ingress controller (warning only)."""
1171
1200
  try:
1172
1201
  self.log.info("Checking for NGINX ingress controller...")
@@ -1182,7 +1211,7 @@ class GatewayVerifier:
1182
1211
  f"PASSED: Found running NGINX ingress controller: {nginx_pod} "
1183
1212
  f"(namespace: {config_dict['namespace']})"
1184
1213
  )
1185
- return True
1214
+ return VerificationStatus.PASSED
1186
1215
  else:
1187
1216
  pod_status = self.kubectl.get_pod_status(
1188
1217
  nginx_pod, config_dict["namespace"]
@@ -1194,14 +1223,14 @@ class GatewayVerifier:
1194
1223
 
1195
1224
  # Try fallback search by name patterns
1196
1225
  if self._find_nginx_by_name_pattern():
1197
- return True
1226
+ return VerificationStatus.PASSED
1198
1227
 
1199
1228
  # No NGINX ingress controller found
1200
1229
  self.log.warning("No NGINX ingress controller found")
1201
1230
  self.log.warning("This may impact ingress routing capabilities")
1202
1231
  self.log.warning("Available ingress controllers:")
1203
1232
  self._list_available_ingress_controllers()
1204
- return False
1233
+ return VerificationStatus.FAILED
1205
1234
 
1206
1235
  except (KubectlError, ResourceNotFoundError) as e:
1207
1236
  self.log.warning(f"WARNING: Could not verify NGINX ingress controller: {e}")
@@ -1437,10 +1466,6 @@ class KubernetesCloudDeploymentVerifier:
1437
1466
  self.log.error(f"Data parsing error during verification: {e}")
1438
1467
  return False
1439
1468
 
1440
- def _passed_or_failed_str_from_bool(self, is_passing: bool) -> str:
1441
- """Return PASSED or FAILED string for verification results, matching VM verification format."""
1442
- return PASSED_STATUS if is_passing else FAILED_STATUS
1443
-
1444
1469
  @contextmanager
1445
1470
  def _verification_step(self, step_name: str):
1446
1471
  """Context manager for verification steps that indents detailed output."""
@@ -1467,7 +1492,7 @@ class KubernetesCloudDeploymentVerifier:
1467
1492
  with self._verification_step("Finding operator pod"):
1468
1493
  try:
1469
1494
  operator_pod = operator_verifier.find_operator_pod()
1470
- self.results.operator_pod_installed = True
1495
+ self.results.operator_pod_installed = VerificationStatus.PASSED
1471
1496
  except OperatorPodNotFoundError as e:
1472
1497
  self.log.error(
1473
1498
  "Failed to find operator pod, please make sure the operator is running"
@@ -1490,56 +1515,47 @@ class KubernetesCloudDeploymentVerifier:
1490
1515
  self.results.operator_health = operator_verifier.verify_operator_health(
1491
1516
  operator_data
1492
1517
  )
1493
- self.log.info(
1494
- f"Operator Health: {self._passed_or_failed_str_from_bool(self.results.operator_health)}"
1495
- )
1518
+ self.log.info(f"Operator Health: {self.results.operator_health.value}")
1496
1519
 
1497
1520
  self.log.info("Verifying operator identity...")
1498
1521
  if cloud_deployment.kubernetes_config is None:
1499
1522
  self.log.error(
1500
1523
  "Kubernetes configuration is missing from cloud deployment"
1501
1524
  )
1502
- self.results.operator_identity = False
1525
+ self.results.operator_identity = VerificationStatus.FAILED
1503
1526
  else:
1504
1527
  self.results.operator_identity = operator_verifier.verify_operator_identity(
1505
1528
  operator_data,
1506
1529
  cloud_deployment.kubernetes_config,
1507
1530
  cloud_deployment.provider,
1508
1531
  )
1509
- self.log.info(
1510
- f"Operator Identity: {self._passed_or_failed_str_from_bool(self.results.operator_identity)}"
1511
- )
1532
+ self.log.info(f"Operator Identity: {self.results.operator_identity.value}")
1512
1533
 
1513
1534
  # Step 4: Check file storage
1514
1535
  with self._verification_step("Checking file storage"):
1515
1536
  if cloud_deployment.file_storage is None:
1516
1537
  self.log.info(
1517
- "INFO: No file storage configured - skipping file storage verification"
1538
+ "No file storage configured - skipping file storage verification"
1518
1539
  )
1519
- self.results.file_storage = True
1540
+ self.results.file_storage = VerificationStatus.SKIPPED
1520
1541
  else:
1521
1542
  self.results.file_storage = storage_verifier.verify_file_storage(
1522
1543
  cloud_deployment.file_storage, cloud_deployment
1523
1544
  )
1524
- self.log.info(
1525
- f"File Storage: {self._passed_or_failed_str_from_bool(self.results.file_storage)}"
1526
- )
1545
+
1546
+ self.log.info(f"File Storage: {self.results.file_storage.value}")
1527
1547
 
1528
1548
  # Step 5: Verify gateway support
1529
- with self._verification_step("Verifying gateway support"):
1549
+ with self._verification_step("Checking gateway support"):
1530
1550
  self.results.gateway_support = gateway_verifier.verify_gateway_support(
1531
1551
  operator_data
1532
1552
  )
1533
- self.log.info(
1534
- f"Gateway Support: {self._passed_or_failed_str_from_bool(self.results.gateway_support)}"
1535
- )
1553
+ self.log.info(f"Gateway Support: {self.results.gateway_support.value}")
1536
1554
 
1537
1555
  # Step 6: Check NGINX ingress (warning only)
1538
1556
  with self._verification_step("Checking NGINX ingress controller"):
1539
1557
  self.results.nginx_ingress = gateway_verifier.verify_nginx_ingress()
1540
- self.log.info(
1541
- f"NGINX Ingress: {self._passed_or_failed_str_from_bool(self.results.nginx_ingress)}"
1542
- )
1558
+ self.log.info(f"NGINX Ingress: {self.results.nginx_ingress.value}")
1543
1559
 
1544
1560
  self._show_verification_summary()
1545
1561
 
@@ -1557,9 +1573,7 @@ class KubernetesCloudDeploymentVerifier:
1557
1573
  verification_result_summary = ["Verification result:"]
1558
1574
 
1559
1575
  for component, result in self.results.to_dict().items():
1560
- verification_result_summary.append(
1561
- f"{component}: {self._passed_or_failed_str_from_bool(result)}"
1562
- )
1576
+ verification_result_summary.append(f"{component}: {result.value}")
1563
1577
 
1564
1578
  self.log.info("\n".join(verification_result_summary))
1565
1579
 
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import shlex
3
3
  import subprocess
4
- from typing import Any, List, Optional
4
+ from typing import Any, Dict, List, Optional
5
5
 
6
6
  import click
7
7
  import tabulate
@@ -10,9 +10,19 @@ from anyscale.cli_logger import BlockLogger
10
10
  from anyscale.client.openapi_client.models.create_experimental_workspace import (
11
11
  CreateExperimentalWorkspace,
12
12
  )
13
+ from anyscale.client.openapi_client.models.delete_resource_tags_request import (
14
+ DeleteResourceTagsRequest,
15
+ )
13
16
  from anyscale.client.openapi_client.models.experimental_workspace import (
14
17
  ExperimentalWorkspace,
15
18
  )
19
+ from anyscale.client.openapi_client.models.resource_tag_resource_type import (
20
+ ResourceTagResourceType,
21
+ )
22
+ from anyscale.client.openapi_client.models.upsert_resource_tags_request import (
23
+ UpsertResourceTagsRequest,
24
+ )
25
+ from anyscale.commands.util import flatten_tag_dict_to_api_list
16
26
  from anyscale.controllers.base_controller import BaseController
17
27
  from anyscale.feature_flags import FLAG_DEFAULT_WORKING_DIR_FOR_PROJ
18
28
  from anyscale.project_utils import get_default_project
@@ -84,13 +94,21 @@ class WorkspaceController(BaseController):
84
94
  )
85
95
  )
86
96
 
87
- def list(self) -> None:
97
+ def list(self, tags_filter: Optional[Dict[str, List[str]]] = None) -> None:
88
98
  """
89
99
  prints a non-exhaustive tabular list of information about non-deleted workspaces.
90
100
  """
91
- workspaces_data: List[
92
- ExperimentalWorkspace
93
- ] = self.api_client.list_workspaces_api_v2_experimental_workspaces_get().results
101
+ params = {}
102
+ if tags_filter:
103
+ api_tag_filter = flatten_tag_dict_to_api_list(tags_filter)
104
+ if api_tag_filter:
105
+ params["tag_filter"] = api_tag_filter
106
+
107
+ workspaces_data: List[ExperimentalWorkspace] = (
108
+ self.api_client.list_workspaces_api_v2_experimental_workspaces_get(
109
+ **params
110
+ ).results
111
+ )
94
112
 
95
113
  workspaces_table: List[List[Any]] = [
96
114
  [
@@ -109,6 +127,50 @@ class WorkspaceController(BaseController):
109
127
 
110
128
  print(f"Workspaces:\n{table}")
111
129
 
130
+ def _resolve_workspace_id(
131
+ self, *, workspace_id: Optional[str], name: Optional[str]
132
+ ) -> str:
133
+ if workspace_id:
134
+ return workspace_id
135
+ if not name:
136
+ raise click.ClickException("Provide either --id or --name for workspace.")
137
+ results = self.api_client.list_workspaces_api_v2_experimental_workspaces_get(
138
+ name=name
139
+ ).results
140
+ if len(results) == 0:
141
+ raise click.ClickException(f"No workspace with name '{name}' found.")
142
+ if len(results) > 1:
143
+ raise click.ClickException(
144
+ f"Multiple workspaces with name '{name}' found. Please use --id."
145
+ )
146
+ return results[0].id
147
+
148
+ def add_tags(
149
+ self,
150
+ *,
151
+ workspace_id: Optional[str] = None,
152
+ name: Optional[str] = None,
153
+ tags: Dict[str, str],
154
+ ) -> None:
155
+ wid = self._resolve_workspace_id(workspace_id=workspace_id, name=name)
156
+ req = UpsertResourceTagsRequest(
157
+ resource_type=ResourceTagResourceType.WORKSPACE, resource_id=wid, tags=tags,
158
+ )
159
+ self.api_client.upsert_resource_tags_api_v2_tags_resource_put(req)
160
+
161
+ def remove_tags(
162
+ self,
163
+ *,
164
+ workspace_id: Optional[str] = None,
165
+ name: Optional[str] = None,
166
+ keys: List[str],
167
+ ) -> None:
168
+ wid = self._resolve_workspace_id(workspace_id=workspace_id, name=name)
169
+ req = DeleteResourceTagsRequest(
170
+ resource_type=ResourceTagResourceType.WORKSPACE, resource_id=wid, keys=keys,
171
+ )
172
+ self.api_client.delete_resource_tags_api_v2_tags_resource_delete(req)
173
+
112
174
  def clone(self, workspace: ExperimentalWorkspace) -> None:
113
175
  dir_name = workspace.name
114
176
  os.makedirs(dir_name)
@@ -46,7 +46,8 @@ HA_JOB_STATE_TO_JOB_STATE = {
46
46
  HaJobStates.PENDING: JobState.STARTING,
47
47
  HaJobStates.AWAITING_CLUSTER_START: JobState.STARTING,
48
48
  HaJobStates.SUCCESS: JobState.SUCCEEDED,
49
- HaJobStates.ERRORED: JobState.FAILED,
49
+ # ERRORED is a transient state that can transition to RESTARTING when retries remain.
50
+ HaJobStates.ERRORED: JobState.RUNNING,
50
51
  HaJobStates.TERMINATED: JobState.FAILED,
51
52
  HaJobStates.BROKEN: JobState.FAILED,
52
53
  HaJobStates.OUT_OF_RETRIES: JobState.FAILED,
@@ -280,6 +281,7 @@ class PrivateJobSDK(WorkloadSDK):
280
281
  workspace_id=self.client.get_current_workspace_id(),
281
282
  config=prod_job_config,
282
283
  job_queue_config=job_queue_config,
284
+ tags=getattr(config, "tags", None),
283
285
  )
284
286
  )
285
287
 
@@ -438,7 +440,35 @@ class PrivateJobSDK(WorkloadSDK):
438
440
  self.logger.info(f"Job {job_model.id} is successfully archived.")
439
441
  return job_model.id
440
442
 
441
- def wait(
443
+ def _stream_logs_for_job_run(
444
+ self, job_run_id: str, next_page_token: Optional[str] = None,
445
+ ) -> Optional[str]:
446
+ """Stream logs for a job run and return updated pagination state.
447
+
448
+ Args:
449
+ job_run_id: The ID of the job run to stream logs for
450
+ next_page_token: Token for fetching next page of logs
451
+
452
+ Returns:
453
+ next_page_token for the next iteration
454
+ """
455
+ try:
456
+ logs, next_page_token = self.client.stream_logs_for_job_run(
457
+ job_run_id=job_run_id, next_page_token=next_page_token,
458
+ )
459
+
460
+ # Print logs line by line
461
+ for line in logs.splitlines():
462
+ if line: # Skip empty lines
463
+ print(line)
464
+
465
+ except Exception as e: # noqa: BLE001
466
+ # Don't fail if log streaming fails
467
+ self.logger.warning(f"Error streaming logs: {e}")
468
+
469
+ return next_page_token
470
+
471
+ def wait( # noqa: PLR0912
442
472
  self,
443
473
  *,
444
474
  name: Optional[str] = None,
@@ -448,6 +478,7 @@ class PrivateJobSDK(WorkloadSDK):
448
478
  state: Union[str, JobState] = JobState.SUCCEEDED,
449
479
  timeout_s: float = 1800,
450
480
  interval_s: float = _POLLING_INTERVAL_SECONDS,
481
+ follow: bool = False,
451
482
  ):
452
483
  if not isinstance(timeout_s, (int, float)):
453
484
  raise TypeError("timeout_s must be a float")
@@ -471,6 +502,11 @@ class PrivateJobSDK(WorkloadSDK):
471
502
  self.logger.info(
472
503
  f"Waiting for job '{job_id_or_name}' to reach target state {state}, currently in state: {curr_state}"
473
504
  )
505
+
506
+ next_page_token = None
507
+ job_run_id = None
508
+ logs_started = False
509
+
474
510
  for _ in self.timer.poll(timeout_s=timeout_s, interval_s=interval_s):
475
511
  job_model = self._resolve_to_job_model(
476
512
  name=name, job_id=job_id, cloud=cloud, project=project
@@ -483,6 +519,18 @@ class PrivateJobSDK(WorkloadSDK):
483
519
  )
484
520
  curr_state = new_state
485
521
 
522
+ # Stream logs if enabled and job has a job run
523
+ if follow and job_model.last_job_run_id:
524
+ if not logs_started:
525
+ job_run_id = job_model.last_job_run_id
526
+ self.logger.info(f"Starting log stream for job run {job_run_id}")
527
+ logs_started = True
528
+
529
+ if job_run_id:
530
+ next_page_token = self._stream_logs_for_job_run(
531
+ job_run_id=job_run_id, next_page_token=next_page_token,
532
+ )
533
+
486
534
  if curr_state == state:
487
535
  self.logger.info(
488
536
  f"Job '{job_id_or_name}' reached target state, exiting"
anyscale/job/commands.py CHANGED
@@ -187,6 +187,7 @@ _WAIT_ARG_DOCSTRINGS = {
187
187
  "project": "Named project to use for the job. If not provided, the default project for the cloud will be used (or, if running in a workspace, the project of the workspace).",
188
188
  "state": "Target state of the job",
189
189
  "timeout_s": "Number of seconds to wait before timing out, this timeout will not affect job execution",
190
+ "follow": "Whether to follow the logs of the job. If True, the logs will be streamed to the console.",
190
191
  }
191
192
 
192
193
 
@@ -204,6 +205,7 @@ def wait(
204
205
  project: Optional[str] = None,
205
206
  state: Union[JobState, str] = JobState.SUCCEEDED,
206
207
  timeout_s: float = 1800,
208
+ follow: bool = False,
207
209
  _private_sdk: Optional[PrivateJobSDK] = None,
208
210
  **_kwargs: Dict[str, Any],
209
211
  ):
@@ -216,6 +218,7 @@ def wait(
216
218
  project=project,
217
219
  state=state,
218
220
  timeout_s=timeout_s,
221
+ follow=follow,
219
222
  )
220
223
 
221
224
 
anyscale/job/models.py CHANGED
@@ -300,6 +300,9 @@ py_modules: # (Optional) A list of local directories or remote URIs that will be
300
300
  cloud: anyscale-prod # (Optional) The name of the Anyscale Cloud.
301
301
  project: my-project # (Optional) The name of the Anyscale Project.
302
302
  max_retries: 3 # (Optional) Maximum number of times the job will be retried before being marked failed. Defaults to `1`.
303
+ tags:
304
+ team: mlops
305
+ purpose: training
303
306
 
304
307
  """
305
308
 
@@ -377,6 +380,19 @@ max_retries: 3 # (Optional) Maximum number of times the job will be retried befo
377
380
  if timeout_s < 0:
378
381
  raise ValueError("'timeout_s' must be >= 0.")
379
382
 
383
+ tags: Optional[Dict[str, str]] = field(
384
+ default=None, metadata={"docstring": "Tags to associate with the job."},
385
+ )
386
+
387
+ def _validate_tags(self, tags: Optional[Dict[str, str]]):
388
+ if tags is None:
389
+ return
390
+ if not isinstance(tags, dict):
391
+ raise TypeError("'tags' must be a Dict[str, str].")
392
+ for k, v in tags.items():
393
+ if not isinstance(k, str) or not isinstance(v, str):
394
+ raise TypeError("'tags' must be a Dict[str, str].")
395
+
380
396
 
381
397
  class JobRunState(ModelEnum):
382
398
  """Current state of an individual job run."""
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import Dict, List, Optional
2
2
 
3
3
  from anyscale._private.anyscale_client import AnyscaleClient
4
4
  from anyscale._private.models.model_base import ResultIterator
@@ -16,9 +16,15 @@ from anyscale.job_queue.commands import (
16
16
  _LIST_EXAMPLE,
17
17
  _STATUS_ARG_DOCSTRINGS,
18
18
  _STATUS_EXAMPLE,
19
+ _TAGS_ADD_ARG_DOCSTRINGS,
20
+ _TAGS_ADD_EXAMPLE,
21
+ _TAGS_REMOVE_ARG_DOCSTRINGS,
22
+ _TAGS_REMOVE_EXAMPLE,
19
23
  _UPDATE_ARG_DOCSTRINGS,
20
24
  _UPDATE_EXAMPLE,
25
+ add_tags,
21
26
  list,
27
+ remove_tags,
22
28
  status,
23
29
  update,
24
30
  )
@@ -49,6 +55,7 @@ class JobQueueSDK:
49
55
  cloud: Optional[str] = None,
50
56
  project: Optional[str] = None,
51
57
  cluster_status: Optional[SessionState] = None,
58
+ tags_filter: Optional[dict[str, List[str]]] = None,
52
59
  page_size: Optional[int] = None,
53
60
  max_items: Optional[int] = None,
54
61
  sorting_directives: Optional[List[JobQueueSortDirective]] = None,
@@ -61,6 +68,7 @@ class JobQueueSDK:
61
68
  cloud=cloud,
62
69
  project=project,
63
70
  cluster_status=cluster_status,
71
+ tags_filter=tags_filter,
64
72
  page_size=page_size,
65
73
  max_items=max_items,
66
74
  sorting_directives=sorting_directives,
@@ -87,3 +95,31 @@ class JobQueueSDK:
87
95
  max_concurrency=max_concurrency,
88
96
  idle_timeout_s=idle_timeout_s,
89
97
  )
98
+
99
+ @sdk_docs(doc_py_example=_TAGS_ADD_EXAMPLE, arg_docstrings=_TAGS_ADD_ARG_DOCSTRINGS)
100
+ def add_tags( # noqa: F811
101
+ self,
102
+ *,
103
+ job_queue_id: Optional[str] = None,
104
+ name: Optional[str] = None,
105
+ tags: Dict[str, str],
106
+ ) -> None:
107
+ """Upsert (add/update) tag key/value pairs for a job queue."""
108
+ return self._private_sdk.add_tags(
109
+ job_queue_id=job_queue_id, name=name, tags=tags
110
+ )
111
+
112
+ @sdk_docs(
113
+ doc_py_example=_TAGS_REMOVE_EXAMPLE, arg_docstrings=_TAGS_REMOVE_ARG_DOCSTRINGS
114
+ )
115
+ def remove_tags( # noqa: F811
116
+ self,
117
+ *,
118
+ job_queue_id: Optional[str] = None,
119
+ name: Optional[str] = None,
120
+ keys: List[str],
121
+ ) -> None:
122
+ """Remove tags by key from a job queue."""
123
+ return self._private_sdk.remove_tags(
124
+ job_queue_id=job_queue_id, name=name, keys=keys
125
+ )