skypilot-nightly 1.0.0.dev20250521__py3-none-any.whl → 1.0.0.dev20250523__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +46 -16
  3. sky/backends/cloud_vm_ray_backend.py +16 -4
  4. sky/check.py +109 -44
  5. sky/cli.py +261 -90
  6. sky/client/cli.py +261 -90
  7. sky/client/sdk.py +122 -3
  8. sky/clouds/__init__.py +5 -0
  9. sky/clouds/aws.py +4 -2
  10. sky/clouds/azure.py +4 -2
  11. sky/clouds/cloud.py +30 -6
  12. sky/clouds/cudo.py +2 -1
  13. sky/clouds/do.py +2 -1
  14. sky/clouds/fluidstack.py +2 -1
  15. sky/clouds/gcp.py +160 -23
  16. sky/clouds/ibm.py +4 -2
  17. sky/clouds/kubernetes.py +66 -22
  18. sky/clouds/lambda_cloud.py +2 -1
  19. sky/clouds/nebius.py +18 -2
  20. sky/clouds/oci.py +4 -2
  21. sky/clouds/paperspace.py +2 -1
  22. sky/clouds/runpod.py +2 -1
  23. sky/clouds/scp.py +2 -1
  24. sky/clouds/service_catalog/__init__.py +3 -0
  25. sky/clouds/service_catalog/common.py +9 -2
  26. sky/clouds/service_catalog/constants.py +2 -1
  27. sky/clouds/service_catalog/ssh_catalog.py +167 -0
  28. sky/clouds/ssh.py +203 -0
  29. sky/clouds/vast.py +2 -1
  30. sky/clouds/vsphere.py +2 -1
  31. sky/core.py +59 -17
  32. sky/dashboard/out/404.html +1 -1
  33. sky/dashboard/out/_next/static/{hvWzC5E6Q4CcKzXcWbgig → ECKwDNS9v9y3_IKFZ2lpp}/_buildManifest.js +1 -1
  34. sky/dashboard/out/_next/static/chunks/pages/infra-abf08c4384190a39.js +1 -0
  35. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  36. sky/dashboard/out/clusters/[cluster].html +1 -1
  37. sky/dashboard/out/clusters.html +1 -1
  38. sky/dashboard/out/index.html +1 -1
  39. sky/dashboard/out/infra.html +1 -1
  40. sky/dashboard/out/jobs/[job].html +1 -1
  41. sky/dashboard/out/jobs.html +1 -1
  42. sky/data/storage.py +1 -0
  43. sky/execution.py +56 -7
  44. sky/jobs/server/core.py +4 -2
  45. sky/optimizer.py +29 -15
  46. sky/provision/__init__.py +1 -0
  47. sky/provision/aws/instance.py +17 -1
  48. sky/provision/gcp/constants.py +147 -4
  49. sky/provision/gcp/instance_utils.py +10 -0
  50. sky/provision/gcp/volume_utils.py +247 -0
  51. sky/provision/kubernetes/instance.py +16 -5
  52. sky/provision/kubernetes/utils.py +37 -19
  53. sky/provision/nebius/instance.py +3 -1
  54. sky/provision/nebius/utils.py +14 -2
  55. sky/provision/ssh/__init__.py +18 -0
  56. sky/resources.py +177 -4
  57. sky/serve/server/core.py +2 -4
  58. sky/server/common.py +46 -9
  59. sky/server/constants.py +2 -0
  60. sky/server/html/token_page.html +154 -0
  61. sky/server/requests/executor.py +3 -6
  62. sky/server/requests/payloads.py +7 -0
  63. sky/server/server.py +80 -8
  64. sky/setup_files/dependencies.py +1 -0
  65. sky/skypilot_config.py +117 -31
  66. sky/task.py +24 -1
  67. sky/templates/gcp-ray.yml.j2 +44 -1
  68. sky/templates/nebius-ray.yml.j2 +12 -2
  69. sky/utils/admin_policy_utils.py +26 -22
  70. sky/utils/context.py +36 -6
  71. sky/utils/context_utils.py +15 -0
  72. sky/utils/infra_utils.py +21 -1
  73. sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
  74. sky/utils/kubernetes/create_cluster.sh +1 -0
  75. sky/utils/kubernetes/deploy_remote_cluster.py +1437 -0
  76. sky/utils/kubernetes/kubernetes_deploy_utils.py +117 -10
  77. sky/utils/kubernetes/ssh-tunnel.sh +387 -0
  78. sky/utils/log_utils.py +214 -1
  79. sky/utils/resources_utils.py +14 -0
  80. sky/utils/schemas.py +67 -0
  81. sky/utils/ux_utils.py +2 -1
  82. {skypilot_nightly-1.0.0.dev20250521.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/METADATA +6 -1
  83. {skypilot_nightly-1.0.0.dev20250521.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/RECORD +88 -81
  84. sky/dashboard/out/_next/static/chunks/pages/infra-9180cd91cee64b96.js +0 -1
  85. sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
  86. /sky/dashboard/out/_next/static/{hvWzC5E6Q4CcKzXcWbgig → ECKwDNS9v9y3_IKFZ2lpp}/_ssgManifest.js +0 -0
  87. {skypilot_nightly-1.0.0.dev20250521.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/WHEEL +0 -0
  88. {skypilot_nightly-1.0.0.dev20250521.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/entry_points.txt +0 -0
  89. {skypilot_nightly-1.0.0.dev20250521.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/licenses/LICENSE +0 -0
  90. {skypilot_nightly-1.0.0.dev20250521.dist-info → skypilot_nightly-1.0.0.dev20250523.dist-info}/top_level.txt +0 -0
sky/utils/log_utils.py CHANGED
@@ -190,7 +190,7 @@ class SkyLocalUpLineProcessor(LineProcessor):
190
190
 
191
191
 
192
192
  class SkyRemoteUpLineProcessor(LineProcessor):
193
- """A processor for deploy_remote_cluster.sh log lines."""
193
+ """A processor for deploy_remote_cluster.py log lines."""
194
194
 
195
195
  def __init__(self, log_path: str, is_local: bool):
196
196
  self.log_path = log_path
@@ -291,6 +291,219 @@ class SkyRemoteUpLineProcessor(LineProcessor):
291
291
  self.status_display.stop()
292
292
 
293
293
 
294
+ class SkySSHUpLineProcessor(LineProcessor):
295
+ """A processor for deploy_remote_cluster.py log lines for SSH clusters"""
296
+
297
+ def __init__(self, log_path: str, is_local: bool):
298
+ self.log_path = log_path
299
+ self.is_local = is_local
300
+ self.current_cluster: Optional[str] = None
301
+ self.is_cleanup_mode = False
302
+
303
+ def __enter__(self) -> None:
304
+ status = rich_utils.safe_status(
305
+ ux_utils.spinner_message('Preparing to set up SSH Node Pools',
306
+ log_path=self.log_path,
307
+ is_local=self.is_local))
308
+ self.status_display = status
309
+ self.status_display.start()
310
+
311
+ def process_line(self, log_line: str) -> None:
312
+ # Detect cleanup mode
313
+ if 'SKYPILOT_CLEANUP_MODE:' in log_line:
314
+ self.is_cleanup_mode = True
315
+ if self.current_cluster:
316
+ self.status_display.update(
317
+ ux_utils.spinner_message(
318
+ f'Cleaning up Node Pool: \\[{self.current_cluster}]',
319
+ log_path=self.log_path,
320
+ is_local=self.is_local))
321
+
322
+ # Cluster detection message
323
+ if 'SKYPILOT_CLUSTER_INFO:' in log_line:
324
+ clusters_part = log_line.split('SKYPILOT_CLUSTER_INFO:',
325
+ 1)[1].strip()
326
+ if clusters_part.startswith('Found'):
327
+ logger.info(f'{colorama.Style.RESET_ALL}'
328
+ f'{colorama.Fore.CYAN}{clusters_part}'
329
+ f'{colorama.Style.RESET_ALL}')
330
+
331
+ # Current cluster being operated on
332
+ if 'SKYPILOT_CURRENT_CLUSTER:' in log_line:
333
+ self.current_cluster = log_line.split('SKYPILOT_CURRENT_CLUSTER:',
334
+ 1)[1].strip()
335
+
336
+ if self.is_cleanup_mode:
337
+ self.status_display.update(
338
+ ux_utils.spinner_message(
339
+ f'Cleaning up Node Pool: {self.current_cluster}',
340
+ log_path=self.log_path,
341
+ is_local=self.is_local))
342
+ logger.info(f'{colorama.Fore.CYAN}\nCleaning up Node Pool: '
343
+ f'{self.current_cluster}{colorama.Style.RESET_ALL}')
344
+ else:
345
+ self.status_display.update(
346
+ ux_utils.spinner_message(
347
+ f'Deploying SkyPilot \\[{self.current_cluster}]',
348
+ log_path=self.log_path,
349
+ is_local=self.is_local))
350
+ logger.info(f'{colorama.Style.RESET_ALL}'
351
+ f'{colorama.Fore.CYAN}\nSetting up Node Pool: '
352
+ f'{self.current_cluster}{colorama.Style.RESET_ALL}')
353
+
354
+ # Handle cluster completion marker
355
+ if 'SKYPILOT_CLUSTER_COMPLETED:' in log_line:
356
+ if self.is_cleanup_mode:
357
+ logger.info(
358
+ f'{ux_utils.INDENT_LAST_SYMBOL}{colorama.Fore.GREEN}'
359
+ f'✔ Node Pool {self.current_cluster} cleaned up '
360
+ f'successfully.{colorama.Style.RESET_ALL}')
361
+ else:
362
+ logger.info(
363
+ f'{ux_utils.INDENT_LAST_SYMBOL}{colorama.Fore.GREEN}'
364
+ f'✔ Node Pool {self.current_cluster} deployed successfully.'
365
+ f'{colorama.Style.RESET_ALL}')
366
+
367
+ # Pre-flight checks
368
+ if 'Checking SSH connection to head node' in log_line:
369
+ logger.info(f'{ux_utils.INDENT_SYMBOL}{colorama.Style.DIM}'
370
+ 'Checking SSH connection to head node...'
371
+ f'{colorama.Style.RESET_ALL}')
372
+
373
+ if log_line.startswith('SSH connection successful'):
374
+ node_name = log_line.split('(')[-1].split(')')[0]
375
+ logger.info(f'{ux_utils.INDENT_SYMBOL}{colorama.Fore.GREEN}'
376
+ '✔ SSH connection established to head node '
377
+ f'{node_name}.{colorama.Style.RESET_ALL}')
378
+
379
+ # Kubernetes installation steps
380
+ if 'Deploying Kubernetes on head node' in log_line:
381
+ current_cluster_str = f' \\[{self.current_cluster}]' if (
382
+ self.current_cluster) else ''
383
+ self.status_display.update(
384
+ ux_utils.spinner_message(
385
+ 'Deploying SkyPilot runtime on head node'
386
+ f'{current_cluster_str}',
387
+ log_path=self.log_path,
388
+ is_local=self.is_local))
389
+
390
+ if 'K3s deployed on head node' in log_line:
391
+ node_name = log_line.split('(')[-1].split(')')[0]
392
+ logger.info(
393
+ f'{ux_utils.INDENT_SYMBOL}{colorama.Fore.GREEN}'
394
+ f'✔ SkyPilot runtime successfully deployed on head node '
395
+ f'{node_name}.{colorama.Style.RESET_ALL}')
396
+
397
+ # Worker nodes
398
+ if 'Deploying Kubernetes on worker node' in log_line:
399
+ self.status_display.update(
400
+ ux_utils.spinner_message(
401
+ 'Deploying SkyPilot runtime on worker nodes' +
402
+ (f' \\[{self.current_cluster}]'
403
+ if self.current_cluster else ''),
404
+ log_path=self.log_path,
405
+ is_local=self.is_local))
406
+
407
+ if 'Kubernetes deployed on worker node' in log_line:
408
+ node_name = log_line.split('(')[-1].split(')')[0]
409
+ logger.info(
410
+ f'{ux_utils.INDENT_SYMBOL}{colorama.Fore.GREEN}'
411
+ '✔ SkyPilot runtime successfully deployed on worker node '
412
+ f'{node_name}.{colorama.Style.RESET_ALL}')
413
+
414
+ if 'Failed to deploy K3s on worker node' in log_line:
415
+ node_name = log_line.split('(')[-1].split(')')[0]
416
+ logger.info(f'{ux_utils.INDENT_SYMBOL}{colorama.Fore.RED}'
417
+ '✗ Failed to deploy K3s on worker node '
418
+ f'{node_name}.{colorama.Style.RESET_ALL}')
419
+
420
+ # Cluster configuration
421
+ if 'Configuring local kubectl to connect to the cluster...' in log_line:
422
+ self.status_display.update(
423
+ ux_utils.spinner_message('Setting up SkyPilot configuration' +
424
+ (f' \\[{self.current_cluster}]'
425
+ if self.current_cluster else ''),
426
+ log_path=self.log_path,
427
+ is_local=self.is_local))
428
+
429
+ if 'kubectl configured to connect to the cluster.' in log_line:
430
+ logger.info(f'{ux_utils.INDENT_SYMBOL}{colorama.Fore.GREEN}'
431
+ '✔ SkyPilot configuration complete.'
432
+ f'{colorama.Style.RESET_ALL}')
433
+
434
+ # GPU operator installation
435
+ if 'Installing Nvidia GPU Operator...' in log_line:
436
+ self.status_display.update(
437
+ ux_utils.spinner_message('Configuring Nvidia GPUs' +
438
+ (f' \\[{self.current_cluster}]'
439
+ if self.current_cluster else ''),
440
+ log_path=self.log_path,
441
+ is_local=self.is_local))
442
+
443
+ if 'GPU Operator installed.' in log_line:
444
+ logger.info(f'{ux_utils.INDENT_SYMBOL}{colorama.Fore.GREEN}'
445
+ '✔ Nvidia GPUs configured successfully.'
446
+ f'{colorama.Style.RESET_ALL}')
447
+
448
+ # Cleanup steps
449
+ if 'Cleaning up head node' in log_line:
450
+ self.status_display.update(
451
+ ux_utils.spinner_message('Cleaning up head node' +
452
+ (f' \\[{self.current_cluster}]'
453
+ if self.current_cluster else ''),
454
+ log_path=self.log_path,
455
+ is_local=self.is_local))
456
+
457
+ if 'Cleaning up worker node' in log_line:
458
+ self.status_display.update(
459
+ ux_utils.spinner_message('Cleaning up worker nodes' +
460
+ (f' \\[{self.current_cluster}]'
461
+ if self.current_cluster else ''),
462
+ log_path=self.log_path,
463
+ is_local=self.is_local))
464
+
465
+ # Handle node cleanup success messages
466
+ if 'Node' in log_line and 'cleaned up successfully' in log_line:
467
+ logger.info(f'{ux_utils.INDENT_SYMBOL}{colorama.Fore.GREEN}'
468
+ f'{log_line.strip()}{colorama.Style.RESET_ALL}')
469
+
470
+ if 'Node' in log_line and 'Failed to clean up' in log_line:
471
+ logger.info(f'{ux_utils.INDENT_SYMBOL}{colorama.Fore.RED}'
472
+ f'{log_line.strip()}{colorama.Style.RESET_ALL}')
473
+
474
+ if 'Failed to clean up worker node' in log_line:
475
+ logger.info(f'{ux_utils.INDENT_SYMBOL}{colorama.Fore.RED}'
476
+ f'{log_line.strip()}{colorama.Style.RESET_ALL}')
477
+
478
+ # Final status for the cluster deployment
479
+ if 'Cluster deployment completed.' in log_line:
480
+ logger.info(f'{ux_utils.INDENT_SYMBOL}{colorama.Fore.GREEN}'
481
+ '✔ SkyPilot runtime is up.'
482
+ f'{colorama.Style.RESET_ALL}')
483
+
484
+ if 'Failed to deploy Kubernetes on the following nodes:' in log_line:
485
+ logger.info(log_line.strip())
486
+
487
+ if 'already exists in history. ' in log_line:
488
+ node_name = log_line.split('(')[-1].split(')')[0]
489
+ logger.info(f'{ux_utils.INDENT_SYMBOL}{colorama.Fore.YELLOW}'
490
+ '✔ SkyPilot runtime already deployed on worker node '
491
+ f'{node_name}. Skipping.{colorama.Style.RESET_ALL}')
492
+
493
+ if 'Failed to setup TCP forwarding on head node' in log_line:
494
+ node_name = log_line.split('(')[-1].split(')')[0]
495
+ logger.info(
496
+ f'{ux_utils.INDENT_SYMBOL}{colorama.Fore.RED}'
497
+ f'✗ Failed to setup TCP forwarding on head node {node_name}.'
498
+ f'{colorama.Style.RESET_ALL}')
499
+
500
+ def __exit__(self, except_type: Optional[Type[BaseException]],
501
+ except_value: Optional[BaseException],
502
+ traceback: Optional[types.TracebackType]) -> None:
503
+ del except_type, except_value, traceback # unused
504
+ self.status_display.stop()
505
+
506
+
294
507
  def create_table(field_names: List[str], **kwargs) -> prettytable.PrettyTable:
295
508
  """Creates table with default style."""
296
509
  border = kwargs.pop('border', False)
@@ -50,6 +50,20 @@ class DiskTier(enum.Enum):
50
50
  return types.index(self) <= types.index(other)
51
51
 
52
52
 
53
+ class StorageType(enum.Enum):
54
+ """Storage type."""
55
+ # Durable network storage, e.g. GCP persistent disks
56
+ NETWORK = 'network'
57
+ # Local instance storage, e.g. GCP local SSDs
58
+ INSTANCE = 'instance'
59
+
60
+
61
+ class DiskAttachMode(enum.Enum):
62
+ """Disk attach mode."""
63
+ READ_ONLY = 'read_only'
64
+ READ_WRITE = 'read_write'
65
+
66
+
53
67
  @dataclasses.dataclass
54
68
  class ClusterName:
55
69
  display_name: str
sky/utils/schemas.py CHANGED
@@ -188,6 +188,35 @@ def _get_single_resources_schema():
188
188
  }
189
189
  }],
190
190
  },
191
+ 'volumes': {
192
+ 'type': 'array',
193
+ 'items': {
194
+ 'type': 'object',
195
+ 'properties': {
196
+ 'disk_size': {
197
+ 'type': 'integer',
198
+ },
199
+ 'disk_tier': {
200
+ 'type': 'string',
201
+ },
202
+ 'path': {
203
+ 'type': 'string',
204
+ },
205
+ 'auto_delete': {
206
+ 'type': 'boolean',
207
+ },
208
+ 'storage_type': {
209
+ 'type': 'string',
210
+ },
211
+ 'name': {
212
+ 'type': 'string',
213
+ },
214
+ 'attach_mode': {
215
+ 'type': 'string',
216
+ },
217
+ },
218
+ },
219
+ },
191
220
  'disk_size': {
192
221
  'type': 'integer',
193
222
  },
@@ -372,6 +401,23 @@ def get_storage_schema():
372
401
  mode.value for mode in storage.StorageMode
373
402
  ]
374
403
  },
404
+ 'config': {
405
+ 'type': 'object',
406
+ 'properties': {
407
+ 'disk_size': {
408
+ 'type': 'integer',
409
+ },
410
+ 'disk_tier': {
411
+ 'type': 'string',
412
+ },
413
+ 'storage_type': {
414
+ 'type': 'string',
415
+ },
416
+ 'attach_mode': {
417
+ 'type': 'string',
418
+ },
419
+ },
420
+ },
375
421
  '_is_sky_managed': {
376
422
  'type': 'boolean',
377
423
  },
@@ -1043,6 +1089,27 @@ def get_config_schema():
1043
1089
  'fabric': {
1044
1090
  'type': 'string',
1045
1091
  },
1092
+ 'filesystems': {
1093
+ 'type': 'array',
1094
+ 'items': {
1095
+ 'type': 'object',
1096
+ 'additionalProperties': False,
1097
+ 'properties': {
1098
+ 'filesystem_id': {
1099
+ 'type': 'string',
1100
+ },
1101
+ 'attach_mode': {
1102
+ 'type': 'string',
1103
+ 'case_sensitive_enum': [
1104
+ 'READ_WRITE', 'READ_ONLY'
1105
+ ]
1106
+ },
1107
+ 'mount_path': {
1108
+ 'type': 'string',
1109
+ }
1110
+ }
1111
+ }
1112
+ },
1046
1113
  }
1047
1114
  },
1048
1115
  }
sky/utils/ux_utils.py CHANGED
@@ -161,7 +161,8 @@ def finishing_message(message: str,
161
161
  follow_up_message = follow_up_message if (follow_up_message
162
162
  is not None) else ''
163
163
  success_prefix = (f'{colorama.Style.RESET_ALL}{colorama.Fore.GREEN}✓ '
164
- f'{message}{colorama.Style.RESET_ALL}{follow_up_message}')
164
+ f'{message}{colorama.Style.RESET_ALL}{follow_up_message}'
165
+ f'{colorama.Style.RESET_ALL}')
165
166
  if log_path is None:
166
167
  return success_prefix
167
168
  path_hint = log_path_hint(log_path, is_local)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250521
3
+ Version: 1.0.0.dev20250523
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0
@@ -87,6 +87,9 @@ Requires-Dist: ray[default]!=2.6.0,>=2.2.0; extra == "oci"
87
87
  Provides-Extra: kubernetes
88
88
  Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "kubernetes"
89
89
  Requires-Dist: websockets; extra == "kubernetes"
90
+ Provides-Extra: ssh
91
+ Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "ssh"
92
+ Requires-Dist: websockets; extra == "ssh"
90
93
  Provides-Extra: remote
91
94
  Requires-Dist: grpcio!=1.48.0,>=1.32.0; python_version < "3.10" and extra == "remote"
92
95
  Requires-Dist: grpcio!=1.48.0,>=1.42.0; python_version >= "3.10" and extra == "remote"
@@ -142,6 +145,8 @@ Requires-Dist: oci; extra == "all"
142
145
  Requires-Dist: ray[default]!=2.6.0,>=2.2.0; extra == "all"
143
146
  Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
144
147
  Requires-Dist: websockets; extra == "all"
148
+ Requires-Dist: kubernetes!=32.0.0,>=20.0.0; extra == "all"
149
+ Requires-Dist: websockets; extra == "all"
145
150
  Requires-Dist: grpcio!=1.48.0,>=1.32.0; python_version < "3.10" and extra == "all"
146
151
  Requires-Dist: grpcio!=1.48.0,>=1.42.0; python_version >= "3.10" and extra == "all"
147
152
  Requires-Dist: protobuf!=3.19.5,>=3.15.3; extra == "all"