skypilot-nightly 1.0.0.dev20250919__py3-none-any.whl → 1.0.0.dev20250925__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (113) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend.py +10 -0
  3. sky/backends/backend_utils.py +200 -78
  4. sky/backends/cloud_vm_ray_backend.py +37 -13
  5. sky/backends/local_docker_backend.py +9 -0
  6. sky/client/cli/command.py +104 -53
  7. sky/client/sdk.py +13 -5
  8. sky/client/sdk_async.py +4 -2
  9. sky/clouds/kubernetes.py +2 -1
  10. sky/clouds/runpod.py +20 -7
  11. sky/core.py +7 -53
  12. sky/dashboard/out/404.html +1 -1
  13. sky/dashboard/out/_next/static/{VvaUqYDvHOcHZRnvMBmax → bn-NHt5qTzeTN2PefXuDA}/_buildManifest.js +1 -1
  14. sky/dashboard/out/_next/static/chunks/1121-b911fc0a0b4742f0.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/6856-2b3600ff2854d066.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/8969-d8bc3a2b9cf839a9.js +1 -0
  17. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-2cb9b15e09cda628.js +16 -0
  18. sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-9525660179df3605.js → [cluster]-e052384df65ef200.js} +1 -1
  19. sky/dashboard/out/_next/static/chunks/{webpack-b2a3938c22b6647b.js → webpack-16ba1d7187d2e3b1.js} +1 -1
  20. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  21. sky/dashboard/out/clusters/[cluster].html +1 -1
  22. sky/dashboard/out/clusters.html +1 -1
  23. sky/dashboard/out/config.html +1 -1
  24. sky/dashboard/out/index.html +1 -1
  25. sky/dashboard/out/infra/[context].html +1 -1
  26. sky/dashboard/out/infra.html +1 -1
  27. sky/dashboard/out/jobs/[job].html +1 -1
  28. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  29. sky/dashboard/out/jobs.html +1 -1
  30. sky/dashboard/out/users.html +1 -1
  31. sky/dashboard/out/volumes.html +1 -1
  32. sky/dashboard/out/workspace/new.html +1 -1
  33. sky/dashboard/out/workspaces/[name].html +1 -1
  34. sky/dashboard/out/workspaces.html +1 -1
  35. sky/data/mounting_utils.py +19 -10
  36. sky/execution.py +4 -2
  37. sky/global_user_state.py +224 -38
  38. sky/jobs/client/sdk.py +10 -1
  39. sky/jobs/controller.py +7 -7
  40. sky/jobs/server/core.py +3 -3
  41. sky/jobs/server/server.py +15 -11
  42. sky/jobs/utils.py +1 -1
  43. sky/logs/agent.py +30 -3
  44. sky/logs/aws.py +9 -19
  45. sky/provision/__init__.py +2 -1
  46. sky/provision/aws/instance.py +2 -1
  47. sky/provision/azure/instance.py +2 -1
  48. sky/provision/cudo/instance.py +2 -2
  49. sky/provision/do/instance.py +2 -2
  50. sky/provision/docker_utils.py +41 -19
  51. sky/provision/fluidstack/instance.py +2 -2
  52. sky/provision/gcp/instance.py +2 -1
  53. sky/provision/hyperbolic/instance.py +2 -1
  54. sky/provision/instance_setup.py +1 -1
  55. sky/provision/kubernetes/instance.py +134 -8
  56. sky/provision/lambda_cloud/instance.py +2 -1
  57. sky/provision/nebius/instance.py +2 -1
  58. sky/provision/oci/instance.py +2 -1
  59. sky/provision/paperspace/instance.py +2 -2
  60. sky/provision/primeintellect/instance.py +2 -2
  61. sky/provision/provisioner.py +1 -0
  62. sky/provision/runpod/instance.py +2 -2
  63. sky/provision/scp/instance.py +2 -2
  64. sky/provision/seeweb/instance.py +2 -1
  65. sky/provision/vast/instance.py +2 -1
  66. sky/provision/vsphere/instance.py +6 -5
  67. sky/schemas/api/responses.py +2 -1
  68. sky/serve/autoscalers.py +2 -0
  69. sky/serve/client/impl.py +45 -19
  70. sky/serve/replica_managers.py +12 -5
  71. sky/serve/serve_utils.py +5 -11
  72. sky/serve/server/core.py +9 -6
  73. sky/serve/server/impl.py +78 -25
  74. sky/serve/server/server.py +4 -5
  75. sky/serve/service_spec.py +33 -0
  76. sky/server/auth/oauth2_proxy.py +2 -2
  77. sky/server/constants.py +1 -1
  78. sky/server/daemons.py +2 -3
  79. sky/server/requests/executor.py +56 -6
  80. sky/server/requests/payloads.py +31 -8
  81. sky/server/requests/preconditions.py +2 -3
  82. sky/server/rest.py +2 -0
  83. sky/server/server.py +28 -19
  84. sky/server/stream_utils.py +34 -12
  85. sky/setup_files/dependencies.py +12 -2
  86. sky/setup_files/setup.py +44 -44
  87. sky/skylet/constants.py +2 -3
  88. sky/templates/kubernetes-ray.yml.j2 +16 -15
  89. sky/usage/usage_lib.py +3 -0
  90. sky/utils/cli_utils/status_utils.py +4 -5
  91. sky/utils/context.py +104 -29
  92. sky/utils/controller_utils.py +7 -6
  93. sky/utils/kubernetes/create_cluster.sh +13 -28
  94. sky/utils/kubernetes/delete_cluster.sh +10 -7
  95. sky/utils/kubernetes/generate_kind_config.py +6 -66
  96. sky/utils/kubernetes/kubernetes_deploy_utils.py +170 -37
  97. sky/utils/kubernetes_enums.py +5 -0
  98. sky/utils/ux_utils.py +35 -1
  99. sky/utils/yaml_utils.py +9 -0
  100. sky/volumes/client/sdk.py +44 -8
  101. sky/volumes/server/server.py +33 -7
  102. sky/volumes/volume.py +22 -14
  103. {skypilot_nightly-1.0.0.dev20250919.dist-info → skypilot_nightly-1.0.0.dev20250925.dist-info}/METADATA +38 -33
  104. {skypilot_nightly-1.0.0.dev20250919.dist-info → skypilot_nightly-1.0.0.dev20250925.dist-info}/RECORD +109 -109
  105. sky/dashboard/out/_next/static/chunks/1121-4ff1ec0dbc5792ab.js +0 -1
  106. sky/dashboard/out/_next/static/chunks/6856-9a2538f38c004652.js +0 -1
  107. sky/dashboard/out/_next/static/chunks/8969-a39efbadcd9fde80.js +0 -1
  108. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1e9248ddbddcd122.js +0 -16
  109. /sky/dashboard/out/_next/static/{VvaUqYDvHOcHZRnvMBmax → bn-NHt5qTzeTN2PefXuDA}/_ssgManifest.js +0 -0
  110. {skypilot_nightly-1.0.0.dev20250919.dist-info → skypilot_nightly-1.0.0.dev20250925.dist-info}/WHEEL +0 -0
  111. {skypilot_nightly-1.0.0.dev20250919.dist-info → skypilot_nightly-1.0.0.dev20250925.dist-info}/entry_points.txt +0 -0
  112. {skypilot_nightly-1.0.0.dev20250919.dist-info → skypilot_nightly-1.0.0.dev20250925.dist-info}/licenses/LICENSE +0 -0
  113. {skypilot_nightly-1.0.0.dev20250919.dist-info → skypilot_nightly-1.0.0.dev20250925.dist-info}/top_level.txt +0 -0
@@ -30,9 +30,10 @@ HEAD_NODE_VALUE = '1'
30
30
  WORKER_NODE_VALUE = '0'
31
31
 
32
32
 
33
- def run_instances(region: str, cluster_name: str,
33
+ def run_instances(region: str, cluster_name: str, cluster_name_on_cloud: str,
34
34
  config: common.ProvisionConfig) -> common.ProvisionRecord:
35
35
  """See sky/provision/__init__.py"""
36
+ del cluster_name # unused
36
37
  logger.info('New provision of Vsphere: run_instances().')
37
38
 
38
39
  resumed_instance_ids: List[str] = []
@@ -40,7 +41,7 @@ def run_instances(region: str, cluster_name: str,
40
41
  vc_object = _get_vc_object(region)
41
42
  vc_object.connect()
42
43
 
43
- exist_instances = _get_filtered_instance(vc_object, cluster_name,
44
+ exist_instances = _get_filtered_instance(vc_object, cluster_name_on_cloud,
44
45
  config.provider_config)
45
46
  head_instance_id = _get_head_instance_id(exist_instances)
46
47
 
@@ -89,8 +90,8 @@ def run_instances(region: str, cluster_name: str,
89
90
  config, region, vc_object)
90
91
  # TODO: update logic for multi-node creation
91
92
  for _ in range(to_start_num):
92
- created_instance_uuid = _create_instances(cluster_name, config,
93
- region, vc_object,
93
+ created_instance_uuid = _create_instances(cluster_name_on_cloud,
94
+ config, region, vc_object,
94
95
  vsphere_cluster_name)
95
96
  created_instance_ids.append(created_instance_uuid)
96
97
  if head_instance_id is None:
@@ -104,7 +105,7 @@ def run_instances(region: str, cluster_name: str,
104
105
  provider_name='vsphere',
105
106
  region=region,
106
107
  zone=vsphere_cluster_name,
107
- cluster_name=cluster_name,
108
+ cluster_name=cluster_name_on_cloud,
108
109
  head_instance_id=head_instance_id,
109
110
  resumed_instance_ids=resumed_instance_ids,
110
111
  created_instance_ids=created_instance_ids,
@@ -86,7 +86,7 @@ class StatusResponse(ResponseBaseModel):
86
86
  # backends.ResourceHandle, so we use Any here.
87
87
  # This is an internally facing field anyway, so it's less
88
88
  # of a problem that it's not typed.
89
- handle: Any
89
+ handle: Optional[Any] = None
90
90
  last_use: str
91
91
  status: status_lib.ClusterStatus
92
92
  autostop: int
@@ -118,6 +118,7 @@ class StatusResponse(ResponseBaseModel):
118
118
  cpus: Optional[str] = None
119
119
  memory: Optional[str] = None
120
120
  accelerators: Optional[str] = None
121
+ cluster_name_on_cloud: Optional[str] = None
121
122
 
122
123
 
123
124
  class UploadStatus(enum.Enum):
sky/serve/autoscalers.py CHANGED
@@ -411,6 +411,8 @@ class _AutoscalerWithHysteresis(Autoscaler):
411
411
  # `_set_target_num_replicas_with_hysteresis` to have the replicas
412
412
  # quickly scale after each update.
413
413
  self.target_num_replicas = self._calculate_target_num_replicas()
414
+ logger.debug(f'Target number of replicas: {self.target_num_replicas}'
415
+ 'after update_version.')
414
416
  # Cleanup hysteresis counters.
415
417
  self.upscale_counter = 0
416
418
  self.downscale_counter = 0
sky/serve/client/impl.py CHANGED
@@ -105,7 +105,8 @@ def update(
105
105
 
106
106
 
107
107
  def apply(
108
- task: Union['sky.Task', 'sky.Dag'],
108
+ task: Optional[Union['sky.Task', 'sky.Dag']],
109
+ workers: Optional[int],
109
110
  service_name: str,
110
111
  mode: 'serve_utils.UpdateMode',
111
112
  pool: bool = False,
@@ -117,35 +118,60 @@ def apply(
117
118
  # Avoid circular import.
118
119
  from sky.client import sdk # pylint: disable=import-outside-toplevel
119
120
 
120
- dag = dag_utils.convert_entrypoint_to_dag(task)
121
- with admin_policy_utils.apply_and_use_config_in_current_request(
122
- dag, at_client_side=True) as dag:
123
- sdk.validate(dag)
124
- request_id = sdk.optimize(dag)
125
- sdk.stream_and_get(request_id)
126
- if _need_confirmation:
127
- noun = 'pool' if pool else 'service'
128
- prompt = f'Applying config to {noun} {service_name!r}. Proceed?'
129
- if prompt is not None:
130
- click.confirm(prompt,
131
- default=True,
132
- abort=True,
133
- show_default=True)
134
-
135
- dag = client_common.upload_mounts_to_api_server(dag)
136
- dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
121
+ noun = 'pool' if pool else 'service'
122
+ # There are two cases here. If task is None, we should be trying to
123
+ # update the number of workers in the pool. If task is not None, we should
124
+ # be trying to apply a new config to the pool. The two code paths
125
+ # are slightly different with us needing to craft the dag and validate
126
+ # it if we have a task. In the future we could move this logic to the
127
+ # server side and simplify this code, for the time being we keep it here.
128
+ if task is None:
129
+ if workers is None:
130
+ raise ValueError(f'Cannot create a new {noun} without specifying '
131
+ f'task or workers. Please provide either a task '
132
+ f'or specify the number of workers.')
137
133
 
138
134
  body = payloads.JobsPoolApplyBody(
139
- task=dag_str,
135
+ workers=workers,
140
136
  pool_name=service_name,
141
137
  mode=mode,
142
138
  )
139
+
143
140
  response = server_common.make_authenticated_request(
144
141
  'POST',
145
142
  '/jobs/pool_apply',
146
143
  json=json.loads(body.model_dump_json()),
147
144
  timeout=(5, None))
148
145
  return server_common.get_request_id(response)
146
+ else:
147
+ dag = dag_utils.convert_entrypoint_to_dag(task)
148
+ with admin_policy_utils.apply_and_use_config_in_current_request(
149
+ dag, at_client_side=True) as dag:
150
+ sdk.validate(dag)
151
+ request_id = sdk.optimize(dag)
152
+ sdk.stream_and_get(request_id)
153
+ if _need_confirmation:
154
+ prompt = f'Applying config to {noun} {service_name!r}. Proceed?'
155
+ if prompt is not None:
156
+ click.confirm(prompt,
157
+ default=True,
158
+ abort=True,
159
+ show_default=True)
160
+
161
+ dag = client_common.upload_mounts_to_api_server(dag)
162
+ dag_str = dag_utils.dump_chain_dag_to_yaml_str(dag)
163
+
164
+ body = payloads.JobsPoolApplyBody(
165
+ task=dag_str,
166
+ pool_name=service_name,
167
+ mode=mode,
168
+ )
169
+ response = server_common.make_authenticated_request(
170
+ 'POST',
171
+ '/jobs/pool_apply',
172
+ json=json.loads(body.model_dump_json()),
173
+ timeout=(5, None))
174
+ return server_common.get_request_id(response)
149
175
 
150
176
 
151
177
  def down(
@@ -422,11 +422,12 @@ class ReplicaInfo:
422
422
  based on the cluster name.
423
423
  """
424
424
  if cluster_record is None:
425
- cluster_record = global_user_state.get_cluster_from_name(
425
+ handle = global_user_state.get_handle_from_cluster_name(
426
426
  self.cluster_name)
427
- if cluster_record is None:
427
+ else:
428
+ handle = cluster_record['handle']
429
+ if handle is None:
428
430
  return None
429
- handle = cluster_record['handle']
430
431
  assert isinstance(handle, backends.CloudVmRayResourceHandle)
431
432
  return handle
432
433
 
@@ -443,6 +444,12 @@ class ReplicaInfo:
443
444
  handle = self.handle()
444
445
  if handle is None:
445
446
  return None
447
+ if self.replica_port == '-':
448
+ # This is a pool replica so there is no endpoint and it's filled
449
+ # with this dummy value. We return None here so that we can
450
+ # get the active ready replicas and perform autoscaling. Otherwise,
451
+ # would error out when trying to get the endpoint.
452
+ return None
446
453
  replica_port_int = int(self.replica_port)
447
454
  try:
448
455
  endpoint_dict = backend_utils.get_endpoints(handle.cluster_name,
@@ -470,7 +477,7 @@ class ReplicaInfo:
470
477
  with_handle: bool,
471
478
  with_url: bool = True) -> Dict[str, Any]:
472
479
  cluster_record = global_user_state.get_cluster_from_name(
473
- self.cluster_name)
480
+ self.cluster_name, include_user_info=False, summary_response=True)
474
481
  info_dict = {
475
482
  'replica_id': self.replica_id,
476
483
  'name': self.cluster_name,
@@ -956,7 +963,7 @@ class SkyPilotReplicaManager(ReplicaManager):
956
963
  # provision) or the cluster is preempted and cleaned up by the status
957
964
  # refresh. In this case, we skip spawning a new down process to save
958
965
  # controller resources.
959
- if global_user_state.get_cluster_from_name(info.cluster_name) is None:
966
+ if not global_user_state.cluster_with_name_exists(info.cluster_name):
960
967
  self._handle_sky_down_finish(info, exitcode=0)
961
968
  return
962
969
 
sky/serve/serve_utils.py CHANGED
@@ -262,7 +262,7 @@ def _validate_consolidation_mode_config(current_is_consolidation_mode: bool,
262
262
  controller = controller_utils.get_controller_for_pool(pool).value
263
263
  if current_is_consolidation_mode:
264
264
  controller_cn = controller.cluster_name
265
- if global_user_state.get_cluster_from_name(controller_cn) is not None:
265
+ if global_user_state.cluster_with_name_exists(controller_cn):
266
266
  with ux_utils.print_exception_no_traceback():
267
267
  raise exceptions.InconsistentConsolidationModeError(
268
268
  f'{colorama.Fore.RED}Consolidation mode for '
@@ -896,8 +896,8 @@ def _terminate_failed_services(
896
896
  # replicas, so we don't need to try again here.
897
897
  for replica_info in serve_state.get_replica_infos(service_name):
898
898
  # TODO(tian): Refresh latest status of the cluster.
899
- if global_user_state.get_cluster_from_name(
900
- replica_info.cluster_name) is not None:
899
+ if global_user_state.cluster_with_name_exists(
900
+ replica_info.cluster_name):
901
901
  remaining_replica_clusters.append(f'{replica_info.cluster_name!r}')
902
902
  serve_state.remove_replica(service_name, replica_info.replica_id)
903
903
 
@@ -1133,10 +1133,8 @@ def _process_line(line: str,
1133
1133
  # `✓ Cluster launched: new-http. View logs at: *.log`
1134
1134
  # We should tail the detailed logs for user.
1135
1135
  def cluster_is_up() -> bool:
1136
- cluster_record = global_user_state.get_cluster_from_name(cluster_name)
1137
- if cluster_record is None:
1138
- return False
1139
- return cluster_record['status'] == status_lib.ClusterStatus.UP
1136
+ status = global_user_state.get_status_from_cluster_name(cluster_name)
1137
+ return status == status_lib.ClusterStatus.UP
1140
1138
 
1141
1139
  provision_api_log_prompt = re.match(_SKYPILOT_PROVISION_API_LOG_PATTERN,
1142
1140
  line)
@@ -1329,10 +1327,6 @@ def stream_replica_logs(service_name: str, replica_id: int, follow: bool,
1329
1327
  print(line, end='', flush=True)
1330
1328
  return ''
1331
1329
 
1332
- # For pools, we don't stream the job logs as the run section is ignored.
1333
- if pool:
1334
- return ''
1335
-
1336
1330
  backend = backends.CloudVmRayBackend()
1337
1331
  handle = global_user_state.get_handle_from_cluster_name(
1338
1332
  replica_cluster_name)
sky/serve/server/core.py CHANGED
@@ -46,20 +46,23 @@ def up(
46
46
 
47
47
 
48
48
  @usage_lib.entrypoint
49
- def update(
50
- task: 'sky.Task',
51
- service_name: str,
52
- mode: serve_utils.UpdateMode = serve_utils.DEFAULT_UPDATE_MODE) -> None:
49
+ def update(task: Optional['sky.Task'],
50
+ service_name: str,
51
+ mode: serve_utils.UpdateMode = serve_utils.DEFAULT_UPDATE_MODE,
52
+ workers: Optional[int] = None) -> None:
53
53
  """Updates an existing service.
54
54
 
55
55
  Please refer to the sky.cli.serve_update for the document.
56
56
 
57
57
  Args:
58
- task: sky.Task to update.
58
+ task: sky.Task to update, or None if updating
59
+ the number of workers/replicas.
59
60
  service_name: Name of the service.
60
61
  mode: Update mode.
62
+ workers: Number of workers/replicas to set for the service when
63
+ task is None.
61
64
  """
62
- return impl.update(task, service_name, mode, pool=False)
65
+ return impl.update(task, service_name, mode, pool=False, workers=workers)
63
66
 
64
67
 
65
68
  @usage_lib.entrypoint
sky/serve/server/impl.py CHANGED
@@ -411,6 +411,9 @@ def up(
411
411
  f'\n{ux_utils.INDENT_LAST_SYMBOL}To terminate the pool:\t'
412
412
  f'{ux_utils.BOLD}sky jobs pool down {service_name}'
413
413
  f'{ux_utils.RESET_BOLD}'
414
+ f'\n{ux_utils.INDENT_SYMBOL}To update the number of workers:\t'
415
+ f'{ux_utils.BOLD}sky jobs pool apply --pool {service_name} '
416
+ f'--workers 5{ux_utils.RESET_BOLD}'
414
417
  '\n\n' + ux_utils.finishing_message('Successfully created pool '
415
418
  f'{service_name!r}.'))
416
419
  else:
@@ -448,37 +451,15 @@ def up(
448
451
 
449
452
 
450
453
  def update(
451
- task: 'task_lib.Task',
454
+ task: Optional['task_lib.Task'],
452
455
  service_name: str,
453
456
  mode: serve_utils.UpdateMode = serve_utils.DEFAULT_UPDATE_MODE,
454
457
  pool: bool = False,
458
+ workers: Optional[int] = None,
455
459
  ) -> None:
456
460
  """Updates an existing service or pool."""
457
461
  noun = 'pool' if pool else 'service'
458
462
  capnoun = noun.capitalize()
459
- task.validate()
460
- serve_utils.validate_service_task(task, pool=pool)
461
-
462
- # Always apply the policy again here, even though it might have been applied
463
- # in the CLI. This is to ensure that we apply the policy to the final DAG
464
- # and get the mutated config.
465
- # TODO(cblmemo,zhwu): If a user sets a new skypilot_config, the update
466
- # will not apply the config.
467
- dag, _ = admin_policy_utils.apply(task)
468
- task = dag.tasks[0]
469
- if pool:
470
- if task.run is not None:
471
- logger.warning(f'{colorama.Fore.YELLOW}The `run` section will be '
472
- f'ignored for pool.{colorama.Style.RESET_ALL}')
473
- # Use dummy run script for cluster pool.
474
- task.run = serve_constants.POOL_DUMMY_RUN_COMMAND
475
-
476
- assert task.service is not None
477
- if not pool and task.service.tls_credential is not None:
478
- logger.warning('Updating TLS keyfile and certfile is not supported. '
479
- 'Any updates to the keyfile and certfile will not take '
480
- 'effect. To update TLS keyfile and certfile, please '
481
- 'tear down the service and spin up a new one.')
482
463
 
483
464
  controller_type = controller_utils.get_controller_for_pool(pool)
484
465
  handle = backend_utils.is_controller_accessible(
@@ -505,6 +486,77 @@ def update(
505
486
  f'To spin up a {noun}, use {ux_utils.BOLD}'
506
487
  f'{cmd}{ux_utils.RESET_BOLD}')
507
488
 
489
+ # If task is None and workers is specified, load existing configuration
490
+ # and update replica count.
491
+ if task is None:
492
+ if workers is None:
493
+ with ux_utils.print_exception_no_traceback():
494
+ raise ValueError(
495
+ f'Cannot update {noun} without specifying '
496
+ f'task or workers. Please provide either a task '
497
+ f'or specify the number of workers.')
498
+
499
+ if not pool:
500
+ with ux_utils.print_exception_no_traceback():
501
+ raise ValueError(
502
+ 'Non-pool service, trying to update replicas to '
503
+ f'{workers} is not supported. Ignoring the update.')
504
+
505
+ # Load the existing task configuration from the service's YAML file
506
+ latest_yaml_path = serve_utils.generate_task_yaml_file_name(
507
+ service_name, service_record['version'], expand_user=False)
508
+
509
+ logger.debug('Loading existing task configuration from '
510
+ f'{latest_yaml_path} to create a new modified task.')
511
+
512
+ # Get the path locally.
513
+ with tempfile.NamedTemporaryFile(
514
+ prefix=f'service-task-{service_name}-',
515
+ mode='w',
516
+ ) as service_file:
517
+ try:
518
+ backend.download_file(handle, latest_yaml_path,
519
+ service_file.name)
520
+ except exceptions.CommandError as e:
521
+ raise RuntimeError(
522
+ f'Failed to download the old task configuration from '
523
+ f'{latest_yaml_path}: {e.error_msg}') from e
524
+
525
+ # Load the existing task configuration
526
+ existing_config = yaml_utils.read_yaml(service_file.name)
527
+ task = task_lib.Task.from_yaml_config(existing_config)
528
+
529
+ if task.service is None:
530
+ with ux_utils.print_exception_no_traceback():
531
+ raise RuntimeError('No service configuration found in '
532
+ f'existing {noun} {service_name!r}')
533
+ task.set_service(task.service.copy(min_replicas=workers))
534
+
535
+ task.validate()
536
+ serve_utils.validate_service_task(task, pool=pool)
537
+
538
+ # Now apply the policy and handle task-specific logic
539
+ # Always apply the policy again here, even though it might have been applied
540
+ # in the CLI. This is to ensure that we apply the policy to the final DAG
541
+ # and get the mutated config.
542
+ # TODO(cblmemo,zhwu): If a user sets a new skypilot_config, the update
543
+ # will not apply the config.
544
+ dag, _ = admin_policy_utils.apply(task)
545
+ task = dag.tasks[0]
546
+ if pool:
547
+ if task.run is not None:
548
+ logger.warning(f'{colorama.Fore.YELLOW}The `run` section will be '
549
+ f'ignored for pool.{colorama.Style.RESET_ALL}')
550
+ # Use dummy run script for cluster pool.
551
+ task.run = serve_constants.POOL_DUMMY_RUN_COMMAND
552
+
553
+ assert task.service is not None
554
+ if not pool and task.service.tls_credential is not None:
555
+ logger.warning('Updating TLS keyfile and certfile is not supported. '
556
+ 'Any updates to the keyfile and certfile will not take '
557
+ 'effect. To update TLS keyfile and certfile, please '
558
+ 'tear down the service and spin up a new one.')
559
+
508
560
  prompt = None
509
561
  if (service_record['status'] == serve_state.ServiceStatus.CONTROLLER_FAILED
510
562
  ):
@@ -625,6 +677,7 @@ def update(
625
677
 
626
678
  def apply(
627
679
  task: 'task_lib.Task',
680
+ workers: Optional[int],
628
681
  service_name: str,
629
682
  mode: serve_utils.UpdateMode = serve_utils.DEFAULT_UPDATE_MODE,
630
683
  pool: bool = False,
@@ -640,7 +693,7 @@ def apply(
640
693
  service_record = _get_service_record(service_name, pool, handle,
641
694
  backend)
642
695
  if service_record is not None:
643
- return update(task, service_name, mode, pool)
696
+ return update(task, service_name, mode, pool, workers)
644
697
  except exceptions.ClusterNotUpError:
645
698
  pass
646
699
  up(task, service_name, pool)
@@ -98,7 +98,7 @@ async def tail_logs(
98
98
  request: fastapi.Request, log_body: payloads.ServeLogsBody,
99
99
  background_tasks: fastapi.BackgroundTasks
100
100
  ) -> fastapi.responses.StreamingResponse:
101
- executor.schedule_request(
101
+ request_task = executor.prepare_request(
102
102
  request_id=request.state.request_id,
103
103
  request_name='serve.logs',
104
104
  request_body=log_body,
@@ -106,10 +106,9 @@ async def tail_logs(
106
106
  schedule_type=api_requests.ScheduleType.SHORT,
107
107
  request_cluster_name=common.SKY_SERVE_CONTROLLER_NAME,
108
108
  )
109
-
110
- request_task = await api_requests.get_request_async(request.state.request_id
111
- )
112
-
109
+ task = executor.execute_request_in_coroutine(request_task)
110
+ # Cancel the coroutine after the request is done or client disconnects
111
+ background_tasks.add_task(task.cancel)
113
112
  return stream_utils.stream_response(
114
113
  request_id=request_task.request_id,
115
114
  logs_path=request_task.log_path,
sky/serve/service_spec.py CHANGED
@@ -506,3 +506,36 @@ class SkyServiceSpec:
506
506
  if not hasattr(self, '_pool'):
507
507
  return False
508
508
  return bool(self._pool)
509
+
510
+ def copy(self, **override) -> 'SkyServiceSpec':
511
+ return SkyServiceSpec(
512
+ readiness_path=override.pop('readiness_path', self._readiness_path),
513
+ initial_delay_seconds=override.pop('initial_delay_seconds',
514
+ self._initial_delay_seconds),
515
+ readiness_timeout_seconds=override.pop(
516
+ 'readiness_timeout_seconds', self._readiness_timeout_seconds),
517
+ min_replicas=override.pop('min_replicas', self._min_replicas),
518
+ max_replicas=override.pop('max_replicas', self._max_replicas),
519
+ num_overprovision=override.pop('num_overprovision',
520
+ self._num_overprovision),
521
+ ports=override.pop('ports', self._ports),
522
+ target_qps_per_replica=override.pop('target_qps_per_replica',
523
+ self._target_qps_per_replica),
524
+ post_data=override.pop('post_data', self._post_data),
525
+ tls_credential=override.pop('tls_credential', self._tls_credential),
526
+ readiness_headers=override.pop('readiness_headers',
527
+ self._readiness_headers),
528
+ dynamic_ondemand_fallback=override.pop(
529
+ 'dynamic_ondemand_fallback', self._dynamic_ondemand_fallback),
530
+ base_ondemand_fallback_replicas=override.pop(
531
+ 'base_ondemand_fallback_replicas',
532
+ self._base_ondemand_fallback_replicas),
533
+ spot_placer=override.pop('spot_placer', self._spot_placer),
534
+ upscale_delay_seconds=override.pop('upscale_delay_seconds',
535
+ self._upscale_delay_seconds),
536
+ downscale_delay_seconds=override.pop('downscale_delay_seconds',
537
+ self._downscale_delay_seconds),
538
+ load_balancing_policy=override.pop('load_balancing_policy',
539
+ self._load_balancing_policy),
540
+ pool=override.pop('pool', self._pool),
541
+ )
@@ -37,8 +37,8 @@ OAUTH2_PROXY_ENABLED_ENV_VAR = 'SKYPILOT_AUTH_OAUTH2_PROXY_ENABLED'
37
37
  class OAuth2ProxyMiddleware(starlette.middleware.base.BaseHTTPMiddleware):
38
38
  """Middleware to handle authentication by delegating to OAuth2 Proxy."""
39
39
 
40
- def __init__(self, application: fastapi.FastAPI):
41
- super().__init__(application)
40
+ def __init__(self, *args, **kwargs):
41
+ super().__init__(*args, **kwargs)
42
42
  self.enabled: bool = (os.getenv(OAUTH2_PROXY_ENABLED_ENV_VAR,
43
43
  'false') == 'true')
44
44
  self.proxy_base: str = ''
sky/server/constants.py CHANGED
@@ -10,7 +10,7 @@ from sky.skylet import constants
10
10
  # based on version info is needed.
11
11
  # For more details and code guidelines, refer to:
12
12
  # https://docs.skypilot.co/en/latest/developers/CONTRIBUTING.html#backward-compatibility-guidelines
13
- API_VERSION = 18
13
+ API_VERSION = 20
14
14
 
15
15
  # The minimum peer API version that the code should still work with.
16
16
  # Notes (dev):
sky/server/daemons.py CHANGED
@@ -8,7 +8,6 @@ from sky import sky_logging
8
8
  from sky import skypilot_config
9
9
  from sky.server import constants as server_constants
10
10
  from sky.utils import annotations
11
- from sky.utils import common
12
11
  from sky.utils import common_utils
13
12
  from sky.utils import env_options
14
13
  from sky.utils import subprocess_utils
@@ -94,13 +93,13 @@ class InternalRequestDaemon:
94
93
  def refresh_cluster_status_event():
95
94
  """Periodically refresh the cluster status."""
96
95
  # pylint: disable=import-outside-toplevel
97
- from sky import core
96
+ from sky.backends import backend_utils
98
97
 
99
98
  logger.info('=== Refreshing cluster status ===')
100
99
  # This periodically refresh will hold the lock for the cluster being
101
100
  # refreshed, but it is OK because other operations will just wait for
102
101
  # the lock and get the just refreshed status without refreshing again.
103
- core.status(refresh=common.StatusRefreshMode.FORCE, all_users=True)
102
+ backend_utils.refresh_cluster_records()
104
103
  logger.info('Status refreshed. Sleeping '
105
104
  f'{server_constants.CLUSTER_REFRESH_DAEMON_INTERVAL_SECONDS}'
106
105
  ' seconds for the next refresh...\n')
@@ -502,7 +502,35 @@ def _record_memory_metrics(request_name: str, proc: psutil.Process,
502
502
  name=request_name).observe(max(peak_rss - rss_begin, 0))
503
503
 
504
504
 
505
- async def execute_request_coroutine(request: api_requests.Request):
505
+ class CoroutineTask:
506
+ """Wrapper of a background task runs in coroutine"""
507
+
508
+ def __init__(self, task: asyncio.Task):
509
+ self.task = task
510
+
511
+ async def cancel(self):
512
+ try:
513
+ self.task.cancel()
514
+ await self.task
515
+ except asyncio.CancelledError:
516
+ pass
517
+
518
+
519
+ def execute_request_in_coroutine(
520
+ request: api_requests.Request) -> CoroutineTask:
521
+ """Execute a request in current event loop.
522
+
523
+ Args:
524
+ request: The request to execute.
525
+
526
+ Returns:
527
+ A CoroutineTask handle to operate the background task.
528
+ """
529
+ task = asyncio.create_task(_execute_request_coroutine(request))
530
+ return CoroutineTask(task)
531
+
532
+
533
+ async def _execute_request_coroutine(request: api_requests.Request):
506
534
  """Execute a request in current event loop.
507
535
 
508
536
  Similar to _request_execution_wrapper, but executed as coroutine in current
@@ -640,13 +668,35 @@ def schedule_request(request_id: str,
640
668
  The precondition is waited asynchronously and does not block the
641
669
  caller.
642
670
  """
643
- prepare_request(request_id, request_name, request_body, func,
644
- request_cluster_name, schedule_type, is_skypilot_system)
671
+ request_task = prepare_request(request_id, request_name, request_body, func,
672
+ request_cluster_name, schedule_type,
673
+ is_skypilot_system)
674
+ schedule_prepared_request(request_task, ignore_return_value, precondition,
675
+ retryable)
676
+
677
+
678
+ def schedule_prepared_request(request_task: api_requests.Request,
679
+ ignore_return_value: bool = False,
680
+ precondition: Optional[
681
+ preconditions.Precondition] = None,
682
+ retryable: bool = False) -> None:
683
+ """Enqueue a request to the request queue
684
+
685
+ Args:
686
+ request_task: The prepared request task to schedule.
687
+ ignore_return_value: If True, the return value of the function will be
688
+ ignored.
689
+ precondition: If a precondition is provided, the request will only be
690
+ scheduled for execution when the precondition is met (returns True).
691
+ The precondition is waited asynchronously and does not block the
692
+ caller.
693
+ retryable: Whether the request should be retried if it fails.
694
+ """
645
695
 
646
696
  def enqueue():
647
- input_tuple = (request_id, ignore_return_value, retryable)
648
- logger.info(f'Queuing request: {request_id}')
649
- _get_queue(schedule_type).put(input_tuple)
697
+ input_tuple = (request_task.request_id, ignore_return_value, retryable)
698
+ logger.info(f'Queuing request: {request_task.request_id}')
699
+ _get_queue(request_task.schedule_type).put(input_tuple)
650
700
 
651
701
  if precondition is not None:
652
702
  # Wait async to avoid blocking caller.