skypilot-nightly 1.0.0.dev20250807__py3-none-any.whl → 1.0.0.dev20250812__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (91) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/kubernetes.py +5 -2
  3. sky/backends/backend_utils.py +57 -7
  4. sky/backends/cloud_vm_ray_backend.py +50 -8
  5. sky/client/cli/command.py +60 -26
  6. sky/client/sdk.py +132 -65
  7. sky/client/sdk_async.py +1 -1
  8. sky/core.py +10 -2
  9. sky/dashboard/out/404.html +1 -1
  10. sky/dashboard/out/_next/static/{YAirOGsV1z6B2RJ0VIUmD → Fuy7OzApYTUMz2QgoP7dP}/_buildManifest.js +1 -1
  11. sky/dashboard/out/_next/static/chunks/{6601-3e21152fe16da09c.js → 6601-06114c982db410b6.js} +1 -1
  12. sky/dashboard/out/_next/static/chunks/8056-5bdeda81199c0def.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/{8969-318c3dca725e8e5d.js → 8969-c9686994ddafcf01.js} +1 -1
  14. sky/dashboard/out/_next/static/chunks/pages/{_app-1e6de35d15a8d432.js → _app-491a4d699d95e808.js} +1 -1
  15. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-078751bad714c017.js +11 -0
  16. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-da9cc0901349c2e9.js +1 -0
  17. sky/dashboard/out/_next/static/chunks/webpack-7fd0cf9dbecff10f.js +1 -0
  18. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  19. sky/dashboard/out/clusters/[cluster].html +1 -1
  20. sky/dashboard/out/clusters.html +1 -1
  21. sky/dashboard/out/config.html +1 -1
  22. sky/dashboard/out/index.html +1 -1
  23. sky/dashboard/out/infra/[context].html +1 -1
  24. sky/dashboard/out/infra.html +1 -1
  25. sky/dashboard/out/jobs/[job].html +1 -1
  26. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  27. sky/dashboard/out/jobs.html +1 -1
  28. sky/dashboard/out/users.html +1 -1
  29. sky/dashboard/out/volumes.html +1 -1
  30. sky/dashboard/out/workspace/new.html +1 -1
  31. sky/dashboard/out/workspaces/[name].html +1 -1
  32. sky/dashboard/out/workspaces.html +1 -1
  33. sky/execution.py +21 -4
  34. sky/global_user_state.py +110 -1
  35. sky/jobs/client/sdk.py +27 -20
  36. sky/jobs/controller.py +2 -1
  37. sky/jobs/recovery_strategy.py +3 -0
  38. sky/jobs/server/core.py +4 -0
  39. sky/jobs/utils.py +9 -2
  40. sky/provision/__init__.py +3 -2
  41. sky/provision/aws/instance.py +5 -4
  42. sky/provision/azure/instance.py +5 -4
  43. sky/provision/cudo/instance.py +5 -4
  44. sky/provision/do/instance.py +5 -4
  45. sky/provision/fluidstack/instance.py +5 -4
  46. sky/provision/gcp/instance.py +5 -4
  47. sky/provision/hyperbolic/instance.py +5 -4
  48. sky/provision/kubernetes/instance.py +36 -6
  49. sky/provision/lambda_cloud/instance.py +5 -4
  50. sky/provision/nebius/instance.py +5 -4
  51. sky/provision/oci/instance.py +5 -4
  52. sky/provision/paperspace/instance.py +5 -4
  53. sky/provision/provisioner.py +6 -0
  54. sky/provision/runpod/instance.py +5 -4
  55. sky/provision/scp/instance.py +5 -5
  56. sky/provision/vast/instance.py +5 -5
  57. sky/provision/vsphere/instance.py +5 -4
  58. sky/schemas/db/global_user_state/001_initial_schema.py +1 -1
  59. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  60. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  61. sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
  62. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  63. sky/schemas/db/spot_jobs/001_initial_schema.py +1 -1
  64. sky/serve/client/impl.py +11 -8
  65. sky/serve/client/sdk.py +7 -7
  66. sky/serve/serve_state.py +437 -340
  67. sky/serve/serve_utils.py +37 -3
  68. sky/serve/server/impl.py +2 -2
  69. sky/server/common.py +12 -8
  70. sky/server/constants.py +1 -1
  71. sky/setup_files/alembic.ini +4 -0
  72. sky/skypilot_config.py +4 -4
  73. sky/users/permission.py +1 -1
  74. sky/utils/cli_utils/status_utils.py +10 -1
  75. sky/utils/db/db_utils.py +53 -1
  76. sky/utils/db/migration_utils.py +5 -1
  77. sky/utils/kubernetes/deploy_remote_cluster.py +3 -1
  78. sky/utils/resource_checker.py +162 -21
  79. sky/volumes/client/sdk.py +4 -4
  80. sky/workspaces/core.py +210 -6
  81. {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/METADATA +2 -2
  82. {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/RECORD +87 -83
  83. sky/dashboard/out/_next/static/chunks/8056-019615038d6ce427.js +0 -1
  84. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6fd1d2d8441aa54b.js +0 -11
  85. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-155d477a6c3e04e2.js +0 -1
  86. sky/dashboard/out/_next/static/chunks/webpack-76efbdad99742559.js +0 -1
  87. /sky/dashboard/out/_next/static/{YAirOGsV1z6B2RJ0VIUmD → Fuy7OzApYTUMz2QgoP7dP}/_ssgManifest.js +0 -0
  88. {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/WHEEL +0 -0
  89. {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/entry_points.txt +0 -0
  90. {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/licenses/LICENSE +0 -0
  91. {skypilot_nightly-1.0.0.dev20250807.dist-info → skypilot_nightly-1.0.0.dev20250812.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,61 @@
1
+ """fix initial revision
2
+
3
+ Revision ID: 003
4
+ Revises: 002
5
+ Create Date: 2025-08-07
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = '003'
18
+ down_revision: Union[str, Sequence[str], None] = '002'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade() -> None:
24
+ """Upgrade schema."""
25
+ with op.get_context().autocommit_block():
26
+ # add missing columns to clusters table
27
+ db_utils.add_column_to_table_alembic('clusters',
28
+ 'storage_mounts_metadata',
29
+ sa.LargeBinary(),
30
+ server_default=None)
31
+ # Set the value to replace existing entries to 1 so that all the
32
+ # existing clusters before #2977 are considered as ever up, i.e:
33
+ # existing cluster's default (null) -> 1;
34
+ # new cluster's default -> 0;
35
+ # This is conservative for the existing clusters: even if some INIT
36
+ # clusters were never really UP, setting it to 1 means they won't be
37
+ # auto-deleted during any failover.
38
+ db_utils.add_column_to_table_alembic(
39
+ 'clusters',
40
+ 'cluster_ever_up',
41
+ sa.Integer(),
42
+ server_default='0',
43
+ value_to_replace_existing_entries=1)
44
+ db_utils.add_column_to_table_alembic('clusters',
45
+ 'status_updated_at',
46
+ sa.Integer(),
47
+ server_default=None)
48
+
49
+ # remove mistakenly added columns
50
+ db_utils.drop_column_from_table_alembic('clusters', 'launched_nodes')
51
+ db_utils.drop_column_from_table_alembic('clusters', 'disk_tier')
52
+ db_utils.drop_column_from_table_alembic('clusters',
53
+ 'config_hash_locked')
54
+ db_utils.drop_column_from_table_alembic('clusters', 'handle_locked')
55
+ db_utils.drop_column_from_table_alembic('clusters', 'num_failures')
56
+ db_utils.drop_column_from_table_alembic('clusters', 'configs')
57
+
58
+
59
+ def downgrade() -> None:
60
+ """Downgrade schema."""
61
+ pass
@@ -0,0 +1,34 @@
1
+ """Columns for whether the cluster is managed.
2
+
3
+ Revision ID: 004
4
+ Revises: 003
5
+ Create Date: 2025-08-07
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.π
17
+ revision: str = '004'
18
+ down_revision: Union[str, Sequence[str], None] = '003'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade():
24
+ """Add columns for whether the cluster is managed."""
25
+ with op.get_context().autocommit_block():
26
+ db_utils.add_column_to_table_alembic('clusters',
27
+ 'is_managed',
28
+ sa.Integer(),
29
+ server_default='0')
30
+
31
+
32
+ def downgrade():
33
+ """Remove columns for whether the cluster is managed."""
34
+ pass
@@ -0,0 +1,32 @@
1
+ """Columns for whether the cluster is managed.
2
+
3
+ Revision ID: 005
4
+ Revises: 004
5
+ Create Date: 2025-08-08
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+
13
+ from sky.global_user_state import Base
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.π
17
+ revision: str = '005'
18
+ down_revision: Union[str, Sequence[str], None] = '004'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade():
24
+ """Add new table for cluster events."""
25
+ with op.get_context().autocommit_block():
26
+ # Add new table for cluster events.
27
+ db_utils.add_table_to_db_sqlalchemy(Base.metadata, op.get_bind(),
28
+ 'cluster_events')
29
+
30
+
31
+ def downgrade():
32
+ pass
@@ -0,0 +1,67 @@
1
+ """Initial schema for sky serve state database with backwards compatibility
2
+
3
+ Revision ID: 001
4
+ Revises:
5
+ Create Date: 2024-01-01 12:00:00.000000
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ import json
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.serve import constants
15
+ from sky.serve.serve_state import Base
16
+ from sky.utils.db import db_utils
17
+
18
+ # revision identifiers, used by Alembic.
19
+ revision = '001'
20
+ down_revision = None
21
+ branch_labels = None
22
+ depends_on = None
23
+
24
+
25
+ def upgrade():
26
+ """Create initial schema and add all backwards compatibility columns"""
27
+ with op.get_context().autocommit_block():
28
+ # Create all tables with their current schema
29
+ db_utils.add_all_tables_to_db_sqlalchemy(Base.metadata, op.get_bind())
30
+
31
+ # Add backwards compatibility columns using helper function that matches
32
+ # original add_column_to_table_sqlalchemy behavior exactly
33
+ db_utils.add_column_to_table_alembic('services',
34
+ 'requested_resources_str',
35
+ sa.Text())
36
+ db_utils.add_column_to_table_alembic(
37
+ 'services',
38
+ 'current_version',
39
+ sa.Integer(),
40
+ server_default=f'{constants.INITIAL_VERSION}')
41
+ db_utils.add_column_to_table_alembic('services',
42
+ 'active_versions',
43
+ sa.Text(),
44
+ server_default=json.dumps([]))
45
+ db_utils.add_column_to_table_alembic('services',
46
+ 'load_balancing_policy', sa.Text())
47
+ db_utils.add_column_to_table_alembic('services',
48
+ 'tls_encrypted',
49
+ sa.Integer(),
50
+ server_default='0')
51
+ db_utils.add_column_to_table_alembic('services',
52
+ 'pool',
53
+ sa.Integer(),
54
+ server_default='0')
55
+ db_utils.add_column_to_table_alembic(
56
+ 'services',
57
+ 'controller_pid',
58
+ sa.Integer(),
59
+ value_to_replace_existing_entries=-1)
60
+ db_utils.add_column_to_table_alembic('services', 'hash', sa.Text())
61
+ db_utils.add_column_to_table_alembic('services', 'entrypoint',
62
+ sa.Text())
63
+
64
+
65
+ def downgrade():
66
+ """Drop all tables"""
67
+ Base.metadata.drop_all(bind=op.get_bind())
@@ -26,7 +26,7 @@ def upgrade():
26
26
  """Create initial schema and add all backwards compatibility columns"""
27
27
  with op.get_context().autocommit_block():
28
28
  # Create all tables with their current schema
29
- db_utils.add_tables_to_db_sqlalchemy(Base.metadata, op.get_bind())
29
+ db_utils.add_all_tables_to_db_sqlalchemy(Base.metadata, op.get_bind())
30
30
 
31
31
  # Add backwards compatibility columns using helper function that matches
32
32
  # original add_column_to_table_sqlalchemy behavior exactly
sky/serve/client/impl.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Implementation of SDK for SkyServe."""
2
2
  import json
3
3
  import typing
4
- from typing import List, Optional, Sequence, Union
4
+ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
5
5
 
6
6
  import click
7
7
 
@@ -25,7 +25,7 @@ def up(
25
25
  # Internal only:
26
26
  # pylint: disable=invalid-name
27
27
  _need_confirmation: bool = False
28
- ) -> server_common.RequestId:
28
+ ) -> server_common.RequestId[Tuple[str, str]]:
29
29
  assert not pool, 'Command `up` is not supported for pool.'
30
30
  # Avoid circular import.
31
31
  from sky.client import sdk # pylint: disable=import-outside-toplevel
@@ -69,7 +69,7 @@ def update(
69
69
  # Internal only:
70
70
  # pylint: disable=invalid-name
71
71
  _need_confirmation: bool = False
72
- ) -> server_common.RequestId:
72
+ ) -> server_common.RequestId[None]:
73
73
  assert not pool, 'Command `update` is not supported for pool.'
74
74
  # Avoid circular import.
75
75
  from sky.client import sdk # pylint: disable=import-outside-toplevel
@@ -112,7 +112,7 @@ def apply(
112
112
  # Internal only:
113
113
  # pylint: disable=invalid-name
114
114
  _need_confirmation: bool = False
115
- ) -> server_common.RequestId:
115
+ ) -> server_common.RequestId[None]:
116
116
  assert pool, 'Command `apply` is only supported for pool.'
117
117
  # Avoid circular import.
118
118
  from sky.client import sdk # pylint: disable=import-outside-toplevel
@@ -153,7 +153,7 @@ def down(
153
153
  all: bool = False, # pylint: disable=redefined-builtin
154
154
  purge: bool = False,
155
155
  pool: bool = False,
156
- ) -> server_common.RequestId:
156
+ ) -> server_common.RequestId[None]:
157
157
  if pool:
158
158
  body = payloads.JobsPoolDownBody(
159
159
  pool_names=service_names,
@@ -177,7 +177,7 @@ def down(
177
177
  def status(
178
178
  service_names: Optional[Union[str, List[str]]],
179
179
  pool: bool = False,
180
- ) -> server_common.RequestId:
180
+ ) -> server_common.RequestId[List[Dict[str, Any]]]:
181
181
  if pool:
182
182
  body = payloads.JobsPoolStatusBody(pool_names=service_names)
183
183
  else:
@@ -222,7 +222,8 @@ def tail_logs(service_name: str,
222
222
  json=json.loads(body.model_dump_json()),
223
223
  timeout=(5, None),
224
224
  stream=True)
225
- request_id = server_common.get_request_id(response)
225
+ request_id: server_common.RequestId[None] = server_common.get_request_id(
226
+ response)
226
227
  return sdk.stream_response(request_id=request_id,
227
228
  response=response,
228
229
  output_stream=output_stream,
@@ -265,7 +266,9 @@ def sync_down_logs(service_name: str,
265
266
  '/jobs/pool_sync-down-logs' if pool else '/serve/sync-down-logs',
266
267
  json=json.loads(body.model_dump_json()),
267
268
  timeout=(5, None))
268
- remote_dir = sdk.stream_and_get(server_common.get_request_id(response))
269
+ request_id: server_common.RequestId[str] = server_common.get_request_id(
270
+ response)
271
+ remote_dir = sdk.stream_and_get(request_id)
269
272
 
270
273
  # Download from API server paths to the client's local_dir
271
274
  client_common.download_logs_from_api_server([remote_dir], remote_dir,
sky/serve/client/sdk.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """SDK for SkyServe."""
2
2
  import json
3
3
  import typing
4
- from typing import List, Optional, Sequence, Union
4
+ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
5
5
 
6
6
  from sky.serve.client import impl
7
7
  from sky.server import common as server_common
@@ -26,7 +26,7 @@ def up(
26
26
  # Internal only:
27
27
  # pylint: disable=invalid-name
28
28
  _need_confirmation: bool = False
29
- ) -> server_common.RequestId:
29
+ ) -> server_common.RequestId[Tuple[str, str]]:
30
30
  """Spins up a service.
31
31
 
32
32
  Please refer to the sky.cli.serve_up for the document.
@@ -61,7 +61,7 @@ def update(
61
61
  # Internal only:
62
62
  # pylint: disable=invalid-name
63
63
  _need_confirmation: bool = False
64
- ) -> server_common.RequestId:
64
+ ) -> server_common.RequestId[None]:
65
65
  """Updates an existing service.
66
66
 
67
67
  Please refer to the sky.cli.serve_update for the document.
@@ -94,7 +94,7 @@ def down(
94
94
  service_names: Optional[Union[str, List[str]]],
95
95
  all: bool = False, # pylint: disable=redefined-builtin
96
96
  purge: bool = False
97
- ) -> server_common.RequestId:
97
+ ) -> server_common.RequestId[None]:
98
98
  """Tears down a service.
99
99
 
100
100
  Please refer to the sky.cli.serve_down for the docs.
@@ -122,7 +122,7 @@ def down(
122
122
  @usage_lib.entrypoint
123
123
  @server_common.check_server_healthy_or_start
124
124
  def terminate_replica(service_name: str, replica_id: int,
125
- purge: bool) -> server_common.RequestId:
125
+ purge: bool) -> server_common.RequestId[None]:
126
126
  """Tears down a specific replica for the given service.
127
127
 
128
128
  Args:
@@ -156,8 +156,8 @@ def terminate_replica(service_name: str, replica_id: int,
156
156
  @usage_lib.entrypoint
157
157
  @server_common.check_server_healthy_or_start
158
158
  def status(
159
- service_names: Optional[Union[str,
160
- List[str]]]) -> server_common.RequestId:
159
+ service_names: Optional[Union[str, List[str]]]
160
+ ) -> server_common.RequestId[List[Dict[str, Any]]]:
161
161
  """Gets service statuses.
162
162
 
163
163
  If service_names is given, return those services. Otherwise, return all