skypilot-nightly 1.0.0.dev20250806__py3-none-any.whl → 1.0.0.dev20250808__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (137) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +20 -1
  3. sky/backends/cloud_vm_ray_backend.py +42 -6
  4. sky/check.py +11 -1
  5. sky/client/cli/command.py +248 -119
  6. sky/client/sdk.py +146 -66
  7. sky/client/sdk_async.py +5 -1
  8. sky/core.py +5 -2
  9. sky/dashboard/out/404.html +1 -1
  10. sky/dashboard/out/_next/static/-DXZksWqf2waNHeU9YTQe/_buildManifest.js +1 -0
  11. sky/dashboard/out/_next/static/chunks/1141-a8a8f1adba34c892.js +11 -0
  12. sky/dashboard/out/_next/static/chunks/1871-980a395e92633a5c.js +6 -0
  13. sky/dashboard/out/_next/static/chunks/3785.6003d293cb83eab4.js +1 -0
  14. sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/4725.29550342bd53afd8.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/{4937.d6bf67771e353356.js → 4937.a2baa2df5572a276.js} +1 -1
  17. sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/{691.6d99cbfba347cebf.js → 691.5eeedf82cc243343.js} +1 -1
  20. sky/dashboard/out/_next/static/chunks/6989-6129c1cfbcf51063.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/6990-0f886f16e0d55ff8.js +1 -0
  22. sky/dashboard/out/_next/static/chunks/8056-34d27f51e6d1c631.js +1 -0
  23. sky/dashboard/out/_next/static/chunks/8252.62b0d23aed618bb2.js +16 -0
  24. sky/dashboard/out/_next/static/chunks/8969-c9686994ddafcf01.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/9025.a1bef12d672bb66d.js +6 -0
  26. sky/dashboard/out/_next/static/chunks/9159-11421c0f2909236f.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/9360.85b0b1b4054574dd.js +31 -0
  28. sky/dashboard/out/_next/static/chunks/9666.cd4273f2a5c5802c.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/{9847.4c46c5e229c78704.js → 9847.757720f3b40c0aa5.js} +1 -1
  30. sky/dashboard/out/_next/static/chunks/pages/{_app-2a43ea3241bbdacd.js → _app-491a4d699d95e808.js} +1 -1
  31. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-ae17cec0fc6483d9.js +11 -0
  32. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-155d477a6c3e04e2.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/pages/{clusters-47f1ddae13a2f8e4.js → clusters-b30460f683e6ba96.js} +1 -1
  34. sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-2a44e70b500b6b70.js → [context]-13d53fffc03ccb52.js} +1 -1
  36. sky/dashboard/out/_next/static/chunks/pages/{infra-22faac9325016d83.js → infra-fc9222e26c8e2f0d.js} +1 -1
  37. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-154f55cf8af55be5.js +11 -0
  38. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-f5ccf5d39d87aebe.js +21 -0
  39. sky/dashboard/out/_next/static/chunks/pages/jobs-cdc60fb5d371e16a.js +1 -0
  40. sky/dashboard/out/_next/static/chunks/pages/{users-b90c865a690bfe84.js → users-7ed36e44e779d5c7.js} +1 -1
  41. sky/dashboard/out/_next/static/chunks/pages/{volumes-7af733f5d7b6ed1c.js → volumes-c9695d657f78b5dc.js} +1 -1
  42. sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
  43. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-f72f73bcef9541dc.js +1 -0
  44. sky/dashboard/out/_next/static/chunks/pages/workspaces-8f67be60165724cc.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/webpack-339efec49c0cc7d0.js +1 -0
  46. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +3 -0
  47. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  48. sky/dashboard/out/clusters/[cluster].html +1 -1
  49. sky/dashboard/out/clusters.html +1 -1
  50. sky/dashboard/out/config.html +1 -1
  51. sky/dashboard/out/index.html +1 -1
  52. sky/dashboard/out/infra/[context].html +1 -1
  53. sky/dashboard/out/infra.html +1 -1
  54. sky/dashboard/out/jobs/[job].html +1 -1
  55. sky/dashboard/out/jobs/pools/[pool].html +1 -0
  56. sky/dashboard/out/jobs.html +1 -1
  57. sky/dashboard/out/users.html +1 -1
  58. sky/dashboard/out/volumes.html +1 -1
  59. sky/dashboard/out/workspace/new.html +1 -1
  60. sky/dashboard/out/workspaces/[name].html +1 -1
  61. sky/dashboard/out/workspaces.html +1 -1
  62. sky/execution.py +6 -4
  63. sky/global_user_state.py +22 -3
  64. sky/jobs/__init__.py +2 -0
  65. sky/jobs/client/sdk.py +67 -19
  66. sky/jobs/controller.py +2 -1
  67. sky/jobs/server/core.py +48 -1
  68. sky/jobs/server/server.py +52 -3
  69. sky/jobs/state.py +5 -1
  70. sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
  71. sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
  72. sky/schemas/db/global_user_state/004_is_managed.py +34 -0
  73. sky/schemas/db/serve_state/001_initial_schema.py +67 -0
  74. sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
  75. sky/serve/client/impl.py +93 -6
  76. sky/serve/client/sdk.py +22 -53
  77. sky/serve/constants.py +2 -1
  78. sky/serve/controller.py +4 -2
  79. sky/serve/serve_state.py +444 -324
  80. sky/serve/serve_utils.py +77 -46
  81. sky/serve/server/core.py +13 -197
  82. sky/serve/server/impl.py +239 -2
  83. sky/serve/service.py +8 -3
  84. sky/server/common.py +18 -7
  85. sky/server/constants.py +1 -1
  86. sky/server/requests/executor.py +5 -3
  87. sky/server/requests/payloads.py +19 -0
  88. sky/setup_files/alembic.ini +4 -0
  89. sky/task.py +18 -11
  90. sky/templates/kubernetes-ray.yml.j2 +5 -0
  91. sky/templates/sky-serve-controller.yaml.j2 +1 -0
  92. sky/usage/usage_lib.py +8 -6
  93. sky/utils/annotations.py +8 -3
  94. sky/utils/cli_utils/status_utils.py +1 -1
  95. sky/utils/common_utils.py +11 -1
  96. sky/utils/db/db_utils.py +31 -0
  97. sky/utils/db/migration_utils.py +6 -2
  98. sky/utils/kubernetes/deploy_remote_cluster.py +3 -1
  99. sky/utils/resource_checker.py +162 -21
  100. sky/volumes/client/sdk.py +4 -4
  101. sky/workspaces/core.py +210 -6
  102. {skypilot_nightly-1.0.0.dev20250806.dist-info → skypilot_nightly-1.0.0.dev20250808.dist-info}/METADATA +19 -14
  103. {skypilot_nightly-1.0.0.dev20250806.dist-info → skypilot_nightly-1.0.0.dev20250808.dist-info}/RECORD +109 -103
  104. sky/client/sdk.pyi +0 -301
  105. sky/dashboard/out/_next/static/Gelsd19kVxXcX7aQQGsGu/_buildManifest.js +0 -1
  106. sky/dashboard/out/_next/static/chunks/1043-75af48ca5d5aaf57.js +0 -1
  107. sky/dashboard/out/_next/static/chunks/1141-8678a9102cc5f67e.js +0 -11
  108. sky/dashboard/out/_next/static/chunks/1664-22b00e32c9ff96a4.js +0 -1
  109. sky/dashboard/out/_next/static/chunks/1871-ced1c14230cad6e1.js +0 -6
  110. sky/dashboard/out/_next/static/chunks/2003.f90b06bb1f914295.js +0 -1
  111. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +0 -1
  112. sky/dashboard/out/_next/static/chunks/2622-951867535095b0eb.js +0 -1
  113. sky/dashboard/out/_next/static/chunks/3785.0a173cd4393f0fef.js +0 -1
  114. sky/dashboard/out/_next/static/chunks/4725.42f21f250f91f65b.js +0 -1
  115. sky/dashboard/out/_next/static/chunks/4869.18e6a4361a380763.js +0 -16
  116. sky/dashboard/out/_next/static/chunks/5230-f3bb2663e442e86c.js +0 -1
  117. sky/dashboard/out/_next/static/chunks/6601-2109d22e7861861c.js +0 -1
  118. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
  119. sky/dashboard/out/_next/static/chunks/8969-9a8cca241b30db83.js +0 -1
  120. sky/dashboard/out/_next/static/chunks/9025.99f29acb7617963e.js +0 -6
  121. sky/dashboard/out/_next/static/chunks/938-bda2685db5eae6cf.js +0 -1
  122. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-7cb24da04ca00956.js +0 -11
  123. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-1e95993124dbfc57.js +0 -1
  124. sky/dashboard/out/_next/static/chunks/pages/config-d56e64f30db7b42e.js +0 -1
  125. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-90693cb88b5599a7.js +0 -11
  126. sky/dashboard/out/_next/static/chunks/pages/jobs-ab318e52eb4424a7.js +0 -1
  127. sky/dashboard/out/_next/static/chunks/pages/workspace/new-92f741084a89e27b.js +0 -1
  128. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-35e0de5bca55e594.js +0 -1
  129. sky/dashboard/out/_next/static/chunks/pages/workspaces-062525fb5462acb6.js +0 -1
  130. sky/dashboard/out/_next/static/chunks/webpack-387626669badf82e.js +0 -1
  131. sky/dashboard/out/_next/static/css/b3227360726f12eb.css +0 -3
  132. /sky/dashboard/out/_next/static/{Gelsd19kVxXcX7aQQGsGu → -DXZksWqf2waNHeU9YTQe}/_ssgManifest.js +0 -0
  133. /sky/dashboard/out/_next/static/chunks/{6135-2d7ed3350659d073.js → 6135-85426374db04811e.js} +0 -0
  134. {skypilot_nightly-1.0.0.dev20250806.dist-info → skypilot_nightly-1.0.0.dev20250808.dist-info}/WHEEL +0 -0
  135. {skypilot_nightly-1.0.0.dev20250806.dist-info → skypilot_nightly-1.0.0.dev20250808.dist-info}/entry_points.txt +0 -0
  136. {skypilot_nightly-1.0.0.dev20250806.dist-info → skypilot_nightly-1.0.0.dev20250808.dist-info}/licenses/LICENSE +0 -0
  137. {skypilot_nightly-1.0.0.dev20250806.dist-info → skypilot_nightly-1.0.0.dev20250808.dist-info}/top_level.txt +0 -0
sky/jobs/server/server.py CHANGED
@@ -1,5 +1,7 @@
1
1
  """REST API for managed jobs."""
2
2
 
3
+ import pathlib
4
+
3
5
  import fastapi
4
6
 
5
7
  from sky import sky_logging
@@ -117,7 +119,7 @@ async def pool_apply(request: fastapi.Request,
117
119
  request_body=jobs_pool_apply_body,
118
120
  func=core.pool_apply,
119
121
  schedule_type=api_requests.ScheduleType.LONG,
120
- request_cluster_name=common.SKY_SERVE_CONTROLLER_NAME,
122
+ request_cluster_name=common.JOB_CONTROLLER_NAME,
121
123
  )
122
124
 
123
125
 
@@ -130,7 +132,7 @@ async def pool_down(request: fastapi.Request,
130
132
  request_body=jobs_pool_down_body,
131
133
  func=core.pool_down,
132
134
  schedule_type=api_requests.ScheduleType.SHORT,
133
- request_cluster_name=common.SKY_SERVE_CONTROLLER_NAME,
135
+ request_cluster_name=common.JOB_CONTROLLER_NAME,
134
136
  )
135
137
 
136
138
 
@@ -144,5 +146,52 @@ async def pool_status(
144
146
  request_body=jobs_pool_status_body,
145
147
  func=core.pool_status,
146
148
  schedule_type=api_requests.ScheduleType.SHORT,
147
- request_cluster_name=common.SKY_SERVE_CONTROLLER_NAME,
149
+ request_cluster_name=common.JOB_CONTROLLER_NAME,
150
+ )
151
+
152
+
153
+ @router.post('/pool_logs')
154
+ async def pool_tail_logs(
155
+ request: fastapi.Request, log_body: payloads.JobsPoolLogsBody,
156
+ background_tasks: fastapi.BackgroundTasks
157
+ ) -> fastapi.responses.StreamingResponse:
158
+ executor.schedule_request(
159
+ request_id=request.state.request_id,
160
+ request_name='jobs.pool_logs',
161
+ request_body=log_body,
162
+ func=core.pool_tail_logs,
163
+ schedule_type=api_requests.ScheduleType.SHORT,
164
+ request_cluster_name=common.JOB_CONTROLLER_NAME,
165
+ )
166
+
167
+ request_task = api_requests.get_request(request.state.request_id)
168
+
169
+ return stream_utils.stream_response(
170
+ request_id=request_task.request_id,
171
+ logs_path=request_task.log_path,
172
+ background_tasks=background_tasks,
173
+ )
174
+
175
+
176
+ @router.post('/pool_sync-down-logs')
177
+ async def pool_download_logs(
178
+ request: fastapi.Request,
179
+ download_logs_body: payloads.JobsPoolDownloadLogsBody,
180
+ ) -> None:
181
+ user_hash = download_logs_body.env_vars[constants.USER_ID_ENV_VAR]
182
+ timestamp = sky_logging.get_run_timestamp()
183
+ logs_dir_on_api_server = (
184
+ pathlib.Path(server_common.api_server_user_logs_dir_prefix(user_hash)) /
185
+ 'pool' / f'{download_logs_body.pool_name}_{timestamp}')
186
+ logs_dir_on_api_server.mkdir(parents=True, exist_ok=True)
187
+ # We should reuse the original request body, so that the env vars, such as
188
+ # user hash, are kept the same.
189
+ download_logs_body.local_dir = str(logs_dir_on_api_server)
190
+ executor.schedule_request(
191
+ request_id=request.state.request_id,
192
+ request_name='jobs.pool_sync_down_logs',
193
+ request_body=download_logs_body,
194
+ func=core.pool_sync_down_logs,
195
+ schedule_type=api_requests.ScheduleType.SHORT,
196
+ request_cluster_name=common.JOB_CONTROLLER_NAME,
148
197
  )
sky/jobs/state.py CHANGED
@@ -107,6 +107,7 @@ job_info_table = sqlalchemy.Table(
107
107
  sqlalchemy.Column('job_id_on_pool_cluster',
108
108
  sqlalchemy.Integer,
109
109
  server_default=None),
110
+ sqlalchemy.Column('pool_hash', sqlalchemy.Text, server_default=None),
110
111
  )
111
112
 
112
113
  ha_recovery_script_table = sqlalchemy.Table(
@@ -225,6 +226,7 @@ def _get_jobs_dict(r: 'row.RowMapping') -> Dict[str, Any]:
225
226
  'pool': r['pool'],
226
227
  'current_cluster_name': r['current_cluster_name'],
227
228
  'job_id_on_pool_cluster': r['job_id_on_pool_cluster'],
229
+ 'pool_hash': r['pool_hash'],
228
230
  }
229
231
 
230
232
 
@@ -462,7 +464,8 @@ def set_job_info(job_id: int, name: str, workspace: str, entrypoint: str):
462
464
 
463
465
  @_init_db
464
466
  def set_job_info_without_job_id(name: str, workspace: str, entrypoint: str,
465
- pool: Optional[str]) -> int:
467
+ pool: Optional[str],
468
+ pool_hash: Optional[str]) -> int:
466
469
  assert _SQLALCHEMY_ENGINE is not None
467
470
  with orm.Session(_SQLALCHEMY_ENGINE) as session:
468
471
  if (_SQLALCHEMY_ENGINE.dialect.name ==
@@ -480,6 +483,7 @@ def set_job_info_without_job_id(name: str, workspace: str, entrypoint: str,
480
483
  workspace=workspace,
481
484
  entrypoint=entrypoint,
482
485
  pool=pool,
486
+ pool_hash=pool_hash,
483
487
  )
484
488
 
485
489
  if (_SQLALCHEMY_ENGINE.dialect.name ==
@@ -0,0 +1,35 @@
1
+ """add workspace column to cluster_history table
2
+
3
+ Revision ID: 002
4
+ Revises: 001
5
+ Create Date: 2025-08-06
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = '002'
18
+ down_revision: Union[str, Sequence[str], None] = '001'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade() -> None:
24
+ """Upgrade schema."""
25
+ with op.get_context().autocommit_block():
26
+ db_utils.add_column_to_table_alembic('cluster_history',
27
+ 'workspace',
28
+ sa.Text(),
29
+ server_default=None)
30
+ pass
31
+
32
+
33
+ def downgrade() -> None:
34
+ """Downgrade schema."""
35
+ pass
@@ -0,0 +1,61 @@
1
+ """fix initial revision
2
+
3
+ Revision ID: 003
4
+ Revises: 002
5
+ Create Date: 2025-08-07
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = '003'
18
+ down_revision: Union[str, Sequence[str], None] = '002'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade() -> None:
24
+ """Upgrade schema."""
25
+ with op.get_context().autocommit_block():
26
+ # add missing columns to clusters table
27
+ db_utils.add_column_to_table_alembic('clusters',
28
+ 'storage_mounts_metadata',
29
+ sa.LargeBinary(),
30
+ server_default=None)
31
+ # Set the value to replace existing entries to 1 so that all the
32
+ # existing clusters before #2977 are considered as ever up, i.e:
33
+ # existing cluster's default (null) -> 1;
34
+ # new cluster's default -> 0;
35
+ # This is conservative for the existing clusters: even if some INIT
36
+ # clusters were never really UP, setting it to 1 means they won't be
37
+ # auto-deleted during any failover.
38
+ db_utils.add_column_to_table_alembic(
39
+ 'clusters',
40
+ 'cluster_ever_up',
41
+ sa.Integer(),
42
+ server_default='0',
43
+ value_to_replace_existing_entries=1)
44
+ db_utils.add_column_to_table_alembic('clusters',
45
+ 'status_updated_at',
46
+ sa.Integer(),
47
+ server_default=None)
48
+
49
+ # remove mistakenly added columns
50
+ db_utils.drop_column_from_table_alembic('clusters', 'launched_nodes')
51
+ db_utils.drop_column_from_table_alembic('clusters', 'disk_tier')
52
+ db_utils.drop_column_from_table_alembic('clusters',
53
+ 'config_hash_locked')
54
+ db_utils.drop_column_from_table_alembic('clusters', 'handle_locked')
55
+ db_utils.drop_column_from_table_alembic('clusters', 'num_failures')
56
+ db_utils.drop_column_from_table_alembic('clusters', 'configs')
57
+
58
+
59
+ def downgrade() -> None:
60
+ """Downgrade schema."""
61
+ pass
@@ -0,0 +1,34 @@
1
+ """Columns for whether the cluster is managed.
2
+
3
+ Revision ID: 004
4
+ Revises: 003
5
+ Create Date: 2025-08-07
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.π
17
+ revision: str = '004'
18
+ down_revision: Union[str, Sequence[str], None] = '003'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade():
24
+ """Add columns for whether the cluster is managed."""
25
+ with op.get_context().autocommit_block():
26
+ db_utils.add_column_to_table_alembic('clusters',
27
+ 'is_managed',
28
+ sa.Integer(),
29
+ server_default='0')
30
+
31
+
32
+ def downgrade():
33
+ """Remove columns for whether the cluster is managed."""
34
+ pass
@@ -0,0 +1,67 @@
1
+ """Initial schema for sky serve state database with backwards compatibility
2
+
3
+ Revision ID: 001
4
+ Revises:
5
+ Create Date: 2024-01-01 12:00:00.000000
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ import json
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.serve import constants
15
+ from sky.serve.serve_state import Base
16
+ from sky.utils.db import db_utils
17
+
18
+ # revision identifiers, used by Alembic.
19
+ revision = '001'
20
+ down_revision = None
21
+ branch_labels = None
22
+ depends_on = None
23
+
24
+
25
+ def upgrade():
26
+ """Create initial schema and add all backwards compatibility columns"""
27
+ with op.get_context().autocommit_block():
28
+ # Create all tables with their current schema
29
+ db_utils.add_tables_to_db_sqlalchemy(Base.metadata, op.get_bind())
30
+
31
+ # Add backwards compatibility columns using helper function that matches
32
+ # original add_column_to_table_sqlalchemy behavior exactly
33
+ db_utils.add_column_to_table_alembic('services',
34
+ 'requested_resources_str',
35
+ sa.Text())
36
+ db_utils.add_column_to_table_alembic(
37
+ 'services',
38
+ 'current_version',
39
+ sa.Integer(),
40
+ server_default=f'{constants.INITIAL_VERSION}')
41
+ db_utils.add_column_to_table_alembic('services',
42
+ 'active_versions',
43
+ sa.Text(),
44
+ server_default=json.dumps([]))
45
+ db_utils.add_column_to_table_alembic('services',
46
+ 'load_balancing_policy', sa.Text())
47
+ db_utils.add_column_to_table_alembic('services',
48
+ 'tls_encrypted',
49
+ sa.Integer(),
50
+ server_default='0')
51
+ db_utils.add_column_to_table_alembic('services',
52
+ 'pool',
53
+ sa.Integer(),
54
+ server_default='0')
55
+ db_utils.add_column_to_table_alembic(
56
+ 'services',
57
+ 'controller_pid',
58
+ sa.Integer(),
59
+ value_to_replace_existing_entries=-1)
60
+ db_utils.add_column_to_table_alembic('services', 'hash', sa.Text())
61
+ db_utils.add_column_to_table_alembic('services', 'entrypoint',
62
+ sa.Text())
63
+
64
+
65
+ def downgrade():
66
+ """Drop all tables"""
67
+ Base.metadata.drop_all(bind=op.get_bind())
@@ -0,0 +1,34 @@
1
+ """Adding a hash column for pool.
2
+
3
+ Revision ID: 003
4
+ Revises: 002
5
+ Create Date: 2025-07-18
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = '003'
18
+ down_revision: Union[str, Sequence[str], None] = '002'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade():
24
+ """Add columns for pool hash."""
25
+ with op.get_context().autocommit_block():
26
+ db_utils.add_column_to_table_alembic('job_info',
27
+ 'pool_hash',
28
+ sa.Text(),
29
+ server_default=None)
30
+
31
+
32
+ def downgrade():
33
+ """Remove columns for pool hash."""
34
+ pass
sky/serve/client/impl.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """Implementation of SDK for SkyServe."""
2
2
  import json
3
3
  import typing
4
- from typing import List, Optional, Union
4
+ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
5
5
 
6
6
  import click
7
7
 
@@ -12,6 +12,8 @@ from sky.utils import admin_policy_utils
12
12
  from sky.utils import dag_utils
13
13
 
14
14
  if typing.TYPE_CHECKING:
15
+ import io
16
+
15
17
  import sky
16
18
  from sky.serve import serve_utils
17
19
 
@@ -23,7 +25,7 @@ def up(
23
25
  # Internal only:
24
26
  # pylint: disable=invalid-name
25
27
  _need_confirmation: bool = False
26
- ) -> server_common.RequestId:
28
+ ) -> server_common.RequestId[Tuple[str, str]]:
27
29
  assert not pool, 'Command `up` is not supported for pool.'
28
30
  # Avoid circular import.
29
31
  from sky.client import sdk # pylint: disable=import-outside-toplevel
@@ -67,7 +69,7 @@ def update(
67
69
  # Internal only:
68
70
  # pylint: disable=invalid-name
69
71
  _need_confirmation: bool = False
70
- ) -> server_common.RequestId:
72
+ ) -> server_common.RequestId[None]:
71
73
  assert not pool, 'Command `update` is not supported for pool.'
72
74
  # Avoid circular import.
73
75
  from sky.client import sdk # pylint: disable=import-outside-toplevel
@@ -110,7 +112,7 @@ def apply(
110
112
  # Internal only:
111
113
  # pylint: disable=invalid-name
112
114
  _need_confirmation: bool = False
113
- ) -> server_common.RequestId:
115
+ ) -> server_common.RequestId[None]:
114
116
  assert pool, 'Command `apply` is only supported for pool.'
115
117
  # Avoid circular import.
116
118
  from sky.client import sdk # pylint: disable=import-outside-toplevel
@@ -151,7 +153,7 @@ def down(
151
153
  all: bool = False, # pylint: disable=redefined-builtin
152
154
  purge: bool = False,
153
155
  pool: bool = False,
154
- ) -> server_common.RequestId:
156
+ ) -> server_common.RequestId[None]:
155
157
  if pool:
156
158
  body = payloads.JobsPoolDownBody(
157
159
  pool_names=service_names,
@@ -175,7 +177,7 @@ def down(
175
177
  def status(
176
178
  service_names: Optional[Union[str, List[str]]],
177
179
  pool: bool = False,
178
- ) -> server_common.RequestId:
180
+ ) -> server_common.RequestId[List[Dict[str, Any]]]:
179
181
  if pool:
180
182
  body = payloads.JobsPoolStatusBody(pool_names=service_names)
181
183
  else:
@@ -186,3 +188,88 @@ def status(
186
188
  json=json.loads(body.model_dump_json()),
187
189
  timeout=(5, None))
188
190
  return server_common.get_request_id(response)
191
+
192
+
193
+ def tail_logs(service_name: str,
194
+ target: Union[str, 'serve_utils.ServiceComponent'],
195
+ replica_id: Optional[int] = None,
196
+ follow: bool = True,
197
+ output_stream: Optional['io.TextIOBase'] = None,
198
+ tail: Optional[int] = None,
199
+ pool: bool = False) -> None:
200
+ # Avoid circular import.
201
+ from sky.client import sdk # pylint: disable=import-outside-toplevel
202
+
203
+ if pool:
204
+ body = payloads.JobsPoolLogsBody(
205
+ pool_name=service_name,
206
+ target=target,
207
+ worker_id=replica_id,
208
+ follow=follow,
209
+ tail=tail,
210
+ )
211
+ else:
212
+ body = payloads.ServeLogsBody(
213
+ service_name=service_name,
214
+ target=target,
215
+ replica_id=replica_id,
216
+ follow=follow,
217
+ tail=tail,
218
+ )
219
+ response = server_common.make_authenticated_request(
220
+ 'POST',
221
+ '/jobs/pool_logs' if pool else '/serve/logs',
222
+ json=json.loads(body.model_dump_json()),
223
+ timeout=(5, None),
224
+ stream=True)
225
+ request_id: server_common.RequestId[None] = server_common.get_request_id(
226
+ response)
227
+ return sdk.stream_response(request_id=request_id,
228
+ response=response,
229
+ output_stream=output_stream,
230
+ resumable=True)
231
+
232
+
233
+ def sync_down_logs(service_name: str,
234
+ local_dir: str,
235
+ *,
236
+ targets: Optional[Union[
237
+ str, 'serve_utils.ServiceComponent',
238
+ Sequence[Union[str,
239
+ 'serve_utils.ServiceComponent']]]] = None,
240
+ replica_ids: Optional[List[int]] = None,
241
+ tail: Optional[int] = None,
242
+ pool: bool = False) -> None:
243
+ # Avoid circular import.
244
+ from sky.client import sdk # pylint: disable=import-outside-toplevel
245
+
246
+ if pool:
247
+ body = payloads.JobsPoolDownloadLogsBody(
248
+ pool_name=service_name,
249
+ local_dir=local_dir,
250
+ targets=targets,
251
+ worker_ids=replica_ids,
252
+ tail=tail,
253
+ )
254
+ else:
255
+ body = payloads.ServeDownloadLogsBody(
256
+ service_name=service_name,
257
+ # No need to set here, since the server will override it
258
+ # to a directory on the API server.
259
+ local_dir=local_dir,
260
+ targets=targets,
261
+ replica_ids=replica_ids,
262
+ tail=tail,
263
+ )
264
+ response = server_common.make_authenticated_request(
265
+ 'POST',
266
+ '/jobs/pool_sync-down-logs' if pool else '/serve/sync-down-logs',
267
+ json=json.loads(body.model_dump_json()),
268
+ timeout=(5, None))
269
+ request_id: server_common.RequestId[str] = server_common.get_request_id(
270
+ response)
271
+ remote_dir = sdk.stream_and_get(request_id)
272
+
273
+ # Download from API server paths to the client's local_dir
274
+ client_common.download_logs_from_api_server([remote_dir], remote_dir,
275
+ local_dir)
sky/serve/client/sdk.py CHANGED
@@ -1,9 +1,8 @@
1
1
  """SDK for SkyServe."""
2
2
  import json
3
3
  import typing
4
- from typing import List, Optional, Union
4
+ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
5
5
 
6
- from sky.client import common as client_common
7
6
  from sky.serve.client import impl
8
7
  from sky.server import common as server_common
9
8
  from sky.server import rest
@@ -27,7 +26,7 @@ def up(
27
26
  # Internal only:
28
27
  # pylint: disable=invalid-name
29
28
  _need_confirmation: bool = False
30
- ) -> server_common.RequestId:
29
+ ) -> server_common.RequestId[Tuple[str, str]]:
31
30
  """Spins up a service.
32
31
 
33
32
  Please refer to the sky.cli.serve_up for the document.
@@ -62,7 +61,7 @@ def update(
62
61
  # Internal only:
63
62
  # pylint: disable=invalid-name
64
63
  _need_confirmation: bool = False
65
- ) -> server_common.RequestId:
64
+ ) -> server_common.RequestId[None]:
66
65
  """Updates an existing service.
67
66
 
68
67
  Please refer to the sky.cli.serve_update for the document.
@@ -95,7 +94,7 @@ def down(
95
94
  service_names: Optional[Union[str, List[str]]],
96
95
  all: bool = False, # pylint: disable=redefined-builtin
97
96
  purge: bool = False
98
- ) -> server_common.RequestId:
97
+ ) -> server_common.RequestId[None]:
99
98
  """Tears down a service.
100
99
 
101
100
  Please refer to the sky.cli.serve_down for the docs.
@@ -123,7 +122,7 @@ def down(
123
122
  @usage_lib.entrypoint
124
123
  @server_common.check_server_healthy_or_start
125
124
  def terminate_replica(service_name: str, replica_id: int,
126
- purge: bool) -> server_common.RequestId:
125
+ purge: bool) -> server_common.RequestId[None]:
127
126
  """Tears down a specific replica for the given service.
128
127
 
129
128
  Args:
@@ -157,8 +156,8 @@ def terminate_replica(service_name: str, replica_id: int,
157
156
  @usage_lib.entrypoint
158
157
  @server_common.check_server_healthy_or_start
159
158
  def status(
160
- service_names: Optional[Union[str,
161
- List[str]]]) -> server_common.RequestId:
159
+ service_names: Optional[Union[str, List[str]]]
160
+ ) -> server_common.RequestId[List[Dict[str, Any]]]:
162
161
  """Gets service statuses.
163
162
 
164
163
  If service_names is given, return those services. Otherwise, return all
@@ -290,27 +289,13 @@ def tail_logs(service_name: str,
290
289
  sky.exceptions.ClusterNotUpError: the sky serve controller is not up.
291
290
  ValueError: arguments not valid, or failed to tail the logs.
292
291
  """
293
- # Avoid circular import.
294
- from sky.client import sdk # pylint: disable=import-outside-toplevel
295
-
296
- body = payloads.ServeLogsBody(
297
- service_name=service_name,
298
- target=target,
299
- replica_id=replica_id,
300
- follow=follow,
301
- tail=tail,
302
- )
303
- response = server_common.make_authenticated_request(
304
- 'POST',
305
- '/serve/logs',
306
- json=json.loads(body.model_dump_json()),
307
- timeout=(5, None),
308
- stream=True)
309
- request_id = server_common.get_request_id(response)
310
- return sdk.stream_response(request_id=request_id,
311
- response=response,
312
- output_stream=output_stream,
313
- resumable=True)
292
+ return impl.tail_logs(service_name,
293
+ target,
294
+ replica_id,
295
+ follow,
296
+ output_stream,
297
+ tail,
298
+ pool=False)
314
299
 
315
300
 
316
301
  @usage_lib.entrypoint
@@ -320,8 +305,8 @@ def sync_down_logs(service_name: str,
320
305
  *,
321
306
  targets: Optional[Union[
322
307
  str, 'serve_utils.ServiceComponent',
323
- List[Union[str,
324
- 'serve_utils.ServiceComponent']]]] = None,
308
+ Sequence[Union[str,
309
+ 'serve_utils.ServiceComponent']]]] = None,
325
310
  replica_ids: Optional[List[int]] = None,
326
311
  tail: Optional[int] = None) -> None:
327
312
  """Sync down logs from the service components to a local directory.
@@ -352,25 +337,9 @@ def sync_down_logs(service_name: str,
352
337
  sky.exceptions.ClusterNotUpError: If the controller is not up.
353
338
  ValueError: Arguments not valid.
354
339
  """
355
- # Avoid circular import.
356
- from sky.client import sdk # pylint: disable=import-outside-toplevel
357
-
358
- body = payloads.ServeDownloadLogsBody(
359
- service_name=service_name,
360
- # No need to set here, since the server will override it
361
- # to a directory on the API server.
362
- local_dir=local_dir,
363
- targets=targets,
364
- replica_ids=replica_ids,
365
- tail=tail,
366
- )
367
- response = server_common.make_authenticated_request(
368
- 'POST',
369
- '/serve/sync-down-logs',
370
- json=json.loads(body.model_dump_json()),
371
- timeout=(5, None))
372
- remote_dir = sdk.stream_and_get(server_common.get_request_id(response))
373
-
374
- # Download from API server paths to the client's local_dir
375
- client_common.download_logs_from_api_server([remote_dir], remote_dir,
376
- local_dir)
340
+ return impl.sync_down_logs(service_name,
341
+ local_dir,
342
+ targets=targets,
343
+ replica_ids=replica_ids,
344
+ tail=tail,
345
+ pool=False)
sky/serve/constants.py CHANGED
@@ -106,7 +106,8 @@ REPLICA_ID_ENV_VAR = 'SKYPILOT_SERVE_REPLICA_ID'
106
106
  # v2.0 - Added template-replica feature.
107
107
  # v3.0 - Added cluster pool.
108
108
  # v4.0 - Added pool argument to wait_service_registration.
109
- SERVE_VERSION = 4
109
+ # v5.0 - Added pool argument to stream_serve_process_logs & stream_replica_logs.
110
+ SERVE_VERSION = 5
110
111
 
111
112
  TERMINATE_REPLICA_VERSION_MISMATCH_ERROR = (
112
113
  'The version of service is outdated and does not support manually '
sky/serve/controller.py CHANGED
@@ -27,11 +27,12 @@ from sky.utils import ux_utils
27
27
  logger = sky_logging.init_logger(__name__)
28
28
 
29
29
 
30
- class SuppressSuccessGetAccessLogsFilter(logging.Filter):
30
+ class AutoscalerInfoFilter(logging.Filter):
31
31
 
32
32
  def filter(self, record: logging.LogRecord) -> bool:
33
33
  message = record.getMessage()
34
- return not ('GET' in message and '200' in message)
34
+ return not ('GET' in message and '200' in message and
35
+ '/autoscaler/info' in message)
35
36
 
36
37
 
37
38
  class SkyServeController:
@@ -61,6 +62,7 @@ class SkyServeController:
61
62
  uvicorn_access_logger = logging.getLogger('uvicorn.access')
62
63
  for handler in uvicorn_access_logger.handlers:
63
64
  handler.setFormatter(sky_logging.FORMATTER)
65
+ handler.addFilter(AutoscalerInfoFilter())
64
66
  yield
65
67
 
66
68
  def _run_autoscaler(self):