skypilot-nightly 1.0.0.dev20250926__py3-none-any.whl → 1.0.0.dev20251001__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +43 -14
- sky/backends/cloud_vm_ray_backend.py +153 -38
- sky/check.py +0 -29
- sky/client/cli/command.py +48 -26
- sky/client/cli/table_utils.py +91 -0
- sky/client/sdk.py +14 -23
- sky/client/sdk_async.py +5 -5
- sky/core.py +18 -20
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/{3294.03e02ae73455f48e.js → 3294.93d9336bdc032b3a.js} +1 -1
- sky/dashboard/out/_next/static/chunks/6856-5fdc9b851a18acdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/{[pool]-07349868f7905d37.js → [pool]-509b2977a6373bf6.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{webpack-8e64d11e58eab5cb.js → webpack-4f0c389a4ce5fd9c.js} +1 -1
- sky/dashboard/out/_next/static/{VXU6_xE28M55BOdwmUUJS → m3YT2i5s6v4SsIdYc8WZa}/_buildManifest.js +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/storage.py +11 -0
- sky/data/storage_utils.py +1 -45
- sky/execution.py +0 -1
- sky/global_user_state.py +3 -3
- sky/jobs/client/sdk.py +3 -2
- sky/jobs/controller.py +15 -0
- sky/jobs/server/core.py +120 -28
- sky/jobs/server/server.py +1 -1
- sky/jobs/server/utils.py +65 -32
- sky/jobs/state.py +145 -3
- sky/jobs/utils.py +87 -8
- sky/provision/kubernetes/instance.py +1 -1
- sky/schemas/api/responses.py +73 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +70 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +262 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/serve/serve_utils.py +16 -0
- sky/serve/server/core.py +1 -1
- sky/serve/server/impl.py +6 -6
- sky/server/common.py +2 -1
- sky/server/requests/serializers/decoders.py +10 -6
- sky/server/requests/serializers/encoders.py +13 -8
- sky/skylet/constants.py +1 -1
- sky/skylet/job_lib.py +2 -32
- sky/skylet/log_lib.py +211 -0
- sky/skylet/log_lib.pyi +30 -1
- sky/skylet/services.py +208 -2
- sky/skylet/skylet.py +3 -0
- sky/task.py +4 -0
- sky/utils/cluster_utils.py +23 -5
- sky/utils/command_runner.py +21 -5
- sky/utils/command_runner.pyi +11 -0
- sky/utils/volume.py +5 -0
- {skypilot_nightly-1.0.0.dev20250926.dist-info → skypilot_nightly-1.0.0.dev20251001.dist-info}/METADATA +35 -35
- {skypilot_nightly-1.0.0.dev20250926.dist-info → skypilot_nightly-1.0.0.dev20251001.dist-info}/RECORD +70 -66
- sky/dashboard/out/_next/static/chunks/6856-2b3600ff2854d066.js +0 -1
- /sky/dashboard/out/_next/static/{VXU6_xE28M55BOdwmUUJS → m3YT2i5s6v4SsIdYc8WZa}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250926.dist-info → skypilot_nightly-1.0.0.dev20251001.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250926.dist-info → skypilot_nightly-1.0.0.dev20251001.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250926.dist-info → skypilot_nightly-1.0.0.dev20251001.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250926.dist-info → skypilot_nightly-1.0.0.dev20251001.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
from google.protobuf.internal import containers as _containers
|
|
2
|
+
from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
|
|
3
|
+
from google.protobuf import descriptor as _descriptor
|
|
4
|
+
from google.protobuf import message as _message
|
|
5
|
+
from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union
|
|
6
|
+
|
|
7
|
+
DESCRIPTOR: _descriptor.FileDescriptor
|
|
8
|
+
|
|
9
|
+
class ManagedJobStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
|
10
|
+
__slots__ = ()
|
|
11
|
+
MANAGED_JOB_STATUS_UNSPECIFIED: _ClassVar[ManagedJobStatus]
|
|
12
|
+
MANAGED_JOB_STATUS_PENDING: _ClassVar[ManagedJobStatus]
|
|
13
|
+
MANAGED_JOB_STATUS_SUBMITTED: _ClassVar[ManagedJobStatus]
|
|
14
|
+
MANAGED_JOB_STATUS_STARTING: _ClassVar[ManagedJobStatus]
|
|
15
|
+
MANAGED_JOB_STATUS_RUNNING: _ClassVar[ManagedJobStatus]
|
|
16
|
+
MANAGED_JOB_STATUS_RECOVERING: _ClassVar[ManagedJobStatus]
|
|
17
|
+
MANAGED_JOB_STATUS_CANCELLING: _ClassVar[ManagedJobStatus]
|
|
18
|
+
MANAGED_JOB_STATUS_SUCCEEDED: _ClassVar[ManagedJobStatus]
|
|
19
|
+
MANAGED_JOB_STATUS_CANCELLED: _ClassVar[ManagedJobStatus]
|
|
20
|
+
MANAGED_JOB_STATUS_FAILED: _ClassVar[ManagedJobStatus]
|
|
21
|
+
MANAGED_JOB_STATUS_FAILED_SETUP: _ClassVar[ManagedJobStatus]
|
|
22
|
+
MANAGED_JOB_STATUS_FAILED_PRECHECKS: _ClassVar[ManagedJobStatus]
|
|
23
|
+
MANAGED_JOB_STATUS_FAILED_NO_RESOURCE: _ClassVar[ManagedJobStatus]
|
|
24
|
+
MANAGED_JOB_STATUS_FAILED_CONTROLLER: _ClassVar[ManagedJobStatus]
|
|
25
|
+
|
|
26
|
+
class ManagedJobScheduleState(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
|
27
|
+
__slots__ = ()
|
|
28
|
+
MANAGED_JOB_SCHEDULE_STATE_UNSPECIFIED: _ClassVar[ManagedJobScheduleState]
|
|
29
|
+
MANAGED_JOB_SCHEDULE_STATE_INVALID: _ClassVar[ManagedJobScheduleState]
|
|
30
|
+
MANAGED_JOB_SCHEDULE_STATE_INACTIVE: _ClassVar[ManagedJobScheduleState]
|
|
31
|
+
MANAGED_JOB_SCHEDULE_STATE_WAITING: _ClassVar[ManagedJobScheduleState]
|
|
32
|
+
MANAGED_JOB_SCHEDULE_STATE_ALIVE_WAITING: _ClassVar[ManagedJobScheduleState]
|
|
33
|
+
MANAGED_JOB_SCHEDULE_STATE_LAUNCHING: _ClassVar[ManagedJobScheduleState]
|
|
34
|
+
MANAGED_JOB_SCHEDULE_STATE_ALIVE_BACKOFF: _ClassVar[ManagedJobScheduleState]
|
|
35
|
+
MANAGED_JOB_SCHEDULE_STATE_ALIVE: _ClassVar[ManagedJobScheduleState]
|
|
36
|
+
MANAGED_JOB_SCHEDULE_STATE_DONE: _ClassVar[ManagedJobScheduleState]
|
|
37
|
+
MANAGED_JOB_STATUS_UNSPECIFIED: ManagedJobStatus
|
|
38
|
+
MANAGED_JOB_STATUS_PENDING: ManagedJobStatus
|
|
39
|
+
MANAGED_JOB_STATUS_SUBMITTED: ManagedJobStatus
|
|
40
|
+
MANAGED_JOB_STATUS_STARTING: ManagedJobStatus
|
|
41
|
+
MANAGED_JOB_STATUS_RUNNING: ManagedJobStatus
|
|
42
|
+
MANAGED_JOB_STATUS_RECOVERING: ManagedJobStatus
|
|
43
|
+
MANAGED_JOB_STATUS_CANCELLING: ManagedJobStatus
|
|
44
|
+
MANAGED_JOB_STATUS_SUCCEEDED: ManagedJobStatus
|
|
45
|
+
MANAGED_JOB_STATUS_CANCELLED: ManagedJobStatus
|
|
46
|
+
MANAGED_JOB_STATUS_FAILED: ManagedJobStatus
|
|
47
|
+
MANAGED_JOB_STATUS_FAILED_SETUP: ManagedJobStatus
|
|
48
|
+
MANAGED_JOB_STATUS_FAILED_PRECHECKS: ManagedJobStatus
|
|
49
|
+
MANAGED_JOB_STATUS_FAILED_NO_RESOURCE: ManagedJobStatus
|
|
50
|
+
MANAGED_JOB_STATUS_FAILED_CONTROLLER: ManagedJobStatus
|
|
51
|
+
MANAGED_JOB_SCHEDULE_STATE_UNSPECIFIED: ManagedJobScheduleState
|
|
52
|
+
MANAGED_JOB_SCHEDULE_STATE_INVALID: ManagedJobScheduleState
|
|
53
|
+
MANAGED_JOB_SCHEDULE_STATE_INACTIVE: ManagedJobScheduleState
|
|
54
|
+
MANAGED_JOB_SCHEDULE_STATE_WAITING: ManagedJobScheduleState
|
|
55
|
+
MANAGED_JOB_SCHEDULE_STATE_ALIVE_WAITING: ManagedJobScheduleState
|
|
56
|
+
MANAGED_JOB_SCHEDULE_STATE_LAUNCHING: ManagedJobScheduleState
|
|
57
|
+
MANAGED_JOB_SCHEDULE_STATE_ALIVE_BACKOFF: ManagedJobScheduleState
|
|
58
|
+
MANAGED_JOB_SCHEDULE_STATE_ALIVE: ManagedJobScheduleState
|
|
59
|
+
MANAGED_JOB_SCHEDULE_STATE_DONE: ManagedJobScheduleState
|
|
60
|
+
|
|
61
|
+
class JobIds(_message.Message):
|
|
62
|
+
__slots__ = ("ids",)
|
|
63
|
+
IDS_FIELD_NUMBER: _ClassVar[int]
|
|
64
|
+
ids: _containers.RepeatedScalarFieldContainer[int]
|
|
65
|
+
def __init__(self, ids: _Optional[_Iterable[int]] = ...) -> None: ...
|
|
66
|
+
|
|
67
|
+
class UserHashes(_message.Message):
|
|
68
|
+
__slots__ = ("hashes",)
|
|
69
|
+
HASHES_FIELD_NUMBER: _ClassVar[int]
|
|
70
|
+
hashes: _containers.RepeatedScalarFieldContainer[str]
|
|
71
|
+
def __init__(self, hashes: _Optional[_Iterable[str]] = ...) -> None: ...
|
|
72
|
+
|
|
73
|
+
class Statuses(_message.Message):
|
|
74
|
+
__slots__ = ("statuses",)
|
|
75
|
+
STATUSES_FIELD_NUMBER: _ClassVar[int]
|
|
76
|
+
statuses: _containers.RepeatedScalarFieldContainer[str]
|
|
77
|
+
def __init__(self, statuses: _Optional[_Iterable[str]] = ...) -> None: ...
|
|
78
|
+
|
|
79
|
+
class GetVersionRequest(_message.Message):
|
|
80
|
+
__slots__ = ()
|
|
81
|
+
def __init__(self) -> None: ...
|
|
82
|
+
|
|
83
|
+
class GetVersionResponse(_message.Message):
|
|
84
|
+
__slots__ = ("controller_version",)
|
|
85
|
+
CONTROLLER_VERSION_FIELD_NUMBER: _ClassVar[int]
|
|
86
|
+
controller_version: str
|
|
87
|
+
def __init__(self, controller_version: _Optional[str] = ...) -> None: ...
|
|
88
|
+
|
|
89
|
+
class GetJobTableRequest(_message.Message):
|
|
90
|
+
__slots__ = ("skip_finished", "accessible_workspaces", "job_ids", "workspace_match", "name_match", "pool_match", "page", "limit", "user_hashes", "statuses", "show_jobs_without_user_hash")
|
|
91
|
+
SKIP_FINISHED_FIELD_NUMBER: _ClassVar[int]
|
|
92
|
+
ACCESSIBLE_WORKSPACES_FIELD_NUMBER: _ClassVar[int]
|
|
93
|
+
JOB_IDS_FIELD_NUMBER: _ClassVar[int]
|
|
94
|
+
WORKSPACE_MATCH_FIELD_NUMBER: _ClassVar[int]
|
|
95
|
+
NAME_MATCH_FIELD_NUMBER: _ClassVar[int]
|
|
96
|
+
POOL_MATCH_FIELD_NUMBER: _ClassVar[int]
|
|
97
|
+
PAGE_FIELD_NUMBER: _ClassVar[int]
|
|
98
|
+
LIMIT_FIELD_NUMBER: _ClassVar[int]
|
|
99
|
+
USER_HASHES_FIELD_NUMBER: _ClassVar[int]
|
|
100
|
+
STATUSES_FIELD_NUMBER: _ClassVar[int]
|
|
101
|
+
SHOW_JOBS_WITHOUT_USER_HASH_FIELD_NUMBER: _ClassVar[int]
|
|
102
|
+
skip_finished: bool
|
|
103
|
+
accessible_workspaces: _containers.RepeatedScalarFieldContainer[str]
|
|
104
|
+
job_ids: JobIds
|
|
105
|
+
workspace_match: str
|
|
106
|
+
name_match: str
|
|
107
|
+
pool_match: str
|
|
108
|
+
page: int
|
|
109
|
+
limit: int
|
|
110
|
+
user_hashes: UserHashes
|
|
111
|
+
statuses: Statuses
|
|
112
|
+
show_jobs_without_user_hash: bool
|
|
113
|
+
def __init__(self, skip_finished: bool = ..., accessible_workspaces: _Optional[_Iterable[str]] = ..., job_ids: _Optional[_Union[JobIds, _Mapping]] = ..., workspace_match: _Optional[str] = ..., name_match: _Optional[str] = ..., pool_match: _Optional[str] = ..., page: _Optional[int] = ..., limit: _Optional[int] = ..., user_hashes: _Optional[_Union[UserHashes, _Mapping]] = ..., statuses: _Optional[_Union[Statuses, _Mapping]] = ..., show_jobs_without_user_hash: bool = ...) -> None: ...
|
|
114
|
+
|
|
115
|
+
class ManagedJobInfo(_message.Message):
|
|
116
|
+
__slots__ = ("job_id", "task_id", "job_name", "task_name", "job_duration", "workspace", "status", "schedule_state", "resources", "cluster_resources", "cluster_resources_full", "cloud", "region", "infra", "accelerators", "recovery_count", "details", "failure_reason", "user_name", "user_hash", "submitted_at", "start_at", "end_at", "user_yaml", "entrypoint", "metadata", "pool", "pool_hash")
|
|
117
|
+
class AcceleratorsEntry(_message.Message):
|
|
118
|
+
__slots__ = ("key", "value")
|
|
119
|
+
KEY_FIELD_NUMBER: _ClassVar[int]
|
|
120
|
+
VALUE_FIELD_NUMBER: _ClassVar[int]
|
|
121
|
+
key: str
|
|
122
|
+
value: float
|
|
123
|
+
def __init__(self, key: _Optional[str] = ..., value: _Optional[float] = ...) -> None: ...
|
|
124
|
+
class MetadataEntry(_message.Message):
|
|
125
|
+
__slots__ = ("key", "value")
|
|
126
|
+
KEY_FIELD_NUMBER: _ClassVar[int]
|
|
127
|
+
VALUE_FIELD_NUMBER: _ClassVar[int]
|
|
128
|
+
key: str
|
|
129
|
+
value: str
|
|
130
|
+
def __init__(self, key: _Optional[str] = ..., value: _Optional[str] = ...) -> None: ...
|
|
131
|
+
JOB_ID_FIELD_NUMBER: _ClassVar[int]
|
|
132
|
+
TASK_ID_FIELD_NUMBER: _ClassVar[int]
|
|
133
|
+
JOB_NAME_FIELD_NUMBER: _ClassVar[int]
|
|
134
|
+
TASK_NAME_FIELD_NUMBER: _ClassVar[int]
|
|
135
|
+
JOB_DURATION_FIELD_NUMBER: _ClassVar[int]
|
|
136
|
+
WORKSPACE_FIELD_NUMBER: _ClassVar[int]
|
|
137
|
+
STATUS_FIELD_NUMBER: _ClassVar[int]
|
|
138
|
+
SCHEDULE_STATE_FIELD_NUMBER: _ClassVar[int]
|
|
139
|
+
RESOURCES_FIELD_NUMBER: _ClassVar[int]
|
|
140
|
+
CLUSTER_RESOURCES_FIELD_NUMBER: _ClassVar[int]
|
|
141
|
+
CLUSTER_RESOURCES_FULL_FIELD_NUMBER: _ClassVar[int]
|
|
142
|
+
CLOUD_FIELD_NUMBER: _ClassVar[int]
|
|
143
|
+
REGION_FIELD_NUMBER: _ClassVar[int]
|
|
144
|
+
INFRA_FIELD_NUMBER: _ClassVar[int]
|
|
145
|
+
ACCELERATORS_FIELD_NUMBER: _ClassVar[int]
|
|
146
|
+
RECOVERY_COUNT_FIELD_NUMBER: _ClassVar[int]
|
|
147
|
+
DETAILS_FIELD_NUMBER: _ClassVar[int]
|
|
148
|
+
FAILURE_REASON_FIELD_NUMBER: _ClassVar[int]
|
|
149
|
+
USER_NAME_FIELD_NUMBER: _ClassVar[int]
|
|
150
|
+
USER_HASH_FIELD_NUMBER: _ClassVar[int]
|
|
151
|
+
SUBMITTED_AT_FIELD_NUMBER: _ClassVar[int]
|
|
152
|
+
START_AT_FIELD_NUMBER: _ClassVar[int]
|
|
153
|
+
END_AT_FIELD_NUMBER: _ClassVar[int]
|
|
154
|
+
USER_YAML_FIELD_NUMBER: _ClassVar[int]
|
|
155
|
+
ENTRYPOINT_FIELD_NUMBER: _ClassVar[int]
|
|
156
|
+
METADATA_FIELD_NUMBER: _ClassVar[int]
|
|
157
|
+
POOL_FIELD_NUMBER: _ClassVar[int]
|
|
158
|
+
POOL_HASH_FIELD_NUMBER: _ClassVar[int]
|
|
159
|
+
job_id: int
|
|
160
|
+
task_id: int
|
|
161
|
+
job_name: str
|
|
162
|
+
task_name: str
|
|
163
|
+
job_duration: float
|
|
164
|
+
workspace: str
|
|
165
|
+
status: ManagedJobStatus
|
|
166
|
+
schedule_state: ManagedJobScheduleState
|
|
167
|
+
resources: str
|
|
168
|
+
cluster_resources: str
|
|
169
|
+
cluster_resources_full: str
|
|
170
|
+
cloud: str
|
|
171
|
+
region: str
|
|
172
|
+
infra: str
|
|
173
|
+
accelerators: _containers.ScalarMap[str, float]
|
|
174
|
+
recovery_count: int
|
|
175
|
+
details: str
|
|
176
|
+
failure_reason: str
|
|
177
|
+
user_name: str
|
|
178
|
+
user_hash: str
|
|
179
|
+
submitted_at: float
|
|
180
|
+
start_at: float
|
|
181
|
+
end_at: float
|
|
182
|
+
user_yaml: str
|
|
183
|
+
entrypoint: str
|
|
184
|
+
metadata: _containers.ScalarMap[str, str]
|
|
185
|
+
pool: str
|
|
186
|
+
pool_hash: str
|
|
187
|
+
def __init__(self, job_id: _Optional[int] = ..., task_id: _Optional[int] = ..., job_name: _Optional[str] = ..., task_name: _Optional[str] = ..., job_duration: _Optional[float] = ..., workspace: _Optional[str] = ..., status: _Optional[_Union[ManagedJobStatus, str]] = ..., schedule_state: _Optional[_Union[ManagedJobScheduleState, str]] = ..., resources: _Optional[str] = ..., cluster_resources: _Optional[str] = ..., cluster_resources_full: _Optional[str] = ..., cloud: _Optional[str] = ..., region: _Optional[str] = ..., infra: _Optional[str] = ..., accelerators: _Optional[_Mapping[str, float]] = ..., recovery_count: _Optional[int] = ..., details: _Optional[str] = ..., failure_reason: _Optional[str] = ..., user_name: _Optional[str] = ..., user_hash: _Optional[str] = ..., submitted_at: _Optional[float] = ..., start_at: _Optional[float] = ..., end_at: _Optional[float] = ..., user_yaml: _Optional[str] = ..., entrypoint: _Optional[str] = ..., metadata: _Optional[_Mapping[str, str]] = ..., pool: _Optional[str] = ..., pool_hash: _Optional[str] = ...) -> None: ...
|
|
188
|
+
|
|
189
|
+
class GetJobTableResponse(_message.Message):
|
|
190
|
+
__slots__ = ("jobs", "total", "total_no_filter", "status_counts")
|
|
191
|
+
class StatusCountsEntry(_message.Message):
|
|
192
|
+
__slots__ = ("key", "value")
|
|
193
|
+
KEY_FIELD_NUMBER: _ClassVar[int]
|
|
194
|
+
VALUE_FIELD_NUMBER: _ClassVar[int]
|
|
195
|
+
key: str
|
|
196
|
+
value: int
|
|
197
|
+
def __init__(self, key: _Optional[str] = ..., value: _Optional[int] = ...) -> None: ...
|
|
198
|
+
JOBS_FIELD_NUMBER: _ClassVar[int]
|
|
199
|
+
TOTAL_FIELD_NUMBER: _ClassVar[int]
|
|
200
|
+
TOTAL_NO_FILTER_FIELD_NUMBER: _ClassVar[int]
|
|
201
|
+
STATUS_COUNTS_FIELD_NUMBER: _ClassVar[int]
|
|
202
|
+
jobs: _containers.RepeatedCompositeFieldContainer[ManagedJobInfo]
|
|
203
|
+
total: int
|
|
204
|
+
total_no_filter: int
|
|
205
|
+
status_counts: _containers.ScalarMap[str, int]
|
|
206
|
+
def __init__(self, jobs: _Optional[_Iterable[_Union[ManagedJobInfo, _Mapping]]] = ..., total: _Optional[int] = ..., total_no_filter: _Optional[int] = ..., status_counts: _Optional[_Mapping[str, int]] = ...) -> None: ...
|
|
207
|
+
|
|
208
|
+
class GetAllJobIdsByNameRequest(_message.Message):
|
|
209
|
+
__slots__ = ("job_name",)
|
|
210
|
+
JOB_NAME_FIELD_NUMBER: _ClassVar[int]
|
|
211
|
+
job_name: str
|
|
212
|
+
def __init__(self, job_name: _Optional[str] = ...) -> None: ...
|
|
213
|
+
|
|
214
|
+
class GetAllJobIdsByNameResponse(_message.Message):
|
|
215
|
+
__slots__ = ("job_ids",)
|
|
216
|
+
JOB_IDS_FIELD_NUMBER: _ClassVar[int]
|
|
217
|
+
job_ids: _containers.RepeatedScalarFieldContainer[int]
|
|
218
|
+
def __init__(self, job_ids: _Optional[_Iterable[int]] = ...) -> None: ...
|
|
219
|
+
|
|
220
|
+
class CancelJobsRequest(_message.Message):
|
|
221
|
+
__slots__ = ("current_workspace", "user_hash", "all_users", "job_ids", "job_name", "pool_name")
|
|
222
|
+
CURRENT_WORKSPACE_FIELD_NUMBER: _ClassVar[int]
|
|
223
|
+
USER_HASH_FIELD_NUMBER: _ClassVar[int]
|
|
224
|
+
ALL_USERS_FIELD_NUMBER: _ClassVar[int]
|
|
225
|
+
JOB_IDS_FIELD_NUMBER: _ClassVar[int]
|
|
226
|
+
JOB_NAME_FIELD_NUMBER: _ClassVar[int]
|
|
227
|
+
POOL_NAME_FIELD_NUMBER: _ClassVar[int]
|
|
228
|
+
current_workspace: str
|
|
229
|
+
user_hash: str
|
|
230
|
+
all_users: bool
|
|
231
|
+
job_ids: JobIds
|
|
232
|
+
job_name: str
|
|
233
|
+
pool_name: str
|
|
234
|
+
def __init__(self, current_workspace: _Optional[str] = ..., user_hash: _Optional[str] = ..., all_users: bool = ..., job_ids: _Optional[_Union[JobIds, _Mapping]] = ..., job_name: _Optional[str] = ..., pool_name: _Optional[str] = ...) -> None: ...
|
|
235
|
+
|
|
236
|
+
class CancelJobsResponse(_message.Message):
|
|
237
|
+
__slots__ = ("message",)
|
|
238
|
+
MESSAGE_FIELD_NUMBER: _ClassVar[int]
|
|
239
|
+
message: str
|
|
240
|
+
def __init__(self, message: _Optional[str] = ...) -> None: ...
|
|
241
|
+
|
|
242
|
+
class StreamLogsRequest(_message.Message):
|
|
243
|
+
__slots__ = ("job_name", "job_id", "follow", "controller", "tail")
|
|
244
|
+
JOB_NAME_FIELD_NUMBER: _ClassVar[int]
|
|
245
|
+
JOB_ID_FIELD_NUMBER: _ClassVar[int]
|
|
246
|
+
FOLLOW_FIELD_NUMBER: _ClassVar[int]
|
|
247
|
+
CONTROLLER_FIELD_NUMBER: _ClassVar[int]
|
|
248
|
+
TAIL_FIELD_NUMBER: _ClassVar[int]
|
|
249
|
+
job_name: str
|
|
250
|
+
job_id: int
|
|
251
|
+
follow: bool
|
|
252
|
+
controller: bool
|
|
253
|
+
tail: int
|
|
254
|
+
def __init__(self, job_name: _Optional[str] = ..., job_id: _Optional[int] = ..., follow: bool = ..., controller: bool = ..., tail: _Optional[int] = ...) -> None: ...
|
|
255
|
+
|
|
256
|
+
class StreamLogsResponse(_message.Message):
|
|
257
|
+
__slots__ = ("log_line", "exit_code")
|
|
258
|
+
LOG_LINE_FIELD_NUMBER: _ClassVar[int]
|
|
259
|
+
EXIT_CODE_FIELD_NUMBER: _ClassVar[int]
|
|
260
|
+
log_line: str
|
|
261
|
+
exit_code: int
|
|
262
|
+
def __init__(self, log_line: _Optional[str] = ..., exit_code: _Optional[int] = ...) -> None: ...
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
|
2
|
+
"""Client and server classes corresponding to protobuf-defined services."""
|
|
3
|
+
import grpc
|
|
4
|
+
import warnings
|
|
5
|
+
|
|
6
|
+
from sky.schemas.generated import managed_jobsv1_pb2 as sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2
|
|
7
|
+
|
|
8
|
+
GRPC_GENERATED_VERSION = '1.63.0'
|
|
9
|
+
GRPC_VERSION = grpc.__version__
|
|
10
|
+
EXPECTED_ERROR_RELEASE = '1.65.0'
|
|
11
|
+
SCHEDULED_RELEASE_DATE = 'June 25, 2024'
|
|
12
|
+
_version_not_supported = False
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
from grpc._utilities import first_version_is_lower
|
|
16
|
+
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
|
|
17
|
+
except ImportError:
|
|
18
|
+
_version_not_supported = True
|
|
19
|
+
|
|
20
|
+
if _version_not_supported:
|
|
21
|
+
warnings.warn(
|
|
22
|
+
f'The grpc package installed is at version {GRPC_VERSION},'
|
|
23
|
+
+ f' but the generated code in sky/schemas/generated/managed_jobsv1_pb2_grpc.py depends on'
|
|
24
|
+
+ f' grpcio>={GRPC_GENERATED_VERSION}.'
|
|
25
|
+
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
|
|
26
|
+
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
|
|
27
|
+
+ f' This warning will become an error in {EXPECTED_ERROR_RELEASE},'
|
|
28
|
+
+ f' scheduled for release on {SCHEDULED_RELEASE_DATE}.',
|
|
29
|
+
RuntimeWarning
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ManagedJobsServiceStub(object):
|
|
34
|
+
"""Missing associated documentation comment in .proto file."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, channel):
|
|
37
|
+
"""Constructor.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
channel: A grpc.Channel.
|
|
41
|
+
"""
|
|
42
|
+
self.GetVersion = channel.unary_unary(
|
|
43
|
+
'/managed_jobs.v1.ManagedJobsService/GetVersion',
|
|
44
|
+
request_serializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetVersionRequest.SerializeToString,
|
|
45
|
+
response_deserializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetVersionResponse.FromString,
|
|
46
|
+
_registered_method=True)
|
|
47
|
+
self.GetJobTable = channel.unary_unary(
|
|
48
|
+
'/managed_jobs.v1.ManagedJobsService/GetJobTable',
|
|
49
|
+
request_serializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetJobTableRequest.SerializeToString,
|
|
50
|
+
response_deserializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetJobTableResponse.FromString,
|
|
51
|
+
_registered_method=True)
|
|
52
|
+
self.GetAllJobIdsByName = channel.unary_unary(
|
|
53
|
+
'/managed_jobs.v1.ManagedJobsService/GetAllJobIdsByName',
|
|
54
|
+
request_serializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetAllJobIdsByNameRequest.SerializeToString,
|
|
55
|
+
response_deserializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetAllJobIdsByNameResponse.FromString,
|
|
56
|
+
_registered_method=True)
|
|
57
|
+
self.CancelJobs = channel.unary_unary(
|
|
58
|
+
'/managed_jobs.v1.ManagedJobsService/CancelJobs',
|
|
59
|
+
request_serializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.CancelJobsRequest.SerializeToString,
|
|
60
|
+
response_deserializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.CancelJobsResponse.FromString,
|
|
61
|
+
_registered_method=True)
|
|
62
|
+
self.StreamLogs = channel.unary_stream(
|
|
63
|
+
'/managed_jobs.v1.ManagedJobsService/StreamLogs',
|
|
64
|
+
request_serializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.StreamLogsRequest.SerializeToString,
|
|
65
|
+
response_deserializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.StreamLogsResponse.FromString,
|
|
66
|
+
_registered_method=True)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ManagedJobsServiceServicer(object):
|
|
70
|
+
"""Missing associated documentation comment in .proto file."""
|
|
71
|
+
|
|
72
|
+
def GetVersion(self, request, context):
|
|
73
|
+
"""Get controller version.
|
|
74
|
+
"""
|
|
75
|
+
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
76
|
+
context.set_details('Method not implemented!')
|
|
77
|
+
raise NotImplementedError('Method not implemented!')
|
|
78
|
+
|
|
79
|
+
def GetJobTable(self, request, context):
|
|
80
|
+
"""Get the managed job queue with advanced filtering.
|
|
81
|
+
"""
|
|
82
|
+
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
83
|
+
context.set_details('Method not implemented!')
|
|
84
|
+
raise NotImplementedError('Method not implemented!')
|
|
85
|
+
|
|
86
|
+
def GetAllJobIdsByName(self, request, context):
|
|
87
|
+
"""Get all job IDs by name.
|
|
88
|
+
"""
|
|
89
|
+
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
90
|
+
context.set_details('Method not implemented!')
|
|
91
|
+
raise NotImplementedError('Method not implemented!')
|
|
92
|
+
|
|
93
|
+
def CancelJobs(self, request, context):
|
|
94
|
+
"""Cancel managed jobs.
|
|
95
|
+
"""
|
|
96
|
+
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
97
|
+
context.set_details('Method not implemented!')
|
|
98
|
+
raise NotImplementedError('Method not implemented!')
|
|
99
|
+
|
|
100
|
+
def StreamLogs(self, request, context):
|
|
101
|
+
"""Stream managed job logs.
|
|
102
|
+
"""
|
|
103
|
+
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
|
104
|
+
context.set_details('Method not implemented!')
|
|
105
|
+
raise NotImplementedError('Method not implemented!')
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def add_ManagedJobsServiceServicer_to_server(servicer, server):
|
|
109
|
+
rpc_method_handlers = {
|
|
110
|
+
'GetVersion': grpc.unary_unary_rpc_method_handler(
|
|
111
|
+
servicer.GetVersion,
|
|
112
|
+
request_deserializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetVersionRequest.FromString,
|
|
113
|
+
response_serializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetVersionResponse.SerializeToString,
|
|
114
|
+
),
|
|
115
|
+
'GetJobTable': grpc.unary_unary_rpc_method_handler(
|
|
116
|
+
servicer.GetJobTable,
|
|
117
|
+
request_deserializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetJobTableRequest.FromString,
|
|
118
|
+
response_serializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetJobTableResponse.SerializeToString,
|
|
119
|
+
),
|
|
120
|
+
'GetAllJobIdsByName': grpc.unary_unary_rpc_method_handler(
|
|
121
|
+
servicer.GetAllJobIdsByName,
|
|
122
|
+
request_deserializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetAllJobIdsByNameRequest.FromString,
|
|
123
|
+
response_serializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetAllJobIdsByNameResponse.SerializeToString,
|
|
124
|
+
),
|
|
125
|
+
'CancelJobs': grpc.unary_unary_rpc_method_handler(
|
|
126
|
+
servicer.CancelJobs,
|
|
127
|
+
request_deserializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.CancelJobsRequest.FromString,
|
|
128
|
+
response_serializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.CancelJobsResponse.SerializeToString,
|
|
129
|
+
),
|
|
130
|
+
'StreamLogs': grpc.unary_stream_rpc_method_handler(
|
|
131
|
+
servicer.StreamLogs,
|
|
132
|
+
request_deserializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.StreamLogsRequest.FromString,
|
|
133
|
+
response_serializer=sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.StreamLogsResponse.SerializeToString,
|
|
134
|
+
),
|
|
135
|
+
}
|
|
136
|
+
generic_handler = grpc.method_handlers_generic_handler(
|
|
137
|
+
'managed_jobs.v1.ManagedJobsService', rpc_method_handlers)
|
|
138
|
+
server.add_generic_rpc_handlers((generic_handler,))
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# This class is part of an EXPERIMENTAL API.
|
|
142
|
+
class ManagedJobsService(object):
|
|
143
|
+
"""Missing associated documentation comment in .proto file."""
|
|
144
|
+
|
|
145
|
+
@staticmethod
|
|
146
|
+
def GetVersion(request,
|
|
147
|
+
target,
|
|
148
|
+
options=(),
|
|
149
|
+
channel_credentials=None,
|
|
150
|
+
call_credentials=None,
|
|
151
|
+
insecure=False,
|
|
152
|
+
compression=None,
|
|
153
|
+
wait_for_ready=None,
|
|
154
|
+
timeout=None,
|
|
155
|
+
metadata=None):
|
|
156
|
+
return grpc.experimental.unary_unary(
|
|
157
|
+
request,
|
|
158
|
+
target,
|
|
159
|
+
'/managed_jobs.v1.ManagedJobsService/GetVersion',
|
|
160
|
+
sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetVersionRequest.SerializeToString,
|
|
161
|
+
sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetVersionResponse.FromString,
|
|
162
|
+
options,
|
|
163
|
+
channel_credentials,
|
|
164
|
+
insecure,
|
|
165
|
+
call_credentials,
|
|
166
|
+
compression,
|
|
167
|
+
wait_for_ready,
|
|
168
|
+
timeout,
|
|
169
|
+
metadata,
|
|
170
|
+
_registered_method=True)
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def GetJobTable(request,
|
|
174
|
+
target,
|
|
175
|
+
options=(),
|
|
176
|
+
channel_credentials=None,
|
|
177
|
+
call_credentials=None,
|
|
178
|
+
insecure=False,
|
|
179
|
+
compression=None,
|
|
180
|
+
wait_for_ready=None,
|
|
181
|
+
timeout=None,
|
|
182
|
+
metadata=None):
|
|
183
|
+
return grpc.experimental.unary_unary(
|
|
184
|
+
request,
|
|
185
|
+
target,
|
|
186
|
+
'/managed_jobs.v1.ManagedJobsService/GetJobTable',
|
|
187
|
+
sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetJobTableRequest.SerializeToString,
|
|
188
|
+
sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetJobTableResponse.FromString,
|
|
189
|
+
options,
|
|
190
|
+
channel_credentials,
|
|
191
|
+
insecure,
|
|
192
|
+
call_credentials,
|
|
193
|
+
compression,
|
|
194
|
+
wait_for_ready,
|
|
195
|
+
timeout,
|
|
196
|
+
metadata,
|
|
197
|
+
_registered_method=True)
|
|
198
|
+
|
|
199
|
+
@staticmethod
|
|
200
|
+
def GetAllJobIdsByName(request,
|
|
201
|
+
target,
|
|
202
|
+
options=(),
|
|
203
|
+
channel_credentials=None,
|
|
204
|
+
call_credentials=None,
|
|
205
|
+
insecure=False,
|
|
206
|
+
compression=None,
|
|
207
|
+
wait_for_ready=None,
|
|
208
|
+
timeout=None,
|
|
209
|
+
metadata=None):
|
|
210
|
+
return grpc.experimental.unary_unary(
|
|
211
|
+
request,
|
|
212
|
+
target,
|
|
213
|
+
'/managed_jobs.v1.ManagedJobsService/GetAllJobIdsByName',
|
|
214
|
+
sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetAllJobIdsByNameRequest.SerializeToString,
|
|
215
|
+
sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.GetAllJobIdsByNameResponse.FromString,
|
|
216
|
+
options,
|
|
217
|
+
channel_credentials,
|
|
218
|
+
insecure,
|
|
219
|
+
call_credentials,
|
|
220
|
+
compression,
|
|
221
|
+
wait_for_ready,
|
|
222
|
+
timeout,
|
|
223
|
+
metadata,
|
|
224
|
+
_registered_method=True)
|
|
225
|
+
|
|
226
|
+
@staticmethod
|
|
227
|
+
def CancelJobs(request,
|
|
228
|
+
target,
|
|
229
|
+
options=(),
|
|
230
|
+
channel_credentials=None,
|
|
231
|
+
call_credentials=None,
|
|
232
|
+
insecure=False,
|
|
233
|
+
compression=None,
|
|
234
|
+
wait_for_ready=None,
|
|
235
|
+
timeout=None,
|
|
236
|
+
metadata=None):
|
|
237
|
+
return grpc.experimental.unary_unary(
|
|
238
|
+
request,
|
|
239
|
+
target,
|
|
240
|
+
'/managed_jobs.v1.ManagedJobsService/CancelJobs',
|
|
241
|
+
sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.CancelJobsRequest.SerializeToString,
|
|
242
|
+
sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.CancelJobsResponse.FromString,
|
|
243
|
+
options,
|
|
244
|
+
channel_credentials,
|
|
245
|
+
insecure,
|
|
246
|
+
call_credentials,
|
|
247
|
+
compression,
|
|
248
|
+
wait_for_ready,
|
|
249
|
+
timeout,
|
|
250
|
+
metadata,
|
|
251
|
+
_registered_method=True)
|
|
252
|
+
|
|
253
|
+
@staticmethod
|
|
254
|
+
def StreamLogs(request,
|
|
255
|
+
target,
|
|
256
|
+
options=(),
|
|
257
|
+
channel_credentials=None,
|
|
258
|
+
call_credentials=None,
|
|
259
|
+
insecure=False,
|
|
260
|
+
compression=None,
|
|
261
|
+
wait_for_ready=None,
|
|
262
|
+
timeout=None,
|
|
263
|
+
metadata=None):
|
|
264
|
+
return grpc.experimental.unary_stream(
|
|
265
|
+
request,
|
|
266
|
+
target,
|
|
267
|
+
'/managed_jobs.v1.ManagedJobsService/StreamLogs',
|
|
268
|
+
sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.StreamLogsRequest.SerializeToString,
|
|
269
|
+
sky_dot_schemas_dot_generated_dot_managed__jobsv1__pb2.StreamLogsResponse.FromString,
|
|
270
|
+
options,
|
|
271
|
+
channel_credentials,
|
|
272
|
+
insecure,
|
|
273
|
+
call_credentials,
|
|
274
|
+
compression,
|
|
275
|
+
wait_for_ready,
|
|
276
|
+
timeout,
|
|
277
|
+
metadata,
|
|
278
|
+
_registered_method=True)
|
sky/serve/serve_utils.py
CHANGED
|
@@ -408,6 +408,22 @@ def validate_service_task(task: 'sky.Task', pool: bool) -> None:
|
|
|
408
408
|
f'{sys_name} will replenish preempted spot '
|
|
409
409
|
f'with {policy_description} instances.')
|
|
410
410
|
|
|
411
|
+
if pool:
|
|
412
|
+
accelerators = set()
|
|
413
|
+
for resource in task.resources:
|
|
414
|
+
if resource.accelerators is not None:
|
|
415
|
+
if isinstance(resource.accelerators, str):
|
|
416
|
+
accelerators.add(resource.accelerators)
|
|
417
|
+
elif isinstance(resource.accelerators, dict):
|
|
418
|
+
accelerators.update(resource.accelerators.keys())
|
|
419
|
+
elif isinstance(resource.accelerators, list):
|
|
420
|
+
accelerators.update(resource.accelerators)
|
|
421
|
+
if len(accelerators) > 1:
|
|
422
|
+
with ux_utils.print_exception_no_traceback():
|
|
423
|
+
raise ValueError('Heterogeneous clusters are not supported for '
|
|
424
|
+
'cluster pools please specify one accelerator '
|
|
425
|
+
'for all workers.')
|
|
426
|
+
|
|
411
427
|
# Try to create a spot placer from the task yaml. Check if the task yaml
|
|
412
428
|
# is valid for spot placer.
|
|
413
429
|
spot_placer.SpotPlacer.from_task(task.service, task)
|
sky/serve/server/core.py
CHANGED
|
@@ -117,7 +117,7 @@ def terminate_replica(service_name: str, replica_id: int, purge: bool) -> None:
|
|
|
117
117
|
assert isinstance(handle, backends.CloudVmRayResourceHandle)
|
|
118
118
|
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
119
119
|
|
|
120
|
-
if
|
|
120
|
+
if not use_legacy:
|
|
121
121
|
try:
|
|
122
122
|
stdout = serve_rpc_utils.RpcRunner.terminate_replica(
|
|
123
123
|
handle, service_name, replica_id, purge)
|
sky/serve/server/impl.py
CHANGED
|
@@ -89,7 +89,7 @@ def _get_service_record(
|
|
|
89
89
|
assert isinstance(handle, backends.CloudVmRayResourceHandle)
|
|
90
90
|
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
91
91
|
|
|
92
|
-
if
|
|
92
|
+
if not use_legacy:
|
|
93
93
|
try:
|
|
94
94
|
service_statuses = serve_rpc_utils.RpcRunner.get_service_status(
|
|
95
95
|
handle, [service_name], pool)
|
|
@@ -589,7 +589,7 @@ def update(
|
|
|
589
589
|
|
|
590
590
|
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
591
591
|
|
|
592
|
-
if
|
|
592
|
+
if not use_legacy:
|
|
593
593
|
try:
|
|
594
594
|
current_version = serve_rpc_utils.RpcRunner.add_version(
|
|
595
595
|
handle, service_name)
|
|
@@ -636,7 +636,7 @@ def update(
|
|
|
636
636
|
|
|
637
637
|
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
638
638
|
|
|
639
|
-
if
|
|
639
|
+
if not use_legacy:
|
|
640
640
|
try:
|
|
641
641
|
serve_rpc_utils.RpcRunner.update_service(
|
|
642
642
|
handle, service_name, current_version, mode, pool)
|
|
@@ -730,7 +730,7 @@ def down(
|
|
|
730
730
|
assert isinstance(handle, backends.CloudVmRayResourceHandle)
|
|
731
731
|
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
732
732
|
|
|
733
|
-
if
|
|
733
|
+
if not use_legacy:
|
|
734
734
|
try:
|
|
735
735
|
stdout = serve_rpc_utils.RpcRunner.terminate_services(
|
|
736
736
|
handle, service_names, purge, pool)
|
|
@@ -792,7 +792,7 @@ def status(
|
|
|
792
792
|
assert isinstance(handle, backends.CloudVmRayResourceHandle)
|
|
793
793
|
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
794
794
|
|
|
795
|
-
if
|
|
795
|
+
if not use_legacy:
|
|
796
796
|
try:
|
|
797
797
|
service_records = serve_rpc_utils.RpcRunner.get_service_status(
|
|
798
798
|
handle, service_names, pool)
|
|
@@ -928,7 +928,7 @@ def _get_all_replica_targets(
|
|
|
928
928
|
assert isinstance(handle, backends.CloudVmRayResourceHandle)
|
|
929
929
|
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
930
930
|
|
|
931
|
-
if
|
|
931
|
+
if not use_legacy:
|
|
932
932
|
try:
|
|
933
933
|
service_records = serve_rpc_utils.RpcRunner.get_service_status(
|
|
934
934
|
handle, [service_name], pool)
|
sky/server/common.py
CHANGED
|
@@ -780,6 +780,7 @@ def check_server_healthy_or_start_fn(deploy: bool = False,
|
|
|
780
780
|
os.path.expanduser(constants.API_SERVER_CREATION_LOCK_PATH)):
|
|
781
781
|
# Check again if server is already running. Other processes may
|
|
782
782
|
# have started the server while we were waiting for the lock.
|
|
783
|
+
get_api_server_status.cache_clear() # type: ignore[attr-defined]
|
|
783
784
|
api_server_info = get_api_server_status(endpoint)
|
|
784
785
|
if api_server_info.status == ApiServerStatus.UNHEALTHY:
|
|
785
786
|
_start_api_server(deploy, host, foreground, metrics,
|
|
@@ -841,7 +842,7 @@ def process_mounts_in_task_on_api_server(task: str, env_vars: Dict[str, str],
|
|
|
841
842
|
for task_config in task_configs:
|
|
842
843
|
if task_config is None:
|
|
843
844
|
continue
|
|
844
|
-
file_mounts_mapping = task_config.
|
|
845
|
+
file_mounts_mapping = task_config.pop('file_mounts_mapping', {})
|
|
845
846
|
if not file_mounts_mapping:
|
|
846
847
|
# We did not mount any files to new paths on the remote server
|
|
847
848
|
# so no need to resolve filepaths.
|