skypilot-nightly 1.0.0.dev20250804__py3-none-any.whl → 1.0.0.dev20250807__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/backends/cloud_vm_ray_backend.py +33 -4
- sky/catalog/kubernetes_catalog.py +8 -0
- sky/catalog/nebius_catalog.py +0 -1
- sky/check.py +11 -1
- sky/client/cli/command.py +234 -100
- sky/client/sdk.py +30 -9
- sky/client/sdk_async.py +815 -0
- sky/clouds/kubernetes.py +6 -1
- sky/clouds/nebius.py +1 -4
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/YAirOGsV1z6B2RJ0VIUmD/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-a8a8f1adba34c892.js +11 -0
- sky/dashboard/out/_next/static/chunks/1871-980a395e92633a5c.js +6 -0
- sky/dashboard/out/_next/static/chunks/3785.6003d293cb83eab4.js +1 -0
- sky/dashboard/out/_next/static/chunks/{3698-7874720877646365.js → 3850-ff4a9a69d978632b.js} +1 -1
- sky/dashboard/out/_next/static/chunks/4725.29550342bd53afd8.js +1 -0
- sky/dashboard/out/_next/static/chunks/{4937.d6bf67771e353356.js → 4937.a2baa2df5572a276.js} +1 -1
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/6601-3e21152fe16da09c.js +1 -0
- sky/dashboard/out/_next/static/chunks/{691.6d99cbfba347cebf.js → 691.5eeedf82cc243343.js} +1 -1
- sky/dashboard/out/_next/static/chunks/6989-6129c1cfbcf51063.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-0f886f16e0d55ff8.js +1 -0
- sky/dashboard/out/_next/static/chunks/8056-019615038d6ce427.js +1 -0
- sky/dashboard/out/_next/static/chunks/8252.62b0d23aed618bb2.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-318c3dca725e8e5d.js +1 -0
- sky/dashboard/out/_next/static/chunks/{9025.7937c16bc8623516.js → 9025.a1bef12d672bb66d.js} +1 -1
- sky/dashboard/out/_next/static/chunks/9159-11421c0f2909236f.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.85b0b1b4054574dd.js +31 -0
- sky/dashboard/out/_next/static/chunks/9666.cd4273f2a5c5802c.js +1 -0
- sky/dashboard/out/_next/static/chunks/{9847.4c46c5e229c78704.js → 9847.757720f3b40c0aa5.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{9984.78ee6d2c6fa4b0e8.js → 9984.c5564679e467d245.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{_app-a67ae198457b9886.js → _app-1e6de35d15a8d432.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-6fd1d2d8441aa54b.js +11 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-155d477a6c3e04e2.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-b30460f683e6ba96.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{config-8620d099cbef8608.js → config-dfb9bf07b13045f4.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-13d53fffc03ccb52.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-fc9222e26c8e2f0d.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-154f55cf8af55be5.js +11 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-f5ccf5d39d87aebe.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-cdc60fb5d371e16a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-7ed36e44e779d5c7.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-c9695d657f78b5dc.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-f72f73bcef9541dc.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-8f67be60165724cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-76efbdad99742559.js +1 -0
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/global_user_state.py +14 -2
- sky/jobs/__init__.py +2 -0
- sky/jobs/client/sdk.py +43 -2
- sky/jobs/client/sdk_async.py +135 -0
- sky/jobs/server/core.py +48 -1
- sky/jobs/server/server.py +52 -3
- sky/jobs/state.py +5 -1
- sky/jobs/utils.py +3 -1
- sky/provision/kubernetes/utils.py +30 -4
- sky/provision/nebius/instance.py +1 -0
- sky/provision/nebius/utils.py +9 -1
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/serve/client/impl.py +85 -1
- sky/serve/client/sdk.py +16 -47
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +3 -1
- sky/serve/controller.py +6 -3
- sky/serve/load_balancer.py +3 -1
- sky/serve/serve_state.py +93 -5
- sky/serve/serve_utils.py +200 -67
- sky/serve/server/core.py +13 -197
- sky/serve/server/impl.py +261 -23
- sky/serve/service.py +15 -3
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +46 -0
- sky/server/auth/oauth2_proxy.py +185 -0
- sky/server/common.py +119 -21
- sky/server/constants.py +1 -1
- sky/server/daemons.py +60 -11
- sky/server/requests/executor.py +5 -3
- sky/server/requests/payloads.py +19 -0
- sky/server/rest.py +114 -0
- sky/server/server.py +44 -40
- sky/setup_files/dependencies.py +2 -0
- sky/skylet/constants.py +1 -1
- sky/skylet/events.py +5 -1
- sky/skylet/skylet.py +3 -1
- sky/task.py +61 -21
- sky/templates/kubernetes-ray.yml.j2 +9 -0
- sky/templates/nebius-ray.yml.j2 +1 -0
- sky/templates/sky-serve-controller.yaml.j2 +1 -0
- sky/usage/usage_lib.py +8 -6
- sky/utils/annotations.py +8 -3
- sky/utils/common_utils.py +11 -1
- sky/utils/controller_utils.py +7 -0
- sky/utils/db/migration_utils.py +2 -2
- sky/utils/rich_utils.py +120 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/METADATA +22 -13
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/RECORD +120 -112
- sky/client/sdk.pyi +0 -300
- sky/dashboard/out/_next/static/KiGGm4fK0CpmN6BT17jkh/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/1043-928582d4860fef92.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-3f10a5a9f697c630.js +0 -11
- sky/dashboard/out/_next/static/chunks/1664-22b00e32c9ff96a4.js +0 -1
- sky/dashboard/out/_next/static/chunks/1871-7e17c195296e2ea9.js +0 -6
- sky/dashboard/out/_next/static/chunks/2003.f90b06bb1f914295.js +0 -1
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +0 -1
- sky/dashboard/out/_next/static/chunks/3785.95524bc443db8260.js +0 -1
- sky/dashboard/out/_next/static/chunks/4725.42f21f250f91f65b.js +0 -1
- sky/dashboard/out/_next/static/chunks/4869.18e6a4361a380763.js +0 -16
- sky/dashboard/out/_next/static/chunks/5230-f3bb2663e442e86c.js +0 -1
- sky/dashboard/out/_next/static/chunks/6601-234b1cf963c7280b.js +0 -1
- sky/dashboard/out/_next/static/chunks/6989-983d3ae7a874de98.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/8969-9a8cca241b30db83.js +0 -1
- sky/dashboard/out/_next/static/chunks/938-40d15b6261ec8dc1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-fa63e8b1d203f298.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-9e7df5fc761c95a7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-956ad430075efee8.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-9cfd875eecb6eaf5.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-0fbdc9072f19fbe2.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-6c5af4c86e6ab3d3.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/jobs-6393a9edc7322b54.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-34d6bb10c3b3ee3d.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-225c8dae0634eb7f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-92f741084a89e27b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-4d41c9023287f59a.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-e4cb7e97d37e93ad.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-13145516b19858fb.js +0 -1
- sky/dashboard/out/_next/static/css/b3227360726f12eb.css +0 -3
- /sky/dashboard/out/_next/static/{KiGGm4fK0CpmN6BT17jkh → YAirOGsV1z6B2RJ0VIUmD}/_ssgManifest.js +0 -0
- /sky/dashboard/out/_next/static/chunks/{6135-d0e285ac5f3f2485.js → 6135-85426374db04811e.js} +0 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250804.dist-info → skypilot_nightly-1.0.0.dev20250807.dist-info}/top_level.txt +0 -0
sky/serve/server/core.py
CHANGED
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
"""SkyServe core APIs."""
|
|
2
|
-
import pathlib
|
|
3
|
-
import signal
|
|
4
|
-
import threading
|
|
5
2
|
import typing
|
|
6
|
-
from typing import Any, Dict, List, Optional,
|
|
3
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
7
4
|
|
|
8
5
|
from sky import backends
|
|
9
6
|
from sky import exceptions
|
|
@@ -12,11 +9,8 @@ from sky.backends import backend_utils
|
|
|
12
9
|
from sky.serve import serve_utils
|
|
13
10
|
from sky.serve.server import impl
|
|
14
11
|
from sky.usage import usage_lib
|
|
15
|
-
from sky.utils import command_runner
|
|
16
12
|
from sky.utils import controller_utils
|
|
17
|
-
from sky.utils import rich_utils
|
|
18
13
|
from sky.utils import subprocess_utils
|
|
19
|
-
from sky.utils import ux_utils
|
|
20
14
|
|
|
21
15
|
if typing.TYPE_CHECKING:
|
|
22
16
|
import sky
|
|
@@ -24,42 +18,6 @@ if typing.TYPE_CHECKING:
|
|
|
24
18
|
logger = sky_logging.init_logger(__name__)
|
|
25
19
|
|
|
26
20
|
|
|
27
|
-
def _get_all_replica_targets(
|
|
28
|
-
service_name: str, backend: backends.CloudVmRayBackend,
|
|
29
|
-
handle: backends.CloudVmRayResourceHandle
|
|
30
|
-
) -> Set[serve_utils.ServiceComponentTarget]:
|
|
31
|
-
"""Helper function to get targets for all live replicas."""
|
|
32
|
-
code = serve_utils.ServeCodeGen.get_service_status([service_name],
|
|
33
|
-
pool=False)
|
|
34
|
-
returncode, serve_status_payload, stderr = backend.run_on_head(
|
|
35
|
-
handle,
|
|
36
|
-
code,
|
|
37
|
-
require_outputs=True,
|
|
38
|
-
stream_logs=False,
|
|
39
|
-
separate_stderr=True)
|
|
40
|
-
|
|
41
|
-
try:
|
|
42
|
-
subprocess_utils.handle_returncode(returncode,
|
|
43
|
-
code,
|
|
44
|
-
'Failed to fetch services',
|
|
45
|
-
stderr,
|
|
46
|
-
stream_logs=True)
|
|
47
|
-
except exceptions.CommandError as e:
|
|
48
|
-
raise RuntimeError(e.error_msg) from e
|
|
49
|
-
|
|
50
|
-
service_records = serve_utils.load_service_status(serve_status_payload)
|
|
51
|
-
if not service_records:
|
|
52
|
-
raise ValueError(f'Service {service_name!r} not found.')
|
|
53
|
-
assert len(service_records) == 1
|
|
54
|
-
service_record = service_records[0]
|
|
55
|
-
|
|
56
|
-
return {
|
|
57
|
-
serve_utils.ServiceComponentTarget(serve_utils.ServiceComponent.REPLICA,
|
|
58
|
-
replica_info['replica_id'])
|
|
59
|
-
for replica_info in service_record['replica_info']
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
|
|
63
21
|
@usage_lib.entrypoint
|
|
64
22
|
def up(
|
|
65
23
|
task: 'sky.Task',
|
|
@@ -277,59 +235,12 @@ def tail_logs(
|
|
|
277
235
|
sky.exceptions.ClusterNotUpError: the sky serve controller is not up.
|
|
278
236
|
ValueError: arguments not valid, or failed to tail the logs.
|
|
279
237
|
"""
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
if target == serve_utils.ServiceComponent.REPLICA:
|
|
288
|
-
if replica_id is None:
|
|
289
|
-
with ux_utils.print_exception_no_traceback():
|
|
290
|
-
raise ValueError(
|
|
291
|
-
'`replica_id` must be specified when using target=REPLICA.')
|
|
292
|
-
else:
|
|
293
|
-
if replica_id is not None:
|
|
294
|
-
with ux_utils.print_exception_no_traceback():
|
|
295
|
-
raise ValueError('`replica_id` must be None when using '
|
|
296
|
-
'target=CONTROLLER/LOAD_BALANCER.')
|
|
297
|
-
|
|
298
|
-
controller_type = controller_utils.Controllers.SKY_SERVE_CONTROLLER
|
|
299
|
-
handle = backend_utils.is_controller_accessible(
|
|
300
|
-
controller=controller_type,
|
|
301
|
-
stopped_message=controller_type.value.default_hint_if_non_existent)
|
|
302
|
-
|
|
303
|
-
backend = backend_utils.get_backend_from_handle(handle)
|
|
304
|
-
assert isinstance(backend, backends.CloudVmRayBackend), backend
|
|
305
|
-
|
|
306
|
-
if target != serve_utils.ServiceComponent.REPLICA:
|
|
307
|
-
code = serve_utils.ServeCodeGen.stream_serve_process_logs(
|
|
308
|
-
service_name,
|
|
309
|
-
stream_controller=(
|
|
310
|
-
target == serve_utils.ServiceComponent.CONTROLLER),
|
|
311
|
-
follow=follow,
|
|
312
|
-
tail=tail)
|
|
313
|
-
else:
|
|
314
|
-
assert replica_id is not None, service_name
|
|
315
|
-
code = serve_utils.ServeCodeGen.stream_replica_logs(service_name,
|
|
316
|
-
replica_id,
|
|
317
|
-
follow,
|
|
318
|
-
tail=tail)
|
|
319
|
-
|
|
320
|
-
# With the stdin=subprocess.DEVNULL, the ctrl-c will not directly
|
|
321
|
-
# kill the process, so we need to handle it manually here.
|
|
322
|
-
if threading.current_thread() is threading.main_thread():
|
|
323
|
-
signal.signal(signal.SIGINT, backend_utils.interrupt_handler)
|
|
324
|
-
signal.signal(signal.SIGTSTP, backend_utils.stop_handler)
|
|
325
|
-
|
|
326
|
-
# Refer to the notes in
|
|
327
|
-
# sky/backends/cloud_vm_ray_backend.py::CloudVmRayBackend::tail_logs.
|
|
328
|
-
backend.run_on_head(handle,
|
|
329
|
-
code,
|
|
330
|
-
stream_logs=True,
|
|
331
|
-
process_stream=False,
|
|
332
|
-
ssh_mode=command_runner.SshMode.INTERACTIVE)
|
|
238
|
+
return impl.tail_logs(service_name,
|
|
239
|
+
target=target,
|
|
240
|
+
replica_id=replica_id,
|
|
241
|
+
follow=follow,
|
|
242
|
+
tail=tail,
|
|
243
|
+
pool=False)
|
|
333
244
|
|
|
334
245
|
|
|
335
246
|
@usage_lib.entrypoint
|
|
@@ -374,104 +285,9 @@ def sync_down_logs(
|
|
|
374
285
|
sky.exceptions.ClusterNotUpError: If the controller is not up.
|
|
375
286
|
ValueError: Arguments not valid.
|
|
376
287
|
"""
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
stopped_message=controller_type.value.default_hint_if_non_existent)
|
|
384
|
-
backend: backends.CloudVmRayBackend = (
|
|
385
|
-
backend_utils.get_backend_from_handle(handle))
|
|
386
|
-
|
|
387
|
-
requested_components: Set[serve_utils.ServiceComponent] = set()
|
|
388
|
-
if not targets:
|
|
389
|
-
# No targets specified -> request all components
|
|
390
|
-
requested_components = {
|
|
391
|
-
serve_utils.ServiceComponent.CONTROLLER,
|
|
392
|
-
serve_utils.ServiceComponent.LOAD_BALANCER,
|
|
393
|
-
serve_utils.ServiceComponent.REPLICA
|
|
394
|
-
}
|
|
395
|
-
else:
|
|
396
|
-
# Parse provided targets
|
|
397
|
-
if isinstance(targets, (str, serve_utils.ServiceComponent)):
|
|
398
|
-
requested_components = {serve_utils.ServiceComponent(targets)}
|
|
399
|
-
else: # list
|
|
400
|
-
requested_components = {
|
|
401
|
-
serve_utils.ServiceComponent(t) for t in targets
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
normalized_targets: Set[serve_utils.ServiceComponentTarget] = set()
|
|
405
|
-
if serve_utils.ServiceComponent.CONTROLLER in requested_components:
|
|
406
|
-
normalized_targets.add(
|
|
407
|
-
serve_utils.ServiceComponentTarget(
|
|
408
|
-
serve_utils.ServiceComponent.CONTROLLER))
|
|
409
|
-
if serve_utils.ServiceComponent.LOAD_BALANCER in requested_components:
|
|
410
|
-
normalized_targets.add(
|
|
411
|
-
serve_utils.ServiceComponentTarget(
|
|
412
|
-
serve_utils.ServiceComponent.LOAD_BALANCER))
|
|
413
|
-
if serve_utils.ServiceComponent.REPLICA in requested_components:
|
|
414
|
-
with rich_utils.safe_status(
|
|
415
|
-
ux_utils.spinner_message('Getting live replica infos...')):
|
|
416
|
-
replica_targets = _get_all_replica_targets(service_name, backend,
|
|
417
|
-
handle)
|
|
418
|
-
if not replica_ids:
|
|
419
|
-
# Replica target requested but no specific IDs
|
|
420
|
-
# -> Get all replica logs
|
|
421
|
-
normalized_targets.update(replica_targets)
|
|
422
|
-
else:
|
|
423
|
-
# Replica target requested with specific IDs
|
|
424
|
-
requested_replica_targets = [
|
|
425
|
-
serve_utils.ServiceComponentTarget(
|
|
426
|
-
serve_utils.ServiceComponent.REPLICA, rid)
|
|
427
|
-
for rid in replica_ids
|
|
428
|
-
]
|
|
429
|
-
for target in requested_replica_targets:
|
|
430
|
-
if target not in replica_targets:
|
|
431
|
-
logger.warning(f'Replica ID {target.replica_id} not found '
|
|
432
|
-
f'for {service_name}. Skipping...')
|
|
433
|
-
else:
|
|
434
|
-
normalized_targets.add(target)
|
|
435
|
-
|
|
436
|
-
def sync_down_logs_by_target(target: serve_utils.ServiceComponentTarget):
|
|
437
|
-
component = target.component
|
|
438
|
-
# We need to set one side of the pipe to a logs stream, and the other
|
|
439
|
-
# side to a file.
|
|
440
|
-
log_path = str(pathlib.Path(local_dir) / f'{target}.log')
|
|
441
|
-
stream_logs_code: str
|
|
442
|
-
|
|
443
|
-
if component == serve_utils.ServiceComponent.CONTROLLER:
|
|
444
|
-
stream_logs_code = (
|
|
445
|
-
serve_utils.ServeCodeGen.stream_serve_process_logs(
|
|
446
|
-
service_name,
|
|
447
|
-
stream_controller=True,
|
|
448
|
-
follow=False,
|
|
449
|
-
tail=tail))
|
|
450
|
-
elif component == serve_utils.ServiceComponent.LOAD_BALANCER:
|
|
451
|
-
stream_logs_code = (
|
|
452
|
-
serve_utils.ServeCodeGen.stream_serve_process_logs(
|
|
453
|
-
service_name,
|
|
454
|
-
stream_controller=False,
|
|
455
|
-
follow=False,
|
|
456
|
-
tail=tail))
|
|
457
|
-
elif component == serve_utils.ServiceComponent.REPLICA:
|
|
458
|
-
replica_id = target.replica_id
|
|
459
|
-
assert replica_id is not None, service_name
|
|
460
|
-
stream_logs_code = serve_utils.ServeCodeGen.stream_replica_logs(
|
|
461
|
-
service_name, replica_id, follow=False, tail=tail)
|
|
462
|
-
else:
|
|
463
|
-
assert False, component
|
|
464
|
-
|
|
465
|
-
# Refer to the notes in
|
|
466
|
-
# sky/backends/cloud_vm_ray_backend.py::CloudVmRayBackend::tail_logs.
|
|
467
|
-
backend.run_on_head(handle,
|
|
468
|
-
stream_logs_code,
|
|
469
|
-
stream_logs=False,
|
|
470
|
-
process_stream=False,
|
|
471
|
-
ssh_mode=command_runner.SshMode.INTERACTIVE,
|
|
472
|
-
log_path=log_path)
|
|
473
|
-
|
|
474
|
-
subprocess_utils.run_in_parallel(sync_down_logs_by_target,
|
|
475
|
-
list(normalized_targets))
|
|
476
|
-
|
|
477
|
-
return local_dir
|
|
288
|
+
return impl.sync_down_logs(service_name,
|
|
289
|
+
local_dir=local_dir,
|
|
290
|
+
targets=targets,
|
|
291
|
+
replica_ids=replica_ids,
|
|
292
|
+
tail=tail,
|
|
293
|
+
pool=False)
|
sky/serve/server/impl.py
CHANGED
|
@@ -1,7 +1,12 @@
|
|
|
1
1
|
"""Implementation of the SkyServe core APIs."""
|
|
2
|
+
import pathlib
|
|
2
3
|
import re
|
|
4
|
+
import shlex
|
|
5
|
+
import signal
|
|
3
6
|
import tempfile
|
|
4
|
-
|
|
7
|
+
import threading
|
|
8
|
+
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
|
9
|
+
import uuid
|
|
5
10
|
|
|
6
11
|
import colorama
|
|
7
12
|
import filelock
|
|
@@ -21,6 +26,7 @@ from sky.serve import serve_state
|
|
|
21
26
|
from sky.serve import serve_utils
|
|
22
27
|
from sky.skylet import constants
|
|
23
28
|
from sky.utils import admin_policy_utils
|
|
29
|
+
from sky.utils import command_runner
|
|
24
30
|
from sky.utils import common
|
|
25
31
|
from sky.utils import common_utils
|
|
26
32
|
from sky.utils import controller_utils
|
|
@@ -102,10 +108,10 @@ def up(
|
|
|
102
108
|
pool: bool = False,
|
|
103
109
|
) -> Tuple[str, str]:
|
|
104
110
|
"""Spins up a service or a pool."""
|
|
105
|
-
if pool and not serve_utils.is_consolidation_mode():
|
|
111
|
+
if pool and not serve_utils.is_consolidation_mode(pool):
|
|
106
112
|
raise ValueError(
|
|
107
113
|
'Pool is only supported in consolidation mode. To fix, set '
|
|
108
|
-
'`
|
|
114
|
+
'`jobs.controller.consolidation_mode: true` in SkyPilot config.')
|
|
109
115
|
task.validate()
|
|
110
116
|
serve_utils.validate_service_task(task, pool=pool)
|
|
111
117
|
assert task.service is not None
|
|
@@ -174,7 +180,8 @@ def up(
|
|
|
174
180
|
prefix=f'controller-task-{service_name}-',
|
|
175
181
|
mode='w',
|
|
176
182
|
) as controller_file:
|
|
177
|
-
|
|
183
|
+
controller = controller_utils.get_controller_for_pool(pool)
|
|
184
|
+
controller_name = controller.value.cluster_name
|
|
178
185
|
task_config = task.to_yaml_config()
|
|
179
186
|
common_utils.dump_yaml(service_file.name, task_config)
|
|
180
187
|
remote_tmp_task_yaml_path = (
|
|
@@ -187,8 +194,13 @@ def up(
|
|
|
187
194
|
controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
|
|
188
195
|
task_resources=task.resources)
|
|
189
196
|
controller_job_id = None
|
|
190
|
-
if serve_utils.is_consolidation_mode():
|
|
191
|
-
|
|
197
|
+
if serve_utils.is_consolidation_mode(pool):
|
|
198
|
+
# We need a unique integer per sky.serve.up call to avoid name
|
|
199
|
+
# conflict. Originally in non-consolidation mode, this is the ray
|
|
200
|
+
# job id; now we use the request id hash instead. Here we also
|
|
201
|
+
# make sure it is a 63-bit integer to avoid overflow on sqlalchemy.
|
|
202
|
+
rid = common_utils.get_current_request_id()
|
|
203
|
+
controller_job_id = hash(uuid.UUID(rid).int) & 0x7FFFFFFFFFFFFFFF
|
|
192
204
|
|
|
193
205
|
vars_to_fill = {
|
|
194
206
|
'remote_task_yaml_path': remote_tmp_task_yaml_path,
|
|
@@ -200,6 +212,7 @@ def up(
|
|
|
200
212
|
'modified_catalogs':
|
|
201
213
|
service_catalog_common.get_modified_catalog_file_mounts(),
|
|
202
214
|
'consolidation_mode_job_id': controller_job_id,
|
|
215
|
+
'entrypoint': shlex.quote(common_utils.get_current_command()),
|
|
203
216
|
**tls_template_vars,
|
|
204
217
|
**controller_utils.shared_controller_vars_to_fill(
|
|
205
218
|
controller=controller_utils.Controllers.SKY_SERVE_CONTROLLER,
|
|
@@ -238,7 +251,7 @@ def up(
|
|
|
238
251
|
# for the first time; otherwise it is a name conflict.
|
|
239
252
|
# Since the controller may be shared among multiple users, launch the
|
|
240
253
|
# controller with the API server's user hash.
|
|
241
|
-
if not serve_utils.is_consolidation_mode():
|
|
254
|
+
if not serve_utils.is_consolidation_mode(pool):
|
|
242
255
|
print(f'{colorama.Fore.YELLOW}Launching controller for '
|
|
243
256
|
f'{service_name!r}...{colorama.Style.RESET_ALL}')
|
|
244
257
|
with common.with_server_user():
|
|
@@ -251,9 +264,9 @@ def up(
|
|
|
251
264
|
_disable_controller_check=True,
|
|
252
265
|
)
|
|
253
266
|
else:
|
|
267
|
+
controller_type = controller_utils.get_controller_for_pool(pool)
|
|
254
268
|
controller_handle = backend_utils.is_controller_accessible(
|
|
255
|
-
controller=
|
|
256
|
-
stopped_message='')
|
|
269
|
+
controller=controller_type, stopped_message='')
|
|
257
270
|
backend = backend_utils.get_backend_from_handle(controller_handle)
|
|
258
271
|
assert isinstance(backend, backends.CloudVmRayBackend)
|
|
259
272
|
backend.sync_file_mounts(
|
|
@@ -270,10 +283,8 @@ def up(
|
|
|
270
283
|
]
|
|
271
284
|
run_script = '\n'.join(env_cmds + [run_script])
|
|
272
285
|
# Dump script for high availability recovery.
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
# managed_job_state.set_ha_recovery_script(
|
|
276
|
-
# consolidation_mode_job_id, run_script)
|
|
286
|
+
if controller_utils.high_availability_specified(controller_name):
|
|
287
|
+
serve_state.set_ha_recovery_script(service_name, run_script)
|
|
277
288
|
backend.run_on_head(controller_handle, run_script)
|
|
278
289
|
|
|
279
290
|
style = colorama.Style
|
|
@@ -289,7 +300,7 @@ def up(
|
|
|
289
300
|
# and return the endpoint if the job id matches. Otherwise it will
|
|
290
301
|
# return None.
|
|
291
302
|
code = serve_utils.ServeCodeGen.wait_service_registration(
|
|
292
|
-
service_name, controller_job_id)
|
|
303
|
+
service_name, controller_job_id, pool)
|
|
293
304
|
backend = backend_utils.get_backend_from_handle(controller_handle)
|
|
294
305
|
assert isinstance(backend, backends.CloudVmRayBackend)
|
|
295
306
|
assert isinstance(controller_handle,
|
|
@@ -304,7 +315,7 @@ def up(
|
|
|
304
315
|
returncode, code, f'Failed to wait for {noun} initialization',
|
|
305
316
|
lb_port_payload)
|
|
306
317
|
except exceptions.CommandError:
|
|
307
|
-
if serve_utils.is_consolidation_mode():
|
|
318
|
+
if serve_utils.is_consolidation_mode(pool):
|
|
308
319
|
with ux_utils.print_exception_no_traceback():
|
|
309
320
|
raise RuntimeError(
|
|
310
321
|
f'Failed to wait for {noun} initialization. '
|
|
@@ -339,7 +350,7 @@ def up(
|
|
|
339
350
|
else:
|
|
340
351
|
lb_port = serve_utils.load_service_initialization_result(
|
|
341
352
|
lb_port_payload)
|
|
342
|
-
if not serve_utils.is_consolidation_mode():
|
|
353
|
+
if not serve_utils.is_consolidation_mode(pool):
|
|
343
354
|
socket_endpoint = backend_utils.get_endpoints(
|
|
344
355
|
controller_handle.cluster_name,
|
|
345
356
|
lb_port,
|
|
@@ -442,8 +453,9 @@ def update(
|
|
|
442
453
|
'effect. To update TLS keyfile and certfile, please '
|
|
443
454
|
'tear down the service and spin up a new one.')
|
|
444
455
|
|
|
456
|
+
controller_type = controller_utils.get_controller_for_pool(pool)
|
|
445
457
|
handle = backend_utils.is_controller_accessible(
|
|
446
|
-
controller=
|
|
458
|
+
controller=controller_type,
|
|
447
459
|
stopped_message=
|
|
448
460
|
'Service controller is stopped. There is no service to update. '
|
|
449
461
|
f'To spin up a new service, use {ux_utils.BOLD}'
|
|
@@ -572,9 +584,9 @@ def apply(
|
|
|
572
584
|
"""Applies the config to the service or pool."""
|
|
573
585
|
with filelock.FileLock(serve_utils.get_service_filelock_path(service_name)):
|
|
574
586
|
try:
|
|
587
|
+
controller_type = controller_utils.get_controller_for_pool(pool)
|
|
575
588
|
handle = backend_utils.is_controller_accessible(
|
|
576
|
-
controller=
|
|
577
|
-
stopped_message='')
|
|
589
|
+
controller=controller_type, stopped_message='')
|
|
578
590
|
backend = backend_utils.get_backend_from_handle(handle)
|
|
579
591
|
assert isinstance(backend, backends.CloudVmRayBackend)
|
|
580
592
|
service_record = _get_service_record(service_name, pool, handle,
|
|
@@ -598,8 +610,9 @@ def down(
|
|
|
598
610
|
service_names = []
|
|
599
611
|
if isinstance(service_names, str):
|
|
600
612
|
service_names = [service_names]
|
|
613
|
+
controller_type = controller_utils.get_controller_for_pool(pool)
|
|
601
614
|
handle = backend_utils.is_controller_accessible(
|
|
602
|
-
controller=
|
|
615
|
+
controller=controller_type,
|
|
603
616
|
stopped_message=f'All {noun}s should have terminated.')
|
|
604
617
|
|
|
605
618
|
service_names_str = ','.join(service_names)
|
|
@@ -624,7 +637,7 @@ def down(
|
|
|
624
637
|
except exceptions.FetchClusterInfoError as e:
|
|
625
638
|
raise RuntimeError(
|
|
626
639
|
'Failed to fetch controller IP. Please refresh controller status '
|
|
627
|
-
f'by `sky status -r {
|
|
640
|
+
f'by `sky status -r {controller_type.value.cluster_name}` '
|
|
628
641
|
'and try again.') from e
|
|
629
642
|
|
|
630
643
|
try:
|
|
@@ -654,7 +667,7 @@ def status(
|
|
|
654
667
|
raise RuntimeError(f'Failed to refresh {noun}s status '
|
|
655
668
|
'due to network error.') from e
|
|
656
669
|
|
|
657
|
-
controller_type = controller_utils.
|
|
670
|
+
controller_type = controller_utils.get_controller_for_pool(pool)
|
|
658
671
|
handle = backend_utils.is_controller_accessible(
|
|
659
672
|
controller=controller_type,
|
|
660
673
|
stopped_message=controller_type.value.default_hint_if_non_existent.
|
|
@@ -690,7 +703,7 @@ def status(
|
|
|
690
703
|
if service_record['load_balancer_port'] is not None:
|
|
691
704
|
try:
|
|
692
705
|
lb_port = service_record['load_balancer_port']
|
|
693
|
-
if not serve_utils.is_consolidation_mode():
|
|
706
|
+
if not serve_utils.is_consolidation_mode(pool):
|
|
694
707
|
endpoint = backend_utils.get_endpoints(
|
|
695
708
|
cluster=common.SKY_SERVE_CONTROLLER_NAME,
|
|
696
709
|
port=lb_port).get(lb_port, None)
|
|
@@ -707,3 +720,228 @@ def status(
|
|
|
707
720
|
service_record['endpoint'] = f'{protocol}://{endpoint}'
|
|
708
721
|
|
|
709
722
|
return service_records
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
ServiceComponentOrStr = Union[str, serve_utils.ServiceComponent]
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def tail_logs(
|
|
729
|
+
service_name: str,
|
|
730
|
+
*,
|
|
731
|
+
target: ServiceComponentOrStr,
|
|
732
|
+
replica_id: Optional[int] = None,
|
|
733
|
+
follow: bool = True,
|
|
734
|
+
tail: Optional[int] = None,
|
|
735
|
+
pool: bool = False,
|
|
736
|
+
) -> None:
|
|
737
|
+
"""Tail logs of a service or pool."""
|
|
738
|
+
if isinstance(target, str):
|
|
739
|
+
target = serve_utils.ServiceComponent(target)
|
|
740
|
+
|
|
741
|
+
if pool and target == serve_utils.ServiceComponent.LOAD_BALANCER:
|
|
742
|
+
raise ValueError(f'Target {target} is not supported for pool.')
|
|
743
|
+
|
|
744
|
+
if target == serve_utils.ServiceComponent.REPLICA:
|
|
745
|
+
if replica_id is None:
|
|
746
|
+
with ux_utils.print_exception_no_traceback():
|
|
747
|
+
raise ValueError(
|
|
748
|
+
'`replica_id` must be specified when using target=REPLICA.')
|
|
749
|
+
else:
|
|
750
|
+
if replica_id is not None:
|
|
751
|
+
with ux_utils.print_exception_no_traceback():
|
|
752
|
+
raise ValueError('`replica_id` must be None when using '
|
|
753
|
+
'target=CONTROLLER/LOAD_BALANCER.')
|
|
754
|
+
|
|
755
|
+
controller_type = controller_utils.get_controller_for_pool(pool)
|
|
756
|
+
handle = backend_utils.is_controller_accessible(
|
|
757
|
+
controller=controller_type,
|
|
758
|
+
stopped_message=controller_type.value.default_hint_if_non_existent)
|
|
759
|
+
|
|
760
|
+
backend = backend_utils.get_backend_from_handle(handle)
|
|
761
|
+
assert isinstance(backend, backends.CloudVmRayBackend), backend
|
|
762
|
+
|
|
763
|
+
if target != serve_utils.ServiceComponent.REPLICA:
|
|
764
|
+
code = serve_utils.ServeCodeGen.stream_serve_process_logs(
|
|
765
|
+
service_name,
|
|
766
|
+
stream_controller=(
|
|
767
|
+
target == serve_utils.ServiceComponent.CONTROLLER),
|
|
768
|
+
follow=follow,
|
|
769
|
+
tail=tail,
|
|
770
|
+
pool=pool)
|
|
771
|
+
else:
|
|
772
|
+
assert replica_id is not None, service_name
|
|
773
|
+
code = serve_utils.ServeCodeGen.stream_replica_logs(service_name,
|
|
774
|
+
replica_id,
|
|
775
|
+
follow,
|
|
776
|
+
tail=tail,
|
|
777
|
+
pool=pool)
|
|
778
|
+
|
|
779
|
+
# With the stdin=subprocess.DEVNULL, the ctrl-c will not directly
|
|
780
|
+
# kill the process, so we need to handle it manually here.
|
|
781
|
+
if threading.current_thread() is threading.main_thread():
|
|
782
|
+
signal.signal(signal.SIGINT, backend_utils.interrupt_handler)
|
|
783
|
+
signal.signal(signal.SIGTSTP, backend_utils.stop_handler)
|
|
784
|
+
|
|
785
|
+
# Refer to the notes in
|
|
786
|
+
# sky/backends/cloud_vm_ray_backend.py::CloudVmRayBackend::tail_logs.
|
|
787
|
+
backend.run_on_head(handle,
|
|
788
|
+
code,
|
|
789
|
+
stream_logs=True,
|
|
790
|
+
process_stream=False,
|
|
791
|
+
ssh_mode=command_runner.SshMode.INTERACTIVE)
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
def _get_all_replica_targets(
|
|
795
|
+
service_name: str, backend: backends.CloudVmRayBackend,
|
|
796
|
+
handle: backends.CloudVmRayResourceHandle,
|
|
797
|
+
pool: bool) -> Set[serve_utils.ServiceComponentTarget]:
|
|
798
|
+
"""Helper function to get targets for all live replicas."""
|
|
799
|
+
code = serve_utils.ServeCodeGen.get_service_status([service_name],
|
|
800
|
+
pool=pool)
|
|
801
|
+
returncode, serve_status_payload, stderr = backend.run_on_head(
|
|
802
|
+
handle,
|
|
803
|
+
code,
|
|
804
|
+
require_outputs=True,
|
|
805
|
+
stream_logs=False,
|
|
806
|
+
separate_stderr=True)
|
|
807
|
+
|
|
808
|
+
try:
|
|
809
|
+
subprocess_utils.handle_returncode(returncode,
|
|
810
|
+
code,
|
|
811
|
+
'Failed to fetch services',
|
|
812
|
+
stderr,
|
|
813
|
+
stream_logs=True)
|
|
814
|
+
except exceptions.CommandError as e:
|
|
815
|
+
raise RuntimeError(e.error_msg) from e
|
|
816
|
+
|
|
817
|
+
service_records = serve_utils.load_service_status(serve_status_payload)
|
|
818
|
+
if not service_records:
|
|
819
|
+
raise ValueError(f'Service {service_name!r} not found.')
|
|
820
|
+
assert len(service_records) == 1
|
|
821
|
+
service_record = service_records[0]
|
|
822
|
+
|
|
823
|
+
return {
|
|
824
|
+
serve_utils.ServiceComponentTarget(serve_utils.ServiceComponent.REPLICA,
|
|
825
|
+
replica_info['replica_id'])
|
|
826
|
+
for replica_info in service_record['replica_info']
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
def sync_down_logs(
|
|
831
|
+
service_name: str,
|
|
832
|
+
*,
|
|
833
|
+
local_dir: str,
|
|
834
|
+
targets: Union[ServiceComponentOrStr, List[ServiceComponentOrStr],
|
|
835
|
+
None] = None,
|
|
836
|
+
replica_ids: Optional[List[int]] = None,
|
|
837
|
+
tail: Optional[int] = None,
|
|
838
|
+
pool: bool = False,
|
|
839
|
+
) -> str:
|
|
840
|
+
"""Sync down logs of a service or pool."""
|
|
841
|
+
noun = 'pool' if pool else 'service'
|
|
842
|
+
repnoun = 'worker' if pool else 'replica'
|
|
843
|
+
caprepnoun = repnoun.capitalize()
|
|
844
|
+
|
|
845
|
+
# Step 0) get the controller handle
|
|
846
|
+
with rich_utils.safe_status(
|
|
847
|
+
ux_utils.spinner_message(f'Checking {noun} status...')):
|
|
848
|
+
controller_type = controller_utils.get_controller_for_pool(pool)
|
|
849
|
+
handle = backend_utils.is_controller_accessible(
|
|
850
|
+
controller=controller_type,
|
|
851
|
+
stopped_message=controller_type.value.default_hint_if_non_existent)
|
|
852
|
+
backend: backends.CloudVmRayBackend = (
|
|
853
|
+
backend_utils.get_backend_from_handle(handle))
|
|
854
|
+
|
|
855
|
+
requested_components: Set[serve_utils.ServiceComponent] = set()
|
|
856
|
+
if not targets:
|
|
857
|
+
# No targets specified -> request all components
|
|
858
|
+
requested_components = {
|
|
859
|
+
serve_utils.ServiceComponent.CONTROLLER,
|
|
860
|
+
serve_utils.ServiceComponent.LOAD_BALANCER,
|
|
861
|
+
serve_utils.ServiceComponent.REPLICA
|
|
862
|
+
}
|
|
863
|
+
else:
|
|
864
|
+
# Parse provided targets
|
|
865
|
+
if isinstance(targets, (str, serve_utils.ServiceComponent)):
|
|
866
|
+
requested_components = {serve_utils.ServiceComponent(targets)}
|
|
867
|
+
else: # list
|
|
868
|
+
requested_components = {
|
|
869
|
+
serve_utils.ServiceComponent(t) for t in targets
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
normalized_targets: Set[serve_utils.ServiceComponentTarget] = set()
|
|
873
|
+
if serve_utils.ServiceComponent.CONTROLLER in requested_components:
|
|
874
|
+
normalized_targets.add(
|
|
875
|
+
serve_utils.ServiceComponentTarget(
|
|
876
|
+
serve_utils.ServiceComponent.CONTROLLER))
|
|
877
|
+
if serve_utils.ServiceComponent.LOAD_BALANCER in requested_components:
|
|
878
|
+
normalized_targets.add(
|
|
879
|
+
serve_utils.ServiceComponentTarget(
|
|
880
|
+
serve_utils.ServiceComponent.LOAD_BALANCER))
|
|
881
|
+
if serve_utils.ServiceComponent.REPLICA in requested_components:
|
|
882
|
+
with rich_utils.safe_status(
|
|
883
|
+
ux_utils.spinner_message(f'Getting live {repnoun} infos...')):
|
|
884
|
+
replica_targets = _get_all_replica_targets(service_name, backend,
|
|
885
|
+
handle, pool)
|
|
886
|
+
if not replica_ids:
|
|
887
|
+
# Replica target requested but no specific IDs
|
|
888
|
+
# -> Get all replica logs
|
|
889
|
+
normalized_targets.update(replica_targets)
|
|
890
|
+
else:
|
|
891
|
+
# Replica target requested with specific IDs
|
|
892
|
+
requested_replica_targets = [
|
|
893
|
+
serve_utils.ServiceComponentTarget(
|
|
894
|
+
serve_utils.ServiceComponent.REPLICA, rid)
|
|
895
|
+
for rid in replica_ids
|
|
896
|
+
]
|
|
897
|
+
for target in requested_replica_targets:
|
|
898
|
+
if target not in replica_targets:
|
|
899
|
+
logger.warning(f'{caprepnoun} ID {target.replica_id} not '
|
|
900
|
+
f'found for {service_name}. Skipping...')
|
|
901
|
+
else:
|
|
902
|
+
normalized_targets.add(target)
|
|
903
|
+
|
|
904
|
+
def sync_down_logs_by_target(target: serve_utils.ServiceComponentTarget):
|
|
905
|
+
component = target.component
|
|
906
|
+
# We need to set one side of the pipe to a logs stream, and the other
|
|
907
|
+
# side to a file.
|
|
908
|
+
log_path = str(pathlib.Path(local_dir) / f'{target}.log')
|
|
909
|
+
stream_logs_code: str
|
|
910
|
+
|
|
911
|
+
if component == serve_utils.ServiceComponent.CONTROLLER:
|
|
912
|
+
stream_logs_code = (
|
|
913
|
+
serve_utils.ServeCodeGen.stream_serve_process_logs(
|
|
914
|
+
service_name,
|
|
915
|
+
stream_controller=True,
|
|
916
|
+
follow=False,
|
|
917
|
+
tail=tail,
|
|
918
|
+
pool=pool))
|
|
919
|
+
elif component == serve_utils.ServiceComponent.LOAD_BALANCER:
|
|
920
|
+
stream_logs_code = (
|
|
921
|
+
serve_utils.ServeCodeGen.stream_serve_process_logs(
|
|
922
|
+
service_name,
|
|
923
|
+
stream_controller=False,
|
|
924
|
+
follow=False,
|
|
925
|
+
tail=tail,
|
|
926
|
+
pool=pool))
|
|
927
|
+
elif component == serve_utils.ServiceComponent.REPLICA:
|
|
928
|
+
replica_id = target.replica_id
|
|
929
|
+
assert replica_id is not None, service_name
|
|
930
|
+
stream_logs_code = serve_utils.ServeCodeGen.stream_replica_logs(
|
|
931
|
+
service_name, replica_id, follow=False, tail=tail, pool=pool)
|
|
932
|
+
else:
|
|
933
|
+
assert False, component
|
|
934
|
+
|
|
935
|
+
# Refer to the notes in
|
|
936
|
+
# sky/backends/cloud_vm_ray_backend.py::CloudVmRayBackend::tail_logs.
|
|
937
|
+
backend.run_on_head(handle,
|
|
938
|
+
stream_logs_code,
|
|
939
|
+
stream_logs=False,
|
|
940
|
+
process_stream=False,
|
|
941
|
+
ssh_mode=command_runner.SshMode.INTERACTIVE,
|
|
942
|
+
log_path=log_path)
|
|
943
|
+
|
|
944
|
+
subprocess_utils.run_in_parallel(sync_down_logs_by_target,
|
|
945
|
+
list(normalized_targets))
|
|
946
|
+
|
|
947
|
+
return local_dir
|