skypilot-nightly 1.0.0.dev20250926__py3-none-any.whl → 1.0.0.dev20251001__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +2 -2
- sky/backends/backend_utils.py +43 -14
- sky/backends/cloud_vm_ray_backend.py +153 -38
- sky/check.py +0 -29
- sky/client/cli/command.py +48 -26
- sky/client/cli/table_utils.py +91 -0
- sky/client/sdk.py +14 -23
- sky/client/sdk_async.py +5 -5
- sky/core.py +18 -20
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/{3294.03e02ae73455f48e.js → 3294.93d9336bdc032b3a.js} +1 -1
- sky/dashboard/out/_next/static/chunks/6856-5fdc9b851a18acdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/{[pool]-07349868f7905d37.js → [pool]-509b2977a6373bf6.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{webpack-8e64d11e58eab5cb.js → webpack-4f0c389a4ce5fd9c.js} +1 -1
- sky/dashboard/out/_next/static/{VXU6_xE28M55BOdwmUUJS → m3YT2i5s6v4SsIdYc8WZa}/_buildManifest.js +1 -1
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/storage.py +11 -0
- sky/data/storage_utils.py +1 -45
- sky/execution.py +0 -1
- sky/global_user_state.py +3 -3
- sky/jobs/client/sdk.py +3 -2
- sky/jobs/controller.py +15 -0
- sky/jobs/server/core.py +120 -28
- sky/jobs/server/server.py +1 -1
- sky/jobs/server/utils.py +65 -32
- sky/jobs/state.py +145 -3
- sky/jobs/utils.py +87 -8
- sky/provision/kubernetes/instance.py +1 -1
- sky/schemas/api/responses.py +73 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +70 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +262 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/serve/serve_utils.py +16 -0
- sky/serve/server/core.py +1 -1
- sky/serve/server/impl.py +6 -6
- sky/server/common.py +2 -1
- sky/server/requests/serializers/decoders.py +10 -6
- sky/server/requests/serializers/encoders.py +13 -8
- sky/skylet/constants.py +1 -1
- sky/skylet/job_lib.py +2 -32
- sky/skylet/log_lib.py +211 -0
- sky/skylet/log_lib.pyi +30 -1
- sky/skylet/services.py +208 -2
- sky/skylet/skylet.py +3 -0
- sky/task.py +4 -0
- sky/utils/cluster_utils.py +23 -5
- sky/utils/command_runner.py +21 -5
- sky/utils/command_runner.pyi +11 -0
- sky/utils/volume.py +5 -0
- {skypilot_nightly-1.0.0.dev20250926.dist-info → skypilot_nightly-1.0.0.dev20251001.dist-info}/METADATA +35 -35
- {skypilot_nightly-1.0.0.dev20250926.dist-info → skypilot_nightly-1.0.0.dev20251001.dist-info}/RECORD +70 -66
- sky/dashboard/out/_next/static/chunks/6856-2b3600ff2854d066.js +0 -1
- /sky/dashboard/out/_next/static/{VXU6_xE28M55BOdwmUUJS → m3YT2i5s6v4SsIdYc8WZa}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250926.dist-info → skypilot_nightly-1.0.0.dev20251001.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250926.dist-info → skypilot_nightly-1.0.0.dev20251001.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250926.dist-info → skypilot_nightly-1.0.0.dev20251001.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250926.dist-info → skypilot_nightly-1.0.0.dev20251001.dist-info}/top_level.txt +0 -0
sky/jobs/server/utils.py
CHANGED
|
@@ -1,13 +1,24 @@
|
|
|
1
1
|
"""Utility functions for managed jobs."""
|
|
2
|
+
import typing
|
|
3
|
+
|
|
2
4
|
from sky import backends
|
|
5
|
+
from sky import exceptions
|
|
3
6
|
from sky import sky_logging
|
|
7
|
+
from sky.adaptors import common as adaptors_common
|
|
4
8
|
from sky.backends import backend_utils
|
|
9
|
+
from sky.backends import cloud_vm_ray_backend
|
|
5
10
|
from sky.jobs import utils as managed_job_utils
|
|
6
11
|
from sky.skylet import constants as skylet_constants
|
|
7
12
|
from sky.utils import controller_utils
|
|
8
13
|
|
|
9
14
|
logger = sky_logging.init_logger(__name__)
|
|
10
15
|
|
|
16
|
+
if typing.TYPE_CHECKING:
|
|
17
|
+
from sky.schemas.generated import managed_jobsv1_pb2
|
|
18
|
+
else:
|
|
19
|
+
managed_jobsv1_pb2 = adaptors_common.LazyImport(
|
|
20
|
+
'sky.schemas.generated.managed_jobsv1_pb2')
|
|
21
|
+
|
|
11
22
|
|
|
12
23
|
def check_version_mismatch_and_non_terminal_jobs() -> None:
|
|
13
24
|
"""Check if controller has version mismatch and non-terminal jobs exist.
|
|
@@ -28,42 +39,64 @@ def check_version_mismatch_and_non_terminal_jobs() -> None:
|
|
|
28
39
|
backend = backend_utils.get_backend_from_handle(handle)
|
|
29
40
|
assert isinstance(backend, backends.CloudVmRayBackend)
|
|
30
41
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
42
|
+
use_legacy = not handle.is_grpc_enabled_with_flag
|
|
43
|
+
|
|
44
|
+
if not use_legacy:
|
|
45
|
+
try:
|
|
46
|
+
version_request = managed_jobsv1_pb2.GetVersionRequest()
|
|
47
|
+
version_response = backend_utils.invoke_skylet_with_retries(
|
|
48
|
+
lambda: cloud_vm_ray_backend.SkyletClient(
|
|
49
|
+
handle.get_grpc_channel(
|
|
50
|
+
)).get_managed_job_controller_version(version_request))
|
|
51
|
+
controller_version = version_response.controller_version
|
|
52
|
+
|
|
53
|
+
job_table_request = managed_jobsv1_pb2.GetJobTableRequest()
|
|
54
|
+
job_table_response = backend_utils.invoke_skylet_with_retries(
|
|
55
|
+
lambda: cloud_vm_ray_backend.SkyletClient(
|
|
56
|
+
handle.get_grpc_channel()).get_managed_job_table(
|
|
57
|
+
job_table_request))
|
|
58
|
+
jobs = managed_job_utils.decode_managed_job_protos(
|
|
59
|
+
job_table_response.jobs)
|
|
60
|
+
except exceptions.SkyletMethodNotImplementedError:
|
|
61
|
+
use_legacy = True
|
|
62
|
+
|
|
63
|
+
if use_legacy:
|
|
64
|
+
# Get controller version and raw job table
|
|
65
|
+
code = managed_job_utils.ManagedJobCodeGen.get_version_and_job_table()
|
|
66
|
+
|
|
67
|
+
returncode, output, stderr = backend.run_on_head(handle,
|
|
68
|
+
code,
|
|
69
|
+
require_outputs=True,
|
|
70
|
+
stream_logs=False,
|
|
71
|
+
separate_stderr=True)
|
|
72
|
+
|
|
73
|
+
if returncode != 0:
|
|
74
|
+
logger.error(output + stderr)
|
|
75
|
+
raise ValueError('Failed to check controller version and jobs with '
|
|
76
|
+
f'returncode: {returncode}.\n{output + stderr}')
|
|
77
|
+
|
|
78
|
+
# Parse the output to extract controller version (split only on first
|
|
79
|
+
# newline)
|
|
80
|
+
output_parts = output.strip().split('\n', 1)
|
|
81
|
+
|
|
82
|
+
# Extract controller version from first line
|
|
83
|
+
if len(output_parts) < 2 or not output_parts[0].startswith(
|
|
84
|
+
'controller_version:'):
|
|
85
|
+
raise ValueError(
|
|
86
|
+
f'Expected controller version in first line, got: {output}')
|
|
87
|
+
|
|
88
|
+
controller_version = output_parts[0].split(':', 1)[1]
|
|
89
|
+
|
|
90
|
+
# Rest is job table payload (preserving any newlines within it)
|
|
91
|
+
job_table_payload = output_parts[1]
|
|
92
|
+
|
|
93
|
+
# Load and filter jobs locally using existing method
|
|
94
|
+
jobs, _, _, _, _ = managed_job_utils.load_managed_job_queue(
|
|
95
|
+
job_table_payload)
|
|
59
96
|
|
|
60
97
|
# Process locally: check version match and filter non-terminal jobs
|
|
61
98
|
version_matches = (controller_version == local_version or
|
|
62
99
|
int(controller_version) > 17)
|
|
63
|
-
|
|
64
|
-
# Load and filter jobs locally using existing method
|
|
65
|
-
jobs, _, _, _, _ = managed_job_utils.load_managed_job_queue(
|
|
66
|
-
job_table_payload)
|
|
67
100
|
non_terminal_jobs = [job for job in jobs if not job['status'].is_terminal()]
|
|
68
101
|
has_non_terminal_jobs = len(non_terminal_jobs) > 0
|
|
69
102
|
|
sky/jobs/state.py
CHANGED
|
@@ -25,6 +25,7 @@ from sqlalchemy.ext import declarative
|
|
|
25
25
|
from sky import exceptions
|
|
26
26
|
from sky import sky_logging
|
|
27
27
|
from sky import skypilot_config
|
|
28
|
+
from sky.adaptors import common as adaptors_common
|
|
28
29
|
from sky.skylet import constants
|
|
29
30
|
from sky.utils import common_utils
|
|
30
31
|
from sky.utils import context_utils
|
|
@@ -34,6 +35,11 @@ from sky.utils.db import migration_utils
|
|
|
34
35
|
if typing.TYPE_CHECKING:
|
|
35
36
|
from sqlalchemy.engine import row
|
|
36
37
|
|
|
38
|
+
from sky.schemas.generated import managed_jobsv1_pb2
|
|
39
|
+
else:
|
|
40
|
+
managed_jobsv1_pb2 = adaptors_common.LazyImport(
|
|
41
|
+
'sky.schemas.generated.managed_jobsv1_pb2')
|
|
42
|
+
|
|
37
43
|
# Separate callback types for sync and async contexts
|
|
38
44
|
SyncCallbackType = Callable[[str], None]
|
|
39
45
|
AsyncCallbackType = Callable[[str], Awaitable[Any]]
|
|
@@ -448,6 +454,75 @@ class ManagedJobStatus(enum.Enum):
|
|
|
448
454
|
cls.RECOVERING,
|
|
449
455
|
]
|
|
450
456
|
|
|
457
|
+
@classmethod
|
|
458
|
+
def from_protobuf(
|
|
459
|
+
cls, protobuf_value: 'managed_jobsv1_pb2.ManagedJobStatus'
|
|
460
|
+
) -> Optional['ManagedJobStatus']:
|
|
461
|
+
"""Convert protobuf ManagedJobStatus enum to Python enum value."""
|
|
462
|
+
protobuf_to_enum = {
|
|
463
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_UNSPECIFIED: None,
|
|
464
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_PENDING: cls.PENDING,
|
|
465
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_SUBMITTED:
|
|
466
|
+
cls.DEPRECATED_SUBMITTED,
|
|
467
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_STARTING: cls.STARTING,
|
|
468
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_RUNNING: cls.RUNNING,
|
|
469
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_SUCCEEDED: cls.SUCCEEDED,
|
|
470
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_FAILED: cls.FAILED,
|
|
471
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_FAILED_CONTROLLER:
|
|
472
|
+
cls.FAILED_CONTROLLER,
|
|
473
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_FAILED_SETUP:
|
|
474
|
+
cls.FAILED_SETUP,
|
|
475
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_CANCELLED: cls.CANCELLED,
|
|
476
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_RECOVERING: cls.RECOVERING,
|
|
477
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_CANCELLING: cls.CANCELLING,
|
|
478
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_FAILED_PRECHECKS:
|
|
479
|
+
cls.FAILED_PRECHECKS,
|
|
480
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_FAILED_NO_RESOURCE:
|
|
481
|
+
cls.FAILED_NO_RESOURCE,
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
if protobuf_value not in protobuf_to_enum:
|
|
485
|
+
raise ValueError(
|
|
486
|
+
f'Unknown protobuf ManagedJobStatus value: {protobuf_value}')
|
|
487
|
+
|
|
488
|
+
return protobuf_to_enum[protobuf_value]
|
|
489
|
+
|
|
490
|
+
def to_protobuf(self) -> 'managed_jobsv1_pb2.ManagedJobStatus':
|
|
491
|
+
"""Convert this Python enum value to protobuf enum value."""
|
|
492
|
+
enum_to_protobuf = {
|
|
493
|
+
ManagedJobStatus.PENDING:
|
|
494
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_PENDING,
|
|
495
|
+
ManagedJobStatus.DEPRECATED_SUBMITTED:
|
|
496
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_SUBMITTED,
|
|
497
|
+
ManagedJobStatus.STARTING:
|
|
498
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_STARTING,
|
|
499
|
+
ManagedJobStatus.RUNNING:
|
|
500
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_RUNNING,
|
|
501
|
+
ManagedJobStatus.SUCCEEDED:
|
|
502
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_SUCCEEDED,
|
|
503
|
+
ManagedJobStatus.FAILED:
|
|
504
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_FAILED,
|
|
505
|
+
ManagedJobStatus.FAILED_CONTROLLER:
|
|
506
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_FAILED_CONTROLLER,
|
|
507
|
+
ManagedJobStatus.FAILED_SETUP:
|
|
508
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_FAILED_SETUP,
|
|
509
|
+
ManagedJobStatus.CANCELLED:
|
|
510
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_CANCELLED,
|
|
511
|
+
ManagedJobStatus.RECOVERING:
|
|
512
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_RECOVERING,
|
|
513
|
+
ManagedJobStatus.CANCELLING:
|
|
514
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_CANCELLING,
|
|
515
|
+
ManagedJobStatus.FAILED_PRECHECKS:
|
|
516
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_FAILED_PRECHECKS,
|
|
517
|
+
ManagedJobStatus.FAILED_NO_RESOURCE:
|
|
518
|
+
managed_jobsv1_pb2.MANAGED_JOB_STATUS_FAILED_NO_RESOURCE,
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
if self not in enum_to_protobuf:
|
|
522
|
+
raise ValueError(f'Unknown ManagedJobStatus value: {self}')
|
|
523
|
+
|
|
524
|
+
return enum_to_protobuf[self]
|
|
525
|
+
|
|
451
526
|
|
|
452
527
|
_SPOT_STATUS_TO_COLOR = {
|
|
453
528
|
ManagedJobStatus.PENDING: colorama.Fore.BLUE,
|
|
@@ -537,6 +612,60 @@ class ManagedJobScheduleState(enum.Enum):
|
|
|
537
612
|
# The job is in a terminal state. (Not necessarily SUCCEEDED.)
|
|
538
613
|
DONE = 'DONE'
|
|
539
614
|
|
|
615
|
+
@classmethod
|
|
616
|
+
def from_protobuf(
|
|
617
|
+
cls, protobuf_value: 'managed_jobsv1_pb2.ManagedJobScheduleState'
|
|
618
|
+
) -> Optional['ManagedJobScheduleState']:
|
|
619
|
+
"""Convert protobuf ManagedJobScheduleState enum to Python enum value.
|
|
620
|
+
"""
|
|
621
|
+
protobuf_to_enum = {
|
|
622
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_UNSPECIFIED: None,
|
|
623
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_INVALID: cls.INVALID,
|
|
624
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_INACTIVE:
|
|
625
|
+
cls.INACTIVE,
|
|
626
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_WAITING: cls.WAITING,
|
|
627
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_ALIVE_WAITING:
|
|
628
|
+
cls.ALIVE_WAITING,
|
|
629
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_LAUNCHING:
|
|
630
|
+
cls.LAUNCHING,
|
|
631
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_ALIVE_BACKOFF:
|
|
632
|
+
cls.ALIVE_BACKOFF,
|
|
633
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_ALIVE: cls.ALIVE,
|
|
634
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_DONE: cls.DONE,
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
if protobuf_value not in protobuf_to_enum:
|
|
638
|
+
raise ValueError('Unknown protobuf ManagedJobScheduleState value: '
|
|
639
|
+
f'{protobuf_value}')
|
|
640
|
+
|
|
641
|
+
return protobuf_to_enum[protobuf_value]
|
|
642
|
+
|
|
643
|
+
def to_protobuf(self) -> 'managed_jobsv1_pb2.ManagedJobScheduleState':
|
|
644
|
+
"""Convert this Python enum value to protobuf enum value."""
|
|
645
|
+
enum_to_protobuf = {
|
|
646
|
+
ManagedJobScheduleState.INVALID:
|
|
647
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_INVALID,
|
|
648
|
+
ManagedJobScheduleState.INACTIVE:
|
|
649
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_INACTIVE,
|
|
650
|
+
ManagedJobScheduleState.WAITING:
|
|
651
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_WAITING,
|
|
652
|
+
ManagedJobScheduleState.ALIVE_WAITING:
|
|
653
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_ALIVE_WAITING,
|
|
654
|
+
ManagedJobScheduleState.LAUNCHING:
|
|
655
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_LAUNCHING,
|
|
656
|
+
ManagedJobScheduleState.ALIVE_BACKOFF:
|
|
657
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_ALIVE_BACKOFF,
|
|
658
|
+
ManagedJobScheduleState.ALIVE:
|
|
659
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_ALIVE,
|
|
660
|
+
ManagedJobScheduleState.DONE:
|
|
661
|
+
managed_jobsv1_pb2.MANAGED_JOB_SCHEDULE_STATE_DONE,
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
if self not in enum_to_protobuf:
|
|
665
|
+
raise ValueError(f'Unknown ManagedJobScheduleState value: {self}')
|
|
666
|
+
|
|
667
|
+
return enum_to_protobuf[self]
|
|
668
|
+
|
|
540
669
|
|
|
541
670
|
# === Status transition functions ===
|
|
542
671
|
@_init_db
|
|
@@ -792,8 +921,14 @@ def set_local_log_file(job_id: int, task_id: Optional[int],
|
|
|
792
921
|
# ======== utility functions ========
|
|
793
922
|
@_init_db
|
|
794
923
|
def get_nonterminal_job_ids_by_name(name: Optional[str],
|
|
924
|
+
user_hash: Optional[str] = None,
|
|
795
925
|
all_users: bool = False) -> List[int]:
|
|
796
|
-
"""Get non-terminal job ids by name.
|
|
926
|
+
"""Get non-terminal job ids by name.
|
|
927
|
+
|
|
928
|
+
If name is None:
|
|
929
|
+
1. if all_users is False, get for the given user_hash
|
|
930
|
+
2. otherwise, get for all users
|
|
931
|
+
"""
|
|
797
932
|
assert _SQLALCHEMY_ENGINE is not None
|
|
798
933
|
|
|
799
934
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
@@ -810,8 +945,15 @@ def get_nonterminal_job_ids_by_name(name: Optional[str],
|
|
|
810
945
|
])
|
|
811
946
|
]
|
|
812
947
|
if name is None and not all_users:
|
|
813
|
-
|
|
814
|
-
|
|
948
|
+
if user_hash is None:
|
|
949
|
+
# For backwards compatibility. With codegen, USER_ID_ENV_VAR
|
|
950
|
+
# was set to the correct value by the jobs controller, as
|
|
951
|
+
# part of ManagedJobCodeGen._build(). This is no longer the
|
|
952
|
+
# case for the Skylet gRPC server, which is why we need to
|
|
953
|
+
# pass it explicitly through the request body.
|
|
954
|
+
logger.debug('user_hash is None, using current user hash')
|
|
955
|
+
user_hash = common_utils.get_user_hash()
|
|
956
|
+
where_conditions.append(job_info_table.c.user_hash == user_hash)
|
|
815
957
|
if name is not None:
|
|
816
958
|
# We match the job name from `job_info` for the jobs submitted after
|
|
817
959
|
# #1982, and from `spot` for the jobs submitted before #1982, whose
|
sky/jobs/utils.py
CHANGED
|
@@ -16,8 +16,8 @@ import textwrap
|
|
|
16
16
|
import time
|
|
17
17
|
import traceback
|
|
18
18
|
import typing
|
|
19
|
-
from typing import (Any, Deque, Dict, List, Literal, Optional, Set,
|
|
20
|
-
Tuple, Union)
|
|
19
|
+
from typing import (Any, Deque, Dict, Iterable, List, Literal, Optional, Set,
|
|
20
|
+
TextIO, Tuple, Union)
|
|
21
21
|
|
|
22
22
|
import colorama
|
|
23
23
|
import filelock
|
|
@@ -33,6 +33,7 @@ from sky.backends import cloud_vm_ray_backend
|
|
|
33
33
|
from sky.jobs import constants as managed_job_constants
|
|
34
34
|
from sky.jobs import scheduler
|
|
35
35
|
from sky.jobs import state as managed_job_state
|
|
36
|
+
from sky.schemas.api import responses
|
|
36
37
|
from sky.skylet import constants
|
|
37
38
|
from sky.skylet import job_lib
|
|
38
39
|
from sky.skylet import log_lib
|
|
@@ -51,16 +52,23 @@ from sky.utils import subprocess_utils
|
|
|
51
52
|
from sky.utils import ux_utils
|
|
52
53
|
|
|
53
54
|
if typing.TYPE_CHECKING:
|
|
55
|
+
from google.protobuf import descriptor
|
|
56
|
+
from google.protobuf import json_format
|
|
54
57
|
import grpc
|
|
55
58
|
import psutil
|
|
56
59
|
|
|
57
60
|
import sky
|
|
58
61
|
from sky import dag as dag_lib
|
|
59
62
|
from sky.schemas.generated import jobsv1_pb2
|
|
63
|
+
from sky.schemas.generated import managed_jobsv1_pb2
|
|
60
64
|
else:
|
|
65
|
+
json_format = adaptors_common.LazyImport('google.protobuf.json_format')
|
|
66
|
+
descriptor = adaptors_common.LazyImport('google.protobuf.descriptor')
|
|
61
67
|
psutil = adaptors_common.LazyImport('psutil')
|
|
62
68
|
grpc = adaptors_common.LazyImport('grpc')
|
|
63
69
|
jobsv1_pb2 = adaptors_common.LazyImport('sky.schemas.generated.jobsv1_pb2')
|
|
70
|
+
managed_jobsv1_pb2 = adaptors_common.LazyImport(
|
|
71
|
+
'sky.schemas.generated.managed_jobsv1_pb2')
|
|
64
72
|
|
|
65
73
|
logger = sky_logging.init_logger(__name__)
|
|
66
74
|
|
|
@@ -169,7 +177,7 @@ def _validate_consolidation_mode_config(
|
|
|
169
177
|
if all_jobs:
|
|
170
178
|
nonterminal_jobs = (
|
|
171
179
|
managed_job_state.get_nonterminal_job_ids_by_name(
|
|
172
|
-
None, all_users=True))
|
|
180
|
+
None, None, all_users=True))
|
|
173
181
|
if nonterminal_jobs:
|
|
174
182
|
with ux_utils.print_exception_no_traceback():
|
|
175
183
|
raise exceptions.InconsistentConsolidationModeError(
|
|
@@ -698,14 +706,15 @@ def generate_managed_job_cluster_name(task_name: str, job_id: int) -> str:
|
|
|
698
706
|
|
|
699
707
|
def cancel_jobs_by_id(job_ids: Optional[List[int]],
|
|
700
708
|
all_users: bool = False,
|
|
701
|
-
current_workspace: Optional[str] = None
|
|
709
|
+
current_workspace: Optional[str] = None,
|
|
710
|
+
user_hash: Optional[str] = None) -> str:
|
|
702
711
|
"""Cancel jobs by id.
|
|
703
712
|
|
|
704
713
|
If job_ids is None, cancel all jobs.
|
|
705
714
|
"""
|
|
706
715
|
if job_ids is None:
|
|
707
716
|
job_ids = managed_job_state.get_nonterminal_job_ids_by_name(
|
|
708
|
-
None, all_users)
|
|
717
|
+
None, user_hash, all_users)
|
|
709
718
|
job_ids = list(set(job_ids))
|
|
710
719
|
if not job_ids:
|
|
711
720
|
return 'No job to cancel.'
|
|
@@ -1241,6 +1250,24 @@ def dump_managed_job_queue(
|
|
|
1241
1250
|
user_hashes: Optional[List[Optional[str]]] = None,
|
|
1242
1251
|
statuses: Optional[List[str]] = None,
|
|
1243
1252
|
) -> str:
|
|
1253
|
+
return message_utils.encode_payload(
|
|
1254
|
+
get_managed_job_queue(skip_finished, accessible_workspaces, job_ids,
|
|
1255
|
+
workspace_match, name_match, pool_match, page,
|
|
1256
|
+
limit, user_hashes, statuses))
|
|
1257
|
+
|
|
1258
|
+
|
|
1259
|
+
def get_managed_job_queue(
|
|
1260
|
+
skip_finished: bool = False,
|
|
1261
|
+
accessible_workspaces: Optional[List[str]] = None,
|
|
1262
|
+
job_ids: Optional[List[int]] = None,
|
|
1263
|
+
workspace_match: Optional[str] = None,
|
|
1264
|
+
name_match: Optional[str] = None,
|
|
1265
|
+
pool_match: Optional[str] = None,
|
|
1266
|
+
page: Optional[int] = None,
|
|
1267
|
+
limit: Optional[int] = None,
|
|
1268
|
+
user_hashes: Optional[List[Optional[str]]] = None,
|
|
1269
|
+
statuses: Optional[List[str]] = None,
|
|
1270
|
+
) -> Dict[str, Any]:
|
|
1244
1271
|
# Make sure to get all jobs - some logic below (e.g. high priority job
|
|
1245
1272
|
# detection) requires a full view of the jobs table.
|
|
1246
1273
|
jobs = managed_job_state.get_managed_jobs()
|
|
@@ -1371,12 +1398,12 @@ def dump_managed_job_queue(
|
|
|
1371
1398
|
else:
|
|
1372
1399
|
job['details'] = None
|
|
1373
1400
|
|
|
1374
|
-
return
|
|
1401
|
+
return {
|
|
1375
1402
|
'jobs': jobs,
|
|
1376
1403
|
'total': total,
|
|
1377
1404
|
'total_no_filter': total_no_filter,
|
|
1378
1405
|
'status_counts': status_counts
|
|
1379
|
-
}
|
|
1406
|
+
}
|
|
1380
1407
|
|
|
1381
1408
|
|
|
1382
1409
|
def filter_jobs(
|
|
@@ -1491,7 +1518,7 @@ def load_managed_job_queue(
|
|
|
1491
1518
|
|
|
1492
1519
|
|
|
1493
1520
|
def _get_job_status_from_tasks(
|
|
1494
|
-
job_tasks: List[Dict[str, Any]]
|
|
1521
|
+
job_tasks: Union[List[responses.ManagedJobRecord], List[Dict[str, Any]]]
|
|
1495
1522
|
) -> Tuple[managed_job_state.ManagedJobStatus, int]:
|
|
1496
1523
|
"""Get the current task status and the current task id for a job."""
|
|
1497
1524
|
managed_task_status = managed_job_state.ManagedJobStatus.SUCCEEDED
|
|
@@ -1824,6 +1851,58 @@ def format_job_table(
|
|
|
1824
1851
|
return output
|
|
1825
1852
|
|
|
1826
1853
|
|
|
1854
|
+
def decode_managed_job_protos(
|
|
1855
|
+
job_protos: Iterable['managed_jobsv1_pb2.ManagedJobInfo']
|
|
1856
|
+
) -> List[Dict[str, Any]]:
|
|
1857
|
+
"""Decode job protos to dicts. Similar to load_managed_job_queue."""
|
|
1858
|
+
user_hash_to_user = global_user_state.get_users(
|
|
1859
|
+
set(job.user_hash for job in job_protos if job.user_hash))
|
|
1860
|
+
|
|
1861
|
+
jobs = []
|
|
1862
|
+
for job_proto in job_protos:
|
|
1863
|
+
job_dict = _job_proto_to_dict(job_proto)
|
|
1864
|
+
user_hash = job_dict.get('user_hash', None)
|
|
1865
|
+
if user_hash is not None:
|
|
1866
|
+
# Skip jobs that do not have user_hash info.
|
|
1867
|
+
# TODO(cooperc): Remove check before 0.12.0.
|
|
1868
|
+
user = user_hash_to_user.get(user_hash, None)
|
|
1869
|
+
job_dict['user_name'] = user.name if user is not None else None
|
|
1870
|
+
jobs.append(job_dict)
|
|
1871
|
+
return jobs
|
|
1872
|
+
|
|
1873
|
+
|
|
1874
|
+
def _job_proto_to_dict(
|
|
1875
|
+
job_proto: 'managed_jobsv1_pb2.ManagedJobInfo') -> Dict[str, Any]:
|
|
1876
|
+
job_dict = json_format.MessageToDict(
|
|
1877
|
+
job_proto,
|
|
1878
|
+
always_print_fields_with_no_presence=True,
|
|
1879
|
+
# Our API returns fields in snake_case.
|
|
1880
|
+
preserving_proto_field_name=True,
|
|
1881
|
+
use_integers_for_enums=True)
|
|
1882
|
+
for field in job_proto.DESCRIPTOR.fields:
|
|
1883
|
+
# Ensure optional fields are present with None values for
|
|
1884
|
+
# backwards compatibility with older clients.
|
|
1885
|
+
if field.has_presence and field.name not in job_dict:
|
|
1886
|
+
job_dict[field.name] = None
|
|
1887
|
+
# json_format.MessageToDict is meant for encoding to JSON,
|
|
1888
|
+
# and Protobuf encodes int64 as decimal strings in JSON,
|
|
1889
|
+
# so we need to convert them back to ints.
|
|
1890
|
+
# https://protobuf.dev/programming-guides/json/#field-representation
|
|
1891
|
+
if field.type == descriptor.FieldDescriptor.TYPE_INT64:
|
|
1892
|
+
job_dict[field.name] = int(job_dict[field.name])
|
|
1893
|
+
job_dict['status'] = managed_job_state.ManagedJobStatus.from_protobuf(
|
|
1894
|
+
job_dict['status'])
|
|
1895
|
+
# For backwards compatibility, convert schedule_state to a string,
|
|
1896
|
+
# as we don't have the logic to handle it in our request
|
|
1897
|
+
# encoder/decoder, unlike status.
|
|
1898
|
+
schedule_state_enum = (
|
|
1899
|
+
managed_job_state.ManagedJobScheduleState.from_protobuf(
|
|
1900
|
+
job_dict['schedule_state']))
|
|
1901
|
+
job_dict['schedule_state'] = (schedule_state_enum.value
|
|
1902
|
+
if schedule_state_enum is not None else None)
|
|
1903
|
+
return job_dict
|
|
1904
|
+
|
|
1905
|
+
|
|
1827
1906
|
class ManagedJobCodeGen:
|
|
1828
1907
|
"""Code generator for managed job utility functions.
|
|
1829
1908
|
|
|
@@ -847,7 +847,7 @@ def _create_namespaced_pod_with_retries(namespace: str, pod_spec: dict,
|
|
|
847
847
|
def _wait_for_deployment_pod(context,
|
|
848
848
|
namespace,
|
|
849
849
|
deployment,
|
|
850
|
-
timeout=
|
|
850
|
+
timeout=300) -> List:
|
|
851
851
|
label_selector = ','.join([
|
|
852
852
|
f'{key}={value}'
|
|
853
853
|
for key, value in deployment.spec.selector.match_labels.items()
|
sky/schemas/api/responses.py
CHANGED
|
@@ -5,8 +5,11 @@ from typing import Any, Dict, List, Optional
|
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
7
|
|
|
8
|
+
from sky import data
|
|
8
9
|
from sky import models
|
|
10
|
+
from sky.jobs import state as job_state
|
|
9
11
|
from sky.server import common
|
|
12
|
+
from sky.skylet import job_lib
|
|
10
13
|
from sky.utils import status_lib
|
|
11
14
|
|
|
12
15
|
|
|
@@ -121,7 +124,77 @@ class StatusResponse(ResponseBaseModel):
|
|
|
121
124
|
cluster_name_on_cloud: Optional[str] = None
|
|
122
125
|
|
|
123
126
|
|
|
127
|
+
class ClusterJobRecord(ResponseBaseModel):
|
|
128
|
+
"""Response for the cluster job queue endpoint."""
|
|
129
|
+
job_id: int
|
|
130
|
+
job_name: str
|
|
131
|
+
username: str
|
|
132
|
+
user_hash: str
|
|
133
|
+
submitted_at: float
|
|
134
|
+
# None if the job has not started yet.
|
|
135
|
+
start_at: Optional[float] = None
|
|
136
|
+
# None if the job has not ended yet.
|
|
137
|
+
end_at: Optional[float] = None
|
|
138
|
+
resources: str
|
|
139
|
+
status: job_lib.JobStatus
|
|
140
|
+
log_path: str
|
|
141
|
+
metadata: Dict[str, Any] = {}
|
|
142
|
+
|
|
143
|
+
|
|
124
144
|
class UploadStatus(enum.Enum):
|
|
125
145
|
"""Status of the upload."""
|
|
126
146
|
UPLOADING = 'uploading'
|
|
127
147
|
COMPLETED = 'completed'
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class StorageRecord(ResponseBaseModel):
|
|
151
|
+
"""Response for the storage list endpoint."""
|
|
152
|
+
name: str
|
|
153
|
+
launched_at: int
|
|
154
|
+
store: List[data.StoreType]
|
|
155
|
+
last_use: str
|
|
156
|
+
status: status_lib.StorageStatus
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# TODO (syang) figure out which fields are always present
|
|
160
|
+
# and therefore can be non-optional.
|
|
161
|
+
class ManagedJobRecord(ResponseBaseModel):
|
|
162
|
+
"""A single managed job record."""
|
|
163
|
+
job_id: Optional[int] = None
|
|
164
|
+
task_id: Optional[int] = None
|
|
165
|
+
job_name: Optional[str] = None
|
|
166
|
+
task_name: Optional[str] = None
|
|
167
|
+
job_duration: Optional[float] = None
|
|
168
|
+
workspace: Optional[str] = None
|
|
169
|
+
status: Optional[job_state.ManagedJobStatus] = None
|
|
170
|
+
schedule_state: Optional[str] = None
|
|
171
|
+
resources: Optional[str] = None
|
|
172
|
+
cluster_resources: Optional[str] = None
|
|
173
|
+
cluster_resources_full: Optional[str] = None
|
|
174
|
+
cloud: Optional[str] = None
|
|
175
|
+
region: Optional[str] = None
|
|
176
|
+
zone: Optional[str] = None
|
|
177
|
+
infra: Optional[str] = None
|
|
178
|
+
recovery_count: Optional[int] = None
|
|
179
|
+
details: Optional[str] = None
|
|
180
|
+
failure_reason: Optional[str] = None
|
|
181
|
+
user_name: Optional[str] = None
|
|
182
|
+
user_hash: Optional[str] = None
|
|
183
|
+
submitted_at: Optional[float] = None
|
|
184
|
+
start_at: Optional[float] = None
|
|
185
|
+
end_at: Optional[float] = None
|
|
186
|
+
user_yaml: Optional[str] = None
|
|
187
|
+
entrypoint: Optional[str] = None
|
|
188
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
189
|
+
controller_pid: Optional[int] = None
|
|
190
|
+
dag_yaml_path: Optional[str] = None
|
|
191
|
+
env_file_path: Optional[str] = None
|
|
192
|
+
last_recovered_at: Optional[float] = None
|
|
193
|
+
run_timestamp: Optional[str] = None
|
|
194
|
+
priority: Optional[int] = None
|
|
195
|
+
original_user_yaml_path: Optional[str] = None
|
|
196
|
+
pool: Optional[str] = None
|
|
197
|
+
pool_hash: Optional[str] = None
|
|
198
|
+
current_cluster_name: Optional[str] = None
|
|
199
|
+
job_id_on_pool_cluster: Optional[int] = None
|
|
200
|
+
accelerators: Optional[Dict[str, int]] = None
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# source: sky/schemas/generated/managed_jobsv1.proto
|
|
4
|
+
# Protobuf Python Version: 5.26.1
|
|
5
|
+
"""Generated protocol buffer code."""
|
|
6
|
+
from google.protobuf import descriptor as _descriptor
|
|
7
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
8
|
+
from google.protobuf import symbol_database as _symbol_database
|
|
9
|
+
from google.protobuf.internal import builder as _builder
|
|
10
|
+
# @@protoc_insertion_point(imports)
|
|
11
|
+
|
|
12
|
+
_sym_db = _symbol_database.Default()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n*sky/schemas/generated/managed_jobsv1.proto\x12\x0fmanaged_jobs.v1\"\x15\n\x06JobIds\x12\x0b\n\x03ids\x18\x01 \x03(\x03\"\x1c\n\nUserHashes\x12\x0e\n\x06hashes\x18\x01 \x03(\t\"\x1c\n\x08Statuses\x12\x10\n\x08statuses\x18\x01 \x03(\t\"\x13\n\x11GetVersionRequest\"0\n\x12GetVersionResponse\x12\x1a\n\x12\x63ontroller_version\x18\x01 \x01(\t\"\xec\x03\n\x12GetJobTableRequest\x12\x15\n\rskip_finished\x18\x01 \x01(\x08\x12\x1d\n\x15\x61\x63\x63\x65ssible_workspaces\x18\x02 \x03(\t\x12-\n\x07job_ids\x18\x03 \x01(\x0b\x32\x17.managed_jobs.v1.JobIdsH\x00\x88\x01\x01\x12\x1c\n\x0fworkspace_match\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x17\n\nname_match\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x17\n\npool_match\x18\x06 \x01(\tH\x03\x88\x01\x01\x12\x11\n\x04page\x18\x07 \x01(\x05H\x04\x88\x01\x01\x12\x12\n\x05limit\x18\x08 \x01(\x05H\x05\x88\x01\x01\x12\x35\n\x0buser_hashes\x18\t \x01(\x0b\x32\x1b.managed_jobs.v1.UserHashesH\x06\x88\x01\x01\x12\x30\n\x08statuses\x18\n \x01(\x0b\x32\x19.managed_jobs.v1.StatusesH\x07\x88\x01\x01\x12#\n\x1bshow_jobs_without_user_hash\x18\x0b \x01(\x08\x42\n\n\x08_job_idsB\x12\n\x10_workspace_matchB\r\n\x0b_name_matchB\r\n\x0b_pool_matchB\x07\n\x05_pageB\x08\n\x06_limitB\x0e\n\x0c_user_hashesB\x0b\n\t_statuses\"\xa9\x08\n\x0eManagedJobInfo\x12\x0e\n\x06job_id\x18\x01 \x01(\x03\x12\x0f\n\x07task_id\x18\x02 \x01(\x03\x12\x10\n\x08job_name\x18\x03 \x01(\t\x12\x11\n\ttask_name\x18\x04 \x01(\t\x12\x14\n\x0cjob_duration\x18\x05 \x01(\x01\x12\x16\n\tworkspace\x18\x06 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x06status\x18\x07 \x01(\x0e\x32!.managed_jobs.v1.ManagedJobStatus\x12@\n\x0eschedule_state\x18\x08 \x01(\x0e\x32(.managed_jobs.v1.ManagedJobScheduleState\x12\x11\n\tresources\x18\t \x01(\t\x12\x19\n\x11\x63luster_resources\x18\n \x01(\t\x12\x1e\n\x16\x63luster_resources_full\x18\x0b \x01(\t\x12\r\n\x05\x63loud\x18\x0c \x01(\t\x12\x0e\n\x06region\x18\r \x01(\t\x12\r\n\x05infra\x18\x0e \x01(\t\x12G\n\x0c\x61\x63\x63\x65lerators\x18\x0f \x03(\x0b\x32\x31.managed_jobs.v1.ManagedJobInfo.AcceleratorsEntry\x12\x16\n\x0erecovery_count\x18\x10 \x01(\x05\x12\x14\n\x07\x64\x65tails\x18\x11 \x01(\tH\x01\x88\x01\x01\x12\x1b\n\x0e\x66\x61ilure_reason\x18\x12 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tuser_name\x18\x13 \x01(\tH\x03\x88\x01\x01\x12\x16\n\tuser_hash\x18\x14 \x01(\tH\x04\x88\x01\x01\x12\x19\n\x0csubmitted_at\x18\x15 \x01(\x01H\x05\x88\x01\x01\x12\x15\n\x08start_at\x18\x16 \x01(\x01H\x06\x88\x01\x01\x12\x13\n\x06\x65nd_at\x18\x17 \x01(\x01H\x07\x88\x01\x01\x12\x16\n\tuser_yaml\x18\x18 \x01(\tH\x08\x88\x01\x01\x12\x17\n\nentrypoint\x18\x19 \x01(\tH\t\x88\x01\x01\x12?\n\x08metadata\x18\x1a \x03(\x0b\x32-.managed_jobs.v1.ManagedJobInfo.MetadataEntry\x12\x11\n\x04pool\x18\x1b \x01(\tH\n\x88\x01\x01\x12\x16\n\tpool_hash\x18\x1c \x01(\tH\x0b\x88\x01\x01\x1a\x33\n\x11\x41\x63\x63\x65leratorsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0c\n\n_workspaceB\n\n\x08_detailsB\x11\n\x0f_failure_reasonB\x0c\n\n_user_nameB\x0c\n\n_user_hashB\x0f\n\r_submitted_atB\x0b\n\t_start_atB\t\n\x07_end_atB\x0c\n\n_user_yamlB\r\n\x0b_entrypointB\x07\n\x05_poolB\x0c\n\n_pool_hash\"\xf0\x01\n\x13GetJobTableResponse\x12-\n\x04jobs\x18\x01 \x03(\x0b\x32\x1f.managed_jobs.v1.ManagedJobInfo\x12\r\n\x05total\x18\x02 \x01(\x05\x12\x17\n\x0ftotal_no_filter\x18\x03 \x01(\x05\x12M\n\rstatus_counts\x18\x04 \x03(\x0b\x32\x36.managed_jobs.v1.GetJobTableResponse.StatusCountsEntry\x1a\x33\n\x11StatusCountsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x05:\x02\x38\x01\"?\n\x19GetAllJobIdsByNameRequest\x12\x15\n\x08job_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0b\n\t_job_name\"-\n\x1aGetAllJobIdsByNameResponse\x12\x0f\n\x07job_ids\x18\x01 \x03(\x03\"\xd7\x01\n\x11\x43\x61ncelJobsRequest\x12\x19\n\x11\x63urrent_workspace\x18\x01 \x01(\t\x12\x16\n\tuser_hash\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x13\n\tall_users\x18\x03 \x01(\x08H\x00\x12*\n\x07job_ids\x18\x04 \x01(\x0b\x32\x17.managed_jobs.v1.JobIdsH\x00\x12\x12\n\x08job_name\x18\x05 \x01(\tH\x00\x12\x13\n\tpool_name\x18\x06 \x01(\tH\x00\x42\x17\n\x15\x63\x61ncellation_criteriaB\x0c\n\n_user_hash\"%\n\x12\x43\x61ncelJobsResponse\x12\x0f\n\x07message\x18\x01 \x01(\t\"\x97\x01\n\x11StreamLogsRequest\x12\x15\n\x08job_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x06job_id\x18\x02 \x01(\x03H\x01\x88\x01\x01\x12\x0e\n\x06\x66ollow\x18\x03 \x01(\x08\x12\x12\n\ncontroller\x18\x04 \x01(\x08\x12\x11\n\x04tail\x18\x05 \x01(\x05H\x02\x88\x01\x01\x42\x0b\n\t_job_nameB\t\n\x07_job_idB\x07\n\x05_tail\"L\n\x12StreamLogsResponse\x12\x10\n\x08log_line\x18\x01 \x01(\t\x12\x16\n\texit_code\x18\x02 \x01(\x05H\x00\x88\x01\x01\x42\x0c\n\n_exit_code*\x85\x04\n\x10ManagedJobStatus\x12\"\n\x1eMANAGED_JOB_STATUS_UNSPECIFIED\x10\x00\x12\x1e\n\x1aMANAGED_JOB_STATUS_PENDING\x10\x01\x12 \n\x1cMANAGED_JOB_STATUS_SUBMITTED\x10\x02\x12\x1f\n\x1bMANAGED_JOB_STATUS_STARTING\x10\x03\x12\x1e\n\x1aMANAGED_JOB_STATUS_RUNNING\x10\x04\x12!\n\x1dMANAGED_JOB_STATUS_RECOVERING\x10\x05\x12!\n\x1dMANAGED_JOB_STATUS_CANCELLING\x10\x06\x12 \n\x1cMANAGED_JOB_STATUS_SUCCEEDED\x10\x07\x12 \n\x1cMANAGED_JOB_STATUS_CANCELLED\x10\x08\x12\x1d\n\x19MANAGED_JOB_STATUS_FAILED\x10\t\x12#\n\x1fMANAGED_JOB_STATUS_FAILED_SETUP\x10\n\x12\'\n#MANAGED_JOB_STATUS_FAILED_PRECHECKS\x10\x0b\x12)\n%MANAGED_JOB_STATUS_FAILED_NO_RESOURCE\x10\x0c\x12(\n$MANAGED_JOB_STATUS_FAILED_CONTROLLER\x10\r*\x8f\x03\n\x17ManagedJobScheduleState\x12*\n&MANAGED_JOB_SCHEDULE_STATE_UNSPECIFIED\x10\x00\x12&\n\"MANAGED_JOB_SCHEDULE_STATE_INVALID\x10\x01\x12\'\n#MANAGED_JOB_SCHEDULE_STATE_INACTIVE\x10\x02\x12&\n\"MANAGED_JOB_SCHEDULE_STATE_WAITING\x10\x03\x12,\n(MANAGED_JOB_SCHEDULE_STATE_ALIVE_WAITING\x10\x04\x12(\n$MANAGED_JOB_SCHEDULE_STATE_LAUNCHING\x10\x05\x12,\n(MANAGED_JOB_SCHEDULE_STATE_ALIVE_BACKOFF\x10\x06\x12$\n MANAGED_JOB_SCHEDULE_STATE_ALIVE\x10\x07\x12#\n\x1fMANAGED_JOB_SCHEDULE_STATE_DONE\x10\x08\x32\xe4\x03\n\x12ManagedJobsService\x12U\n\nGetVersion\x12\".managed_jobs.v1.GetVersionRequest\x1a#.managed_jobs.v1.GetVersionResponse\x12X\n\x0bGetJobTable\x12#.managed_jobs.v1.GetJobTableRequest\x1a$.managed_jobs.v1.GetJobTableResponse\x12m\n\x12GetAllJobIdsByName\x12*.managed_jobs.v1.GetAllJobIdsByNameRequest\x1a+.managed_jobs.v1.GetAllJobIdsByNameResponse\x12U\n\nCancelJobs\x12\".managed_jobs.v1.CancelJobsRequest\x1a#.managed_jobs.v1.CancelJobsResponse\x12W\n\nStreamLogs\x12\".managed_jobs.v1.StreamLogsRequest\x1a#.managed_jobs.v1.StreamLogsResponse0\x01\x62\x06proto3')
|
|
18
|
+
|
|
19
|
+
_globals = globals()
|
|
20
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
21
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'sky.schemas.generated.managed_jobsv1_pb2', _globals)
|
|
22
|
+
if not _descriptor._USE_C_DESCRIPTORS:
|
|
23
|
+
DESCRIPTOR._loaded_options = None
|
|
24
|
+
_globals['_MANAGEDJOBINFO_ACCELERATORSENTRY']._loaded_options = None
|
|
25
|
+
_globals['_MANAGEDJOBINFO_ACCELERATORSENTRY']._serialized_options = b'8\001'
|
|
26
|
+
_globals['_MANAGEDJOBINFO_METADATAENTRY']._loaded_options = None
|
|
27
|
+
_globals['_MANAGEDJOBINFO_METADATAENTRY']._serialized_options = b'8\001'
|
|
28
|
+
_globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._loaded_options = None
|
|
29
|
+
_globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._serialized_options = b'8\001'
|
|
30
|
+
_globals['_MANAGEDJOBSTATUS']._serialized_start=2625
|
|
31
|
+
_globals['_MANAGEDJOBSTATUS']._serialized_end=3142
|
|
32
|
+
_globals['_MANAGEDJOBSCHEDULESTATE']._serialized_start=3145
|
|
33
|
+
_globals['_MANAGEDJOBSCHEDULESTATE']._serialized_end=3544
|
|
34
|
+
_globals['_JOBIDS']._serialized_start=63
|
|
35
|
+
_globals['_JOBIDS']._serialized_end=84
|
|
36
|
+
_globals['_USERHASHES']._serialized_start=86
|
|
37
|
+
_globals['_USERHASHES']._serialized_end=114
|
|
38
|
+
_globals['_STATUSES']._serialized_start=116
|
|
39
|
+
_globals['_STATUSES']._serialized_end=144
|
|
40
|
+
_globals['_GETVERSIONREQUEST']._serialized_start=146
|
|
41
|
+
_globals['_GETVERSIONREQUEST']._serialized_end=165
|
|
42
|
+
_globals['_GETVERSIONRESPONSE']._serialized_start=167
|
|
43
|
+
_globals['_GETVERSIONRESPONSE']._serialized_end=215
|
|
44
|
+
_globals['_GETJOBTABLEREQUEST']._serialized_start=218
|
|
45
|
+
_globals['_GETJOBTABLEREQUEST']._serialized_end=710
|
|
46
|
+
_globals['_MANAGEDJOBINFO']._serialized_start=713
|
|
47
|
+
_globals['_MANAGEDJOBINFO']._serialized_end=1778
|
|
48
|
+
_globals['_MANAGEDJOBINFO_ACCELERATORSENTRY']._serialized_start=1512
|
|
49
|
+
_globals['_MANAGEDJOBINFO_ACCELERATORSENTRY']._serialized_end=1563
|
|
50
|
+
_globals['_MANAGEDJOBINFO_METADATAENTRY']._serialized_start=1565
|
|
51
|
+
_globals['_MANAGEDJOBINFO_METADATAENTRY']._serialized_end=1612
|
|
52
|
+
_globals['_GETJOBTABLERESPONSE']._serialized_start=1781
|
|
53
|
+
_globals['_GETJOBTABLERESPONSE']._serialized_end=2021
|
|
54
|
+
_globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._serialized_start=1970
|
|
55
|
+
_globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._serialized_end=2021
|
|
56
|
+
_globals['_GETALLJOBIDSBYNAMEREQUEST']._serialized_start=2023
|
|
57
|
+
_globals['_GETALLJOBIDSBYNAMEREQUEST']._serialized_end=2086
|
|
58
|
+
_globals['_GETALLJOBIDSBYNAMERESPONSE']._serialized_start=2088
|
|
59
|
+
_globals['_GETALLJOBIDSBYNAMERESPONSE']._serialized_end=2133
|
|
60
|
+
_globals['_CANCELJOBSREQUEST']._serialized_start=2136
|
|
61
|
+
_globals['_CANCELJOBSREQUEST']._serialized_end=2351
|
|
62
|
+
_globals['_CANCELJOBSRESPONSE']._serialized_start=2353
|
|
63
|
+
_globals['_CANCELJOBSRESPONSE']._serialized_end=2390
|
|
64
|
+
_globals['_STREAMLOGSREQUEST']._serialized_start=2393
|
|
65
|
+
_globals['_STREAMLOGSREQUEST']._serialized_end=2544
|
|
66
|
+
_globals['_STREAMLOGSRESPONSE']._serialized_start=2546
|
|
67
|
+
_globals['_STREAMLOGSRESPONSE']._serialized_end=2622
|
|
68
|
+
_globals['_MANAGEDJOBSSERVICE']._serialized_start=3547
|
|
69
|
+
_globals['_MANAGEDJOBSSERVICE']._serialized_end=4031
|
|
70
|
+
# @@protoc_insertion_point(module_scope)
|