skypilot-nightly 1.0.0.dev20251029__py3-none-any.whl → 1.0.0.dev20251101__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (68) hide show
  1. sky/__init__.py +2 -2
  2. sky/adaptors/aws.py +25 -7
  3. sky/client/cli/command.py +47 -23
  4. sky/clouds/aws.py +59 -11
  5. sky/dashboard/out/404.html +1 -1
  6. sky/dashboard/out/_next/static/chunks/2755.d6dc6d530fed0b61.js +26 -0
  7. sky/dashboard/out/_next/static/chunks/{webpack-485984ca04e021d0.js → webpack-e38d5319cd10a3a0.js} +1 -1
  8. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  9. sky/dashboard/out/clusters/[cluster].html +1 -1
  10. sky/dashboard/out/clusters.html +1 -1
  11. sky/dashboard/out/config.html +1 -1
  12. sky/dashboard/out/index.html +1 -1
  13. sky/dashboard/out/infra/[context].html +1 -1
  14. sky/dashboard/out/infra.html +1 -1
  15. sky/dashboard/out/jobs/[job].html +1 -1
  16. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  17. sky/dashboard/out/jobs.html +1 -1
  18. sky/dashboard/out/users.html +1 -1
  19. sky/dashboard/out/volumes.html +1 -1
  20. sky/dashboard/out/workspace/new.html +1 -1
  21. sky/dashboard/out/workspaces/[name].html +1 -1
  22. sky/dashboard/out/workspaces.html +1 -1
  23. sky/data/mounting_utils.py +32 -2
  24. sky/jobs/constants.py +2 -0
  25. sky/jobs/controller.py +62 -67
  26. sky/jobs/file_content_utils.py +80 -0
  27. sky/jobs/log_gc.py +201 -0
  28. sky/jobs/scheduler.py +15 -2
  29. sky/jobs/server/core.py +85 -13
  30. sky/jobs/server/server.py +12 -11
  31. sky/jobs/server/utils.py +28 -10
  32. sky/jobs/state.py +216 -40
  33. sky/jobs/utils.py +60 -22
  34. sky/metrics/utils.py +18 -0
  35. sky/schemas/api/responses.py +1 -0
  36. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  37. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  38. sky/schemas/generated/managed_jobsv1_pb2.py +39 -35
  39. sky/schemas/generated/managed_jobsv1_pb2.pyi +21 -5
  40. sky/serve/server/server.py +8 -7
  41. sky/server/common.py +21 -15
  42. sky/server/constants.py +1 -1
  43. sky/server/daemons.py +23 -17
  44. sky/server/requests/executor.py +7 -3
  45. sky/server/requests/request_names.py +80 -0
  46. sky/server/server.py +103 -35
  47. sky/skylet/constants.py +6 -1
  48. sky/skylet/events.py +7 -0
  49. sky/skylet/services.py +18 -7
  50. sky/ssh_node_pools/server.py +5 -4
  51. sky/task.py +4 -42
  52. sky/templates/kubernetes-ray.yml.j2 +1 -1
  53. sky/templates/websocket_proxy.py +140 -12
  54. sky/users/permission.py +4 -1
  55. sky/utils/db/migration_utils.py +1 -1
  56. sky/utils/resource_checker.py +4 -1
  57. sky/utils/schemas.py +23 -4
  58. sky/volumes/server/server.py +4 -3
  59. sky/workspaces/server.py +7 -6
  60. {skypilot_nightly-1.0.0.dev20251029.dist-info → skypilot_nightly-1.0.0.dev20251101.dist-info}/METADATA +36 -36
  61. {skypilot_nightly-1.0.0.dev20251029.dist-info → skypilot_nightly-1.0.0.dev20251101.dist-info}/RECORD +67 -62
  62. sky/dashboard/out/_next/static/chunks/2755.a239c652bf8684dd.js +0 -26
  63. /sky/dashboard/out/_next/static/{DabuSAKsc_y0wyJxpTIdQ → 8ixeA0NVQJN8HUdijid8b}/_buildManifest.js +0 -0
  64. /sky/dashboard/out/_next/static/{DabuSAKsc_y0wyJxpTIdQ → 8ixeA0NVQJN8HUdijid8b}/_ssgManifest.js +0 -0
  65. {skypilot_nightly-1.0.0.dev20251029.dist-info → skypilot_nightly-1.0.0.dev20251101.dist-info}/WHEEL +0 -0
  66. {skypilot_nightly-1.0.0.dev20251029.dist-info → skypilot_nightly-1.0.0.dev20251101.dist-info}/entry_points.txt +0 -0
  67. {skypilot_nightly-1.0.0.dev20251029.dist-info → skypilot_nightly-1.0.0.dev20251101.dist-info}/licenses/LICENSE +0 -0
  68. {skypilot_nightly-1.0.0.dev20251029.dist-info → skypilot_nightly-1.0.0.dev20251101.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,38 @@
1
+ """Adding columns for the GC time of task logs and controller logs.
2
+
3
+ Revision ID: 005
4
+ Revises: 004
5
+ Create Date: 2025-10-20
6
+
7
+ """
8
+ # pylint: disable=invalid-name
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ from sky.utils.db import db_utils
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = '005'
18
+ down_revision: Union[str, Sequence[str], None] = '004'
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade():
24
+ """Add columns for logs gc."""
25
+ with op.get_context().autocommit_block():
26
+ db_utils.add_column_to_table_alembic('job_info',
27
+ 'controller_logs_cleaned_at',
28
+ sa.Float(),
29
+ server_default=None)
30
+ db_utils.add_column_to_table_alembic('spot',
31
+ 'logs_cleaned_at',
32
+ sa.Float(),
33
+ server_default=None)
34
+
35
+
36
+ def downgrade():
37
+ """Remove columns for logs gc."""
38
+ pass
@@ -14,7 +14,7 @@ _sym_db = _symbol_database.Default()
14
14
 
15
15
 
16
16
 
17
- DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n*sky/schemas/generated/managed_jobsv1.proto\x12\x0fmanaged_jobs.v1\"\x15\n\x06JobIds\x12\x0b\n\x03ids\x18\x01 \x03(\x03\"\x1c\n\nUserHashes\x12\x0e\n\x06hashes\x18\x01 \x03(\t\"\x1c\n\x08Statuses\x12\x10\n\x08statuses\x18\x01 \x03(\t\"\x13\n\x11GetVersionRequest\"0\n\x12GetVersionResponse\x12\x1a\n\x12\x63ontroller_version\x18\x01 \x01(\t\"\xec\x03\n\x12GetJobTableRequest\x12\x15\n\rskip_finished\x18\x01 \x01(\x08\x12\x1d\n\x15\x61\x63\x63\x65ssible_workspaces\x18\x02 \x03(\t\x12-\n\x07job_ids\x18\x03 \x01(\x0b\x32\x17.managed_jobs.v1.JobIdsH\x00\x88\x01\x01\x12\x1c\n\x0fworkspace_match\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x17\n\nname_match\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x17\n\npool_match\x18\x06 \x01(\tH\x03\x88\x01\x01\x12\x11\n\x04page\x18\x07 \x01(\x05H\x04\x88\x01\x01\x12\x12\n\x05limit\x18\x08 \x01(\x05H\x05\x88\x01\x01\x12\x35\n\x0buser_hashes\x18\t \x01(\x0b\x32\x1b.managed_jobs.v1.UserHashesH\x06\x88\x01\x01\x12\x30\n\x08statuses\x18\n \x01(\x0b\x32\x19.managed_jobs.v1.StatusesH\x07\x88\x01\x01\x12#\n\x1bshow_jobs_without_user_hash\x18\x0b \x01(\x08\x42\n\n\x08_job_idsB\x12\n\x10_workspace_matchB\r\n\x0b_name_matchB\r\n\x0b_pool_matchB\x07\n\x05_pageB\x08\n\x06_limitB\x0e\n\x0c_user_hashesB\x0b\n\t_statuses\"\xa9\x08\n\x0eManagedJobInfo\x12\x0e\n\x06job_id\x18\x01 \x01(\x03\x12\x0f\n\x07task_id\x18\x02 \x01(\x03\x12\x10\n\x08job_name\x18\x03 \x01(\t\x12\x11\n\ttask_name\x18\x04 \x01(\t\x12\x14\n\x0cjob_duration\x18\x05 \x01(\x01\x12\x16\n\tworkspace\x18\x06 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x06status\x18\x07 \x01(\x0e\x32!.managed_jobs.v1.ManagedJobStatus\x12@\n\x0eschedule_state\x18\x08 \x01(\x0e\x32(.managed_jobs.v1.ManagedJobScheduleState\x12\x11\n\tresources\x18\t \x01(\t\x12\x19\n\x11\x63luster_resources\x18\n \x01(\t\x12\x1e\n\x16\x63luster_resources_full\x18\x0b \x01(\t\x12\r\n\x05\x63loud\x18\x0c \x01(\t\x12\x0e\n\x06region\x18\r \x01(\t\x12\r\n\x05infra\x18\x0e \x01(\t\x12G\n\x0c\x61\x63\x63\x65lerators\x18\x0f \x03(\x0b\x32\x31.managed_jobs.v1.ManagedJobInfo.AcceleratorsEntry\x12\x16\n\x0erecovery_count\x18\x10 \x01(\x05\x12\x14\n\x07\x64\x65tails\x18\x11 \x01(\tH\x01\x88\x01\x01\x12\x1b\n\x0e\x66\x61ilure_reason\x18\x12 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tuser_name\x18\x13 \x01(\tH\x03\x88\x01\x01\x12\x16\n\tuser_hash\x18\x14 \x01(\tH\x04\x88\x01\x01\x12\x19\n\x0csubmitted_at\x18\x15 \x01(\x01H\x05\x88\x01\x01\x12\x15\n\x08start_at\x18\x16 \x01(\x01H\x06\x88\x01\x01\x12\x13\n\x06\x65nd_at\x18\x17 \x01(\x01H\x07\x88\x01\x01\x12\x16\n\tuser_yaml\x18\x18 \x01(\tH\x08\x88\x01\x01\x12\x17\n\nentrypoint\x18\x19 \x01(\tH\t\x88\x01\x01\x12?\n\x08metadata\x18\x1a \x03(\x0b\x32-.managed_jobs.v1.ManagedJobInfo.MetadataEntry\x12\x11\n\x04pool\x18\x1b \x01(\tH\n\x88\x01\x01\x12\x16\n\tpool_hash\x18\x1c \x01(\tH\x0b\x88\x01\x01\x1a\x33\n\x11\x41\x63\x63\x65leratorsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0c\n\n_workspaceB\n\n\x08_detailsB\x11\n\x0f_failure_reasonB\x0c\n\n_user_nameB\x0c\n\n_user_hashB\x0f\n\r_submitted_atB\x0b\n\t_start_atB\t\n\x07_end_atB\x0c\n\n_user_yamlB\r\n\x0b_entrypointB\x07\n\x05_poolB\x0c\n\n_pool_hash\"\xf0\x01\n\x13GetJobTableResponse\x12-\n\x04jobs\x18\x01 \x03(\x0b\x32\x1f.managed_jobs.v1.ManagedJobInfo\x12\r\n\x05total\x18\x02 \x01(\x05\x12\x17\n\x0ftotal_no_filter\x18\x03 \x01(\x05\x12M\n\rstatus_counts\x18\x04 \x03(\x0b\x32\x36.managed_jobs.v1.GetJobTableResponse.StatusCountsEntry\x1a\x33\n\x11StatusCountsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x05:\x02\x38\x01\"?\n\x19GetAllJobIdsByNameRequest\x12\x15\n\x08job_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0b\n\t_job_name\"-\n\x1aGetAllJobIdsByNameResponse\x12\x0f\n\x07job_ids\x18\x01 \x03(\x03\"\xd7\x01\n\x11\x43\x61ncelJobsRequest\x12\x19\n\x11\x63urrent_workspace\x18\x01 \x01(\t\x12\x16\n\tuser_hash\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x13\n\tall_users\x18\x03 \x01(\x08H\x00\x12*\n\x07job_ids\x18\x04 \x01(\x0b\x32\x17.managed_jobs.v1.JobIdsH\x00\x12\x12\n\x08job_name\x18\x05 \x01(\tH\x00\x12\x13\n\tpool_name\x18\x06 \x01(\tH\x00\x42\x17\n\x15\x63\x61ncellation_criteriaB\x0c\n\n_user_hash\"%\n\x12\x43\x61ncelJobsResponse\x12\x0f\n\x07message\x18\x01 \x01(\t\"\x97\x01\n\x11StreamLogsRequest\x12\x15\n\x08job_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x06job_id\x18\x02 \x01(\x03H\x01\x88\x01\x01\x12\x0e\n\x06\x66ollow\x18\x03 \x01(\x08\x12\x12\n\ncontroller\x18\x04 \x01(\x08\x12\x11\n\x04tail\x18\x05 \x01(\x05H\x02\x88\x01\x01\x42\x0b\n\t_job_nameB\t\n\x07_job_idB\x07\n\x05_tail\"L\n\x12StreamLogsResponse\x12\x10\n\x08log_line\x18\x01 \x01(\t\x12\x16\n\texit_code\x18\x02 \x01(\x05H\x00\x88\x01\x01\x42\x0c\n\n_exit_code*\x85\x04\n\x10ManagedJobStatus\x12\"\n\x1eMANAGED_JOB_STATUS_UNSPECIFIED\x10\x00\x12\x1e\n\x1aMANAGED_JOB_STATUS_PENDING\x10\x01\x12 \n\x1cMANAGED_JOB_STATUS_SUBMITTED\x10\x02\x12\x1f\n\x1bMANAGED_JOB_STATUS_STARTING\x10\x03\x12\x1e\n\x1aMANAGED_JOB_STATUS_RUNNING\x10\x04\x12!\n\x1dMANAGED_JOB_STATUS_RECOVERING\x10\x05\x12!\n\x1dMANAGED_JOB_STATUS_CANCELLING\x10\x06\x12 \n\x1cMANAGED_JOB_STATUS_SUCCEEDED\x10\x07\x12 \n\x1cMANAGED_JOB_STATUS_CANCELLED\x10\x08\x12\x1d\n\x19MANAGED_JOB_STATUS_FAILED\x10\t\x12#\n\x1fMANAGED_JOB_STATUS_FAILED_SETUP\x10\n\x12\'\n#MANAGED_JOB_STATUS_FAILED_PRECHECKS\x10\x0b\x12)\n%MANAGED_JOB_STATUS_FAILED_NO_RESOURCE\x10\x0c\x12(\n$MANAGED_JOB_STATUS_FAILED_CONTROLLER\x10\r*\x8f\x03\n\x17ManagedJobScheduleState\x12*\n&MANAGED_JOB_SCHEDULE_STATE_UNSPECIFIED\x10\x00\x12&\n\"MANAGED_JOB_SCHEDULE_STATE_INVALID\x10\x01\x12\'\n#MANAGED_JOB_SCHEDULE_STATE_INACTIVE\x10\x02\x12&\n\"MANAGED_JOB_SCHEDULE_STATE_WAITING\x10\x03\x12,\n(MANAGED_JOB_SCHEDULE_STATE_ALIVE_WAITING\x10\x04\x12(\n$MANAGED_JOB_SCHEDULE_STATE_LAUNCHING\x10\x05\x12,\n(MANAGED_JOB_SCHEDULE_STATE_ALIVE_BACKOFF\x10\x06\x12$\n MANAGED_JOB_SCHEDULE_STATE_ALIVE\x10\x07\x12#\n\x1fMANAGED_JOB_SCHEDULE_STATE_DONE\x10\x08\x32\xe4\x03\n\x12ManagedJobsService\x12U\n\nGetVersion\x12\".managed_jobs.v1.GetVersionRequest\x1a#.managed_jobs.v1.GetVersionResponse\x12X\n\x0bGetJobTable\x12#.managed_jobs.v1.GetJobTableRequest\x1a$.managed_jobs.v1.GetJobTableResponse\x12m\n\x12GetAllJobIdsByName\x12*.managed_jobs.v1.GetAllJobIdsByNameRequest\x1a+.managed_jobs.v1.GetAllJobIdsByNameResponse\x12U\n\nCancelJobs\x12\".managed_jobs.v1.CancelJobsRequest\x1a#.managed_jobs.v1.CancelJobsResponse\x12W\n\nStreamLogs\x12\".managed_jobs.v1.StreamLogsRequest\x1a#.managed_jobs.v1.StreamLogsResponse0\x01\x62\x06proto3')
17
+ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n*sky/schemas/generated/managed_jobsv1.proto\x12\x0fmanaged_jobs.v1\"\x15\n\x06JobIds\x12\x0b\n\x03ids\x18\x01 \x03(\x03\"\x1c\n\nUserHashes\x12\x0e\n\x06hashes\x18\x01 \x03(\t\"\x1c\n\x08Statuses\x12\x10\n\x08statuses\x18\x01 \x03(\t\"\x18\n\x06\x46ields\x12\x0e\n\x06\x66ields\x18\x01 \x03(\t\" \n\nWorkspaces\x12\x12\n\nworkspaces\x18\x01 \x03(\t\"\x13\n\x11GetVersionRequest\"0\n\x12GetVersionResponse\x12\x1a\n\x12\x63ontroller_version\x18\x01 \x01(\t\"\xe1\x04\n\x12GetJobTableRequest\x12\x15\n\rskip_finished\x18\x01 \x01(\x08\x12?\n\x15\x61\x63\x63\x65ssible_workspaces\x18\x02 \x01(\x0b\x32\x1b.managed_jobs.v1.WorkspacesH\x00\x88\x01\x01\x12-\n\x07job_ids\x18\x03 \x01(\x0b\x32\x17.managed_jobs.v1.JobIdsH\x01\x88\x01\x01\x12\x1c\n\x0fworkspace_match\x18\x04 \x01(\tH\x02\x88\x01\x01\x12\x17\n\nname_match\x18\x05 \x01(\tH\x03\x88\x01\x01\x12\x17\n\npool_match\x18\x06 \x01(\tH\x04\x88\x01\x01\x12\x11\n\x04page\x18\x07 \x01(\x05H\x05\x88\x01\x01\x12\x12\n\x05limit\x18\x08 \x01(\x05H\x06\x88\x01\x01\x12\x35\n\x0buser_hashes\x18\t \x01(\x0b\x32\x1b.managed_jobs.v1.UserHashesH\x07\x88\x01\x01\x12\x30\n\x08statuses\x18\n \x01(\x0b\x32\x19.managed_jobs.v1.StatusesH\x08\x88\x01\x01\x12#\n\x1bshow_jobs_without_user_hash\x18\x0b \x01(\x08\x12,\n\x06\x66ields\x18\x0c \x01(\x0b\x32\x17.managed_jobs.v1.FieldsH\t\x88\x01\x01\x42\x18\n\x16_accessible_workspacesB\n\n\x08_job_idsB\x12\n\x10_workspace_matchB\r\n\x0b_name_matchB\r\n\x0b_pool_matchB\x07\n\x05_pageB\x08\n\x06_limitB\x0e\n\x0c_user_hashesB\x0b\n\t_statusesB\t\n\x07_fields\"\xcb\x08\n\x0eManagedJobInfo\x12\x0e\n\x06job_id\x18\x01 \x01(\x03\x12\x0f\n\x07task_id\x18\x02 \x01(\x03\x12\x10\n\x08job_name\x18\x03 \x01(\t\x12\x11\n\ttask_name\x18\x04 \x01(\t\x12\x14\n\x0cjob_duration\x18\x05 \x01(\x01\x12\x16\n\tworkspace\x18\x06 \x01(\tH\x00\x88\x01\x01\x12\x31\n\x06status\x18\x07 \x01(\x0e\x32!.managed_jobs.v1.ManagedJobStatus\x12@\n\x0eschedule_state\x18\x08 \x01(\x0e\x32(.managed_jobs.v1.ManagedJobScheduleState\x12\x11\n\tresources\x18\t \x01(\t\x12\x19\n\x11\x63luster_resources\x18\n \x01(\t\x12\x1e\n\x16\x63luster_resources_full\x18\x0b \x01(\t\x12\r\n\x05\x63loud\x18\x0c \x01(\t\x12\x0e\n\x06region\x18\r \x01(\t\x12\r\n\x05infra\x18\x0e \x01(\t\x12G\n\x0c\x61\x63\x63\x65lerators\x18\x0f \x03(\x0b\x32\x31.managed_jobs.v1.ManagedJobInfo.AcceleratorsEntry\x12\x16\n\x0erecovery_count\x18\x10 \x01(\x05\x12\x14\n\x07\x64\x65tails\x18\x11 \x01(\tH\x01\x88\x01\x01\x12\x1b\n\x0e\x66\x61ilure_reason\x18\x12 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tuser_name\x18\x13 \x01(\tH\x03\x88\x01\x01\x12\x16\n\tuser_hash\x18\x14 \x01(\tH\x04\x88\x01\x01\x12\x19\n\x0csubmitted_at\x18\x15 \x01(\x01H\x05\x88\x01\x01\x12\x15\n\x08start_at\x18\x16 \x01(\x01H\x06\x88\x01\x01\x12\x13\n\x06\x65nd_at\x18\x17 \x01(\x01H\x07\x88\x01\x01\x12\x16\n\tuser_yaml\x18\x18 \x01(\tH\x08\x88\x01\x01\x12\x17\n\nentrypoint\x18\x19 \x01(\tH\t\x88\x01\x01\x12?\n\x08metadata\x18\x1a \x03(\x0b\x32-.managed_jobs.v1.ManagedJobInfo.MetadataEntry\x12\x11\n\x04pool\x18\x1b \x01(\tH\n\x88\x01\x01\x12\x16\n\tpool_hash\x18\x1c \x01(\tH\x0b\x88\x01\x01\x12\x14\n\x07_job_id\x18\x1d \x01(\x03H\x0c\x88\x01\x01\x1a\x33\n\x11\x41\x63\x63\x65leratorsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x02:\x02\x38\x01\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x0c\n\n_workspaceB\n\n\x08_detailsB\x11\n\x0f_failure_reasonB\x0c\n\n_user_nameB\x0c\n\n_user_hashB\x0f\n\r_submitted_atB\x0b\n\t_start_atB\t\n\x07_end_atB\x0c\n\n_user_yamlB\r\n\x0b_entrypointB\x07\n\x05_poolB\x0c\n\n_pool_hashB\n\n\x08X_job_id\"\xf0\x01\n\x13GetJobTableResponse\x12-\n\x04jobs\x18\x01 \x03(\x0b\x32\x1f.managed_jobs.v1.ManagedJobInfo\x12\r\n\x05total\x18\x02 \x01(\x05\x12\x17\n\x0ftotal_no_filter\x18\x03 \x01(\x05\x12M\n\rstatus_counts\x18\x04 \x03(\x0b\x32\x36.managed_jobs.v1.GetJobTableResponse.StatusCountsEntry\x1a\x33\n\x11StatusCountsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x05:\x02\x38\x01\"?\n\x19GetAllJobIdsByNameRequest\x12\x15\n\x08job_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x0b\n\t_job_name\"-\n\x1aGetAllJobIdsByNameResponse\x12\x0f\n\x07job_ids\x18\x01 \x03(\x03\"\xd7\x01\n\x11\x43\x61ncelJobsRequest\x12\x19\n\x11\x63urrent_workspace\x18\x01 \x01(\t\x12\x16\n\tuser_hash\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x13\n\tall_users\x18\x03 \x01(\x08H\x00\x12*\n\x07job_ids\x18\x04 \x01(\x0b\x32\x17.managed_jobs.v1.JobIdsH\x00\x12\x12\n\x08job_name\x18\x05 \x01(\tH\x00\x12\x13\n\tpool_name\x18\x06 \x01(\tH\x00\x42\x17\n\x15\x63\x61ncellation_criteriaB\x0c\n\n_user_hash\"%\n\x12\x43\x61ncelJobsResponse\x12\x0f\n\x07message\x18\x01 \x01(\t\"\x97\x01\n\x11StreamLogsRequest\x12\x15\n\x08job_name\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x13\n\x06job_id\x18\x02 \x01(\x03H\x01\x88\x01\x01\x12\x0e\n\x06\x66ollow\x18\x03 \x01(\x08\x12\x12\n\ncontroller\x18\x04 \x01(\x08\x12\x11\n\x04tail\x18\x05 \x01(\x05H\x02\x88\x01\x01\x42\x0b\n\t_job_nameB\t\n\x07_job_idB\x07\n\x05_tail\"L\n\x12StreamLogsResponse\x12\x10\n\x08log_line\x18\x01 \x01(\t\x12\x16\n\texit_code\x18\x02 \x01(\x05H\x00\x88\x01\x01\x42\x0c\n\n_exit_code*\x85\x04\n\x10ManagedJobStatus\x12\"\n\x1eMANAGED_JOB_STATUS_UNSPECIFIED\x10\x00\x12\x1e\n\x1aMANAGED_JOB_STATUS_PENDING\x10\x01\x12 \n\x1cMANAGED_JOB_STATUS_SUBMITTED\x10\x02\x12\x1f\n\x1bMANAGED_JOB_STATUS_STARTING\x10\x03\x12\x1e\n\x1aMANAGED_JOB_STATUS_RUNNING\x10\x04\x12!\n\x1dMANAGED_JOB_STATUS_RECOVERING\x10\x05\x12!\n\x1dMANAGED_JOB_STATUS_CANCELLING\x10\x06\x12 \n\x1cMANAGED_JOB_STATUS_SUCCEEDED\x10\x07\x12 \n\x1cMANAGED_JOB_STATUS_CANCELLED\x10\x08\x12\x1d\n\x19MANAGED_JOB_STATUS_FAILED\x10\t\x12#\n\x1fMANAGED_JOB_STATUS_FAILED_SETUP\x10\n\x12\'\n#MANAGED_JOB_STATUS_FAILED_PRECHECKS\x10\x0b\x12)\n%MANAGED_JOB_STATUS_FAILED_NO_RESOURCE\x10\x0c\x12(\n$MANAGED_JOB_STATUS_FAILED_CONTROLLER\x10\r*\x8f\x03\n\x17ManagedJobScheduleState\x12*\n&MANAGED_JOB_SCHEDULE_STATE_UNSPECIFIED\x10\x00\x12&\n\"MANAGED_JOB_SCHEDULE_STATE_INVALID\x10\x01\x12\'\n#MANAGED_JOB_SCHEDULE_STATE_INACTIVE\x10\x02\x12&\n\"MANAGED_JOB_SCHEDULE_STATE_WAITING\x10\x03\x12,\n(MANAGED_JOB_SCHEDULE_STATE_ALIVE_WAITING\x10\x04\x12(\n$MANAGED_JOB_SCHEDULE_STATE_LAUNCHING\x10\x05\x12,\n(MANAGED_JOB_SCHEDULE_STATE_ALIVE_BACKOFF\x10\x06\x12$\n MANAGED_JOB_SCHEDULE_STATE_ALIVE\x10\x07\x12#\n\x1fMANAGED_JOB_SCHEDULE_STATE_DONE\x10\x08\x32\xe4\x03\n\x12ManagedJobsService\x12U\n\nGetVersion\x12\".managed_jobs.v1.GetVersionRequest\x1a#.managed_jobs.v1.GetVersionResponse\x12X\n\x0bGetJobTable\x12#.managed_jobs.v1.GetJobTableRequest\x1a$.managed_jobs.v1.GetJobTableResponse\x12m\n\x12GetAllJobIdsByName\x12*.managed_jobs.v1.GetAllJobIdsByNameRequest\x1a+.managed_jobs.v1.GetAllJobIdsByNameResponse\x12U\n\nCancelJobs\x12\".managed_jobs.v1.CancelJobsRequest\x1a#.managed_jobs.v1.CancelJobsResponse\x12W\n\nStreamLogs\x12\".managed_jobs.v1.StreamLogsRequest\x1a#.managed_jobs.v1.StreamLogsResponse0\x01\x62\x06proto3')
18
18
 
19
19
  _globals = globals()
20
20
  _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@@ -27,44 +27,48 @@ if not _descriptor._USE_C_DESCRIPTORS:
27
27
  _globals['_MANAGEDJOBINFO_METADATAENTRY']._serialized_options = b'8\001'
28
28
  _globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._loaded_options = None
29
29
  _globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._serialized_options = b'8\001'
30
- _globals['_MANAGEDJOBSTATUS']._serialized_start=2625
31
- _globals['_MANAGEDJOBSTATUS']._serialized_end=3142
32
- _globals['_MANAGEDJOBSCHEDULESTATE']._serialized_start=3145
33
- _globals['_MANAGEDJOBSCHEDULESTATE']._serialized_end=3544
30
+ _globals['_MANAGEDJOBSTATUS']._serialized_start=2836
31
+ _globals['_MANAGEDJOBSTATUS']._serialized_end=3353
32
+ _globals['_MANAGEDJOBSCHEDULESTATE']._serialized_start=3356
33
+ _globals['_MANAGEDJOBSCHEDULESTATE']._serialized_end=3755
34
34
  _globals['_JOBIDS']._serialized_start=63
35
35
  _globals['_JOBIDS']._serialized_end=84
36
36
  _globals['_USERHASHES']._serialized_start=86
37
37
  _globals['_USERHASHES']._serialized_end=114
38
38
  _globals['_STATUSES']._serialized_start=116
39
39
  _globals['_STATUSES']._serialized_end=144
40
- _globals['_GETVERSIONREQUEST']._serialized_start=146
41
- _globals['_GETVERSIONREQUEST']._serialized_end=165
42
- _globals['_GETVERSIONRESPONSE']._serialized_start=167
43
- _globals['_GETVERSIONRESPONSE']._serialized_end=215
44
- _globals['_GETJOBTABLEREQUEST']._serialized_start=218
45
- _globals['_GETJOBTABLEREQUEST']._serialized_end=710
46
- _globals['_MANAGEDJOBINFO']._serialized_start=713
47
- _globals['_MANAGEDJOBINFO']._serialized_end=1778
48
- _globals['_MANAGEDJOBINFO_ACCELERATORSENTRY']._serialized_start=1512
49
- _globals['_MANAGEDJOBINFO_ACCELERATORSENTRY']._serialized_end=1563
50
- _globals['_MANAGEDJOBINFO_METADATAENTRY']._serialized_start=1565
51
- _globals['_MANAGEDJOBINFO_METADATAENTRY']._serialized_end=1612
52
- _globals['_GETJOBTABLERESPONSE']._serialized_start=1781
53
- _globals['_GETJOBTABLERESPONSE']._serialized_end=2021
54
- _globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._serialized_start=1970
55
- _globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._serialized_end=2021
56
- _globals['_GETALLJOBIDSBYNAMEREQUEST']._serialized_start=2023
57
- _globals['_GETALLJOBIDSBYNAMEREQUEST']._serialized_end=2086
58
- _globals['_GETALLJOBIDSBYNAMERESPONSE']._serialized_start=2088
59
- _globals['_GETALLJOBIDSBYNAMERESPONSE']._serialized_end=2133
60
- _globals['_CANCELJOBSREQUEST']._serialized_start=2136
61
- _globals['_CANCELJOBSREQUEST']._serialized_end=2351
62
- _globals['_CANCELJOBSRESPONSE']._serialized_start=2353
63
- _globals['_CANCELJOBSRESPONSE']._serialized_end=2390
64
- _globals['_STREAMLOGSREQUEST']._serialized_start=2393
65
- _globals['_STREAMLOGSREQUEST']._serialized_end=2544
66
- _globals['_STREAMLOGSRESPONSE']._serialized_start=2546
67
- _globals['_STREAMLOGSRESPONSE']._serialized_end=2622
68
- _globals['_MANAGEDJOBSSERVICE']._serialized_start=3547
69
- _globals['_MANAGEDJOBSSERVICE']._serialized_end=4031
40
+ _globals['_FIELDS']._serialized_start=146
41
+ _globals['_FIELDS']._serialized_end=170
42
+ _globals['_WORKSPACES']._serialized_start=172
43
+ _globals['_WORKSPACES']._serialized_end=204
44
+ _globals['_GETVERSIONREQUEST']._serialized_start=206
45
+ _globals['_GETVERSIONREQUEST']._serialized_end=225
46
+ _globals['_GETVERSIONRESPONSE']._serialized_start=227
47
+ _globals['_GETVERSIONRESPONSE']._serialized_end=275
48
+ _globals['_GETJOBTABLEREQUEST']._serialized_start=278
49
+ _globals['_GETJOBTABLEREQUEST']._serialized_end=887
50
+ _globals['_MANAGEDJOBINFO']._serialized_start=890
51
+ _globals['_MANAGEDJOBINFO']._serialized_end=1989
52
+ _globals['_MANAGEDJOBINFO_ACCELERATORSENTRY']._serialized_start=1711
53
+ _globals['_MANAGEDJOBINFO_ACCELERATORSENTRY']._serialized_end=1762
54
+ _globals['_MANAGEDJOBINFO_METADATAENTRY']._serialized_start=1764
55
+ _globals['_MANAGEDJOBINFO_METADATAENTRY']._serialized_end=1811
56
+ _globals['_GETJOBTABLERESPONSE']._serialized_start=1992
57
+ _globals['_GETJOBTABLERESPONSE']._serialized_end=2232
58
+ _globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._serialized_start=2181
59
+ _globals['_GETJOBTABLERESPONSE_STATUSCOUNTSENTRY']._serialized_end=2232
60
+ _globals['_GETALLJOBIDSBYNAMEREQUEST']._serialized_start=2234
61
+ _globals['_GETALLJOBIDSBYNAMEREQUEST']._serialized_end=2297
62
+ _globals['_GETALLJOBIDSBYNAMERESPONSE']._serialized_start=2299
63
+ _globals['_GETALLJOBIDSBYNAMERESPONSE']._serialized_end=2344
64
+ _globals['_CANCELJOBSREQUEST']._serialized_start=2347
65
+ _globals['_CANCELJOBSREQUEST']._serialized_end=2562
66
+ _globals['_CANCELJOBSRESPONSE']._serialized_start=2564
67
+ _globals['_CANCELJOBSRESPONSE']._serialized_end=2601
68
+ _globals['_STREAMLOGSREQUEST']._serialized_start=2604
69
+ _globals['_STREAMLOGSREQUEST']._serialized_end=2755
70
+ _globals['_STREAMLOGSRESPONSE']._serialized_start=2757
71
+ _globals['_STREAMLOGSRESPONSE']._serialized_end=2833
72
+ _globals['_MANAGEDJOBSSERVICE']._serialized_start=3758
73
+ _globals['_MANAGEDJOBSSERVICE']._serialized_end=4242
70
74
  # @@protoc_insertion_point(module_scope)
@@ -76,6 +76,18 @@ class Statuses(_message.Message):
76
76
  statuses: _containers.RepeatedScalarFieldContainer[str]
77
77
  def __init__(self, statuses: _Optional[_Iterable[str]] = ...) -> None: ...
78
78
 
79
+ class Fields(_message.Message):
80
+ __slots__ = ("fields",)
81
+ FIELDS_FIELD_NUMBER: _ClassVar[int]
82
+ fields: _containers.RepeatedScalarFieldContainer[str]
83
+ def __init__(self, fields: _Optional[_Iterable[str]] = ...) -> None: ...
84
+
85
+ class Workspaces(_message.Message):
86
+ __slots__ = ("workspaces",)
87
+ WORKSPACES_FIELD_NUMBER: _ClassVar[int]
88
+ workspaces: _containers.RepeatedScalarFieldContainer[str]
89
+ def __init__(self, workspaces: _Optional[_Iterable[str]] = ...) -> None: ...
90
+
79
91
  class GetVersionRequest(_message.Message):
80
92
  __slots__ = ()
81
93
  def __init__(self) -> None: ...
@@ -87,7 +99,7 @@ class GetVersionResponse(_message.Message):
87
99
  def __init__(self, controller_version: _Optional[str] = ...) -> None: ...
88
100
 
89
101
  class GetJobTableRequest(_message.Message):
90
- __slots__ = ("skip_finished", "accessible_workspaces", "job_ids", "workspace_match", "name_match", "pool_match", "page", "limit", "user_hashes", "statuses", "show_jobs_without_user_hash")
102
+ __slots__ = ("skip_finished", "accessible_workspaces", "job_ids", "workspace_match", "name_match", "pool_match", "page", "limit", "user_hashes", "statuses", "show_jobs_without_user_hash", "fields")
91
103
  SKIP_FINISHED_FIELD_NUMBER: _ClassVar[int]
92
104
  ACCESSIBLE_WORKSPACES_FIELD_NUMBER: _ClassVar[int]
93
105
  JOB_IDS_FIELD_NUMBER: _ClassVar[int]
@@ -99,8 +111,9 @@ class GetJobTableRequest(_message.Message):
99
111
  USER_HASHES_FIELD_NUMBER: _ClassVar[int]
100
112
  STATUSES_FIELD_NUMBER: _ClassVar[int]
101
113
  SHOW_JOBS_WITHOUT_USER_HASH_FIELD_NUMBER: _ClassVar[int]
114
+ FIELDS_FIELD_NUMBER: _ClassVar[int]
102
115
  skip_finished: bool
103
- accessible_workspaces: _containers.RepeatedScalarFieldContainer[str]
116
+ accessible_workspaces: Workspaces
104
117
  job_ids: JobIds
105
118
  workspace_match: str
106
119
  name_match: str
@@ -110,10 +123,11 @@ class GetJobTableRequest(_message.Message):
110
123
  user_hashes: UserHashes
111
124
  statuses: Statuses
112
125
  show_jobs_without_user_hash: bool
113
- def __init__(self, skip_finished: bool = ..., accessible_workspaces: _Optional[_Iterable[str]] = ..., job_ids: _Optional[_Union[JobIds, _Mapping]] = ..., workspace_match: _Optional[str] = ..., name_match: _Optional[str] = ..., pool_match: _Optional[str] = ..., page: _Optional[int] = ..., limit: _Optional[int] = ..., user_hashes: _Optional[_Union[UserHashes, _Mapping]] = ..., statuses: _Optional[_Union[Statuses, _Mapping]] = ..., show_jobs_without_user_hash: bool = ...) -> None: ...
126
+ fields: Fields
127
+ def __init__(self, skip_finished: bool = ..., accessible_workspaces: _Optional[_Union[Workspaces, _Mapping]] = ..., job_ids: _Optional[_Union[JobIds, _Mapping]] = ..., workspace_match: _Optional[str] = ..., name_match: _Optional[str] = ..., pool_match: _Optional[str] = ..., page: _Optional[int] = ..., limit: _Optional[int] = ..., user_hashes: _Optional[_Union[UserHashes, _Mapping]] = ..., statuses: _Optional[_Union[Statuses, _Mapping]] = ..., show_jobs_without_user_hash: bool = ..., fields: _Optional[_Union[Fields, _Mapping]] = ...) -> None: ...
114
128
 
115
129
  class ManagedJobInfo(_message.Message):
116
- __slots__ = ("job_id", "task_id", "job_name", "task_name", "job_duration", "workspace", "status", "schedule_state", "resources", "cluster_resources", "cluster_resources_full", "cloud", "region", "infra", "accelerators", "recovery_count", "details", "failure_reason", "user_name", "user_hash", "submitted_at", "start_at", "end_at", "user_yaml", "entrypoint", "metadata", "pool", "pool_hash")
130
+ __slots__ = ("job_id", "task_id", "job_name", "task_name", "job_duration", "workspace", "status", "schedule_state", "resources", "cluster_resources", "cluster_resources_full", "cloud", "region", "infra", "accelerators", "recovery_count", "details", "failure_reason", "user_name", "user_hash", "submitted_at", "start_at", "end_at", "user_yaml", "entrypoint", "metadata", "pool", "pool_hash", "_job_id")
117
131
  class AcceleratorsEntry(_message.Message):
118
132
  __slots__ = ("key", "value")
119
133
  KEY_FIELD_NUMBER: _ClassVar[int]
@@ -156,6 +170,7 @@ class ManagedJobInfo(_message.Message):
156
170
  METADATA_FIELD_NUMBER: _ClassVar[int]
157
171
  POOL_FIELD_NUMBER: _ClassVar[int]
158
172
  POOL_HASH_FIELD_NUMBER: _ClassVar[int]
173
+ _JOB_ID_FIELD_NUMBER: _ClassVar[int]
159
174
  job_id: int
160
175
  task_id: int
161
176
  job_name: str
@@ -184,7 +199,8 @@ class ManagedJobInfo(_message.Message):
184
199
  metadata: _containers.ScalarMap[str, str]
185
200
  pool: str
186
201
  pool_hash: str
187
- def __init__(self, job_id: _Optional[int] = ..., task_id: _Optional[int] = ..., job_name: _Optional[str] = ..., task_name: _Optional[str] = ..., job_duration: _Optional[float] = ..., workspace: _Optional[str] = ..., status: _Optional[_Union[ManagedJobStatus, str]] = ..., schedule_state: _Optional[_Union[ManagedJobScheduleState, str]] = ..., resources: _Optional[str] = ..., cluster_resources: _Optional[str] = ..., cluster_resources_full: _Optional[str] = ..., cloud: _Optional[str] = ..., region: _Optional[str] = ..., infra: _Optional[str] = ..., accelerators: _Optional[_Mapping[str, float]] = ..., recovery_count: _Optional[int] = ..., details: _Optional[str] = ..., failure_reason: _Optional[str] = ..., user_name: _Optional[str] = ..., user_hash: _Optional[str] = ..., submitted_at: _Optional[float] = ..., start_at: _Optional[float] = ..., end_at: _Optional[float] = ..., user_yaml: _Optional[str] = ..., entrypoint: _Optional[str] = ..., metadata: _Optional[_Mapping[str, str]] = ..., pool: _Optional[str] = ..., pool_hash: _Optional[str] = ...) -> None: ...
202
+ _job_id: int
203
+ def __init__(self, job_id: _Optional[int] = ..., task_id: _Optional[int] = ..., job_name: _Optional[str] = ..., task_name: _Optional[str] = ..., job_duration: _Optional[float] = ..., workspace: _Optional[str] = ..., status: _Optional[_Union[ManagedJobStatus, str]] = ..., schedule_state: _Optional[_Union[ManagedJobScheduleState, str]] = ..., resources: _Optional[str] = ..., cluster_resources: _Optional[str] = ..., cluster_resources_full: _Optional[str] = ..., cloud: _Optional[str] = ..., region: _Optional[str] = ..., infra: _Optional[str] = ..., accelerators: _Optional[_Mapping[str, float]] = ..., recovery_count: _Optional[int] = ..., details: _Optional[str] = ..., failure_reason: _Optional[str] = ..., user_name: _Optional[str] = ..., user_hash: _Optional[str] = ..., submitted_at: _Optional[float] = ..., start_at: _Optional[float] = ..., end_at: _Optional[float] = ..., user_yaml: _Optional[str] = ..., entrypoint: _Optional[str] = ..., metadata: _Optional[_Mapping[str, str]] = ..., pool: _Optional[str] = ..., pool_hash: _Optional[str] = ..., _job_id: _Optional[int] = ...) -> None: ...
188
204
 
189
205
  class GetJobTableResponse(_message.Message):
190
206
  __slots__ = ("jobs", "total", "total_no_filter", "status_counts")
@@ -10,6 +10,7 @@ from sky.server import common as server_common
10
10
  from sky.server import stream_utils
11
11
  from sky.server.requests import executor
12
12
  from sky.server.requests import payloads
13
+ from sky.server.requests import request_names
13
14
  from sky.server.requests import requests as api_requests
14
15
  from sky.skylet import constants
15
16
  from sky.utils import common
@@ -25,7 +26,7 @@ async def up(
25
26
  ) -> None:
26
27
  await executor.schedule_request_async(
27
28
  request_id=request.state.request_id,
28
- request_name='serve.up',
29
+ request_name=request_names.RequestName.SERVE_UP,
29
30
  request_body=up_body,
30
31
  func=core.up,
31
32
  schedule_type=api_requests.ScheduleType.LONG,
@@ -40,7 +41,7 @@ async def update(
40
41
  ) -> None:
41
42
  await executor.schedule_request_async(
42
43
  request_id=request.state.request_id,
43
- request_name='serve.update',
44
+ request_name=request_names.RequestName.SERVE_UPDATE,
44
45
  request_body=update_body,
45
46
  func=core.update,
46
47
  schedule_type=api_requests.ScheduleType.SHORT,
@@ -55,7 +56,7 @@ async def down(
55
56
  ) -> None:
56
57
  await executor.schedule_request_async(
57
58
  request_id=request.state.request_id,
58
- request_name='serve.down',
59
+ request_name=request_names.RequestName.SERVE_DOWN,
59
60
  request_body=down_body,
60
61
  func=core.down,
61
62
  schedule_type=api_requests.ScheduleType.SHORT,
@@ -70,7 +71,7 @@ async def terminate_replica(
70
71
  ) -> None:
71
72
  await executor.schedule_request_async(
72
73
  request_id=request.state.request_id,
73
- request_name='serve.terminate_replica',
74
+ request_name=request_names.RequestName.SERVE_TERMINATE_REPLICA,
74
75
  request_body=terminate_replica_body,
75
76
  func=core.terminate_replica,
76
77
  schedule_type=api_requests.ScheduleType.SHORT,
@@ -85,7 +86,7 @@ async def status(
85
86
  ) -> None:
86
87
  await executor.schedule_request_async(
87
88
  request_id=request.state.request_id,
88
- request_name='serve.status',
89
+ request_name=request_names.RequestName.SERVE_STATUS,
89
90
  request_body=status_body,
90
91
  func=core.status,
91
92
  schedule_type=api_requests.ScheduleType.SHORT,
@@ -101,7 +102,7 @@ async def tail_logs(
101
102
  executor.check_request_thread_executor_available()
102
103
  request_task = await executor.prepare_request_async(
103
104
  request_id=request.state.request_id,
104
- request_name='serve.logs',
105
+ request_name=request_names.RequestName.SERVE_LOGS,
105
106
  request_body=log_body,
106
107
  func=core.tail_logs,
107
108
  schedule_type=api_requests.ScheduleType.SHORT,
@@ -134,7 +135,7 @@ async def download_logs(
134
135
  download_logs_body.local_dir = str(logs_dir_on_api_server)
135
136
  await executor.schedule_request_async(
136
137
  request_id=request.state.request_id,
137
- request_name='serve.sync_down_logs',
138
+ request_name=request_names.RequestName.SERVE_SYNC_DOWN_LOGS,
138
139
  request_body=download_logs_body,
139
140
  func=core.sync_down_logs,
140
141
  schedule_type=api_requests.ScheduleType.SHORT,
sky/server/common.py CHANGED
@@ -539,19 +539,27 @@ def _start_api_server(deploy: bool = False,
539
539
  'is not a local URL')
540
540
 
541
541
  # Check available memory before starting the server.
542
- avail_mem_size_gb: float = common_utils.get_mem_size_gb()
543
- # pylint: disable=import-outside-toplevel
544
- import sky.jobs.utils as job_utils
545
- max_memory = (server_constants.MIN_AVAIL_MEM_GB_CONSOLIDATION_MODE
546
- if job_utils.is_consolidation_mode(on_api_restart=True)
547
- else server_constants.MIN_AVAIL_MEM_GB)
548
- if avail_mem_size_gb <= max_memory:
549
- logger.warning(
550
- f'{colorama.Fore.YELLOW}Your SkyPilot API server machine only '
551
- f'has {avail_mem_size_gb:.1f}GB memory available. '
552
- f'At least {max_memory}GB is recommended to support higher '
553
- 'load with better performance.'
554
- f'{colorama.Style.RESET_ALL}')
542
+ # Skip this warning if postgres is used, as:
543
+ # 1) that's almost certainly a remote API server;
544
+ # 2) the actual consolidation mode config is stashed in the database,
545
+ # and the value of `job_utils.is_consolidation_mode` will not be
546
+ # the actual value in the db, but only None as in this case, the
547
+ # whole YAML config is really just `db: <URI>`.
548
+ if skypilot_config.get_nested(('db',), None) is None:
549
+ avail_mem_size_gb: float = common_utils.get_mem_size_gb()
550
+ # pylint: disable=import-outside-toplevel
551
+ import sky.jobs.utils as job_utils
552
+ max_memory = (server_constants.MIN_AVAIL_MEM_GB_CONSOLIDATION_MODE
553
+ if job_utils.is_consolidation_mode(
554
+ on_api_restart=True) else
555
+ server_constants.MIN_AVAIL_MEM_GB)
556
+ if avail_mem_size_gb <= max_memory:
557
+ logger.warning(
558
+ f'{colorama.Fore.YELLOW}Your SkyPilot API server machine '
559
+ f'only has {avail_mem_size_gb:.1f}GB memory available. '
560
+ f'At least {max_memory}GB is recommended to support higher '
561
+ 'load with better performance.'
562
+ f'{colorama.Style.RESET_ALL}')
555
563
 
556
564
  args = [sys.executable, *API_SERVER_CMD.split()]
557
565
  if deploy:
@@ -560,8 +568,6 @@ def _start_api_server(deploy: bool = False,
560
568
  args += [f'--host={host}']
561
569
  if metrics_port is not None:
562
570
  args += [f'--metrics-port={metrics_port}']
563
- # Use this argument to disable the internal signal file check.
564
- args += ['--start-with-python']
565
571
 
566
572
  if foreground:
567
573
  # Replaces the current process with the API server
sky/server/constants.py CHANGED
@@ -10,7 +10,7 @@ from sky.skylet import constants
10
10
  # based on version info is needed.
11
11
  # For more details and code guidelines, refer to:
12
12
  # https://docs.skypilot.co/en/latest/developers/CONTRIBUTING.html#backward-compatibility-guidelines
13
- API_VERSION = 21
13
+ API_VERSION = 22
14
14
 
15
15
  # The minimum peer API version that the code should still work with.
16
16
  # Notes (dev):
sky/server/daemons.py CHANGED
@@ -7,6 +7,7 @@ from typing import Callable
7
7
  from sky import sky_logging
8
8
  from sky import skypilot_config
9
9
  from sky.server import constants as server_constants
10
+ from sky.server.requests import request_names
10
11
  from sky.utils import annotations
11
12
  from sky.utils import common_utils
12
13
  from sky.utils import env_options
@@ -26,7 +27,7 @@ class InternalRequestDaemon:
26
27
  """Internal daemon that runs an event in the background."""
27
28
 
28
29
  id: str
29
- name: str
30
+ name: request_names.RequestName
30
31
  event_fn: Callable[[], None]
31
32
  default_log_level: str = 'INFO'
32
33
  should_skip: Callable[[], bool] = _default_should_skip
@@ -195,26 +196,31 @@ INTERNAL_REQUEST_DAEMONS = [
195
196
  # This status refresh daemon can cause the autostopp'ed/autodown'ed cluster
196
197
  # set to updated status automatically, without showing users the hint of
197
198
  # cluster being stopped or down when `sky status -r` is called.
198
- InternalRequestDaemon(id='skypilot-status-refresh-daemon',
199
- name='status-refresh',
200
- event_fn=refresh_cluster_status_event,
201
- default_log_level='DEBUG'),
199
+ InternalRequestDaemon(
200
+ id='skypilot-status-refresh-daemon',
201
+ name=request_names.RequestName.REQUEST_DAEMON_STATUS_REFRESH,
202
+ event_fn=refresh_cluster_status_event,
203
+ default_log_level='DEBUG'),
202
204
  # Volume status refresh daemon to update the volume status periodically.
203
- InternalRequestDaemon(id='skypilot-volume-status-refresh-daemon',
204
- name='volume-refresh',
205
- event_fn=refresh_volume_status_event),
205
+ InternalRequestDaemon(
206
+ id='skypilot-volume-status-refresh-daemon',
207
+ name=request_names.RequestName.REQUEST_DAEMON_VOLUME_REFRESH,
208
+ event_fn=refresh_volume_status_event),
206
209
  InternalRequestDaemon(id='managed-job-status-refresh-daemon',
207
- name='managed-job-status-refresh',
210
+ name=request_names.RequestName.
211
+ REQUEST_DAEMON_MANAGED_JOB_STATUS_REFRESH,
208
212
  event_fn=managed_job_status_refresh_event,
209
213
  should_skip=should_skip_managed_job_status_refresh),
210
- InternalRequestDaemon(id='sky-serve-status-refresh-daemon',
211
- name='sky-serve-status-refresh',
212
- event_fn=sky_serve_status_refresh_event,
213
- should_skip=should_skip_sky_serve_status_refresh),
214
- InternalRequestDaemon(id='pool-status-refresh-daemon',
215
- name='pool-status-refresh',
216
- event_fn=pool_status_refresh_event,
217
- should_skip=should_skip_pool_status_refresh),
214
+ InternalRequestDaemon(
215
+ id='sky-serve-status-refresh-daemon',
216
+ name=request_names.RequestName.REQUEST_DAEMON_SKY_SERVE_STATUS_REFRESH,
217
+ event_fn=sky_serve_status_refresh_event,
218
+ should_skip=should_skip_sky_serve_status_refresh),
219
+ InternalRequestDaemon(
220
+ id='pool-status-refresh-daemon',
221
+ name=request_names.RequestName.REQUEST_DAEMON_POOL_STATUS_REFRESH,
222
+ event_fn=pool_status_refresh_event,
223
+ should_skip=should_skip_pool_status_refresh),
218
224
  ]
219
225
 
220
226
 
@@ -47,6 +47,7 @@ from sky.server import metrics as metrics_lib
47
47
  from sky.server.requests import payloads
48
48
  from sky.server.requests import preconditions
49
49
  from sky.server.requests import process
50
+ from sky.server.requests import request_names
50
51
  from sky.server.requests import requests as api_requests
51
52
  from sky.server.requests import threads
52
53
  from sky.server.requests.queues import local_queue
@@ -395,7 +396,10 @@ def _request_execution_wrapper(request_id: str,
395
396
  rss_begin = proc.memory_info().rss
396
397
  db_utils.set_max_connections(num_db_connections_per_worker)
397
398
  # Handle the SIGTERM signal to abort the request processing gracefully.
398
- signal.signal(signal.SIGTERM, _sigterm_handler)
399
+ # Only set up signal handlers in the main thread, as signal.signal() raises
400
+ # ValueError if called from a non-main thread (e.g., in tests).
401
+ if threading.current_thread() is threading.main_thread():
402
+ signal.signal(signal.SIGTERM, _sigterm_handler)
399
403
 
400
404
  logger.info(f'Running request {request_id} with pid {pid}')
401
405
 
@@ -688,7 +692,7 @@ async def _execute_request_coroutine(request: api_requests.Request):
688
692
 
689
693
  async def prepare_request_async(
690
694
  request_id: str,
691
- request_name: str,
695
+ request_name: request_names.RequestName,
692
696
  request_body: payloads.RequestBody,
693
697
  func: Callable[P, Any],
694
698
  request_cluster_name: Optional[str] = None,
@@ -721,7 +725,7 @@ async def prepare_request_async(
721
725
 
722
726
 
723
727
  async def schedule_request_async(request_id: str,
724
- request_name: str,
728
+ request_name: request_names.RequestName,
725
729
  request_body: payloads.RequestBody,
726
730
  func: Callable[P, Any],
727
731
  request_cluster_name: Optional[str] = None,
@@ -0,0 +1,80 @@
1
+ """Request names."""
2
+ import enum
3
+
4
+
5
+ class RequestName(str, enum.Enum):
6
+ """Enum of all the request names."""
7
+ # General requests
8
+ CHECK = 'check'
9
+ ENABLED_CLOUDS = 'enabled_clouds'
10
+ REALTIME_KUBERNETES_GPU_AVAILABILITY = (
11
+ 'realtime_kubernetes_gpu_availability')
12
+ KUBERNETES_NODE_INFO = 'kubernetes_node_info'
13
+ STATUS_KUBERNETES = 'status_kubernetes'
14
+ LIST_ACCELERATORS = 'list_accelerators'
15
+ LIST_ACCELERATOR_COUNTS = 'list_accelerator_counts'
16
+ OPTIMIZE = 'optimize'
17
+ # Cluster requests
18
+ CLUSTER_LAUNCH = 'launch'
19
+ CLUSTER_EXEC = 'exec'
20
+ CLUSTER_STOP = 'stop'
21
+ CLUSTER_STATUS = 'status'
22
+ CLUSTER_ENDPOINTS = 'endpoints'
23
+ CLUSTER_DOWN = 'down'
24
+ CLUSTER_START = 'start'
25
+ CLUSTER_AUTOSTOP = 'autostop'
26
+ CLUSTER_QUEUE = 'queue'
27
+ CLUSTER_JOB_STATUS = 'job_status'
28
+ CLUSTER_JOB_CANCEL = 'cancel'
29
+ CLUSTER_JOB_LOGS = 'logs'
30
+ CLUSTER_JOB_DOWNLOAD_LOGS = 'download_logs'
31
+ CLUSTER_COST_REPORT = 'cost_report'
32
+ # Storage requests
33
+ STORAGE_LS = 'storage_ls'
34
+ STORAGE_DELETE = 'storage_delete'
35
+ # Local requests
36
+ LOCAL_UP = 'local_up'
37
+ LOCAL_DOWN = 'local_down'
38
+ # API requests
39
+ API_CANCEL = 'api_cancel'
40
+ ALL_CONTEXTS = 'all_contexts'
41
+ # Managed jobs requests
42
+ JOBS_LAUNCH = 'jobs.launch'
43
+ JOBS_QUEUE = 'jobs.queue'
44
+ JOBS_QUEUE_V2 = 'jobs.queue_v2'
45
+ JOBS_CANCEL = 'jobs.cancel'
46
+ JOBS_LOGS = 'jobs.logs'
47
+ JOBS_DOWNLOAD_LOGS = 'jobs.download_logs'
48
+ JOBS_POOL_APPLY = 'jobs.pool_apply'
49
+ JOBS_POOL_DOWN = 'jobs.pool_down'
50
+ JOBS_POOL_STATUS = 'jobs.pool_status'
51
+ JOBS_POOL_LOGS = 'jobs.pool_logs'
52
+ JOBS_POOL_SYNC_DOWN_LOGS = 'jobs.pool_sync_down_logs'
53
+ # Serve requests
54
+ SERVE_UP = 'serve.up'
55
+ SERVE_UPDATE = 'serve.update'
56
+ SERVE_DOWN = 'serve.down'
57
+ SERVE_TERMINATE_REPLICA = 'serve.terminate_replica'
58
+ SERVE_STATUS = 'serve.status'
59
+ SERVE_LOGS = 'serve.logs'
60
+ SERVE_SYNC_DOWN_LOGS = 'serve.sync_down_logs'
61
+ # Volumes requests
62
+ VOLUME_LIST = 'volume_list'
63
+ VOLUME_DELETE = 'volume_delete'
64
+ VOLUME_APPLY = 'volume_apply'
65
+ # Workspaces requests
66
+ WORKSPACES_GET = 'workspaces.get'
67
+ WORKSPACES_UPDATE = 'workspaces.update'
68
+ WORKSPACES_CREATE = 'workspaces.create'
69
+ WORKSPACES_DELETE = 'workspaces.delete'
70
+ WORKSPACES_GET_CONFIG = 'workspaces.get_config'
71
+ WORKSPACES_UPDATE_CONFIG = 'workspaces.update_config'
72
+ # SSH node pools requests
73
+ SSH_NODE_POOLS_UP = 'ssh_node_pools.up'
74
+ SSH_NODE_POOLS_DOWN = 'ssh_node_pools.down'
75
+ # Internal request daemons
76
+ REQUEST_DAEMON_STATUS_REFRESH = 'status-refresh'
77
+ REQUEST_DAEMON_VOLUME_REFRESH = 'volume-refresh'
78
+ REQUEST_DAEMON_MANAGED_JOB_STATUS_REFRESH = 'managed-job-status-refresh'
79
+ REQUEST_DAEMON_SKY_SERVE_STATUS_REFRESH = 'sky-serve-status-refresh'
80
+ REQUEST_DAEMON_POOL_STATUS_REFRESH = 'pool-status-refresh'