skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +10 -2
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +64 -0
- sky/adaptors/nebius.py +3 -1
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +20 -0
- sky/authentication.py +157 -263
- sky/backends/__init__.py +3 -2
- sky/backends/backend.py +11 -3
- sky/backends/backend_utils.py +588 -184
- sky/backends/cloud_vm_ray_backend.py +1088 -904
- sky/backends/local_docker_backend.py +9 -5
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +18 -0
- sky/catalog/__init__.py +8 -0
- sky/catalog/aws_catalog.py +4 -0
- sky/catalog/common.py +19 -1
- sky/catalog/data_fetchers/fetch_aws.py +102 -80
- sky/catalog/data_fetchers/fetch_gcp.py +30 -3
- sky/catalog/data_fetchers/fetch_nebius.py +9 -6
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/kubernetes_catalog.py +24 -28
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/catalog/runpod_catalog.py +5 -1
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/check.py +73 -43
- sky/client/cli/command.py +675 -412
- sky/client/cli/flags.py +4 -2
- sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +12 -2
- sky/client/sdk.py +132 -63
- sky/client/sdk_async.py +34 -33
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +6 -0
- sky/clouds/aws.py +337 -129
- sky/clouds/azure.py +24 -18
- sky/clouds/cloud.py +40 -13
- sky/clouds/cudo.py +16 -13
- sky/clouds/do.py +9 -7
- sky/clouds/fluidstack.py +12 -5
- sky/clouds/gcp.py +14 -7
- sky/clouds/hyperbolic.py +12 -5
- sky/clouds/ibm.py +12 -5
- sky/clouds/kubernetes.py +80 -45
- sky/clouds/lambda_cloud.py +12 -5
- sky/clouds/nebius.py +23 -9
- sky/clouds/oci.py +19 -12
- sky/clouds/paperspace.py +4 -1
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +85 -24
- sky/clouds/scp.py +12 -8
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +4 -2
- sky/clouds/utils/scp_utils.py +61 -50
- sky/clouds/vast.py +33 -27
- sky/clouds/vsphere.py +14 -16
- sky/core.py +174 -165
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-7b45f9fbb6308557.js} +1 -1
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-c0b5935149902e6f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-aed0ea19df7cf961.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-84a40f8c7c627fe4.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-531b2f8c4bf89f82.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +92 -1
- sky/data/mounting_utils.py +162 -29
- sky/data/storage.py +200 -19
- sky/data/storage_utils.py +10 -45
- sky/exceptions.py +18 -7
- sky/execution.py +74 -31
- sky/global_user_state.py +605 -191
- sky/jobs/__init__.py +2 -0
- sky/jobs/client/sdk.py +101 -4
- sky/jobs/client/sdk_async.py +31 -5
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +726 -284
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +250 -100
- sky/jobs/scheduler.py +271 -173
- sky/jobs/server/core.py +367 -114
- sky/jobs/server/server.py +81 -35
- sky/jobs/server/utils.py +89 -35
- sky/jobs/state.py +1498 -620
- sky/jobs/utils.py +771 -306
- sky/logs/agent.py +40 -5
- sky/logs/aws.py +9 -19
- sky/metrics/utils.py +282 -39
- sky/optimizer.py +1 -1
- sky/provision/__init__.py +37 -1
- sky/provision/aws/config.py +34 -13
- sky/provision/aws/instance.py +5 -2
- sky/provision/azure/instance.py +5 -3
- sky/provision/common.py +2 -0
- sky/provision/cudo/instance.py +4 -3
- sky/provision/do/instance.py +4 -3
- sky/provision/docker_utils.py +97 -26
- sky/provision/fluidstack/instance.py +6 -5
- sky/provision/gcp/config.py +6 -1
- sky/provision/gcp/instance.py +4 -2
- sky/provision/hyperbolic/instance.py +4 -2
- sky/provision/instance_setup.py +66 -20
- sky/provision/kubernetes/__init__.py +2 -0
- sky/provision/kubernetes/config.py +7 -44
- sky/provision/kubernetes/constants.py +0 -1
- sky/provision/kubernetes/instance.py +609 -213
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +12 -8
- sky/provision/kubernetes/network_utils.py +8 -25
- sky/provision/kubernetes/utils.py +382 -418
- sky/provision/kubernetes/volume.py +150 -18
- sky/provision/lambda_cloud/instance.py +16 -13
- sky/provision/nebius/instance.py +6 -2
- sky/provision/nebius/utils.py +103 -86
- sky/provision/oci/instance.py +4 -2
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +30 -9
- sky/provision/runpod/__init__.py +2 -0
- sky/provision/runpod/instance.py +4 -3
- sky/provision/runpod/volume.py +69 -13
- sky/provision/scp/instance.py +307 -130
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/vast/instance.py +5 -3
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +3 -2
- sky/provision/vsphere/instance.py +8 -6
- sky/provision/vsphere/vsphere_utils.py +8 -1
- sky/resources.py +11 -3
- sky/schemas/api/responses.py +107 -6
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +2 -0
- sky/serve/client/impl.py +55 -21
- sky/serve/constants.py +4 -3
- sky/serve/controller.py +17 -11
- sky/serve/load_balancing_policies.py +1 -1
- sky/serve/replica_managers.py +219 -142
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +63 -54
- sky/serve/serve_utils.py +145 -109
- sky/serve/server/core.py +46 -25
- sky/serve/server/impl.py +311 -162
- sky/serve/server/server.py +21 -19
- sky/serve/service.py +84 -68
- sky/serve/service_spec.py +45 -7
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +12 -7
- sky/server/common.py +47 -24
- sky/server/config.py +62 -28
- sky/server/constants.py +9 -1
- sky/server/daemons.py +109 -38
- sky/server/metrics.py +76 -96
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +381 -145
- sky/server/requests/payloads.py +71 -18
- sky/server/requests/preconditions.py +15 -13
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +507 -157
- sky/server/requests/serializers/decoders.py +48 -17
- sky/server/requests/serializers/encoders.py +85 -20
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +116 -24
- sky/server/server.py +420 -172
- sky/server/stream_utils.py +219 -45
- sky/server/uvicorn.py +30 -19
- sky/setup_files/MANIFEST.in +6 -1
- sky/setup_files/alembic.ini +8 -0
- sky/setup_files/dependencies.py +62 -19
- sky/setup_files/setup.py +44 -44
- sky/sky_logging.py +13 -5
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/configs.py +3 -1
- sky/skylet/constants.py +111 -26
- sky/skylet/events.py +64 -10
- sky/skylet/job_lib.py +141 -104
- sky/skylet/log_lib.py +233 -5
- sky/skylet/log_lib.pyi +40 -2
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +524 -0
- sky/skylet/skylet.py +22 -1
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +99 -79
- sky/ssh_node_pools/server.py +9 -8
- sky/task.py +221 -104
- sky/templates/aws-ray.yml.j2 +1 -0
- sky/templates/azure-ray.yml.j2 +1 -0
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +1 -0
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +1 -0
- sky/templates/hyperbolic-ray.yml.j2 +1 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +3 -0
- sky/templates/kubernetes-ray.yml.j2 +196 -55
- sky/templates/lambda-ray.yml.j2 +1 -0
- sky/templates/nebius-ray.yml.j2 +3 -0
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +1 -0
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +1 -0
- sky/templates/scp-ray.yml.j2 +1 -0
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +188 -43
- sky/usage/usage_lib.py +16 -4
- sky/users/permission.py +60 -43
- sky/utils/accelerator_registry.py +6 -3
- sky/utils/admin_policy_utils.py +18 -5
- sky/utils/annotations.py +22 -0
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/cli_utils/status_utils.py +12 -7
- sky/utils/cluster_utils.py +28 -6
- sky/utils/command_runner.py +88 -27
- sky/utils/command_runner.pyi +36 -3
- sky/utils/common.py +3 -1
- sky/utils/common_utils.py +37 -4
- sky/utils/config_utils.py +1 -14
- sky/utils/context.py +127 -40
- sky/utils/context_utils.py +73 -18
- sky/utils/controller_utils.py +229 -70
- sky/utils/db/db_utils.py +95 -18
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +24 -7
- sky/utils/env_options.py +4 -0
- sky/utils/git.py +559 -1
- sky/utils/kubernetes/create_cluster.sh +15 -30
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/{deploy_remote_cluster.py → deploy_ssh_node_pools.py} +258 -380
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/gpu_labeler.py +13 -3
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +213 -194
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes_enums.py +7 -15
- sky/utils/lock_events.py +4 -4
- sky/utils/locks.py +128 -31
- sky/utils/log_utils.py +0 -319
- sky/utils/resource_checker.py +13 -10
- sky/utils/resources_utils.py +53 -29
- sky/utils/rich_utils.py +8 -4
- sky/utils/schemas.py +107 -52
- sky/utils/subprocess_utils.py +17 -4
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +2 -1
- sky/utils/ux_utils.py +35 -1
- sky/utils/volume.py +88 -4
- sky/utils/yaml_utils.py +9 -0
- sky/volumes/client/sdk.py +48 -10
- sky/volumes/server/core.py +59 -22
- sky/volumes/server/server.py +46 -17
- sky/volumes/volume.py +54 -42
- sky/workspaces/core.py +57 -21
- sky/workspaces/server.py +13 -12
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/METADATA +331 -65
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/client/cli/git.py +0 -549
- sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
- sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
- sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
- sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
- sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
- sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
- sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
- sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
- sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
- sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
- sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
- sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
- sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
- sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
- sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
- sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
- skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
- /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""Rpc Utilities for SkyServe"""
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
from sky import backends
|
|
7
|
+
from sky.adaptors import common as adaptors_common
|
|
8
|
+
from sky.backends import backend_utils
|
|
9
|
+
from sky.serve import serve_utils
|
|
10
|
+
|
|
11
|
+
if typing.TYPE_CHECKING:
|
|
12
|
+
from sky.schemas.generated import servev1_pb2
|
|
13
|
+
else:
|
|
14
|
+
servev1_pb2 = adaptors_common.LazyImport(
|
|
15
|
+
'sky.schemas.generated.servev1_pb2')
|
|
16
|
+
|
|
17
|
+
# ======================= gRPC Converters for Sky Serve =======================
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class GetServiceStatusRequestConverter:
|
|
21
|
+
"""Converter for GetServiceStatusRequest"""
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def to_proto(cls, service_names: Optional[List[str]],
|
|
25
|
+
pool: bool) -> 'servev1_pb2.GetServiceStatusRequest':
|
|
26
|
+
request = servev1_pb2.GetServiceStatusRequest()
|
|
27
|
+
request.pool = pool
|
|
28
|
+
if service_names is not None:
|
|
29
|
+
request.service_names.names.extend(service_names)
|
|
30
|
+
return request
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def from_proto(
|
|
34
|
+
cls, proto: 'servev1_pb2.GetServiceStatusRequest'
|
|
35
|
+
) -> Tuple[Optional[List[str]], bool]:
|
|
36
|
+
pool = proto.pool
|
|
37
|
+
if proto.HasField('service_names'):
|
|
38
|
+
service_names = list(proto.service_names.names)
|
|
39
|
+
else:
|
|
40
|
+
service_names = None
|
|
41
|
+
return service_names, pool
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class GetServiceStatusResponseConverter:
|
|
45
|
+
"""Converter for GetServiceStatusResponse"""
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def to_proto(
|
|
49
|
+
cls,
|
|
50
|
+
statuses: List[Dict[str,
|
|
51
|
+
str]]) -> 'servev1_pb2.GetServiceStatusResponse':
|
|
52
|
+
response = servev1_pb2.GetServiceStatusResponse()
|
|
53
|
+
for status in statuses:
|
|
54
|
+
added = response.statuses.add()
|
|
55
|
+
added.status.update(status)
|
|
56
|
+
return response
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_proto(
|
|
60
|
+
cls, proto: 'servev1_pb2.GetServiceStatusResponse'
|
|
61
|
+
) -> List[Dict[str, str]]:
|
|
62
|
+
pickled = [dict(status.status) for status in proto.statuses]
|
|
63
|
+
return pickled
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TerminateServicesRequestConverter:
|
|
67
|
+
"""Converter for TerminateServicesRequest"""
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def to_proto(cls, service_names: Optional[List[str]], purge: bool,
|
|
71
|
+
pool: bool) -> 'servev1_pb2.TerminateServicesRequest':
|
|
72
|
+
request = servev1_pb2.TerminateServicesRequest()
|
|
73
|
+
request.purge = purge
|
|
74
|
+
request.pool = pool
|
|
75
|
+
if service_names is not None:
|
|
76
|
+
request.service_names.names.extend(service_names)
|
|
77
|
+
return request
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def from_proto(
|
|
81
|
+
cls, proto: 'servev1_pb2.TerminateServicesRequest'
|
|
82
|
+
) -> Tuple[Optional[List[str]], bool, bool]:
|
|
83
|
+
purge = proto.purge
|
|
84
|
+
pool = proto.pool
|
|
85
|
+
if proto.HasField('service_names'):
|
|
86
|
+
service_names = list(proto.service_names.names)
|
|
87
|
+
else:
|
|
88
|
+
service_names = None
|
|
89
|
+
return service_names, purge, pool
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# ========================= gRPC Runner for Sky Serve =========================
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class RpcRunner:
|
|
96
|
+
"""gRPC Runner for Sky Serve
|
|
97
|
+
|
|
98
|
+
The RPC runner does not catch errors, and assumes that backend handle has
|
|
99
|
+
grpc enabled.
|
|
100
|
+
|
|
101
|
+
Common exceptions raised:
|
|
102
|
+
exceptions.FetchClusterInfoError
|
|
103
|
+
exceptions.SkyletInternalError
|
|
104
|
+
grpc.RpcError
|
|
105
|
+
grpc.FutureTimeoutError
|
|
106
|
+
AssertionError
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def get_service_status(cls, handle: backends.CloudVmRayResourceHandle,
|
|
111
|
+
service_names: Optional[List[str]],
|
|
112
|
+
pool: bool) -> List[Dict[str, Any]]:
|
|
113
|
+
assert handle.is_grpc_enabled_with_flag
|
|
114
|
+
request = GetServiceStatusRequestConverter.to_proto(service_names, pool)
|
|
115
|
+
response = backend_utils.invoke_skylet_with_retries(
|
|
116
|
+
lambda: backends.SkyletClient(handle.get_grpc_channel()
|
|
117
|
+
).get_service_status(request))
|
|
118
|
+
pickled = GetServiceStatusResponseConverter.from_proto(response)
|
|
119
|
+
return serve_utils.unpickle_service_status(pickled)
|
|
120
|
+
|
|
121
|
+
@classmethod
|
|
122
|
+
def add_version(cls, handle: backends.CloudVmRayResourceHandle,
|
|
123
|
+
service_name: str) -> int:
|
|
124
|
+
assert handle.is_grpc_enabled_with_flag
|
|
125
|
+
request = servev1_pb2.AddVersionRequest(service_name=service_name)
|
|
126
|
+
response = backend_utils.invoke_skylet_with_retries(
|
|
127
|
+
lambda: backends.SkyletClient(handle.get_grpc_channel()
|
|
128
|
+
).add_serve_version(request))
|
|
129
|
+
return response.version
|
|
130
|
+
|
|
131
|
+
@classmethod
|
|
132
|
+
def terminate_services(cls, handle: backends.CloudVmRayResourceHandle,
|
|
133
|
+
service_names: Optional[List[str]], purge: bool,
|
|
134
|
+
pool: bool) -> str:
|
|
135
|
+
assert handle.is_grpc_enabled_with_flag
|
|
136
|
+
request = TerminateServicesRequestConverter.to_proto(
|
|
137
|
+
service_names, purge, pool)
|
|
138
|
+
response = backend_utils.invoke_skylet_with_retries(
|
|
139
|
+
lambda: backends.SkyletClient(handle.get_grpc_channel()
|
|
140
|
+
).terminate_services(request))
|
|
141
|
+
return response.message
|
|
142
|
+
|
|
143
|
+
@classmethod
|
|
144
|
+
def terminate_replica(cls, handle: backends.CloudVmRayResourceHandle,
|
|
145
|
+
service_name: str, replica_id: int,
|
|
146
|
+
purge: bool) -> str:
|
|
147
|
+
assert handle.is_grpc_enabled_with_flag
|
|
148
|
+
request = servev1_pb2.TerminateReplicaRequest(service_name=service_name,
|
|
149
|
+
replica_id=replica_id,
|
|
150
|
+
purge=purge)
|
|
151
|
+
response = backend_utils.invoke_skylet_with_retries(
|
|
152
|
+
lambda: backends.SkyletClient(handle.get_grpc_channel()
|
|
153
|
+
).terminate_replica(request))
|
|
154
|
+
return response.message
|
|
155
|
+
|
|
156
|
+
@classmethod
|
|
157
|
+
def wait_service_registration(cls,
|
|
158
|
+
handle: backends.CloudVmRayResourceHandle,
|
|
159
|
+
service_name: str, job_id: int,
|
|
160
|
+
pool: bool) -> int:
|
|
161
|
+
assert handle.is_grpc_enabled_with_flag
|
|
162
|
+
request = servev1_pb2.WaitServiceRegistrationRequest(
|
|
163
|
+
service_name=service_name, job_id=job_id, pool=pool)
|
|
164
|
+
response = backend_utils.invoke_skylet_with_retries(
|
|
165
|
+
lambda: backends.SkyletClient(handle.get_grpc_channel()
|
|
166
|
+
).wait_service_registration(request))
|
|
167
|
+
return response.lb_port
|
|
168
|
+
|
|
169
|
+
@classmethod
|
|
170
|
+
def update_service(cls, handle: backends.CloudVmRayResourceHandle,
|
|
171
|
+
service_name: str, version: int,
|
|
172
|
+
mode: serve_utils.UpdateMode, pool: bool) -> None:
|
|
173
|
+
assert handle.is_grpc_enabled_with_flag
|
|
174
|
+
request = servev1_pb2.UpdateServiceRequest(service_name=service_name,
|
|
175
|
+
version=version,
|
|
176
|
+
mode=mode.value,
|
|
177
|
+
pool=pool)
|
|
178
|
+
backend_utils.invoke_skylet_with_retries(lambda: backends.SkyletClient(
|
|
179
|
+
handle.get_grpc_channel()).update_service(request))
|
sky/serve/serve_state.py
CHANGED
|
@@ -86,6 +86,7 @@ version_specs_table = sqlalchemy.Table(
|
|
|
86
86
|
sqlalchemy.Column('service_name', sqlalchemy.Text, primary_key=True),
|
|
87
87
|
sqlalchemy.Column('version', sqlalchemy.Integer, primary_key=True),
|
|
88
88
|
sqlalchemy.Column('spec', sqlalchemy.LargeBinary),
|
|
89
|
+
sqlalchemy.Column('yaml_content', sqlalchemy.Text, server_default=None),
|
|
89
90
|
)
|
|
90
91
|
|
|
91
92
|
serve_ha_recovery_script_table = sqlalchemy.Table(
|
|
@@ -472,6 +473,7 @@ def _get_service_from_row(r: 'row.RowMapping') -> Dict[str, Any]:
|
|
|
472
473
|
'controller_pid': r['controller_pid'],
|
|
473
474
|
'hash': r['hash'],
|
|
474
475
|
'entrypoint': r['entrypoint'],
|
|
476
|
+
'yaml_content': r.get('yaml_content'),
|
|
475
477
|
}
|
|
476
478
|
latest_spec = get_spec(r['name'], current_version)
|
|
477
479
|
if latest_spec is not None:
|
|
@@ -480,21 +482,48 @@ def _get_service_from_row(r: 'row.RowMapping') -> Dict[str, Any]:
|
|
|
480
482
|
return record
|
|
481
483
|
|
|
482
484
|
|
|
485
|
+
def _build_services_with_latest_version_query(
|
|
486
|
+
service_name: Optional[str] = None) -> sqlalchemy.sql.Select:
|
|
487
|
+
"""Builds a query joining services with their latest version and yaml.
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
service_name: If provided, filter to this service only.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
A SQLAlchemy selectable for fetching rows, including columns:
|
|
494
|
+
- max_version (latest version per service)
|
|
495
|
+
- services_table.*
|
|
496
|
+
- yaml_content (from version_specs_table for latest version)
|
|
497
|
+
"""
|
|
498
|
+
subquery = sqlalchemy.select(
|
|
499
|
+
version_specs_table.c.service_name,
|
|
500
|
+
sqlalchemy.func.max(version_specs_table.c.version).label('max_version'),
|
|
501
|
+
).group_by(version_specs_table.c.service_name).alias('v')
|
|
502
|
+
|
|
503
|
+
query = sqlalchemy.select(
|
|
504
|
+
subquery.c.max_version,
|
|
505
|
+
services_table,
|
|
506
|
+
version_specs_table.c.yaml_content,
|
|
507
|
+
).select_from(
|
|
508
|
+
services_table.join(
|
|
509
|
+
subquery, services_table.c.name == subquery.c.service_name).join(
|
|
510
|
+
version_specs_table,
|
|
511
|
+
sqlalchemy.and_(
|
|
512
|
+
version_specs_table.c.service_name == services_table.c.name,
|
|
513
|
+
version_specs_table.c.version == subquery.c.max_version,
|
|
514
|
+
),
|
|
515
|
+
))
|
|
516
|
+
if service_name is not None:
|
|
517
|
+
query = query.where(services_table.c.name == service_name)
|
|
518
|
+
return query
|
|
519
|
+
|
|
520
|
+
|
|
483
521
|
@init_db
|
|
484
522
|
def get_services() -> List[Dict[str, Any]]:
|
|
485
523
|
"""Get all existing service records."""
|
|
486
524
|
assert _SQLALCHEMY_ENGINE is not None
|
|
487
525
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
488
|
-
|
|
489
|
-
version_specs_table.c.service_name,
|
|
490
|
-
sqlalchemy.func.max(
|
|
491
|
-
version_specs_table.c.version).label('max_version')).group_by(
|
|
492
|
-
version_specs_table.c.service_name).alias('v')
|
|
493
|
-
|
|
494
|
-
query = sqlalchemy.select(
|
|
495
|
-
subquery.c.max_version, services_table).select_from(
|
|
496
|
-
services_table.join(
|
|
497
|
-
subquery, services_table.c.name == subquery.c.service_name))
|
|
526
|
+
query = _build_services_with_latest_version_query()
|
|
498
527
|
rows = session.execute(query).fetchall()
|
|
499
528
|
records = []
|
|
500
529
|
for row in rows:
|
|
@@ -517,20 +546,7 @@ def get_service_from_name(service_name: str) -> Optional[Dict[str, Any]]:
|
|
|
517
546
|
"""Get all existing service records."""
|
|
518
547
|
assert _SQLALCHEMY_ENGINE is not None
|
|
519
548
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
520
|
-
|
|
521
|
-
version_specs_table.c.service_name,
|
|
522
|
-
sqlalchemy.func.max(
|
|
523
|
-
version_specs_table.c.version).label('max_version')
|
|
524
|
-
).where(version_specs_table.c.service_name == service_name).group_by(
|
|
525
|
-
version_specs_table.c.service_name).alias('v')
|
|
526
|
-
|
|
527
|
-
query = sqlalchemy.select(
|
|
528
|
-
subquery.c.max_version, services_table).select_from(
|
|
529
|
-
services_table.join(
|
|
530
|
-
subquery,
|
|
531
|
-
services_table.c.name == subquery.c.service_name)).where(
|
|
532
|
-
services_table.c.name == service_name)
|
|
533
|
-
|
|
549
|
+
query = _build_services_with_latest_version_query(service_name)
|
|
534
550
|
rows = session.execute(query).fetchall()
|
|
535
551
|
for row in rows:
|
|
536
552
|
return _get_service_from_row(row._mapping) # pylint: disable=protected-access
|
|
@@ -686,22 +702,6 @@ def total_number_terminating_replicas() -> int:
|
|
|
686
702
|
return terminating_count
|
|
687
703
|
|
|
688
704
|
|
|
689
|
-
@init_db
|
|
690
|
-
def total_number_scheduled_to_terminate_replicas() -> int:
|
|
691
|
-
"""Returns the total number of terminating replicas."""
|
|
692
|
-
assert _SQLALCHEMY_ENGINE is not None
|
|
693
|
-
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
694
|
-
rows = session.execute(sqlalchemy.select(
|
|
695
|
-
replicas_table.c.replica_info)).fetchall()
|
|
696
|
-
terminating_count = 0
|
|
697
|
-
for row in rows:
|
|
698
|
-
replica_info: 'replica_managers.ReplicaInfo' = pickle.loads(row[0])
|
|
699
|
-
if (replica_info.status_property.sky_down_status ==
|
|
700
|
-
common_utils.ProcessStatus.SCHEDULED):
|
|
701
|
-
terminating_count += 1
|
|
702
|
-
return terminating_count
|
|
703
|
-
|
|
704
|
-
|
|
705
705
|
def get_replicas_at_status(
|
|
706
706
|
service_name: str,
|
|
707
707
|
status: ReplicaStatus,
|
|
@@ -737,7 +737,8 @@ def add_version(service_name: str) -> int:
|
|
|
737
737
|
|
|
738
738
|
@init_db
|
|
739
739
|
def add_or_update_version(service_name: str, version: int,
|
|
740
|
-
spec: 'service_spec.SkyServiceSpec'
|
|
740
|
+
spec: 'service_spec.SkyServiceSpec',
|
|
741
|
+
yaml_content: str) -> None:
|
|
741
742
|
assert _SQLALCHEMY_ENGINE is not None
|
|
742
743
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
743
744
|
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
@@ -750,27 +751,22 @@ def add_or_update_version(service_name: str, version: int,
|
|
|
750
751
|
raise ValueError('Unsupported database dialect')
|
|
751
752
|
|
|
752
753
|
insert_stmt = insert_func(version_specs_table).values(
|
|
753
|
-
service_name=service_name,
|
|
754
|
+
service_name=service_name,
|
|
755
|
+
version=version,
|
|
756
|
+
spec=pickle.dumps(spec),
|
|
757
|
+
yaml_content=yaml_content)
|
|
754
758
|
|
|
755
759
|
insert_stmt = insert_stmt.on_conflict_do_update(
|
|
756
760
|
index_elements=['service_name', 'version'],
|
|
757
|
-
set_={
|
|
761
|
+
set_={
|
|
762
|
+
'spec': insert_stmt.excluded.spec,
|
|
763
|
+
'yaml_content': insert_stmt.excluded.yaml_content
|
|
764
|
+
})
|
|
758
765
|
|
|
759
766
|
session.execute(insert_stmt)
|
|
760
767
|
session.commit()
|
|
761
768
|
|
|
762
769
|
|
|
763
|
-
@init_db
|
|
764
|
-
def remove_service_versions(service_name: str) -> None:
|
|
765
|
-
"""Removes a replica from the database."""
|
|
766
|
-
assert _SQLALCHEMY_ENGINE is not None
|
|
767
|
-
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
768
|
-
session.execute(
|
|
769
|
-
sqlalchemy.delete(version_specs_table).where(
|
|
770
|
-
version_specs_table.c.service_name == service_name))
|
|
771
|
-
session.commit()
|
|
772
|
-
|
|
773
|
-
|
|
774
770
|
@init_db
|
|
775
771
|
def get_spec(service_name: str,
|
|
776
772
|
version: int) -> Optional['service_spec.SkyServiceSpec']:
|
|
@@ -785,6 +781,19 @@ def get_spec(service_name: str,
|
|
|
785
781
|
return pickle.loads(result[0]) if result else None
|
|
786
782
|
|
|
787
783
|
|
|
784
|
+
@init_db
|
|
785
|
+
def get_yaml_content(service_name: str, version: int) -> Optional[str]:
|
|
786
|
+
"""Gets the yaml content of a version."""
|
|
787
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
788
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
789
|
+
result = session.execute(
|
|
790
|
+
sqlalchemy.select(version_specs_table.c.yaml_content).where(
|
|
791
|
+
sqlalchemy.and_(
|
|
792
|
+
version_specs_table.c.service_name == service_name,
|
|
793
|
+
version_specs_table.c.version == version))).fetchone()
|
|
794
|
+
return result[0] if result else None
|
|
795
|
+
|
|
796
|
+
|
|
788
797
|
@init_db
|
|
789
798
|
def delete_version(service_name: str, version: int) -> None:
|
|
790
799
|
"""Deletes a version from the database."""
|