skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251203__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +10 -2
- sky/adaptors/aws.py +81 -16
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +64 -0
- sky/adaptors/nebius.py +3 -1
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +20 -0
- sky/authentication.py +157 -263
- sky/backends/__init__.py +3 -2
- sky/backends/backend.py +11 -3
- sky/backends/backend_utils.py +588 -184
- sky/backends/cloud_vm_ray_backend.py +1088 -904
- sky/backends/local_docker_backend.py +9 -5
- sky/backends/task_codegen.py +633 -0
- sky/backends/wheel_utils.py +18 -0
- sky/catalog/__init__.py +8 -0
- sky/catalog/aws_catalog.py +4 -0
- sky/catalog/common.py +19 -1
- sky/catalog/data_fetchers/fetch_aws.py +102 -80
- sky/catalog/data_fetchers/fetch_gcp.py +30 -3
- sky/catalog/data_fetchers/fetch_nebius.py +9 -6
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/kubernetes_catalog.py +24 -28
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/catalog/runpod_catalog.py +5 -1
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/check.py +73 -43
- sky/client/cli/command.py +675 -412
- sky/client/cli/flags.py +4 -2
- sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +12 -2
- sky/client/sdk.py +132 -63
- sky/client/sdk_async.py +34 -33
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +6 -0
- sky/clouds/aws.py +337 -129
- sky/clouds/azure.py +24 -18
- sky/clouds/cloud.py +40 -13
- sky/clouds/cudo.py +16 -13
- sky/clouds/do.py +9 -7
- sky/clouds/fluidstack.py +12 -5
- sky/clouds/gcp.py +14 -7
- sky/clouds/hyperbolic.py +12 -5
- sky/clouds/ibm.py +12 -5
- sky/clouds/kubernetes.py +80 -45
- sky/clouds/lambda_cloud.py +12 -5
- sky/clouds/nebius.py +23 -9
- sky/clouds/oci.py +19 -12
- sky/clouds/paperspace.py +4 -1
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +85 -24
- sky/clouds/scp.py +12 -8
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +4 -2
- sky/clouds/utils/scp_utils.py +61 -50
- sky/clouds/vast.py +33 -27
- sky/clouds/vsphere.py +14 -16
- sky/core.py +174 -165
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/96_E2yl3QAiIJGOYCkSpB/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.20a8540fe697d5ee.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-7b45f9fbb6308557.js} +1 -1
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-8f27d1c10c98def8.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-9146207c4567fdfd.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-792db96d918c98c9.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-abfcac9c137aa543.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-ee39056f9851a3ff.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-c0b5935149902e6f.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-aed0ea19df7cf961.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-d66997e2bfc837cf.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-9faf940b253e3e06.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-2072b48b617989c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-f42674164aa73423.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-84a40f8c7c627fe4.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-531b2f8c4bf89f82.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-64e05f17bf2cf8ce.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +92 -1
- sky/data/mounting_utils.py +162 -29
- sky/data/storage.py +200 -19
- sky/data/storage_utils.py +10 -45
- sky/exceptions.py +18 -7
- sky/execution.py +74 -31
- sky/global_user_state.py +605 -191
- sky/jobs/__init__.py +2 -0
- sky/jobs/client/sdk.py +101 -4
- sky/jobs/client/sdk_async.py +31 -5
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +726 -284
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +250 -100
- sky/jobs/scheduler.py +271 -173
- sky/jobs/server/core.py +367 -114
- sky/jobs/server/server.py +81 -35
- sky/jobs/server/utils.py +89 -35
- sky/jobs/state.py +1498 -620
- sky/jobs/utils.py +771 -306
- sky/logs/agent.py +40 -5
- sky/logs/aws.py +9 -19
- sky/metrics/utils.py +282 -39
- sky/optimizer.py +1 -1
- sky/provision/__init__.py +37 -1
- sky/provision/aws/config.py +34 -13
- sky/provision/aws/instance.py +5 -2
- sky/provision/azure/instance.py +5 -3
- sky/provision/common.py +2 -0
- sky/provision/cudo/instance.py +4 -3
- sky/provision/do/instance.py +4 -3
- sky/provision/docker_utils.py +97 -26
- sky/provision/fluidstack/instance.py +6 -5
- sky/provision/gcp/config.py +6 -1
- sky/provision/gcp/instance.py +4 -2
- sky/provision/hyperbolic/instance.py +4 -2
- sky/provision/instance_setup.py +66 -20
- sky/provision/kubernetes/__init__.py +2 -0
- sky/provision/kubernetes/config.py +7 -44
- sky/provision/kubernetes/constants.py +0 -1
- sky/provision/kubernetes/instance.py +609 -213
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +12 -8
- sky/provision/kubernetes/network_utils.py +8 -25
- sky/provision/kubernetes/utils.py +382 -418
- sky/provision/kubernetes/volume.py +150 -18
- sky/provision/lambda_cloud/instance.py +16 -13
- sky/provision/nebius/instance.py +6 -2
- sky/provision/nebius/utils.py +103 -86
- sky/provision/oci/instance.py +4 -2
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +30 -9
- sky/provision/runpod/__init__.py +2 -0
- sky/provision/runpod/instance.py +4 -3
- sky/provision/runpod/volume.py +69 -13
- sky/provision/scp/instance.py +307 -130
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/vast/instance.py +5 -3
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +3 -2
- sky/provision/vsphere/instance.py +8 -6
- sky/provision/vsphere/vsphere_utils.py +8 -1
- sky/resources.py +11 -3
- sky/schemas/api/responses.py +107 -6
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +2 -0
- sky/serve/client/impl.py +55 -21
- sky/serve/constants.py +4 -3
- sky/serve/controller.py +17 -11
- sky/serve/load_balancing_policies.py +1 -1
- sky/serve/replica_managers.py +219 -142
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +63 -54
- sky/serve/serve_utils.py +145 -109
- sky/serve/server/core.py +46 -25
- sky/serve/server/impl.py +311 -162
- sky/serve/server/server.py +21 -19
- sky/serve/service.py +84 -68
- sky/serve/service_spec.py +45 -7
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +12 -7
- sky/server/common.py +47 -24
- sky/server/config.py +62 -28
- sky/server/constants.py +9 -1
- sky/server/daemons.py +109 -38
- sky/server/metrics.py +76 -96
- sky/server/middleware_utils.py +166 -0
- sky/server/requests/executor.py +381 -145
- sky/server/requests/payloads.py +71 -18
- sky/server/requests/preconditions.py +15 -13
- sky/server/requests/request_names.py +121 -0
- sky/server/requests/requests.py +507 -157
- sky/server/requests/serializers/decoders.py +48 -17
- sky/server/requests/serializers/encoders.py +85 -20
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +116 -24
- sky/server/server.py +420 -172
- sky/server/stream_utils.py +219 -45
- sky/server/uvicorn.py +30 -19
- sky/setup_files/MANIFEST.in +6 -1
- sky/setup_files/alembic.ini +8 -0
- sky/setup_files/dependencies.py +62 -19
- sky/setup_files/setup.py +44 -44
- sky/sky_logging.py +13 -5
- sky/skylet/attempt_skylet.py +106 -24
- sky/skylet/configs.py +3 -1
- sky/skylet/constants.py +111 -26
- sky/skylet/events.py +64 -10
- sky/skylet/job_lib.py +141 -104
- sky/skylet/log_lib.py +233 -5
- sky/skylet/log_lib.pyi +40 -2
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +524 -0
- sky/skylet/skylet.py +22 -1
- sky/skylet/subprocess_daemon.py +104 -29
- sky/skypilot_config.py +99 -79
- sky/ssh_node_pools/server.py +9 -8
- sky/task.py +221 -104
- sky/templates/aws-ray.yml.j2 +1 -0
- sky/templates/azure-ray.yml.j2 +1 -0
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +1 -0
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +1 -0
- sky/templates/hyperbolic-ray.yml.j2 +1 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +3 -0
- sky/templates/kubernetes-ray.yml.j2 +196 -55
- sky/templates/lambda-ray.yml.j2 +1 -0
- sky/templates/nebius-ray.yml.j2 +3 -0
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +1 -0
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +1 -0
- sky/templates/scp-ray.yml.j2 +1 -0
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/vast-ray.yml.j2 +1 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +188 -43
- sky/usage/usage_lib.py +16 -4
- sky/users/permission.py +60 -43
- sky/utils/accelerator_registry.py +6 -3
- sky/utils/admin_policy_utils.py +18 -5
- sky/utils/annotations.py +22 -0
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/cli_utils/status_utils.py +12 -7
- sky/utils/cluster_utils.py +28 -6
- sky/utils/command_runner.py +88 -27
- sky/utils/command_runner.pyi +36 -3
- sky/utils/common.py +3 -1
- sky/utils/common_utils.py +37 -4
- sky/utils/config_utils.py +1 -14
- sky/utils/context.py +127 -40
- sky/utils/context_utils.py +73 -18
- sky/utils/controller_utils.py +229 -70
- sky/utils/db/db_utils.py +95 -18
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +24 -7
- sky/utils/env_options.py +4 -0
- sky/utils/git.py +559 -1
- sky/utils/kubernetes/create_cluster.sh +15 -30
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/{deploy_remote_cluster.py → deploy_ssh_node_pools.py} +258 -380
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/gpu_labeler.py +13 -3
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +213 -194
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes_enums.py +7 -15
- sky/utils/lock_events.py +4 -4
- sky/utils/locks.py +128 -31
- sky/utils/log_utils.py +0 -319
- sky/utils/resource_checker.py +13 -10
- sky/utils/resources_utils.py +53 -29
- sky/utils/rich_utils.py +8 -4
- sky/utils/schemas.py +107 -52
- sky/utils/subprocess_utils.py +17 -4
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +2 -1
- sky/utils/ux_utils.py +35 -1
- sky/utils/volume.py +88 -4
- sky/utils/yaml_utils.py +9 -0
- sky/volumes/client/sdk.py +48 -10
- sky/volumes/server/core.py +59 -22
- sky/volumes/server/server.py +46 -17
- sky/volumes/volume.py +54 -42
- sky/workspaces/core.py +57 -21
- sky/workspaces/server.py +13 -12
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/METADATA +331 -65
- skypilot_nightly-1.0.0.dev20251203.dist-info/RECORD +611 -0
- skypilot_nightly-1.0.0.dev20251203.dist-info/top_level.txt +2 -0
- sky/client/cli/git.py +0 -549
- sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
- sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
- sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
- sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
- sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
- sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
- sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
- sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
- sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
- sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
- sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
- sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
- sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
- sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
- sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
- sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
- skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
- /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → 96_E2yl3QAiIJGOYCkSpB}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251203.dist-info}/licenses/LICENSE +0 -0
sky/setup_files/dependencies.py
CHANGED
|
@@ -48,9 +48,18 @@ install_requires = [
|
|
|
48
48
|
# (https://github.com/yaml/pyyaml/issues/601)
|
|
49
49
|
# <= 3.13 may encounter https://github.com/ultralytics/yolov5/issues/414
|
|
50
50
|
'pyyaml > 3.13, != 5.4.*',
|
|
51
|
+
'ijson',
|
|
52
|
+
'orjson',
|
|
51
53
|
'requests',
|
|
54
|
+
# SkyPilot inherits from uvicorn.Server to customize the behavior of
|
|
55
|
+
# uvicorn, so we need to pin uvicorn version to avoid potential break
|
|
56
|
+
# changes.
|
|
57
|
+
# Notes for current version check:
|
|
58
|
+
# - uvicorn 0.33.0 is the latest version that supports Python 3.8
|
|
59
|
+
# - uvicorn 0.36.0 removes setup_event_loop thus breaks SkyPilot's custom
|
|
60
|
+
# behavior.
|
|
61
|
+
'uvicorn[standard] >=0.33.0, <0.36.0',
|
|
52
62
|
'fastapi',
|
|
53
|
-
'uvicorn[standard]',
|
|
54
63
|
# Some pydantic versions are not compatible with ray. Adopted from ray's
|
|
55
64
|
# setup.py:
|
|
56
65
|
# https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L254
|
|
@@ -63,6 +72,8 @@ install_requires = [
|
|
|
63
72
|
'setproctitle',
|
|
64
73
|
'sqlalchemy',
|
|
65
74
|
'psycopg2-binary',
|
|
75
|
+
'aiosqlite',
|
|
76
|
+
'asyncpg',
|
|
66
77
|
# TODO(hailong): These three dependencies should be removed after we make
|
|
67
78
|
# the client-side actually not importing them.
|
|
68
79
|
'casbin',
|
|
@@ -70,13 +81,12 @@ install_requires = [
|
|
|
70
81
|
# Required for API server metrics
|
|
71
82
|
'prometheus_client>=0.8.0',
|
|
72
83
|
'passlib',
|
|
73
|
-
'bcrypt',
|
|
84
|
+
'bcrypt==4.0.1',
|
|
74
85
|
'pyjwt',
|
|
75
86
|
'gitpython',
|
|
76
87
|
'types-paramiko',
|
|
77
88
|
'alembic',
|
|
78
89
|
'aiohttp',
|
|
79
|
-
'aiosqlite',
|
|
80
90
|
'anyio',
|
|
81
91
|
]
|
|
82
92
|
|
|
@@ -94,6 +104,10 @@ GRPC = 'grpcio>=1.63.0'
|
|
|
94
104
|
PROTOBUF = 'protobuf>=5.26.1, < 7.0.0'
|
|
95
105
|
|
|
96
106
|
server_dependencies = [
|
|
107
|
+
# TODO: Some of these dependencies are also specified in install_requires,
|
|
108
|
+
# so they are redundant here. We should figure out if they are only needed
|
|
109
|
+
# on the server (should remove from install_requires), or if they are needed
|
|
110
|
+
# on the client (should remove from here).
|
|
97
111
|
'casbin',
|
|
98
112
|
'sqlalchemy_adapter',
|
|
99
113
|
'passlib',
|
|
@@ -103,14 +117,16 @@ server_dependencies = [
|
|
|
103
117
|
GRPC,
|
|
104
118
|
PROTOBUF,
|
|
105
119
|
'aiosqlite',
|
|
120
|
+
'greenlet',
|
|
106
121
|
]
|
|
107
122
|
|
|
108
123
|
local_ray = [
|
|
109
124
|
# Lower version of ray will cause dependency conflict for
|
|
110
125
|
# click/grpcio/protobuf.
|
|
111
|
-
#
|
|
126
|
+
# Ray 2.6.1+ resolved cluster launcher bugs
|
|
127
|
+
# and grpcio issues on Apple Silicon.
|
|
112
128
|
# https://github.com/ray-project/ray/releases/tag/ray-2.6.1
|
|
113
|
-
'ray[default] >= 2.
|
|
129
|
+
'ray[default] >= 2.6.1',
|
|
114
130
|
]
|
|
115
131
|
|
|
116
132
|
remote = [
|
|
@@ -132,11 +148,19 @@ aws_dependencies = [
|
|
|
132
148
|
'colorama < 0.4.5',
|
|
133
149
|
]
|
|
134
150
|
|
|
151
|
+
# Kubernetes 32.0.0 has an authentication bug:
|
|
152
|
+
# https://github.com/kubernetes-client/python/issues/2333
|
|
153
|
+
kubernetes_dependencies = [
|
|
154
|
+
'kubernetes>=20.0.0,!=32.0.0',
|
|
155
|
+
'websockets',
|
|
156
|
+
'python-dateutil',
|
|
157
|
+
]
|
|
158
|
+
|
|
135
159
|
# azure-cli cannot be installed normally by uv, so we need to work around it in
|
|
136
160
|
# a few places.
|
|
137
161
|
AZURE_CLI = 'azure-cli>=2.65.0'
|
|
138
162
|
|
|
139
|
-
|
|
163
|
+
cloud_dependencies: Dict[str, List[str]] = {
|
|
140
164
|
'aws': aws_dependencies,
|
|
141
165
|
# TODO(zongheng): azure-cli is huge and takes a long time to install.
|
|
142
166
|
# Tracked in: https://github.com/Azure/azure-cli/issues/7387
|
|
@@ -172,20 +196,23 @@ extras_require: Dict[str, List[str]] = {
|
|
|
172
196
|
'docker': ['docker'] + local_ray,
|
|
173
197
|
'lambda': [], # No dependencies needed for lambda
|
|
174
198
|
'cloudflare': aws_dependencies,
|
|
199
|
+
'coreweave': aws_dependencies + kubernetes_dependencies,
|
|
175
200
|
'scp': local_ray,
|
|
176
201
|
'oci': ['oci'],
|
|
177
|
-
|
|
178
|
-
'
|
|
179
|
-
'kubernetes>=20.0.0,!=32.0.0', 'websockets', 'python-dateutil'
|
|
180
|
-
],
|
|
181
|
-
'ssh': ['kubernetes>=20.0.0,!=32.0.0', 'websockets', 'python-dateutil'],
|
|
182
|
-
'remote': remote,
|
|
202
|
+
'kubernetes': kubernetes_dependencies,
|
|
203
|
+
'ssh': kubernetes_dependencies,
|
|
183
204
|
# For the container registry auth api. Reference:
|
|
184
205
|
# https://github.com/runpod/runpod-python/releases/tag/1.6.1
|
|
185
|
-
|
|
206
|
+
# RunPod needs a TOML parser to read ~/.runpod/config.toml. On Python 3.11+
|
|
207
|
+
# stdlib provides tomllib; on lower versions we depend on tomli explicitly.
|
|
208
|
+
# Instead of installing tomli conditionally, we install it explicitly.
|
|
209
|
+
# This is because the conditional installation of tomli does not work
|
|
210
|
+
# with controller package installation code.
|
|
211
|
+
'runpod': ['runpod>=1.6.1', 'tomli'],
|
|
186
212
|
'fluidstack': [], # No dependencies needed for fluidstack
|
|
187
213
|
'cudo': ['cudo-compute>=0.1.10'],
|
|
188
214
|
'paperspace': [], # No dependencies needed for paperspace
|
|
215
|
+
'primeintellect': [], # No dependencies needed for primeintellect
|
|
189
216
|
'do': ['pydo>=0.3.0', 'azure-core>=1.24.0', 'azure-common'],
|
|
190
217
|
'vast': ['vastai-sdk>=0.1.12'],
|
|
191
218
|
'vsphere': [
|
|
@@ -198,19 +225,24 @@ extras_require: Dict[str, List[str]] = {
|
|
|
198
225
|
# 'vsphere-automation-sdk @ git+https://github.com/vmware/vsphere-automation-sdk-python.git@v8.0.1.0' pylint: disable=line-too-long
|
|
199
226
|
],
|
|
200
227
|
'nebius': [
|
|
201
|
-
|
|
228
|
+
# Nebius requires grpcio and protobuf, so we need to include
|
|
229
|
+
# our constraints here.
|
|
230
|
+
'nebius>=0.3.12',
|
|
231
|
+
GRPC,
|
|
232
|
+
PROTOBUF,
|
|
202
233
|
] + aws_dependencies,
|
|
203
234
|
'hyperbolic': [], # No dependencies needed for hyperbolic
|
|
204
|
-
'
|
|
235
|
+
'seeweb': ['ecsapi==0.4.0'],
|
|
236
|
+
'shadeform': [], # No dependencies needed for shadeform
|
|
205
237
|
}
|
|
206
238
|
|
|
207
239
|
# Calculate which clouds should be included in the [all] installation.
|
|
208
|
-
clouds_for_all = set(
|
|
209
|
-
clouds_for_all.remove('remote')
|
|
240
|
+
clouds_for_all = set(cloud_dependencies)
|
|
210
241
|
|
|
211
242
|
if sys.version_info < (3, 10):
|
|
212
243
|
# Nebius needs python3.10. If python 3.9 [all] will not install nebius
|
|
213
244
|
clouds_for_all.remove('nebius')
|
|
245
|
+
clouds_for_all.remove('seeweb')
|
|
214
246
|
|
|
215
247
|
if sys.version_info >= (3, 12):
|
|
216
248
|
# The version of ray we use does not work with >= 3.12, so avoid clouds
|
|
@@ -220,5 +252,16 @@ if sys.version_info >= (3, 12):
|
|
|
220
252
|
# TODO: Remove once https://github.com/vast-ai/vast-sdk/pull/6 is released
|
|
221
253
|
clouds_for_all.remove('vast')
|
|
222
254
|
|
|
223
|
-
|
|
224
|
-
|
|
255
|
+
cloud_extras = {
|
|
256
|
+
cloud: dependencies + server_dependencies
|
|
257
|
+
for cloud, dependencies in cloud_dependencies.items()
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
extras_require: Dict[str, List[str]] = {
|
|
261
|
+
# Include server_dependencies with each cloud.
|
|
262
|
+
**cloud_extras,
|
|
263
|
+
'all': list(set().union(*[cloud_extras[cloud] for cloud in clouds_for_all])
|
|
264
|
+
),
|
|
265
|
+
'remote': remote,
|
|
266
|
+
'server': server_dependencies,
|
|
267
|
+
}
|
sky/setup_files/setup.py
CHANGED
|
@@ -148,47 +148,47 @@ if os.path.exists(readme_filepath):
|
|
|
148
148
|
long_description = io.open(readme_filepath, 'r', encoding='utf-8').read()
|
|
149
149
|
long_description = parse_readme(long_description)
|
|
150
150
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
setuptools.setup(
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
)
|
|
151
|
+
if __name__ == '__main__':
|
|
152
|
+
atexit.register(revert_commit_hash)
|
|
153
|
+
replace_commit_hash()
|
|
154
|
+
setuptools.setup(
|
|
155
|
+
# NOTE: this affects the package.whl wheel name. When changing this (if
|
|
156
|
+
# ever), you must grep for '.whl' and change all corresponding wheel paths
|
|
157
|
+
# (templates/*.j2 and wheel_utils.py).
|
|
158
|
+
name='skypilot-nightly',
|
|
159
|
+
version=find_version(),
|
|
160
|
+
packages=setuptools.find_packages(),
|
|
161
|
+
author='SkyPilot Team',
|
|
162
|
+
license='Apache 2.0',
|
|
163
|
+
readme='README.md',
|
|
164
|
+
description='SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.',
|
|
165
|
+
long_description=long_description,
|
|
166
|
+
long_description_content_type='text/markdown',
|
|
167
|
+
setup_requires=['wheel'],
|
|
168
|
+
requires_python='>=3.7',
|
|
169
|
+
install_requires=dependencies['install_requires'],
|
|
170
|
+
extras_require=dependencies['extras_require'],
|
|
171
|
+
entry_points={
|
|
172
|
+
'console_scripts': ['sky = sky.cli:cli'],
|
|
173
|
+
},
|
|
174
|
+
include_package_data=True,
|
|
175
|
+
classifiers=[
|
|
176
|
+
'Programming Language :: Python :: 3.7',
|
|
177
|
+
'Programming Language :: Python :: 3.8',
|
|
178
|
+
'Programming Language :: Python :: 3.9',
|
|
179
|
+
'Programming Language :: Python :: 3.10',
|
|
180
|
+
'Programming Language :: Python :: 3.11',
|
|
181
|
+
'Programming Language :: Python :: 3.12',
|
|
182
|
+
'Programming Language :: Python :: 3.13',
|
|
183
|
+
'License :: OSI Approved :: Apache Software License',
|
|
184
|
+
'Operating System :: OS Independent',
|
|
185
|
+
'Topic :: Software Development :: Libraries :: Python Modules',
|
|
186
|
+
'Topic :: System :: Distributed Computing',
|
|
187
|
+
],
|
|
188
|
+
project_urls={
|
|
189
|
+
'Homepage': 'https://github.com/skypilot-org/skypilot',
|
|
190
|
+
'Issues': 'https://github.com/skypilot-org/skypilot/issues',
|
|
191
|
+
'Discussion': 'https://github.com/skypilot-org/skypilot/discussions',
|
|
192
|
+
'Documentation': 'https://docs.skypilot.co/',
|
|
193
|
+
},
|
|
194
|
+
)
|
sky/sky_logging.py
CHANGED
|
@@ -85,7 +85,7 @@ class EnvAwareHandler(rich_utils.RichSafeStreamHandler):
|
|
|
85
85
|
@level.setter
|
|
86
86
|
def level(self, level):
|
|
87
87
|
# pylint: disable=protected-access
|
|
88
|
-
self._level = logging._checkLevel(level)
|
|
88
|
+
self._level = logging._checkLevel(level) # type: ignore[attr-defined]
|
|
89
89
|
|
|
90
90
|
|
|
91
91
|
_root_logger = logging.getLogger('sky')
|
|
@@ -109,7 +109,6 @@ def _setup_logger():
|
|
|
109
109
|
global _default_handler
|
|
110
110
|
if _default_handler is None:
|
|
111
111
|
_default_handler = EnvAwareHandler(sys.stdout)
|
|
112
|
-
_default_handler.flush = sys.stdout.flush # type: ignore
|
|
113
112
|
if env_options.Options.SHOW_DEBUG_INFO.get():
|
|
114
113
|
_default_handler.setLevel(logging.DEBUG)
|
|
115
114
|
else:
|
|
@@ -129,7 +128,6 @@ def _setup_logger():
|
|
|
129
128
|
for logger_name in _SENSITIVE_LOGGER:
|
|
130
129
|
logger = logging.getLogger(logger_name)
|
|
131
130
|
handler_to_logger = EnvAwareHandler(sys.stdout, sensitive=True)
|
|
132
|
-
handler_to_logger.flush = sys.stdout.flush # type: ignore
|
|
133
131
|
logger.addHandler(handler_to_logger)
|
|
134
132
|
logger.setLevel(logging.INFO)
|
|
135
133
|
if _show_logging_prefix():
|
|
@@ -148,7 +146,8 @@ def reload_logger():
|
|
|
148
146
|
such as SKYPILOT_DEBUG.
|
|
149
147
|
"""
|
|
150
148
|
global _default_handler
|
|
151
|
-
|
|
149
|
+
if _default_handler is not None:
|
|
150
|
+
_root_logger.removeHandler(_default_handler)
|
|
152
151
|
_default_handler = None
|
|
153
152
|
_setup_logger()
|
|
154
153
|
|
|
@@ -212,12 +211,21 @@ def logging_enabled(logger: logging.Logger, level: int) -> bool:
|
|
|
212
211
|
|
|
213
212
|
|
|
214
213
|
@contextlib.contextmanager
|
|
215
|
-
def silent():
|
|
214
|
+
def silent(should_silence: bool = True):
|
|
216
215
|
"""Make all sky_logging.print() and logger.{info, warning...} silent.
|
|
217
216
|
|
|
218
217
|
We preserve the ERROR level logging, so that errors are
|
|
219
218
|
still printed.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
should_silence: Whether to actually suppress the logging. If False, this
|
|
222
|
+
is a no-op context manager. Provided for convenience when we want to
|
|
223
|
+
suppress logging conditionally.
|
|
220
224
|
"""
|
|
225
|
+
if not should_silence:
|
|
226
|
+
yield
|
|
227
|
+
return
|
|
228
|
+
|
|
221
229
|
global print
|
|
222
230
|
previous_level = _root_logger.level
|
|
223
231
|
previous_is_silent = is_silent()
|
sky/skylet/attempt_skylet.py
CHANGED
|
@@ -1,51 +1,133 @@
|
|
|
1
1
|
"""Restarts skylet if version does not match"""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
import signal
|
|
4
5
|
import subprocess
|
|
6
|
+
from typing import List, Optional, Tuple
|
|
7
|
+
|
|
8
|
+
import psutil
|
|
5
9
|
|
|
6
10
|
from sky.skylet import constants
|
|
11
|
+
from sky.skylet import runtime_utils
|
|
12
|
+
|
|
13
|
+
VERSION_FILE = runtime_utils.get_runtime_dir_path(constants.SKYLET_VERSION_FILE)
|
|
14
|
+
SKYLET_LOG_FILE = runtime_utils.get_runtime_dir_path(constants.SKYLET_LOG_FILE)
|
|
15
|
+
PID_FILE = runtime_utils.get_runtime_dir_path(constants.SKYLET_PID_FILE)
|
|
16
|
+
PORT_FILE = runtime_utils.get_runtime_dir_path(constants.SKYLET_PORT_FILE)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _is_running_skylet_process(pid: int) -> bool:
|
|
20
|
+
if pid <= 0:
|
|
21
|
+
return False
|
|
22
|
+
try:
|
|
23
|
+
process = psutil.Process(pid)
|
|
24
|
+
if not process.is_running():
|
|
25
|
+
return False
|
|
26
|
+
# Check if command line contains the skylet module identifier
|
|
27
|
+
cmdline = process.cmdline()
|
|
28
|
+
return any('sky.skylet.skylet' in arg for arg in cmdline)
|
|
29
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess,
|
|
30
|
+
OSError) as e:
|
|
31
|
+
print(f'Error checking if skylet process {pid} is running: {e}')
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _find_running_skylet_pids() -> List[int]:
|
|
36
|
+
if os.path.exists(PID_FILE):
|
|
37
|
+
try:
|
|
38
|
+
with open(PID_FILE, 'r', encoding='utf-8') as pid_file:
|
|
39
|
+
pid = int(pid_file.read().strip())
|
|
40
|
+
if _is_running_skylet_process(pid):
|
|
41
|
+
return [pid]
|
|
42
|
+
except (OSError, ValueError, IOError) as e:
|
|
43
|
+
# Don't fallback to grep-based detection as the existence of the
|
|
44
|
+
# PID file implies that we are on the new version, and there is
|
|
45
|
+
# possibility of there being multiple skylet processes running,
|
|
46
|
+
# and we don't want to accidentally kill the wrong skylet(s).
|
|
47
|
+
print(f'Error reading PID file {PID_FILE}: {e}')
|
|
48
|
+
return []
|
|
49
|
+
else:
|
|
50
|
+
# Fall back to grep-based detection for backward compatibility.
|
|
51
|
+
pids = []
|
|
52
|
+
# We use -m to grep instead of {constants.SKY_PYTHON_CMD} -m to grep
|
|
53
|
+
# because need to handle the backward compatibility of the old skylet
|
|
54
|
+
# started before #3326, which does not use the full path to python.
|
|
55
|
+
proc = subprocess.run(
|
|
56
|
+
'ps aux | grep -v "grep" | grep "sky.skylet.skylet" | grep " -m"',
|
|
57
|
+
shell=True,
|
|
58
|
+
check=False,
|
|
59
|
+
capture_output=True,
|
|
60
|
+
text=True)
|
|
61
|
+
if proc.returncode == 0:
|
|
62
|
+
# Parse the output to extract PIDs (column 2)
|
|
63
|
+
for line in proc.stdout.strip().split('\n'):
|
|
64
|
+
if line:
|
|
65
|
+
parts = line.split()
|
|
66
|
+
if len(parts) >= 2:
|
|
67
|
+
try:
|
|
68
|
+
pids.append(int(parts[1]))
|
|
69
|
+
except ValueError:
|
|
70
|
+
continue
|
|
71
|
+
return pids
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _check_version_match() -> Tuple[bool, Optional[str]]:
|
|
75
|
+
"""Check if the version file matches the current skylet version.
|
|
7
76
|
|
|
8
|
-
|
|
77
|
+
Returns:
|
|
78
|
+
Tuple of (version_match: bool, version: str or None)
|
|
79
|
+
"""
|
|
80
|
+
version: Optional[str] = None
|
|
81
|
+
if os.path.exists(VERSION_FILE):
|
|
82
|
+
try:
|
|
83
|
+
with open(VERSION_FILE, 'r', encoding='utf-8') as f:
|
|
84
|
+
version = f.read().strip()
|
|
85
|
+
return version == constants.SKYLET_VERSION, version
|
|
86
|
+
except (OSError, IOError):
|
|
87
|
+
pass
|
|
88
|
+
return False, version
|
|
9
89
|
|
|
10
90
|
|
|
11
91
|
def restart_skylet():
|
|
12
92
|
# Kills old skylet if it is running.
|
|
13
93
|
# TODO(zhwu): make the killing graceful, e.g., use a signal to tell
|
|
14
94
|
# skylet to exit, instead of directly killing it.
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
95
|
+
|
|
96
|
+
# Find and kill running skylet processes
|
|
97
|
+
for pid in _find_running_skylet_pids():
|
|
98
|
+
try:
|
|
99
|
+
os.kill(pid, signal.SIGKILL)
|
|
100
|
+
except (OSError, ProcessLookupError):
|
|
101
|
+
# Process died between detection and kill
|
|
102
|
+
pass
|
|
103
|
+
# Clean up the PID file
|
|
104
|
+
try:
|
|
105
|
+
os.remove(PID_FILE)
|
|
106
|
+
except OSError:
|
|
107
|
+
pass # Best effort cleanup
|
|
108
|
+
|
|
109
|
+
port = constants.SKYLET_GRPC_PORT
|
|
23
110
|
subprocess.run(
|
|
24
111
|
# We have made sure that `attempt_skylet.py` is executed with the
|
|
25
112
|
# skypilot runtime env activated, so that skylet can access the cloud
|
|
26
113
|
# CLI tools.
|
|
27
|
-
f'nohup {constants.SKY_PYTHON_CMD} -m sky.skylet.skylet'
|
|
28
|
-
'
|
|
114
|
+
f'nohup {constants.SKY_PYTHON_CMD} -m sky.skylet.skylet '
|
|
115
|
+
f'--port={port} '
|
|
116
|
+
f'>> {SKYLET_LOG_FILE} 2>&1 & echo $! > {PID_FILE}',
|
|
29
117
|
shell=True,
|
|
30
118
|
check=True)
|
|
119
|
+
|
|
120
|
+
with open(PORT_FILE, 'w', encoding='utf-8') as pf:
|
|
121
|
+
pf.write(str(port))
|
|
122
|
+
|
|
31
123
|
with open(VERSION_FILE, 'w', encoding='utf-8') as v_f:
|
|
32
124
|
v_f.write(constants.SKYLET_VERSION)
|
|
33
125
|
|
|
34
126
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
shell=True,
|
|
38
|
-
check=False)
|
|
39
|
-
|
|
40
|
-
running = (proc.returncode == 0)
|
|
127
|
+
# Check if our skylet is running
|
|
128
|
+
running = bool(_find_running_skylet_pids())
|
|
41
129
|
|
|
42
|
-
version_match =
|
|
43
|
-
found_version = None
|
|
44
|
-
if os.path.exists(VERSION_FILE):
|
|
45
|
-
with open(VERSION_FILE, 'r', encoding='utf-8') as f:
|
|
46
|
-
found_version = f.read().strip()
|
|
47
|
-
if found_version == constants.SKYLET_VERSION:
|
|
48
|
-
version_match = True
|
|
130
|
+
version_match, found_version = _check_version_match()
|
|
49
131
|
|
|
50
132
|
version_string = (f' (found version {found_version}, new version '
|
|
51
133
|
f'{constants.SKYLET_VERSION})')
|
sky/skylet/configs.py
CHANGED
|
@@ -5,6 +5,7 @@ import pathlib
|
|
|
5
5
|
import threading
|
|
6
6
|
from typing import Callable, Optional, Union
|
|
7
7
|
|
|
8
|
+
from sky.skylet import runtime_utils
|
|
8
9
|
from sky.utils.db import db_utils
|
|
9
10
|
|
|
10
11
|
_DB_PATH = None
|
|
@@ -29,7 +30,8 @@ def init_db(func: Callable):
|
|
|
29
30
|
|
|
30
31
|
with _db_init_lock:
|
|
31
32
|
if _DB_PATH is None:
|
|
32
|
-
_DB_PATH =
|
|
33
|
+
_DB_PATH = runtime_utils.get_runtime_dir_path(
|
|
34
|
+
'.sky/skylet_config.db')
|
|
33
35
|
os.makedirs(pathlib.Path(_DB_PATH).parents[0], exist_ok=True)
|
|
34
36
|
with db_utils.safe_cursor(
|
|
35
37
|
_DB_PATH
|