skypilot-nightly 1.0.0.dev20250509__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +22 -6
- sky/adaptors/aws.py +25 -7
- sky/adaptors/common.py +24 -1
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/hyperbolic.py +8 -0
- sky/adaptors/kubernetes.py +149 -18
- sky/adaptors/nebius.py +170 -17
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/runpod.py +68 -0
- sky/adaptors/seeweb.py +167 -0
- sky/adaptors/shadeform.py +89 -0
- sky/admin_policy.py +187 -4
- sky/authentication.py +179 -225
- sky/backends/__init__.py +4 -2
- sky/backends/backend.py +22 -9
- sky/backends/backend_utils.py +1299 -380
- sky/backends/cloud_vm_ray_backend.py +1715 -518
- sky/backends/docker_utils.py +1 -1
- sky/backends/local_docker_backend.py +11 -6
- sky/backends/wheel_utils.py +37 -9
- sky/{clouds/service_catalog → catalog}/__init__.py +21 -19
- sky/{clouds/service_catalog → catalog}/aws_catalog.py +27 -8
- sky/{clouds/service_catalog → catalog}/azure_catalog.py +10 -7
- sky/{clouds/service_catalog → catalog}/common.py +89 -48
- sky/{clouds/service_catalog → catalog}/cudo_catalog.py +8 -5
- sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +30 -40
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +38 -38
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +42 -15
- sky/catalog/data_fetchers/fetch_hyperbolic.py +136 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +1 -0
- sky/catalog/data_fetchers/fetch_nebius.py +335 -0
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +1 -1
- sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
- sky/{clouds/service_catalog → catalog}/do_catalog.py +5 -2
- sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/gcp_catalog.py +41 -15
- sky/catalog/hyperbolic_catalog.py +136 -0
- sky/{clouds/service_catalog → catalog}/ibm_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +36 -24
- sky/{clouds/service_catalog → catalog}/lambda_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/nebius_catalog.py +9 -7
- sky/{clouds/service_catalog → catalog}/oci_catalog.py +9 -6
- sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +5 -2
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/{clouds/service_catalog → catalog}/runpod_catalog.py +11 -4
- sky/{clouds/service_catalog → catalog}/scp_catalog.py +9 -6
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/ssh_catalog.py +167 -0
- sky/{clouds/service_catalog → catalog}/vast_catalog.py +6 -3
- sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +5 -2
- sky/check.py +491 -203
- sky/cli.py +5 -6005
- sky/client/{cli.py → cli/command.py} +2477 -1885
- sky/client/cli/deprecation_utils.py +99 -0
- sky/client/cli/flags.py +359 -0
- sky/client/cli/table_utils.py +320 -0
- sky/client/common.py +70 -32
- sky/client/oauth.py +82 -0
- sky/client/sdk.py +1203 -297
- sky/client/sdk_async.py +833 -0
- sky/client/service_account_auth.py +47 -0
- sky/cloud_stores.py +73 -0
- sky/clouds/__init__.py +13 -0
- sky/clouds/aws.py +358 -93
- sky/clouds/azure.py +105 -83
- sky/clouds/cloud.py +127 -36
- sky/clouds/cudo.py +68 -50
- sky/clouds/do.py +66 -48
- sky/clouds/fluidstack.py +63 -44
- sky/clouds/gcp.py +339 -110
- sky/clouds/hyperbolic.py +293 -0
- sky/clouds/ibm.py +70 -49
- sky/clouds/kubernetes.py +563 -162
- sky/clouds/lambda_cloud.py +74 -54
- sky/clouds/nebius.py +206 -80
- sky/clouds/oci.py +88 -66
- sky/clouds/paperspace.py +61 -44
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +164 -74
- sky/clouds/scp.py +89 -83
- sky/clouds/seeweb.py +466 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/ssh.py +263 -0
- sky/clouds/utils/aws_utils.py +10 -4
- sky/clouds/utils/gcp_utils.py +87 -11
- sky/clouds/utils/oci_utils.py +38 -14
- sky/clouds/utils/scp_utils.py +177 -124
- sky/clouds/vast.py +99 -77
- sky/clouds/vsphere.py +51 -40
- sky/core.py +349 -139
- sky/dag.py +15 -0
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
- sky/dashboard/out/_next/static/chunks/1272-1ef0bf0237faccdb.js +1 -0
- sky/dashboard/out/_next/static/chunks/1871-74503c8e80fd253b.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.ad6adaa2a0fa9768.js +1 -0
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +1 -0
- sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.a830b5c9e7867c92.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
- sky/dashboard/out/_next/static/chunks/6130-2be46d70a38f1e82.js +1 -0
- sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6601-06114c982db410b6.js +1 -0
- sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
- sky/dashboard/out/_next/static/chunks/6989-01359c57e018caa4.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
- sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
- sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
- sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
- sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-bde01e4a2beec258.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a37d2063af475a1c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-d44859594e6f8064.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/config-dfb9bf07b13045f4.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-c0b5935149902e6f.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-aed0ea19df7cf961.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-6edeb7d06032adfc.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-479dde13399cf270.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-5ab3b907622cf0fe.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-b84b948ff357c43e.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-c5a3eeee1c218af1.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-22b23febb3e89ce1.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
- sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
- sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -0
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -0
- sky/dashboard/out/infra.html +1 -0
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -0
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -0
- sky/dashboard/out/volumes.html +1 -0
- sky/dashboard/out/workspace/new.html +1 -0
- sky/dashboard/out/workspaces/[name].html +1 -0
- sky/dashboard/out/workspaces.html +1 -0
- sky/data/data_utils.py +137 -1
- sky/data/mounting_utils.py +269 -84
- sky/data/storage.py +1451 -1807
- sky/data/storage_utils.py +43 -57
- sky/exceptions.py +132 -2
- sky/execution.py +206 -63
- sky/global_user_state.py +2374 -586
- sky/jobs/__init__.py +5 -0
- sky/jobs/client/sdk.py +242 -65
- sky/jobs/client/sdk_async.py +143 -0
- sky/jobs/constants.py +9 -8
- sky/jobs/controller.py +839 -277
- sky/jobs/file_content_utils.py +80 -0
- sky/jobs/log_gc.py +201 -0
- sky/jobs/recovery_strategy.py +398 -152
- sky/jobs/scheduler.py +315 -189
- sky/jobs/server/core.py +829 -255
- sky/jobs/server/server.py +156 -115
- sky/jobs/server/utils.py +136 -0
- sky/jobs/state.py +2092 -701
- sky/jobs/utils.py +1242 -160
- sky/logs/__init__.py +21 -0
- sky/logs/agent.py +108 -0
- sky/logs/aws.py +243 -0
- sky/logs/gcp.py +91 -0
- sky/metrics/__init__.py +0 -0
- sky/metrics/utils.py +443 -0
- sky/models.py +78 -1
- sky/optimizer.py +164 -70
- sky/provision/__init__.py +90 -4
- sky/provision/aws/config.py +147 -26
- sky/provision/aws/instance.py +135 -50
- sky/provision/azure/instance.py +10 -5
- sky/provision/common.py +13 -1
- sky/provision/cudo/cudo_machine_type.py +1 -1
- sky/provision/cudo/cudo_utils.py +14 -8
- sky/provision/cudo/cudo_wrapper.py +72 -71
- sky/provision/cudo/instance.py +10 -6
- sky/provision/do/instance.py +10 -6
- sky/provision/do/utils.py +4 -3
- sky/provision/docker_utils.py +114 -23
- sky/provision/fluidstack/instance.py +13 -8
- sky/provision/gcp/__init__.py +1 -0
- sky/provision/gcp/config.py +301 -19
- sky/provision/gcp/constants.py +218 -0
- sky/provision/gcp/instance.py +36 -8
- sky/provision/gcp/instance_utils.py +18 -4
- sky/provision/gcp/volume_utils.py +247 -0
- sky/provision/hyperbolic/__init__.py +12 -0
- sky/provision/hyperbolic/config.py +10 -0
- sky/provision/hyperbolic/instance.py +437 -0
- sky/provision/hyperbolic/utils.py +373 -0
- sky/provision/instance_setup.py +93 -14
- sky/provision/kubernetes/__init__.py +5 -0
- sky/provision/kubernetes/config.py +9 -52
- sky/provision/kubernetes/constants.py +17 -0
- sky/provision/kubernetes/instance.py +789 -247
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +27 -17
- sky/provision/kubernetes/network_utils.py +40 -43
- sky/provision/kubernetes/utils.py +1192 -531
- sky/provision/kubernetes/volume.py +282 -0
- sky/provision/lambda_cloud/instance.py +22 -16
- sky/provision/nebius/constants.py +50 -0
- sky/provision/nebius/instance.py +19 -6
- sky/provision/nebius/utils.py +196 -91
- sky/provision/oci/instance.py +10 -5
- sky/provision/paperspace/instance.py +10 -7
- sky/provision/paperspace/utils.py +1 -1
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +110 -36
- sky/provision/runpod/__init__.py +5 -0
- sky/provision/runpod/instance.py +27 -6
- sky/provision/runpod/utils.py +51 -18
- sky/provision/runpod/volume.py +180 -0
- sky/provision/scp/__init__.py +15 -0
- sky/provision/scp/config.py +93 -0
- sky/provision/scp/instance.py +531 -0
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +807 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/ssh/__init__.py +18 -0
- sky/provision/vast/instance.py +13 -8
- sky/provision/vast/utils.py +10 -7
- sky/provision/vsphere/common/vim_utils.py +1 -2
- sky/provision/vsphere/instance.py +15 -10
- sky/provision/vsphere/vsphere_utils.py +9 -19
- sky/py.typed +0 -0
- sky/resources.py +844 -118
- sky/schemas/__init__.py +0 -0
- sky/schemas/api/__init__.py +0 -0
- sky/schemas/api/responses.py +225 -0
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/global_user_state/002_add_workspace_to_cluster_history.py +35 -0
- sky/schemas/db/global_user_state/003_fix_initial_revision.py +61 -0
- sky/schemas/db/global_user_state/004_is_managed.py +34 -0
- sky/schemas/db/global_user_state/005_cluster_event.py +32 -0
- sky/schemas/db/global_user_state/006_provision_log.py +41 -0
- sky/schemas/db/global_user_state/007_cluster_event_request_id.py +34 -0
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/serve_state/001_initial_schema.py +67 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +42 -0
- sky/schemas/db/spot_jobs/003_pool_hash.py +34 -0
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/generated/__init__.py +0 -0
- sky/schemas/generated/autostopv1_pb2.py +36 -0
- sky/schemas/generated/autostopv1_pb2.pyi +43 -0
- sky/schemas/generated/autostopv1_pb2_grpc.py +146 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +74 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +357 -5
- sky/serve/client/impl.py +310 -0
- sky/serve/client/sdk.py +47 -139
- sky/serve/client/sdk_async.py +130 -0
- sky/serve/constants.py +10 -8
- sky/serve/controller.py +64 -19
- sky/serve/load_balancer.py +106 -60
- sky/serve/load_balancing_policies.py +115 -1
- sky/serve/replica_managers.py +273 -162
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +554 -251
- sky/serve/serve_utils.py +733 -220
- sky/serve/server/core.py +66 -711
- sky/serve/server/impl.py +1093 -0
- sky/serve/server/server.py +21 -18
- sky/serve/service.py +133 -48
- sky/serve/service_spec.py +135 -16
- sky/serve/spot_placer.py +3 -0
- sky/server/auth/__init__.py +0 -0
- sky/server/auth/authn.py +50 -0
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +200 -0
- sky/server/common.py +475 -181
- sky/server/config.py +81 -23
- sky/server/constants.py +44 -6
- sky/server/daemons.py +229 -0
- sky/server/html/token_page.html +185 -0
- sky/server/metrics.py +160 -0
- sky/server/requests/executor.py +528 -138
- sky/server/requests/payloads.py +351 -17
- sky/server/requests/preconditions.py +21 -17
- sky/server/requests/process.py +112 -29
- sky/server/requests/request_names.py +120 -0
- sky/server/requests/requests.py +817 -224
- sky/server/requests/serializers/decoders.py +82 -31
- sky/server/requests/serializers/encoders.py +140 -22
- sky/server/requests/threads.py +106 -0
- sky/server/rest.py +417 -0
- sky/server/server.py +1290 -284
- sky/server/state.py +20 -0
- sky/server/stream_utils.py +345 -57
- sky/server/uvicorn.py +217 -3
- sky/server/versions.py +270 -0
- sky/setup_files/MANIFEST.in +5 -0
- sky/setup_files/alembic.ini +156 -0
- sky/setup_files/dependencies.py +136 -31
- sky/setup_files/setup.py +44 -42
- sky/sky_logging.py +102 -5
- sky/skylet/attempt_skylet.py +1 -0
- sky/skylet/autostop_lib.py +129 -8
- sky/skylet/configs.py +27 -20
- sky/skylet/constants.py +171 -19
- sky/skylet/events.py +105 -21
- sky/skylet/job_lib.py +335 -104
- sky/skylet/log_lib.py +297 -18
- sky/skylet/log_lib.pyi +44 -1
- sky/skylet/ray_patches/__init__.py +17 -3
- sky/skylet/ray_patches/autoscaler.py.diff +18 -0
- sky/skylet/ray_patches/cli.py.diff +19 -0
- sky/skylet/ray_patches/command_runner.py.diff +17 -0
- sky/skylet/ray_patches/log_monitor.py.diff +20 -0
- sky/skylet/ray_patches/resource_demand_scheduler.py.diff +32 -0
- sky/skylet/ray_patches/updater.py.diff +18 -0
- sky/skylet/ray_patches/worker.py.diff +41 -0
- sky/skylet/services.py +564 -0
- sky/skylet/skylet.py +63 -4
- sky/skylet/subprocess_daemon.py +103 -29
- sky/skypilot_config.py +506 -99
- sky/ssh_node_pools/__init__.py +1 -0
- sky/ssh_node_pools/core.py +135 -0
- sky/ssh_node_pools/server.py +233 -0
- sky/task.py +621 -137
- sky/templates/aws-ray.yml.j2 +10 -3
- sky/templates/azure-ray.yml.j2 +1 -1
- sky/templates/do-ray.yml.j2 +1 -1
- sky/templates/gcp-ray.yml.j2 +57 -0
- sky/templates/hyperbolic-ray.yml.j2 +67 -0
- sky/templates/jobs-controller.yaml.j2 +27 -24
- sky/templates/kubernetes-loadbalancer.yml.j2 +2 -0
- sky/templates/kubernetes-ray.yml.j2 +607 -51
- sky/templates/lambda-ray.yml.j2 +1 -1
- sky/templates/nebius-ray.yml.j2 +33 -12
- sky/templates/paperspace-ray.yml.j2 +1 -1
- sky/templates/primeintellect-ray.yml.j2 +71 -0
- sky/templates/runpod-ray.yml.j2 +9 -1
- sky/templates/scp-ray.yml.j2 +3 -50
- sky/templates/seeweb-ray.yml.j2 +108 -0
- sky/templates/shadeform-ray.yml.j2 +72 -0
- sky/templates/sky-serve-controller.yaml.j2 +22 -2
- sky/templates/websocket_proxy.py +178 -18
- sky/usage/usage_lib.py +18 -11
- sky/users/__init__.py +0 -0
- sky/users/model.conf +15 -0
- sky/users/permission.py +387 -0
- sky/users/rbac.py +121 -0
- sky/users/server.py +720 -0
- sky/users/token_service.py +218 -0
- sky/utils/accelerator_registry.py +34 -5
- sky/utils/admin_policy_utils.py +84 -38
- sky/utils/annotations.py +16 -5
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/auth_utils.py +153 -0
- sky/utils/benchmark_utils.py +60 -0
- sky/utils/cli_utils/status_utils.py +159 -86
- sky/utils/cluster_utils.py +31 -9
- sky/utils/command_runner.py +354 -68
- sky/utils/command_runner.pyi +93 -3
- sky/utils/common.py +35 -8
- sky/utils/common_utils.py +310 -87
- sky/utils/config_utils.py +87 -5
- sky/utils/context.py +402 -0
- sky/utils/context_utils.py +222 -0
- sky/utils/controller_utils.py +264 -89
- sky/utils/dag_utils.py +31 -12
- sky/utils/db/__init__.py +0 -0
- sky/utils/db/db_utils.py +470 -0
- sky/utils/db/migration_utils.py +133 -0
- sky/utils/directory_utils.py +12 -0
- sky/utils/env_options.py +13 -0
- sky/utils/git.py +567 -0
- sky/utils/git_clone.sh +460 -0
- sky/utils/infra_utils.py +195 -0
- sky/utils/kubernetes/cleanup-tunnel.sh +62 -0
- sky/utils/kubernetes/config_map_utils.py +133 -0
- sky/utils/kubernetes/create_cluster.sh +13 -27
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/deploy_remote_cluster.py +1299 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +22 -31
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/generate_kubeconfig.sh +4 -1
- sky/utils/kubernetes/gpu_labeler.py +5 -5
- sky/utils/kubernetes/kubernetes_deploy_utils.py +354 -47
- sky/utils/kubernetes/ssh-tunnel.sh +379 -0
- sky/utils/kubernetes/ssh_utils.py +221 -0
- sky/utils/kubernetes_enums.py +8 -15
- sky/utils/lock_events.py +94 -0
- sky/utils/locks.py +368 -0
- sky/utils/log_utils.py +300 -6
- sky/utils/perf_utils.py +22 -0
- sky/utils/resource_checker.py +298 -0
- sky/utils/resources_utils.py +249 -32
- sky/utils/rich_utils.py +213 -37
- sky/utils/schemas.py +905 -147
- sky/utils/serialize_utils.py +16 -0
- sky/utils/status_lib.py +10 -0
- sky/utils/subprocess_utils.py +38 -15
- sky/utils/tempstore.py +70 -0
- sky/utils/timeline.py +24 -52
- sky/utils/ux_utils.py +84 -15
- sky/utils/validator.py +11 -1
- sky/utils/volume.py +86 -0
- sky/utils/yaml_utils.py +111 -0
- sky/volumes/__init__.py +13 -0
- sky/volumes/client/__init__.py +0 -0
- sky/volumes/client/sdk.py +149 -0
- sky/volumes/server/__init__.py +0 -0
- sky/volumes/server/core.py +258 -0
- sky/volumes/server/server.py +122 -0
- sky/volumes/volume.py +212 -0
- sky/workspaces/__init__.py +0 -0
- sky/workspaces/core.py +655 -0
- sky/workspaces/server.py +101 -0
- sky/workspaces/utils.py +56 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/METADATA +675 -0
- skypilot_nightly-1.0.0.dev20251107.dist-info/RECORD +594 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +1 -1
- sky/benchmark/benchmark_state.py +0 -256
- sky/benchmark/benchmark_utils.py +0 -641
- sky/clouds/service_catalog/constants.py +0 -7
- sky/dashboard/out/_next/static/LksQgChY5izXjokL3LcEu/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/236-f49500b82ad5392d.js +0 -6
- sky/dashboard/out/_next/static/chunks/312-c3c8845990db8ffc.js +0 -15
- sky/dashboard/out/_next/static/chunks/37-0a572fe0dbb89c4d.js +0 -6
- sky/dashboard/out/_next/static/chunks/678-206dddca808e6d16.js +0 -59
- sky/dashboard/out/_next/static/chunks/845-0f8017370869e269.js +0 -1
- sky/dashboard/out/_next/static/chunks/979-7bf73a4c7cea0f5c.js +0 -1
- sky/dashboard/out/_next/static/chunks/fd9d1056-2821b0f0cabcd8bd.js +0 -1
- sky/dashboard/out/_next/static/chunks/framework-87d061ee6ed71b28.js +0 -33
- sky/dashboard/out/_next/static/chunks/main-app-241eb28595532291.js +0 -1
- sky/dashboard/out/_next/static/chunks/main-e0e2335212e72357.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-e6b013bc3f77ad60.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/_error-1be831200e60c5c0.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-e15db85d0ea1fbe1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-f383db7389368ea7.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-a93b93e10b8b074e.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-f9f039532ca8cbc4.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-03f279c6741fb48b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-a75029b67aab6a2e.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-830f59b8404e96b8.js +0 -1
- sky/dashboard/out/_next/static/css/c6933bbb2ce7f4dd.css +0 -3
- sky/jobs/dashboard/dashboard.py +0 -223
- sky/jobs/dashboard/static/favicon.ico +0 -0
- sky/jobs/dashboard/templates/index.html +0 -831
- sky/jobs/server/dashboard_utils.py +0 -69
- sky/skylet/providers/scp/__init__.py +0 -2
- sky/skylet/providers/scp/config.py +0 -149
- sky/skylet/providers/scp/node_provider.py +0 -578
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/db_utils.py +0 -100
- sky/utils/kubernetes/deploy_remote_cluster.sh +0 -308
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250509.dist-info/METADATA +0 -361
- skypilot_nightly-1.0.0.dev20250509.dist-info/RECORD +0 -396
- /sky/{clouds/service_catalog → catalog}/config.py +0 -0
- /sky/{benchmark → catalog/data_fetchers}/__init__.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
- /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
- /sky/{clouds/service_catalog/data_fetchers → client/cli}/__init__.py +0 -0
- /sky/dashboard/out/_next/static/{LksQgChY5izXjokL3LcEu → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250509.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/utils/dag_utils.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
"""Utilities for loading and dumping DAGs from/to YAML files."""
|
|
2
2
|
import copy
|
|
3
|
-
from typing import Any, Dict, List, Optional, Tuple
|
|
3
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
4
4
|
|
|
5
5
|
from sky import dag as dag_lib
|
|
6
6
|
from sky import sky_logging
|
|
7
7
|
from sky import task as task_lib
|
|
8
8
|
from sky.utils import cluster_utils
|
|
9
|
-
from sky.utils import common_utils
|
|
10
9
|
from sky.utils import registry
|
|
11
10
|
from sky.utils import ux_utils
|
|
11
|
+
from sky.utils import yaml_utils
|
|
12
12
|
|
|
13
13
|
logger = sky_logging.init_logger(__name__)
|
|
14
14
|
|
|
@@ -66,7 +66,9 @@ def convert_entrypoint_to_dag(entrypoint: Any) -> 'dag_lib.Dag':
|
|
|
66
66
|
|
|
67
67
|
def _load_chain_dag(
|
|
68
68
|
configs: List[Dict[str, Any]],
|
|
69
|
-
env_overrides: Optional[List[Tuple[str, str]]] = None
|
|
69
|
+
env_overrides: Optional[List[Tuple[str, str]]] = None,
|
|
70
|
+
secrets_overrides: Optional[List[Tuple[str,
|
|
71
|
+
str]]] = None) -> dag_lib.Dag:
|
|
70
72
|
"""Loads a chain DAG from a list of YAML configs."""
|
|
71
73
|
dag_name = None
|
|
72
74
|
if set(configs[0].keys()) == {'name'}:
|
|
@@ -84,7 +86,8 @@ def _load_chain_dag(
|
|
|
84
86
|
for task_config in configs:
|
|
85
87
|
if task_config is None:
|
|
86
88
|
continue
|
|
87
|
-
task = task_lib.Task.from_yaml_config(task_config, env_overrides
|
|
89
|
+
task = task_lib.Task.from_yaml_config(task_config, env_overrides,
|
|
90
|
+
secrets_overrides)
|
|
88
91
|
if current_task is not None:
|
|
89
92
|
current_task >> task # pylint: disable=pointless-statement
|
|
90
93
|
current_task = task
|
|
@@ -95,6 +98,7 @@ def _load_chain_dag(
|
|
|
95
98
|
def load_chain_dag_from_yaml(
|
|
96
99
|
path: str,
|
|
97
100
|
env_overrides: Optional[List[Tuple[str, str]]] = None,
|
|
101
|
+
secret_overrides: Optional[List[Tuple[str, str]]] = None,
|
|
98
102
|
) -> dag_lib.Dag:
|
|
99
103
|
"""Loads a chain DAG from a YAML file.
|
|
100
104
|
|
|
@@ -105,17 +109,22 @@ def load_chain_dag_from_yaml(
|
|
|
105
109
|
the task's 'envs' section. If it is a chain dag, the envs will be updated
|
|
106
110
|
for all tasks in the chain.
|
|
107
111
|
|
|
112
|
+
'secrets_overrides' is a list of (key, value) pairs that will be used to
|
|
113
|
+
update the task's 'secrets' section. If it is a chain dag, the secrets will
|
|
114
|
+
be updated for all tasks in the chain.
|
|
115
|
+
|
|
108
116
|
Returns:
|
|
109
117
|
A chain Dag with 1 or more tasks (an empty entrypoint would create a
|
|
110
118
|
trivial task).
|
|
111
119
|
"""
|
|
112
|
-
configs =
|
|
113
|
-
return _load_chain_dag(configs, env_overrides)
|
|
120
|
+
configs = yaml_utils.read_yaml_all(path)
|
|
121
|
+
return _load_chain_dag(configs, env_overrides, secret_overrides)
|
|
114
122
|
|
|
115
123
|
|
|
116
124
|
def load_chain_dag_from_yaml_str(
|
|
117
125
|
yaml_str: str,
|
|
118
126
|
env_overrides: Optional[List[Tuple[str, str]]] = None,
|
|
127
|
+
secrets_overrides: Optional[List[Tuple[str, str]]] = None,
|
|
119
128
|
) -> dag_lib.Dag:
|
|
120
129
|
"""Loads a chain DAG from a YAML string.
|
|
121
130
|
|
|
@@ -126,19 +135,25 @@ def load_chain_dag_from_yaml_str(
|
|
|
126
135
|
the task's 'envs' section. If it is a chain dag, the envs will be updated
|
|
127
136
|
for all tasks in the chain.
|
|
128
137
|
|
|
138
|
+
'secrets_overrides' is a list of (key, value) pairs that will be used to
|
|
139
|
+
update the task's 'secrets' section. If it is a chain dag, the secrets will
|
|
140
|
+
be updated for all tasks in the chain.
|
|
141
|
+
|
|
129
142
|
Returns:
|
|
130
143
|
A chain Dag with 1 or more tasks (an empty entrypoint would create a
|
|
131
144
|
trivial task).
|
|
132
145
|
"""
|
|
133
|
-
configs =
|
|
134
|
-
return _load_chain_dag(configs, env_overrides)
|
|
146
|
+
configs = yaml_utils.read_yaml_all_str(yaml_str)
|
|
147
|
+
return _load_chain_dag(configs, env_overrides, secrets_overrides)
|
|
135
148
|
|
|
136
149
|
|
|
137
|
-
def dump_chain_dag_to_yaml_str(dag: dag_lib.Dag
|
|
150
|
+
def dump_chain_dag_to_yaml_str(dag: dag_lib.Dag,
|
|
151
|
+
use_user_specified_yaml: bool = False) -> str:
|
|
138
152
|
"""Dumps a chain DAG to a YAML string.
|
|
139
153
|
|
|
140
154
|
Args:
|
|
141
155
|
dag: the DAG to dump.
|
|
156
|
+
redact_secrets: whether to redact secrets in the YAML string.
|
|
142
157
|
|
|
143
158
|
Returns:
|
|
144
159
|
The YAML string.
|
|
@@ -146,8 +161,10 @@ def dump_chain_dag_to_yaml_str(dag: dag_lib.Dag) -> str:
|
|
|
146
161
|
assert dag.is_chain(), dag
|
|
147
162
|
configs = [{'name': dag.name}]
|
|
148
163
|
for task in dag.tasks:
|
|
149
|
-
configs.append(
|
|
150
|
-
|
|
164
|
+
configs.append(
|
|
165
|
+
task.to_yaml_config(
|
|
166
|
+
use_user_specified_yaml=use_user_specified_yaml))
|
|
167
|
+
return yaml_utils.dump_yaml_str(configs)
|
|
151
168
|
|
|
152
169
|
|
|
153
170
|
def dump_chain_dag_to_yaml(dag: dag_lib.Dag, path: str) -> None:
|
|
@@ -195,7 +212,9 @@ def fill_default_config_in_dag_for_job_launch(dag: dag_lib.Dag) -> None:
|
|
|
195
212
|
assert default_strategy is not None
|
|
196
213
|
for resources in list(task_.resources):
|
|
197
214
|
original_job_recovery = resources.job_recovery
|
|
198
|
-
job_recovery = {
|
|
215
|
+
job_recovery: Dict[str, Optional[Union[str, int]]] = {
|
|
216
|
+
'strategy': default_strategy
|
|
217
|
+
}
|
|
199
218
|
if isinstance(original_job_recovery, str):
|
|
200
219
|
job_recovery['strategy'] = original_job_recovery
|
|
201
220
|
elif isinstance(original_job_recovery, dict):
|
sky/utils/db/__init__.py
ADDED
|
File without changes
|
sky/utils/db/db_utils.py
ADDED
|
@@ -0,0 +1,470 @@
|
|
|
1
|
+
"""Utils for sky databases."""
|
|
2
|
+
import asyncio
|
|
3
|
+
import contextlib
|
|
4
|
+
import enum
|
|
5
|
+
import os
|
|
6
|
+
import pathlib
|
|
7
|
+
import sqlite3
|
|
8
|
+
import threading
|
|
9
|
+
import typing
|
|
10
|
+
from typing import Any, Callable, Dict, Iterable, Literal, Optional, Union
|
|
11
|
+
|
|
12
|
+
import aiosqlite
|
|
13
|
+
import aiosqlite.context
|
|
14
|
+
import sqlalchemy
|
|
15
|
+
from sqlalchemy import exc as sqlalchemy_exc
|
|
16
|
+
from sqlalchemy.ext import asyncio as sqlalchemy_async
|
|
17
|
+
|
|
18
|
+
from sky import sky_logging
|
|
19
|
+
from sky.skylet import constants
|
|
20
|
+
|
|
21
|
+
logger = sky_logging.init_logger(__name__)
|
|
22
|
+
if typing.TYPE_CHECKING:
|
|
23
|
+
from sqlalchemy.orm import Session
|
|
24
|
+
|
|
25
|
+
# This parameter (passed to sqlite3.connect) controls how long we will wait to
|
|
26
|
+
# obtains a database lock (not necessarily during connection, but whenever it is
|
|
27
|
+
# needed). It is not a connection timeout.
|
|
28
|
+
# Even in WAL mode, only a single writer is allowed at a time. Other writers
|
|
29
|
+
# will block until the write lock can be obtained. This behavior is described in
|
|
30
|
+
# the SQLite documentation for WAL: https://www.sqlite.org/wal.html
|
|
31
|
+
# Python's default timeout is 5s. In normal usage, lock contention is very low,
|
|
32
|
+
# and this is more than sufficient. However, in some highly concurrent cases,
|
|
33
|
+
# such as a jobs controller suddenly recovering thousands of jobs at once, we
|
|
34
|
+
# can see a small number of processes that take much longer to obtain the lock.
|
|
35
|
+
# In contrived highly contentious cases, around 0.1% of transactions will take
|
|
36
|
+
# >30s to take the lock. We have not seen cases that take >60s. For cases up to
|
|
37
|
+
# 1000x parallelism, this is thus thought to be a conservative setting.
|
|
38
|
+
# For more info, see the PR description for #4552.
|
|
39
|
+
_DB_TIMEOUT_S = 60
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class UniqueConstraintViolationError(Exception):
|
|
43
|
+
"""Exception raised for unique constraint violation.
|
|
44
|
+
Attributes:
|
|
45
|
+
value -- the input value that caused the error
|
|
46
|
+
message -- explanation of the error
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, value, message='Unique constraint violation'):
|
|
50
|
+
self.value = value
|
|
51
|
+
self.message = message
|
|
52
|
+
super().__init__(self.message)
|
|
53
|
+
|
|
54
|
+
def __str__(self):
|
|
55
|
+
return (f'UniqueConstraintViolationError: {self.message} '
|
|
56
|
+
f'(Value: {self.value})')
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class SQLAlchemyDialect(enum.Enum):
|
|
60
|
+
SQLITE = 'sqlite'
|
|
61
|
+
POSTGRESQL = 'postgresql'
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@contextlib.contextmanager
|
|
65
|
+
def safe_cursor(db_path: str):
|
|
66
|
+
"""A newly created, auto-committing, auto-closing cursor."""
|
|
67
|
+
conn = sqlite3.connect(db_path, timeout=_DB_TIMEOUT_S)
|
|
68
|
+
cursor = conn.cursor()
|
|
69
|
+
try:
|
|
70
|
+
yield cursor
|
|
71
|
+
finally:
|
|
72
|
+
cursor.close()
|
|
73
|
+
conn.commit()
|
|
74
|
+
conn.close()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def add_column_to_table(
|
|
78
|
+
cursor: 'sqlite3.Cursor',
|
|
79
|
+
conn: 'sqlite3.Connection',
|
|
80
|
+
table_name: str,
|
|
81
|
+
column_name: str,
|
|
82
|
+
column_type: str,
|
|
83
|
+
copy_from: Optional[str] = None,
|
|
84
|
+
value_to_replace_existing_entries: Optional[Any] = None,
|
|
85
|
+
):
|
|
86
|
+
"""Add a column to a table."""
|
|
87
|
+
for row in cursor.execute(f'PRAGMA table_info({table_name})'):
|
|
88
|
+
if row[1] == column_name:
|
|
89
|
+
break
|
|
90
|
+
else:
|
|
91
|
+
try:
|
|
92
|
+
add_column_cmd = (f'ALTER TABLE {table_name} '
|
|
93
|
+
f'ADD COLUMN {column_name} {column_type}')
|
|
94
|
+
cursor.execute(add_column_cmd)
|
|
95
|
+
if copy_from is not None:
|
|
96
|
+
cursor.execute(f'UPDATE {table_name} '
|
|
97
|
+
f'SET {column_name} = {copy_from}')
|
|
98
|
+
if value_to_replace_existing_entries is not None:
|
|
99
|
+
cursor.execute(
|
|
100
|
+
f'UPDATE {table_name} '
|
|
101
|
+
f'SET {column_name} = (?) '
|
|
102
|
+
f'WHERE {column_name} IS NULL',
|
|
103
|
+
(value_to_replace_existing_entries,))
|
|
104
|
+
except sqlite3.OperationalError as e:
|
|
105
|
+
if 'duplicate column name' in str(e):
|
|
106
|
+
# We may be trying to add the same column twice, when
|
|
107
|
+
# running multiple threads. This is fine.
|
|
108
|
+
pass
|
|
109
|
+
else:
|
|
110
|
+
raise
|
|
111
|
+
conn.commit()
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def add_all_tables_to_db_sqlalchemy(
|
|
115
|
+
metadata: sqlalchemy.MetaData,
|
|
116
|
+
engine: sqlalchemy.Engine,
|
|
117
|
+
):
|
|
118
|
+
"""Add tables to the database."""
|
|
119
|
+
for table in metadata.tables.values():
|
|
120
|
+
try:
|
|
121
|
+
table.create(bind=engine, checkfirst=True)
|
|
122
|
+
except (sqlalchemy_exc.OperationalError,
|
|
123
|
+
sqlalchemy_exc.ProgrammingError) as e:
|
|
124
|
+
if 'already exists' in str(e):
|
|
125
|
+
pass
|
|
126
|
+
else:
|
|
127
|
+
raise
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def add_table_to_db_sqlalchemy(
|
|
131
|
+
metadata: sqlalchemy.MetaData,
|
|
132
|
+
engine: sqlalchemy.Engine,
|
|
133
|
+
table_name: str,
|
|
134
|
+
):
|
|
135
|
+
"""Add a specific table to the database."""
|
|
136
|
+
try:
|
|
137
|
+
table = metadata.tables[table_name]
|
|
138
|
+
except KeyError as e:
|
|
139
|
+
raise e
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
table.create(bind=engine, checkfirst=True)
|
|
143
|
+
except (sqlalchemy_exc.OperationalError,
|
|
144
|
+
sqlalchemy_exc.ProgrammingError) as e:
|
|
145
|
+
if 'already exists' in str(e):
|
|
146
|
+
pass
|
|
147
|
+
else:
|
|
148
|
+
raise
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def add_column_to_table_sqlalchemy(
|
|
152
|
+
session: 'Session',
|
|
153
|
+
table_name: str,
|
|
154
|
+
column_name: str,
|
|
155
|
+
column_type: sqlalchemy.types.TypeEngine,
|
|
156
|
+
default_statement: Optional[str] = None,
|
|
157
|
+
copy_from: Optional[str] = None,
|
|
158
|
+
value_to_replace_existing_entries: Optional[Any] = None,
|
|
159
|
+
):
|
|
160
|
+
"""Add a column to a table."""
|
|
161
|
+
# column type may be different for different dialects.
|
|
162
|
+
# for example, sqlite uses BLOB for LargeBinary
|
|
163
|
+
# while postgres uses BYTEA.
|
|
164
|
+
column_type_str = column_type.compile(dialect=session.bind.dialect)
|
|
165
|
+
default_statement_str = (f' {default_statement}'
|
|
166
|
+
if default_statement is not None else '')
|
|
167
|
+
try:
|
|
168
|
+
session.execute(
|
|
169
|
+
sqlalchemy.text(f'ALTER TABLE {table_name} '
|
|
170
|
+
f'ADD COLUMN {column_name} {column_type_str}'
|
|
171
|
+
f'{default_statement_str}'))
|
|
172
|
+
if copy_from is not None:
|
|
173
|
+
session.execute(
|
|
174
|
+
sqlalchemy.text(f'UPDATE {table_name} '
|
|
175
|
+
f'SET {column_name} = {copy_from}'))
|
|
176
|
+
if value_to_replace_existing_entries is not None:
|
|
177
|
+
session.execute(
|
|
178
|
+
sqlalchemy.text(f'UPDATE {table_name} '
|
|
179
|
+
f'SET {column_name} = :replacement_value '
|
|
180
|
+
f'WHERE {column_name} IS NULL'),
|
|
181
|
+
{'replacement_value': value_to_replace_existing_entries})
|
|
182
|
+
#sqlite
|
|
183
|
+
except sqlalchemy_exc.OperationalError as e:
|
|
184
|
+
if 'duplicate column name' in str(e):
|
|
185
|
+
pass
|
|
186
|
+
else:
|
|
187
|
+
raise
|
|
188
|
+
#postgresql
|
|
189
|
+
except sqlalchemy_exc.ProgrammingError as e:
|
|
190
|
+
if 'already exists' in str(e):
|
|
191
|
+
pass
|
|
192
|
+
else:
|
|
193
|
+
raise
|
|
194
|
+
session.commit()
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def add_column_to_table_alembic(
|
|
198
|
+
table_name: str,
|
|
199
|
+
column_name: str,
|
|
200
|
+
column_type: sqlalchemy.types.TypeEngine,
|
|
201
|
+
server_default: Optional[str] = None,
|
|
202
|
+
copy_from: Optional[str] = None,
|
|
203
|
+
value_to_replace_existing_entries: Optional[Any] = None,
|
|
204
|
+
index: Optional[bool] = None,
|
|
205
|
+
):
|
|
206
|
+
"""Add a column to a table using Alembic operations.
|
|
207
|
+
|
|
208
|
+
This provides the same interface as add_column_to_table_sqlalchemy but
|
|
209
|
+
uses Alembic's connection context for proper migration support.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
table_name: Name of the table to add column to
|
|
213
|
+
column_name: Name of the new column
|
|
214
|
+
column_type: SQLAlchemy column type
|
|
215
|
+
server_default: Server-side default value for the column
|
|
216
|
+
copy_from: Column name to copy values from (for existing rows)
|
|
217
|
+
value_to_replace_existing_entries: Default value for existing NULL
|
|
218
|
+
entries
|
|
219
|
+
index: If True, create an index on this column. If None, no index
|
|
220
|
+
is created.
|
|
221
|
+
"""
|
|
222
|
+
from alembic import op # pylint: disable=import-outside-toplevel
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
# Create the column with server_default if provided
|
|
226
|
+
column = sqlalchemy.Column(column_name,
|
|
227
|
+
column_type,
|
|
228
|
+
server_default=server_default,
|
|
229
|
+
index=index)
|
|
230
|
+
op.add_column(table_name, column)
|
|
231
|
+
|
|
232
|
+
# Handle data migration
|
|
233
|
+
if copy_from is not None:
|
|
234
|
+
op.execute(
|
|
235
|
+
sqlalchemy.text(
|
|
236
|
+
f'UPDATE {table_name} SET {column_name} = {copy_from}'))
|
|
237
|
+
|
|
238
|
+
if value_to_replace_existing_entries is not None:
|
|
239
|
+
# Use parameterized query for safety
|
|
240
|
+
op.get_bind().execute(
|
|
241
|
+
sqlalchemy.text(f'UPDATE {table_name} '
|
|
242
|
+
f'SET {column_name} = :replacement_value '
|
|
243
|
+
f'WHERE {column_name} IS NULL'),
|
|
244
|
+
{'replacement_value': value_to_replace_existing_entries})
|
|
245
|
+
except sqlalchemy_exc.ProgrammingError as e:
|
|
246
|
+
if 'already exists' in str(e).lower():
|
|
247
|
+
pass # Column already exists, that's fine
|
|
248
|
+
else:
|
|
249
|
+
raise
|
|
250
|
+
except sqlalchemy_exc.OperationalError as e:
|
|
251
|
+
if 'duplicate column name' in str(e).lower():
|
|
252
|
+
pass # Column already exists, that's fine
|
|
253
|
+
else:
|
|
254
|
+
raise
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def drop_column_from_table_alembic(
|
|
258
|
+
table_name: str,
|
|
259
|
+
column_name: str,
|
|
260
|
+
):
|
|
261
|
+
"""Drop a column from a table using Alembic operations.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
table_name: Name of the table to drop column from.
|
|
265
|
+
column_name: Name of the column to drop.
|
|
266
|
+
"""
|
|
267
|
+
from alembic import op # pylint: disable=import-outside-toplevel
|
|
268
|
+
|
|
269
|
+
# Check if column exists before trying to drop it
|
|
270
|
+
bind = op.get_bind()
|
|
271
|
+
inspector = sqlalchemy.inspect(bind)
|
|
272
|
+
columns = [col['name'] for col in inspector.get_columns(table_name)]
|
|
273
|
+
|
|
274
|
+
if column_name not in columns:
|
|
275
|
+
# Column doesn't exist; nothing to do
|
|
276
|
+
return
|
|
277
|
+
|
|
278
|
+
try:
|
|
279
|
+
op.drop_column(table_name, column_name)
|
|
280
|
+
except (sqlalchemy_exc.ProgrammingError,
|
|
281
|
+
sqlalchemy_exc.OperationalError) as e:
|
|
282
|
+
if 'does not exist' in str(e).lower():
|
|
283
|
+
pass # Already dropped
|
|
284
|
+
else:
|
|
285
|
+
raise
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
class SQLiteConn(threading.local):
|
|
289
|
+
"""Thread-local connection to the sqlite3 database."""
|
|
290
|
+
|
|
291
|
+
def __init__(self, db_path: str, create_table: Callable):
|
|
292
|
+
super().__init__()
|
|
293
|
+
self.db_path = db_path
|
|
294
|
+
self.conn = sqlite3.connect(db_path, timeout=_DB_TIMEOUT_S)
|
|
295
|
+
self.cursor = self.conn.cursor()
|
|
296
|
+
create_table(self.cursor, self.conn)
|
|
297
|
+
self._async_conn: Optional[aiosqlite.Connection] = None
|
|
298
|
+
self._async_conn_lock: Optional[asyncio.Lock] = None
|
|
299
|
+
|
|
300
|
+
async def _get_async_conn(self) -> aiosqlite.Connection:
|
|
301
|
+
"""Get the shared aiosqlite connection for current thread.
|
|
302
|
+
|
|
303
|
+
Typically, external caller should not get the connection directly,
|
|
304
|
+
instead, SQLiteConn.{operation}_async methods should be used. This
|
|
305
|
+
is to avoid txn interleaving on the shared aiosqlite connection.
|
|
306
|
+
E.g.
|
|
307
|
+
coroutine 1:
|
|
308
|
+
A: await write(row1)
|
|
309
|
+
B: cursor = await conn.execute(read_row1)
|
|
310
|
+
C: await cursor.fetchall()
|
|
311
|
+
coroutine 2:
|
|
312
|
+
D: await write(row2)
|
|
313
|
+
E: cursor = await conn.execute(read_row2)
|
|
314
|
+
F: await cursor.fetchall()
|
|
315
|
+
The A -> B -> D -> E -> C time sequence will cause B and D read at the
|
|
316
|
+
same snapshot point when B started, thus cause coroutine2 lost the
|
|
317
|
+
read-after-write consistency. When you are adding new async operations
|
|
318
|
+
to SQLiteConn, make sure the txn pattern does not cause this issue.
|
|
319
|
+
"""
|
|
320
|
+
# Python 3.8 binds current event loop to asyncio.Lock(), which requires
|
|
321
|
+
# a loop available in current thread. Lazy-init the lock to avoid this
|
|
322
|
+
# dependency. The correctness is guranteed since SQLiteConn is
|
|
323
|
+
# thread-local so there is no race condition between check and init.
|
|
324
|
+
if self._async_conn_lock is None:
|
|
325
|
+
self._async_conn_lock = asyncio.Lock()
|
|
326
|
+
if self._async_conn is None:
|
|
327
|
+
async with self._async_conn_lock:
|
|
328
|
+
if self._async_conn is None:
|
|
329
|
+
# Init logic like requests.init_db_within_lock will handle
|
|
330
|
+
# initialization like setting the WAL mode, so we do not
|
|
331
|
+
# duplicate that logic here.
|
|
332
|
+
self._async_conn = await aiosqlite.connect(self.db_path)
|
|
333
|
+
return self._async_conn
|
|
334
|
+
|
|
335
|
+
async def execute_and_commit_async(self,
|
|
336
|
+
sql: str,
|
|
337
|
+
parameters: Optional[
|
|
338
|
+
Iterable[Any]] = None) -> None:
|
|
339
|
+
"""Execute the sql and commit the transaction in a sync block."""
|
|
340
|
+
conn = await self._get_async_conn()
|
|
341
|
+
|
|
342
|
+
if parameters is None:
|
|
343
|
+
parameters = []
|
|
344
|
+
|
|
345
|
+
def exec_and_commit(sql: str, parameters: Optional[Iterable[Any]]):
|
|
346
|
+
# pylint: disable=protected-access
|
|
347
|
+
conn._conn.execute(sql, parameters)
|
|
348
|
+
conn._conn.commit()
|
|
349
|
+
|
|
350
|
+
# pylint: disable=protected-access
|
|
351
|
+
await conn._execute(exec_and_commit, sql, parameters)
|
|
352
|
+
|
|
353
|
+
@aiosqlite.context.contextmanager
|
|
354
|
+
async def execute_fetchall_async(self,
|
|
355
|
+
sql: str,
|
|
356
|
+
parameters: Optional[Iterable[Any]] = None
|
|
357
|
+
) -> Iterable[sqlite3.Row]:
|
|
358
|
+
conn = await self._get_async_conn()
|
|
359
|
+
return await conn.execute_fetchall(sql, parameters)
|
|
360
|
+
|
|
361
|
+
async def execute_get_returning_value_async(
|
|
362
|
+
self,
|
|
363
|
+
sql: str,
|
|
364
|
+
parameters: Optional[Iterable[Any]] = None
|
|
365
|
+
) -> Optional[sqlite3.Row]:
|
|
366
|
+
conn = await self._get_async_conn()
|
|
367
|
+
|
|
368
|
+
if parameters is None:
|
|
369
|
+
parameters = []
|
|
370
|
+
|
|
371
|
+
def exec_and_get_returning_value(sql: str,
|
|
372
|
+
parameters: Optional[Iterable[Any]]):
|
|
373
|
+
# pylint: disable=protected-access
|
|
374
|
+
row = conn._conn.execute(sql, parameters).fetchone()
|
|
375
|
+
conn._conn.commit()
|
|
376
|
+
return row
|
|
377
|
+
|
|
378
|
+
# pylint: disable=protected-access
|
|
379
|
+
return await conn._execute(exec_and_get_returning_value, sql,
|
|
380
|
+
parameters)
|
|
381
|
+
|
|
382
|
+
async def close(self):
|
|
383
|
+
if self._async_conn is not None:
|
|
384
|
+
await self._async_conn.close()
|
|
385
|
+
self.conn.close()
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
_max_connections = 0
|
|
389
|
+
_postgres_engine_cache: Dict[str, sqlalchemy.engine.Engine] = {}
|
|
390
|
+
_sqlite_engine_cache: Dict[str, sqlalchemy.engine.Engine] = {}
|
|
391
|
+
|
|
392
|
+
_db_creation_lock = threading.Lock()
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def set_max_connections(max_connections: int):
|
|
396
|
+
global _max_connections
|
|
397
|
+
_max_connections = max_connections
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def get_max_connections():
|
|
401
|
+
return _max_connections
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
@typing.overload
|
|
405
|
+
def get_engine(
|
|
406
|
+
db_name: Optional[str],
|
|
407
|
+
async_engine: Literal[False] = False) -> sqlalchemy.engine.Engine:
|
|
408
|
+
...
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
@typing.overload
|
|
412
|
+
def get_engine(db_name: Optional[str],
|
|
413
|
+
async_engine: Literal[True]) -> sqlalchemy_async.AsyncEngine:
|
|
414
|
+
...
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def get_engine(
|
|
418
|
+
db_name: Optional[str],
|
|
419
|
+
async_engine: bool = False
|
|
420
|
+
) -> Union[sqlalchemy.engine.Engine, sqlalchemy_async.AsyncEngine]:
|
|
421
|
+
"""Get the engine for the given database name.
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
db_name: The name of the database. ONLY used for SQLite. On Postgres,
|
|
425
|
+
we use a single database, which we get from the connection string.
|
|
426
|
+
async_engine: Whether to return an async engine.
|
|
427
|
+
"""
|
|
428
|
+
conn_string = None
|
|
429
|
+
if os.environ.get(constants.ENV_VAR_IS_SKYPILOT_SERVER) is not None:
|
|
430
|
+
conn_string = os.environ.get(constants.ENV_VAR_DB_CONNECTION_URI)
|
|
431
|
+
if conn_string:
|
|
432
|
+
if async_engine:
|
|
433
|
+
conn_string = conn_string.replace('postgresql://',
|
|
434
|
+
'postgresql+asyncpg://')
|
|
435
|
+
# This is an AsyncEngine, instead of a (normal, synchronous) Engine,
|
|
436
|
+
# so we should not put it in the cache. Instead, just return.
|
|
437
|
+
return sqlalchemy_async.create_async_engine(
|
|
438
|
+
conn_string, poolclass=sqlalchemy.NullPool)
|
|
439
|
+
with _db_creation_lock:
|
|
440
|
+
if conn_string not in _postgres_engine_cache:
|
|
441
|
+
logger.debug('Creating a new postgres engine with '
|
|
442
|
+
f'maximum {_max_connections} connections')
|
|
443
|
+
if _max_connections == 0:
|
|
444
|
+
_postgres_engine_cache[conn_string] = (
|
|
445
|
+
sqlalchemy.create_engine(
|
|
446
|
+
conn_string, poolclass=sqlalchemy.pool.NullPool))
|
|
447
|
+
else:
|
|
448
|
+
_postgres_engine_cache[conn_string] = (
|
|
449
|
+
sqlalchemy.create_engine(
|
|
450
|
+
conn_string,
|
|
451
|
+
poolclass=sqlalchemy.pool.QueuePool,
|
|
452
|
+
pool_size=_max_connections,
|
|
453
|
+
max_overflow=max(0, 5 - _max_connections),
|
|
454
|
+
pool_pre_ping=True,
|
|
455
|
+
pool_recycle=1800))
|
|
456
|
+
engine = _postgres_engine_cache[conn_string]
|
|
457
|
+
else:
|
|
458
|
+
assert db_name is not None, 'db_name must be provided for SQLite'
|
|
459
|
+
db_path = os.path.expanduser(f'~/.sky/{db_name}.db')
|
|
460
|
+
pathlib.Path(db_path).parents[0].mkdir(parents=True, exist_ok=True)
|
|
461
|
+
if async_engine:
|
|
462
|
+
# This is an AsyncEngine, instead of a (normal, synchronous) Engine,
|
|
463
|
+
# so we should not put it in the cache. Instead, just return.
|
|
464
|
+
return sqlalchemy_async.create_async_engine(
|
|
465
|
+
'sqlite+aiosqlite:///' + db_path, connect_args={'timeout': 30})
|
|
466
|
+
if db_path not in _sqlite_engine_cache:
|
|
467
|
+
_sqlite_engine_cache[db_path] = sqlalchemy.create_engine(
|
|
468
|
+
'sqlite:///' + db_path)
|
|
469
|
+
engine = _sqlite_engine_cache[db_path]
|
|
470
|
+
return engine
|