skypilot-nightly 1.0.0.dev20250905__py3-none-any.whl → 1.0.0.dev20251210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +12 -2
- sky/adaptors/aws.py +27 -22
- sky/adaptors/common.py +25 -2
- sky/adaptors/coreweave.py +278 -0
- sky/adaptors/do.py +8 -2
- sky/adaptors/gcp.py +11 -0
- sky/adaptors/ibm.py +5 -2
- sky/adaptors/kubernetes.py +64 -0
- sky/adaptors/nebius.py +3 -1
- sky/adaptors/primeintellect.py +1 -0
- sky/adaptors/seeweb.py +183 -0
- sky/adaptors/shadeform.py +89 -0
- sky/adaptors/slurm.py +478 -0
- sky/admin_policy.py +20 -0
- sky/authentication.py +157 -263
- sky/backends/__init__.py +3 -2
- sky/backends/backend.py +11 -3
- sky/backends/backend_utils.py +630 -185
- sky/backends/cloud_vm_ray_backend.py +1111 -928
- sky/backends/local_docker_backend.py +9 -5
- sky/backends/task_codegen.py +971 -0
- sky/backends/wheel_utils.py +18 -0
- sky/catalog/__init__.py +8 -3
- sky/catalog/aws_catalog.py +4 -0
- sky/catalog/common.py +19 -1
- sky/catalog/data_fetchers/fetch_aws.py +102 -80
- sky/catalog/data_fetchers/fetch_gcp.py +30 -3
- sky/catalog/data_fetchers/fetch_nebius.py +9 -6
- sky/catalog/data_fetchers/fetch_runpod.py +698 -0
- sky/catalog/data_fetchers/fetch_seeweb.py +329 -0
- sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
- sky/catalog/kubernetes_catalog.py +36 -32
- sky/catalog/primeintellect_catalog.py +95 -0
- sky/catalog/runpod_catalog.py +5 -1
- sky/catalog/seeweb_catalog.py +184 -0
- sky/catalog/shadeform_catalog.py +165 -0
- sky/catalog/slurm_catalog.py +243 -0
- sky/check.py +87 -46
- sky/client/cli/command.py +1004 -434
- sky/client/cli/flags.py +4 -2
- sky/{volumes/utils.py → client/cli/table_utils.py} +111 -13
- sky/client/cli/utils.py +79 -0
- sky/client/common.py +12 -2
- sky/client/sdk.py +188 -65
- sky/client/sdk_async.py +34 -33
- sky/cloud_stores.py +82 -3
- sky/clouds/__init__.py +8 -0
- sky/clouds/aws.py +337 -129
- sky/clouds/azure.py +24 -18
- sky/clouds/cloud.py +47 -13
- sky/clouds/cudo.py +16 -13
- sky/clouds/do.py +9 -7
- sky/clouds/fluidstack.py +12 -5
- sky/clouds/gcp.py +14 -7
- sky/clouds/hyperbolic.py +12 -5
- sky/clouds/ibm.py +12 -5
- sky/clouds/kubernetes.py +80 -45
- sky/clouds/lambda_cloud.py +12 -5
- sky/clouds/nebius.py +23 -9
- sky/clouds/oci.py +19 -12
- sky/clouds/paperspace.py +4 -1
- sky/clouds/primeintellect.py +317 -0
- sky/clouds/runpod.py +85 -24
- sky/clouds/scp.py +12 -8
- sky/clouds/seeweb.py +477 -0
- sky/clouds/shadeform.py +400 -0
- sky/clouds/slurm.py +578 -0
- sky/clouds/ssh.py +6 -3
- sky/clouds/utils/scp_utils.py +61 -50
- sky/clouds/vast.py +43 -27
- sky/clouds/vsphere.py +14 -16
- sky/core.py +296 -195
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/KYAhEFa3FTfq4JyKVgo-s/_buildManifest.js +1 -0
- sky/dashboard/out/_next/static/chunks/1141-9c810f01ff4f398a.js +11 -0
- sky/dashboard/out/_next/static/chunks/1871-7e202677c42f43fe.js +6 -0
- sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
- sky/dashboard/out/_next/static/chunks/2755.edd818326d489a1d.js +26 -0
- sky/dashboard/out/_next/static/chunks/3294.ddda8c6c6f9f24dc.js +1 -0
- sky/dashboard/out/_next/static/chunks/3785.7e245f318f9d1121.js +1 -0
- sky/dashboard/out/_next/static/chunks/{6601-06114c982db410b6.js → 3800-b589397dc09c5b4e.js} +1 -1
- sky/dashboard/out/_next/static/chunks/3850-fd5696f3bbbaddae.js +1 -0
- sky/dashboard/out/_next/static/chunks/4725.172ede95d1b21022.js +1 -0
- sky/dashboard/out/_next/static/chunks/4937.a2baa2df5572a276.js +15 -0
- sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
- sky/dashboard/out/_next/static/chunks/6856-da20c5fd999f319c.js +1 -0
- sky/dashboard/out/_next/static/chunks/6990-09cbf02d3cd518c3.js +1 -0
- sky/dashboard/out/_next/static/chunks/7359-c8d04e06886000b3.js +30 -0
- sky/dashboard/out/_next/static/chunks/7615-019513abc55b3b47.js +1 -0
- sky/dashboard/out/_next/static/chunks/8640.5b9475a2d18c5416.js +16 -0
- sky/dashboard/out/_next/static/chunks/8969-452f9d5cbdd2dc73.js +1 -0
- sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
- sky/dashboard/out/_next/static/chunks/9353-8369df1cf105221c.js +1 -0
- sky/dashboard/out/_next/static/chunks/9360.a536cf6b1fa42355.js +31 -0
- sky/dashboard/out/_next/static/chunks/9847.3aaca6bb33455140.js +30 -0
- sky/dashboard/out/_next/static/chunks/pages/_app-68b647e26f9d2793.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-33f525539665fdfd.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-a7565f586ef86467.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters-9e5d47818b9bdadd.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{config-dfb9bf07b13045f4.js → config-718cdc365de82689.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-6563820e094f68ca.js → [context]-12c559ec4d81fdbd.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{infra-aabba60d57826e0f.js → infra-d187cd0413d72475.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-895847b6cf200b04.js +16 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-8d0f4655400b4eb9.js +21 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs-e5a98f17f8513a96.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/plugins/[...slug]-4f46050ca065d8f8.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/users-2f7646eb77785a2c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/volumes-ef19d49c6d0e8500.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-af76bb06dbb3954f.js → [name]-96e0f298308da7e2.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/{workspaces-7598c33a746cdc91.js → workspaces-cb4da3abe08ebf19.js} +1 -1
- sky/dashboard/out/_next/static/chunks/webpack-fba3de387ff6bb08.js +1 -0
- sky/dashboard/out/_next/static/css/c5a4cfd2600fc715.css +3 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs/pools/[pool].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/plugins/[...slug].html +1 -0
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/data_utils.py +92 -1
- sky/data/mounting_utils.py +177 -30
- sky/data/storage.py +200 -19
- sky/data/storage_utils.py +10 -45
- sky/exceptions.py +18 -7
- sky/execution.py +74 -31
- sky/global_user_state.py +605 -191
- sky/jobs/__init__.py +2 -0
- sky/jobs/client/sdk.py +101 -4
- sky/jobs/client/sdk_async.py +31 -5
- sky/jobs/constants.py +15 -8
- sky/jobs/controller.py +726 -284
- sky/jobs/file_content_utils.py +128 -0
- sky/jobs/log_gc.py +193 -0
- sky/jobs/recovery_strategy.py +250 -100
- sky/jobs/scheduler.py +271 -173
- sky/jobs/server/core.py +367 -114
- sky/jobs/server/server.py +81 -35
- sky/jobs/server/utils.py +89 -35
- sky/jobs/state.py +1498 -620
- sky/jobs/utils.py +771 -306
- sky/logs/agent.py +40 -5
- sky/logs/aws.py +9 -19
- sky/metrics/utils.py +282 -39
- sky/models.py +2 -0
- sky/optimizer.py +7 -6
- sky/provision/__init__.py +38 -1
- sky/provision/aws/config.py +34 -13
- sky/provision/aws/instance.py +5 -2
- sky/provision/azure/instance.py +5 -3
- sky/provision/common.py +22 -0
- sky/provision/cudo/instance.py +4 -3
- sky/provision/do/instance.py +4 -3
- sky/provision/docker_utils.py +112 -28
- sky/provision/fluidstack/instance.py +6 -5
- sky/provision/gcp/config.py +6 -1
- sky/provision/gcp/instance.py +4 -2
- sky/provision/hyperbolic/instance.py +4 -2
- sky/provision/instance_setup.py +66 -20
- sky/provision/kubernetes/__init__.py +2 -0
- sky/provision/kubernetes/config.py +7 -44
- sky/provision/kubernetes/constants.py +0 -1
- sky/provision/kubernetes/instance.py +609 -213
- sky/provision/kubernetes/manifests/fusermount-server-daemonset.yaml +1 -2
- sky/provision/kubernetes/network.py +12 -8
- sky/provision/kubernetes/network_utils.py +8 -25
- sky/provision/kubernetes/utils.py +422 -422
- sky/provision/kubernetes/volume.py +150 -18
- sky/provision/lambda_cloud/instance.py +16 -13
- sky/provision/nebius/instance.py +6 -2
- sky/provision/nebius/utils.py +103 -86
- sky/provision/oci/instance.py +4 -2
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/primeintellect/__init__.py +10 -0
- sky/provision/primeintellect/config.py +11 -0
- sky/provision/primeintellect/instance.py +454 -0
- sky/provision/primeintellect/utils.py +398 -0
- sky/provision/provisioner.py +45 -15
- sky/provision/runpod/__init__.py +2 -0
- sky/provision/runpod/instance.py +4 -3
- sky/provision/runpod/volume.py +69 -13
- sky/provision/scp/instance.py +307 -130
- sky/provision/seeweb/__init__.py +11 -0
- sky/provision/seeweb/config.py +13 -0
- sky/provision/seeweb/instance.py +812 -0
- sky/provision/shadeform/__init__.py +11 -0
- sky/provision/shadeform/config.py +12 -0
- sky/provision/shadeform/instance.py +351 -0
- sky/provision/shadeform/shadeform_utils.py +83 -0
- sky/provision/slurm/__init__.py +12 -0
- sky/provision/slurm/config.py +13 -0
- sky/provision/slurm/instance.py +572 -0
- sky/provision/slurm/utils.py +583 -0
- sky/provision/vast/instance.py +9 -4
- sky/provision/vast/utils.py +10 -6
- sky/provision/volume.py +164 -0
- sky/provision/vsphere/common/ssl_helper.py +1 -1
- sky/provision/vsphere/common/vapiconnect.py +2 -1
- sky/provision/vsphere/common/vim_utils.py +3 -2
- sky/provision/vsphere/instance.py +8 -6
- sky/provision/vsphere/vsphere_utils.py +8 -1
- sky/resources.py +11 -3
- sky/schemas/api/responses.py +107 -6
- sky/schemas/db/global_user_state/008_skylet_ssh_tunnel_metadata.py +34 -0
- sky/schemas/db/global_user_state/009_last_activity_and_launched_at.py +89 -0
- sky/schemas/db/global_user_state/010_save_ssh_key.py +66 -0
- sky/schemas/db/global_user_state/011_is_ephemeral.py +34 -0
- sky/schemas/db/kv_cache/001_initial_schema.py +29 -0
- sky/schemas/db/serve_state/002_yaml_content.py +34 -0
- sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
- sky/schemas/db/spot_jobs/002_cluster_pool.py +3 -3
- sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
- sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
- sky/schemas/db/spot_jobs/006_controller_pid_started_at.py +34 -0
- sky/schemas/db/spot_jobs/007_config_file_content.py +34 -0
- sky/schemas/generated/jobsv1_pb2.py +86 -0
- sky/schemas/generated/jobsv1_pb2.pyi +254 -0
- sky/schemas/generated/jobsv1_pb2_grpc.py +542 -0
- sky/schemas/generated/managed_jobsv1_pb2.py +76 -0
- sky/schemas/generated/managed_jobsv1_pb2.pyi +278 -0
- sky/schemas/generated/managed_jobsv1_pb2_grpc.py +278 -0
- sky/schemas/generated/servev1_pb2.py +58 -0
- sky/schemas/generated/servev1_pb2.pyi +115 -0
- sky/schemas/generated/servev1_pb2_grpc.py +322 -0
- sky/serve/autoscalers.py +2 -0
- sky/serve/client/impl.py +55 -21
- sky/serve/constants.py +4 -3
- sky/serve/controller.py +17 -11
- sky/serve/load_balancing_policies.py +1 -1
- sky/serve/replica_managers.py +219 -142
- sky/serve/serve_rpc_utils.py +179 -0
- sky/serve/serve_state.py +63 -54
- sky/serve/serve_utils.py +145 -109
- sky/serve/server/core.py +46 -25
- sky/serve/server/impl.py +311 -162
- sky/serve/server/server.py +21 -19
- sky/serve/service.py +84 -68
- sky/serve/service_spec.py +45 -7
- sky/server/auth/loopback.py +38 -0
- sky/server/auth/oauth2_proxy.py +12 -7
- sky/server/common.py +47 -24
- sky/server/config.py +62 -28
- sky/server/constants.py +9 -1
- sky/server/daemons.py +109 -38
- sky/server/metrics.py +76 -96
- sky/server/middleware_utils.py +166 -0
- sky/server/plugins.py +222 -0
- sky/server/requests/executor.py +384 -145
- sky/server/requests/payloads.py +83 -19
- sky/server/requests/preconditions.py +15 -13
- sky/server/requests/request_names.py +123 -0
- sky/server/requests/requests.py +511 -157
- sky/server/requests/serializers/decoders.py +48 -17
- sky/server/requests/serializers/encoders.py +102 -20
- sky/server/requests/serializers/return_value_serializers.py +60 -0
- sky/server/requests/threads.py +117 -0
- sky/server/rest.py +116 -24
- sky/server/server.py +497 -179
- sky/server/server_utils.py +30 -0
- sky/server/stream_utils.py +219 -45
- sky/server/uvicorn.py +30 -19
- sky/setup_files/MANIFEST.in +6 -1
- sky/setup_files/alembic.ini +8 -0
- sky/setup_files/dependencies.py +64 -19
- sky/setup_files/setup.py +44 -44
- sky/sky_logging.py +13 -5
- sky/skylet/attempt_skylet.py +116 -24
- sky/skylet/configs.py +3 -1
- sky/skylet/constants.py +139 -29
- sky/skylet/events.py +74 -14
- sky/skylet/executor/__init__.py +1 -0
- sky/skylet/executor/slurm.py +189 -0
- sky/skylet/job_lib.py +143 -105
- sky/skylet/log_lib.py +252 -8
- sky/skylet/log_lib.pyi +47 -7
- sky/skylet/providers/ibm/node_provider.py +12 -8
- sky/skylet/providers/ibm/vpc_provider.py +13 -12
- sky/skylet/runtime_utils.py +21 -0
- sky/skylet/services.py +524 -0
- sky/skylet/skylet.py +27 -2
- sky/skylet/subprocess_daemon.py +104 -28
- sky/skypilot_config.py +99 -79
- sky/ssh_node_pools/constants.py +12 -0
- sky/ssh_node_pools/core.py +40 -3
- sky/ssh_node_pools/deploy/__init__.py +4 -0
- sky/ssh_node_pools/deploy/deploy.py +952 -0
- sky/ssh_node_pools/deploy/tunnel_utils.py +199 -0
- sky/ssh_node_pools/deploy/utils.py +173 -0
- sky/ssh_node_pools/server.py +20 -21
- sky/{utils/kubernetes/ssh_utils.py → ssh_node_pools/utils.py} +9 -6
- sky/task.py +221 -104
- sky/templates/aws-ray.yml.j2 +1 -0
- sky/templates/azure-ray.yml.j2 +1 -0
- sky/templates/cudo-ray.yml.j2 +1 -0
- sky/templates/do-ray.yml.j2 +1 -0
- sky/templates/fluidstack-ray.yml.j2 +1 -0
- sky/templates/gcp-ray.yml.j2 +1 -0
- sky/templates/hyperbolic-ray.yml.j2 +1 -0
- sky/templates/ibm-ray.yml.j2 +2 -1
- sky/templates/jobs-controller.yaml.j2 +3 -0
- sky/templates/kubernetes-ray.yml.j2 +204 -55
- sky/templates/lambda-ray.yml.j2 +1 -0
- sky/templates/nebius-ray.yml.j2 +3 -0
- sky/templates/oci-ray.yml.j2 +1 -0
- sky/templates/paperspace-ray.yml.j2 +1 -0
- sky/templates/primeintellect-ray.yml.j2 +72 -0
- sky/templates/runpod-ray.yml.j2 +1 -0
- sky/templates/scp-ray.yml.j2 +1 -0
- sky/templates/seeweb-ray.yml.j2 +171 -0
- sky/templates/shadeform-ray.yml.j2 +73 -0
- sky/templates/slurm-ray.yml.j2 +85 -0
- sky/templates/vast-ray.yml.j2 +2 -0
- sky/templates/vsphere-ray.yml.j2 +1 -0
- sky/templates/websocket_proxy.py +188 -43
- sky/usage/usage_lib.py +16 -4
- sky/users/model.conf +1 -1
- sky/users/permission.py +84 -44
- sky/users/rbac.py +31 -3
- sky/utils/accelerator_registry.py +6 -3
- sky/utils/admin_policy_utils.py +18 -5
- sky/utils/annotations.py +128 -6
- sky/utils/asyncio_utils.py +78 -0
- sky/utils/atomic.py +1 -1
- sky/utils/auth_utils.py +153 -0
- sky/utils/cli_utils/status_utils.py +12 -7
- sky/utils/cluster_utils.py +28 -6
- sky/utils/command_runner.py +283 -30
- sky/utils/command_runner.pyi +63 -7
- sky/utils/common.py +3 -1
- sky/utils/common_utils.py +55 -7
- sky/utils/config_utils.py +1 -14
- sky/utils/context.py +127 -40
- sky/utils/context_utils.py +73 -18
- sky/utils/controller_utils.py +229 -70
- sky/utils/db/db_utils.py +95 -18
- sky/utils/db/kv_cache.py +149 -0
- sky/utils/db/migration_utils.py +24 -7
- sky/utils/env_options.py +4 -0
- sky/utils/git.py +559 -1
- sky/utils/kubernetes/create_cluster.sh +15 -30
- sky/utils/kubernetes/delete_cluster.sh +10 -7
- sky/utils/kubernetes/generate_kind_config.py +6 -66
- sky/utils/kubernetes/gpu_labeler.py +13 -3
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +2 -1
- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +16 -16
- sky/utils/kubernetes/kubernetes_deploy_utils.py +187 -260
- sky/utils/kubernetes/rsync_helper.sh +11 -3
- sky/utils/kubernetes/ssh-tunnel.sh +7 -376
- sky/utils/kubernetes_enums.py +7 -15
- sky/utils/lock_events.py +4 -4
- sky/utils/locks.py +128 -31
- sky/utils/log_utils.py +0 -319
- sky/utils/resource_checker.py +13 -10
- sky/utils/resources_utils.py +53 -29
- sky/utils/rich_utils.py +8 -4
- sky/utils/schemas.py +138 -52
- sky/utils/subprocess_utils.py +17 -4
- sky/utils/thread_utils.py +91 -0
- sky/utils/timeline.py +2 -1
- sky/utils/ux_utils.py +35 -1
- sky/utils/volume.py +88 -4
- sky/utils/yaml_utils.py +9 -0
- sky/volumes/client/sdk.py +48 -10
- sky/volumes/server/core.py +59 -22
- sky/volumes/server/server.py +46 -17
- sky/volumes/volume.py +54 -42
- sky/workspaces/core.py +57 -21
- sky/workspaces/server.py +13 -12
- sky_templates/README.md +3 -0
- sky_templates/__init__.py +3 -0
- sky_templates/ray/__init__.py +0 -0
- sky_templates/ray/start_cluster +183 -0
- sky_templates/ray/stop_cluster +75 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/METADATA +343 -65
- skypilot_nightly-1.0.0.dev20251210.dist-info/RECORD +629 -0
- skypilot_nightly-1.0.0.dev20251210.dist-info/top_level.txt +2 -0
- sky/client/cli/git.py +0 -549
- sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
- sky/dashboard/out/_next/static/chunks/1141-943efc7aff0f0c06.js +0 -1
- sky/dashboard/out/_next/static/chunks/1836-37fede578e2da5f8.js +0 -40
- sky/dashboard/out/_next/static/chunks/3015-86cabed5d4669ad0.js +0 -1
- sky/dashboard/out/_next/static/chunks/3294.c80326aec9bfed40.js +0 -6
- sky/dashboard/out/_next/static/chunks/3785.4872a2f3aa489880.js +0 -1
- sky/dashboard/out/_next/static/chunks/3850-ff4a9a69d978632b.js +0 -1
- sky/dashboard/out/_next/static/chunks/4045.b30465273dc5e468.js +0 -21
- sky/dashboard/out/_next/static/chunks/4676-9da7fdbde90b5549.js +0 -10
- sky/dashboard/out/_next/static/chunks/4725.10f7a9a5d3ea8208.js +0 -1
- sky/dashboard/out/_next/static/chunks/5339.3fda4a4010ff4e06.js +0 -51
- sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
- sky/dashboard/out/_next/static/chunks/649.b9d7f7d10c1b8c53.js +0 -45
- sky/dashboard/out/_next/static/chunks/6856-dca7962af4814e1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +0 -1
- sky/dashboard/out/_next/static/chunks/7325.b4bc99ce0892dcd5.js +0 -6
- sky/dashboard/out/_next/static/chunks/754-d0da8ab45f9509e9.js +0 -18
- sky/dashboard/out/_next/static/chunks/7669.1f5d9a402bf5cc42.js +0 -36
- sky/dashboard/out/_next/static/chunks/8969-0be3036bf86f8256.js +0 -1
- sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
- sky/dashboard/out/_next/static/chunks/9037-fa1737818d0a0969.js +0 -6
- sky/dashboard/out/_next/static/chunks/pages/_app-ce361c6959bc2001.js +0 -34
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/clusters-469814d711d63b1b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js +0 -11
- sky/dashboard/out/_next/static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js +0 -16
- sky/dashboard/out/_next/static/chunks/pages/jobs-1f70d9faa564804f.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/users-018bf31cda52e11b.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/volumes-739726d6b823f532.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-4fe903277b57b523.js +0 -1
- sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
- sky/dashboard/out/_next/static/mS-4qZPSkRuA1u-g2wQhg/_buildManifest.js +0 -1
- sky/templates/kubernetes-ssh-jump.yml.j2 +0 -94
- sky/utils/kubernetes/cleanup-tunnel.sh +0 -62
- sky/utils/kubernetes/deploy_remote_cluster.py +0 -1299
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +0 -191
- skypilot_nightly-1.0.0.dev20250905.dist-info/RECORD +0 -547
- skypilot_nightly-1.0.0.dev20250905.dist-info/top_level.txt +0 -1
- /sky/dashboard/out/_next/static/{mS-4qZPSkRuA1u-g2wQhg → KYAhEFa3FTfq4JyKVgo-s}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250905.dist-info → skypilot_nightly-1.0.0.dev20251210.dist-info}/licenses/LICENSE +0 -0
sky/global_user_state.py
CHANGED
|
@@ -24,13 +24,15 @@ from sqlalchemy import exc as sqlalchemy_exc
|
|
|
24
24
|
from sqlalchemy import orm
|
|
25
25
|
from sqlalchemy.dialects import postgresql
|
|
26
26
|
from sqlalchemy.dialects import sqlite
|
|
27
|
+
from sqlalchemy.ext import asyncio as sql_async
|
|
27
28
|
from sqlalchemy.ext import declarative
|
|
28
29
|
|
|
29
30
|
from sky import models
|
|
30
31
|
from sky import sky_logging
|
|
31
32
|
from sky import skypilot_config
|
|
32
|
-
from sky.
|
|
33
|
+
from sky.metrics import utils as metrics_lib
|
|
33
34
|
from sky.skylet import constants
|
|
35
|
+
from sky.utils import annotations
|
|
34
36
|
from sky.utils import common_utils
|
|
35
37
|
from sky.utils import context_utils
|
|
36
38
|
from sky.utils import registry
|
|
@@ -51,6 +53,7 @@ _ENABLED_CLOUDS_KEY_PREFIX = 'enabled_clouds_'
|
|
|
51
53
|
_ALLOWED_CLOUDS_KEY_PREFIX = 'allowed_clouds_'
|
|
52
54
|
|
|
53
55
|
_SQLALCHEMY_ENGINE: Optional[sqlalchemy.engine.Engine] = None
|
|
56
|
+
_SQLALCHEMY_ENGINE_ASYNC: Optional[sql_async.AsyncEngine] = None
|
|
54
57
|
_SQLALCHEMY_ENGINE_LOCK = threading.Lock()
|
|
55
58
|
|
|
56
59
|
DEFAULT_CLUSTER_EVENT_RETENTION_HOURS = 24.0
|
|
@@ -118,6 +121,9 @@ cluster_table = sqlalchemy.Table(
|
|
|
118
121
|
sqlalchemy.Column('provision_log_path',
|
|
119
122
|
sqlalchemy.Text,
|
|
120
123
|
server_default=None),
|
|
124
|
+
sqlalchemy.Column('skylet_ssh_tunnel_metadata',
|
|
125
|
+
sqlalchemy.LargeBinary,
|
|
126
|
+
server_default=None),
|
|
121
127
|
)
|
|
122
128
|
|
|
123
129
|
storage_table = sqlalchemy.Table(
|
|
@@ -145,6 +151,7 @@ volume_table = sqlalchemy.Table(
|
|
|
145
151
|
server_default=None),
|
|
146
152
|
sqlalchemy.Column('last_use', sqlalchemy.Text),
|
|
147
153
|
sqlalchemy.Column('status', sqlalchemy.Text),
|
|
154
|
+
sqlalchemy.Column('is_ephemeral', sqlalchemy.Integer, server_default='0'),
|
|
148
155
|
)
|
|
149
156
|
|
|
150
157
|
# Table for Cluster History
|
|
@@ -180,6 +187,14 @@ cluster_history_table = sqlalchemy.Table(
|
|
|
180
187
|
sqlalchemy.Column('provision_log_path',
|
|
181
188
|
sqlalchemy.Text,
|
|
182
189
|
server_default=None),
|
|
190
|
+
sqlalchemy.Column('last_activity_time',
|
|
191
|
+
sqlalchemy.Integer,
|
|
192
|
+
server_default=None,
|
|
193
|
+
index=True),
|
|
194
|
+
sqlalchemy.Column('launched_at',
|
|
195
|
+
sqlalchemy.Integer,
|
|
196
|
+
server_default=None,
|
|
197
|
+
index=True),
|
|
183
198
|
)
|
|
184
199
|
|
|
185
200
|
|
|
@@ -293,6 +308,20 @@ def create_table(engine: sqlalchemy.engine.Engine):
|
|
|
293
308
|
migration_utils.GLOBAL_USER_STATE_VERSION)
|
|
294
309
|
|
|
295
310
|
|
|
311
|
+
def initialize_and_get_db_async() -> sql_async.AsyncEngine:
|
|
312
|
+
global _SQLALCHEMY_ENGINE_ASYNC
|
|
313
|
+
if _SQLALCHEMY_ENGINE_ASYNC is not None:
|
|
314
|
+
return _SQLALCHEMY_ENGINE_ASYNC
|
|
315
|
+
with _SQLALCHEMY_ENGINE_LOCK:
|
|
316
|
+
if _SQLALCHEMY_ENGINE_ASYNC is not None:
|
|
317
|
+
return _SQLALCHEMY_ENGINE_ASYNC
|
|
318
|
+
|
|
319
|
+
_SQLALCHEMY_ENGINE_ASYNC = db_utils.get_engine('state',
|
|
320
|
+
async_engine=True)
|
|
321
|
+
initialize_and_get_db()
|
|
322
|
+
return _SQLALCHEMY_ENGINE_ASYNC
|
|
323
|
+
|
|
324
|
+
|
|
296
325
|
# We wrap the sqlalchemy engine initialization in a thread
|
|
297
326
|
# lock to ensure that multiple threads do not initialize the
|
|
298
327
|
# engine which could result in a rare race condition where
|
|
@@ -315,9 +344,29 @@ def initialize_and_get_db() -> sqlalchemy.engine.Engine:
|
|
|
315
344
|
|
|
316
345
|
# return engine
|
|
317
346
|
_SQLALCHEMY_ENGINE = engine
|
|
347
|
+
# Cache the result of _sqlite_supports_returning()
|
|
348
|
+
# ahead of time, as it won't change throughout
|
|
349
|
+
# the lifetime of the engine.
|
|
350
|
+
_sqlite_supports_returning()
|
|
318
351
|
return _SQLALCHEMY_ENGINE
|
|
319
352
|
|
|
320
353
|
|
|
354
|
+
def _init_db_async(func):
|
|
355
|
+
"""Initialize the async database."""
|
|
356
|
+
|
|
357
|
+
@functools.wraps(func)
|
|
358
|
+
async def wrapper(*args, **kwargs):
|
|
359
|
+
if _SQLALCHEMY_ENGINE_ASYNC is None:
|
|
360
|
+
# this may happen multiple times since there is no locking
|
|
361
|
+
# here but thats fine, this is just a short circuit for the
|
|
362
|
+
# common case.
|
|
363
|
+
await context_utils.to_thread(initialize_and_get_db_async)
|
|
364
|
+
|
|
365
|
+
return await func(*args, **kwargs)
|
|
366
|
+
|
|
367
|
+
return wrapper
|
|
368
|
+
|
|
369
|
+
|
|
321
370
|
def _init_db(func):
|
|
322
371
|
"""Initialize the database."""
|
|
323
372
|
|
|
@@ -329,19 +378,51 @@ def _init_db(func):
|
|
|
329
378
|
return wrapper
|
|
330
379
|
|
|
331
380
|
|
|
381
|
+
@annotations.lru_cache(scope='global', maxsize=1)
|
|
382
|
+
def _sqlite_supports_returning() -> bool:
|
|
383
|
+
"""Check if SQLite (3.35.0+) and SQLAlchemy (2.0+) support RETURNING.
|
|
384
|
+
|
|
385
|
+
See https://sqlite.org/lang_returning.html and
|
|
386
|
+
https://docs.sqlalchemy.org/en/20/dialects/sqlite.html#insert-update-delete-returning # pylint: disable=line-too-long
|
|
387
|
+
"""
|
|
388
|
+
sqlalchemy_version_parts = sqlalchemy.__version__.split('.')
|
|
389
|
+
assert len(sqlalchemy_version_parts) >= 1, \
|
|
390
|
+
f'Invalid SQLAlchemy version: {sqlalchemy.__version__}'
|
|
391
|
+
sqlalchemy_major = int(sqlalchemy_version_parts[0])
|
|
392
|
+
if sqlalchemy_major < 2:
|
|
393
|
+
return False
|
|
394
|
+
|
|
395
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
396
|
+
if (_SQLALCHEMY_ENGINE.dialect.name !=
|
|
397
|
+
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
398
|
+
return False
|
|
399
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
400
|
+
result = session.execute(sqlalchemy.text('SELECT sqlite_version()'))
|
|
401
|
+
version_str = result.scalar()
|
|
402
|
+
version_parts = version_str.split('.')
|
|
403
|
+
assert len(version_parts) >= 2, \
|
|
404
|
+
f'Invalid version string: {version_str}'
|
|
405
|
+
major, minor = int(version_parts[0]), int(version_parts[1])
|
|
406
|
+
return (major > 3) or (major == 3 and minor >= 35)
|
|
407
|
+
|
|
408
|
+
|
|
332
409
|
@_init_db
|
|
333
410
|
@metrics_lib.time_me
|
|
334
|
-
def add_or_update_user(
|
|
335
|
-
|
|
411
|
+
def add_or_update_user(
|
|
412
|
+
user: models.User,
|
|
413
|
+
allow_duplicate_name: bool = True,
|
|
414
|
+
return_user: bool = False
|
|
415
|
+
) -> typing.Union[bool, typing.Tuple[bool, models.User]]:
|
|
336
416
|
"""Store the mapping from user hash to user name for display purposes.
|
|
337
417
|
|
|
338
418
|
Returns:
|
|
339
|
-
|
|
419
|
+
If return_user=False: bool (whether the user is newly added)
|
|
420
|
+
If return_user=True: Tuple[bool, models.User]
|
|
340
421
|
"""
|
|
341
422
|
assert _SQLALCHEMY_ENGINE is not None
|
|
342
423
|
|
|
343
424
|
if user.name is None:
|
|
344
|
-
return False
|
|
425
|
+
return (False, user) if return_user else False
|
|
345
426
|
|
|
346
427
|
# Set created_at if not already set
|
|
347
428
|
created_at = user.created_at
|
|
@@ -353,7 +434,7 @@ def add_or_update_user(user: models.User,
|
|
|
353
434
|
existing_user = session.query(user_table).filter(
|
|
354
435
|
user_table.c.name == user.name).first()
|
|
355
436
|
if existing_user is not None:
|
|
356
|
-
return False
|
|
437
|
+
return (False, user) if return_user else False
|
|
357
438
|
|
|
358
439
|
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
359
440
|
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
@@ -367,24 +448,57 @@ def add_or_update_user(user: models.User,
|
|
|
367
448
|
name=user.name,
|
|
368
449
|
password=user.password,
|
|
369
450
|
created_at=created_at)
|
|
451
|
+
use_returning = return_user and _sqlite_supports_returning()
|
|
452
|
+
if use_returning:
|
|
453
|
+
insert_stmnt = insert_stmnt.returning(
|
|
454
|
+
user_table.c.id,
|
|
455
|
+
user_table.c.name,
|
|
456
|
+
user_table.c.password,
|
|
457
|
+
user_table.c.created_at,
|
|
458
|
+
)
|
|
370
459
|
result = session.execute(insert_stmnt)
|
|
371
460
|
|
|
372
|
-
|
|
373
|
-
|
|
461
|
+
row = None
|
|
462
|
+
if use_returning:
|
|
463
|
+
# With RETURNING, check if we got a row back.
|
|
464
|
+
row = result.fetchone()
|
|
465
|
+
was_inserted = row is not None
|
|
466
|
+
else:
|
|
467
|
+
# Without RETURNING, use rowcount.
|
|
468
|
+
was_inserted = result.rowcount > 0
|
|
374
469
|
|
|
375
470
|
if not was_inserted:
|
|
376
471
|
# User existed, so update it (but don't update created_at)
|
|
472
|
+
update_values = {user_table.c.name: user.name}
|
|
377
473
|
if user.password:
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
474
|
+
update_values[user_table.c.password] = user.password
|
|
475
|
+
|
|
476
|
+
update_stmnt = sqlalchemy.update(user_table).where(
|
|
477
|
+
user_table.c.id == user.id).values(update_values)
|
|
478
|
+
if use_returning:
|
|
479
|
+
update_stmnt = update_stmnt.returning(
|
|
480
|
+
user_table.c.id, user_table.c.name,
|
|
481
|
+
user_table.c.password, user_table.c.created_at)
|
|
482
|
+
|
|
483
|
+
result = session.execute(update_stmnt)
|
|
484
|
+
if use_returning:
|
|
485
|
+
row = result.fetchone()
|
|
385
486
|
|
|
386
487
|
session.commit()
|
|
387
|
-
|
|
488
|
+
|
|
489
|
+
if return_user:
|
|
490
|
+
if row is None:
|
|
491
|
+
# row=None means the sqlite used has no RETURNING support,
|
|
492
|
+
# so we need to do a separate query
|
|
493
|
+
row = session.query(user_table).filter_by(
|
|
494
|
+
id=user.id).first()
|
|
495
|
+
updated_user = models.User(id=row.id,
|
|
496
|
+
name=row.name,
|
|
497
|
+
password=row.password,
|
|
498
|
+
created_at=row.created_at)
|
|
499
|
+
return was_inserted, updated_user
|
|
500
|
+
else:
|
|
501
|
+
return was_inserted
|
|
388
502
|
|
|
389
503
|
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
390
504
|
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
|
@@ -409,6 +523,9 @@ def add_or_update_user(user: models.User,
|
|
|
409
523
|
upsert_stmnt = insert_stmnt.on_conflict_do_update(
|
|
410
524
|
index_elements=[user_table.c.id], set_=set_).returning(
|
|
411
525
|
user_table.c.id,
|
|
526
|
+
user_table.c.name,
|
|
527
|
+
user_table.c.password,
|
|
528
|
+
user_table.c.created_at,
|
|
412
529
|
# This will be True for INSERT, False for UPDATE
|
|
413
530
|
sqlalchemy.literal_column('(xmax = 0)').label('was_inserted'
|
|
414
531
|
))
|
|
@@ -416,10 +533,17 @@ def add_or_update_user(user: models.User,
|
|
|
416
533
|
result = session.execute(upsert_stmnt)
|
|
417
534
|
row = result.fetchone()
|
|
418
535
|
|
|
419
|
-
|
|
536
|
+
was_inserted = bool(row.was_inserted) if row else False
|
|
420
537
|
session.commit()
|
|
421
538
|
|
|
422
|
-
|
|
539
|
+
if return_user:
|
|
540
|
+
updated_user = models.User(id=row.id,
|
|
541
|
+
name=row.name,
|
|
542
|
+
password=row.password,
|
|
543
|
+
created_at=row.created_at)
|
|
544
|
+
return was_inserted, updated_user
|
|
545
|
+
else:
|
|
546
|
+
return was_inserted
|
|
423
547
|
else:
|
|
424
548
|
raise ValueError('Unsupported database dialect')
|
|
425
549
|
|
|
@@ -440,7 +564,7 @@ def get_user(user_id: str) -> Optional[models.User]:
|
|
|
440
564
|
|
|
441
565
|
@_init_db
|
|
442
566
|
@metrics_lib.time_me
|
|
443
|
-
def
|
|
567
|
+
def get_users(user_ids: Set[str]) -> Dict[str, models.User]:
|
|
444
568
|
assert _SQLALCHEMY_ENGINE is not None
|
|
445
569
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
446
570
|
rows = session.query(user_table).filter(
|
|
@@ -512,7 +636,8 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
512
636
|
config_hash: Optional[str] = None,
|
|
513
637
|
task_config: Optional[Dict[str, Any]] = None,
|
|
514
638
|
is_managed: bool = False,
|
|
515
|
-
provision_log_path: Optional[str] = None
|
|
639
|
+
provision_log_path: Optional[str] = None,
|
|
640
|
+
existing_cluster_hash: Optional[str] = None):
|
|
516
641
|
"""Adds or updates cluster_name -> cluster_handle mapping.
|
|
517
642
|
|
|
518
643
|
Args:
|
|
@@ -528,8 +653,12 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
528
653
|
is_managed: Whether the cluster is launched by the
|
|
529
654
|
controller.
|
|
530
655
|
provision_log_path: Absolute path to provision.log, if available.
|
|
656
|
+
existing_cluster_hash: If specified, the cluster will be updated
|
|
657
|
+
only if the cluster_hash matches. If a cluster does not exist,
|
|
658
|
+
it will not be inserted and an error will be raised.
|
|
531
659
|
"""
|
|
532
660
|
assert _SQLALCHEMY_ENGINE is not None
|
|
661
|
+
|
|
533
662
|
# FIXME: launched_at will be changed when `sky launch -c` is called.
|
|
534
663
|
handle = pickle.dumps(cluster_handle)
|
|
535
664
|
cluster_launched_at = int(time.time()) if is_launch else None
|
|
@@ -625,32 +754,44 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
625
754
|
session.rollback()
|
|
626
755
|
raise ValueError('Unsupported database dialect')
|
|
627
756
|
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
set_={
|
|
757
|
+
if existing_cluster_hash is not None:
|
|
758
|
+
count = session.query(cluster_table).filter_by(
|
|
759
|
+
name=cluster_name, cluster_hash=existing_cluster_hash).update({
|
|
760
|
+
**conditional_values, cluster_table.c.handle: handle,
|
|
761
|
+
cluster_table.c.status: status.value,
|
|
762
|
+
cluster_table.c.status_updated_at: status_updated_at
|
|
763
|
+
})
|
|
764
|
+
assert count <= 1
|
|
765
|
+
if count == 0:
|
|
766
|
+
raise ValueError(f'Cluster {cluster_name} with hash '
|
|
767
|
+
f'{existing_cluster_hash} not found.')
|
|
768
|
+
else:
|
|
769
|
+
insert_stmnt = insert_func(cluster_table).values(
|
|
770
|
+
name=cluster_name,
|
|
643
771
|
**conditional_values,
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
#
|
|
647
|
-
#
|
|
648
|
-
|
|
649
|
-
#
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
772
|
+
handle=handle,
|
|
773
|
+
status=status.value,
|
|
774
|
+
# set metadata to server default ('{}')
|
|
775
|
+
# set owner to server default (null)
|
|
776
|
+
cluster_hash=cluster_hash,
|
|
777
|
+
# set storage_mounts_metadata to server default (null)
|
|
778
|
+
status_updated_at=status_updated_at,
|
|
779
|
+
is_managed=int(is_managed),
|
|
780
|
+
)
|
|
781
|
+
insert_or_update_stmt = insert_stmnt.on_conflict_do_update(
|
|
782
|
+
index_elements=[cluster_table.c.name],
|
|
783
|
+
set_={
|
|
784
|
+
**conditional_values,
|
|
785
|
+
cluster_table.c.handle: handle,
|
|
786
|
+
cluster_table.c.status: status.value,
|
|
787
|
+
# do not update metadata value
|
|
788
|
+
# do not update owner value
|
|
789
|
+
cluster_table.c.cluster_hash: cluster_hash,
|
|
790
|
+
# do not update storage_mounts_metadata
|
|
791
|
+
cluster_table.c.status_updated_at: status_updated_at,
|
|
792
|
+
# do not update user_hash
|
|
793
|
+
})
|
|
794
|
+
session.execute(insert_or_update_stmt)
|
|
654
795
|
|
|
655
796
|
# Modify cluster history table
|
|
656
797
|
launched_nodes = getattr(cluster_handle, 'launched_nodes', None)
|
|
@@ -668,6 +809,10 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
668
809
|
conditional_values.get('last_creation_command'),
|
|
669
810
|
}
|
|
670
811
|
|
|
812
|
+
# Calculate last_activity_time and launched_at from usage_intervals
|
|
813
|
+
last_activity_time = _get_cluster_last_activity_time(usage_intervals)
|
|
814
|
+
launched_at = _get_cluster_launch_time(usage_intervals)
|
|
815
|
+
|
|
671
816
|
insert_stmnt = insert_func(cluster_history_table).values(
|
|
672
817
|
cluster_hash=cluster_hash,
|
|
673
818
|
name=cluster_name,
|
|
@@ -678,6 +823,8 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
678
823
|
user_hash=user_hash,
|
|
679
824
|
workspace=history_workspace,
|
|
680
825
|
provision_log_path=provision_log_path,
|
|
826
|
+
last_activity_time=last_activity_time,
|
|
827
|
+
launched_at=launched_at,
|
|
681
828
|
**creation_info,
|
|
682
829
|
)
|
|
683
830
|
do_update_stmt = insert_stmnt.on_conflict_do_update(
|
|
@@ -694,6 +841,8 @@ def add_or_update_cluster(cluster_name: str,
|
|
|
694
841
|
cluster_history_table.c.user_hash: history_hash,
|
|
695
842
|
cluster_history_table.c.workspace: history_workspace,
|
|
696
843
|
cluster_history_table.c.provision_log_path: provision_log_path,
|
|
844
|
+
cluster_history_table.c.last_activity_time: last_activity_time,
|
|
845
|
+
cluster_history_table.c.launched_at: launched_at,
|
|
697
846
|
**creation_info,
|
|
698
847
|
})
|
|
699
848
|
session.execute(do_update_stmt)
|
|
@@ -990,7 +1139,8 @@ def get_handle_from_cluster_name(
|
|
|
990
1139
|
assert _SQLALCHEMY_ENGINE is not None
|
|
991
1140
|
assert cluster_name is not None, 'cluster_name cannot be None'
|
|
992
1141
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
993
|
-
row = session.query(
|
|
1142
|
+
row = (session.query(
|
|
1143
|
+
cluster_table.c.handle).filter_by(name=cluster_name).first())
|
|
994
1144
|
if row is None:
|
|
995
1145
|
return None
|
|
996
1146
|
return pickle.loads(row.handle)
|
|
@@ -998,21 +1148,95 @@ def get_handle_from_cluster_name(
|
|
|
998
1148
|
|
|
999
1149
|
@_init_db
|
|
1000
1150
|
@metrics_lib.time_me
|
|
1001
|
-
def
|
|
1151
|
+
def get_handles_from_cluster_names(
|
|
1152
|
+
cluster_names: Set[str]
|
|
1153
|
+
) -> Dict[str, Optional['backends.ResourceHandle']]:
|
|
1154
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
1155
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1156
|
+
rows = session.query(cluster_table.c.name,
|
|
1157
|
+
cluster_table.c.handle).filter(
|
|
1158
|
+
cluster_table.c.name.in_(cluster_names)).all()
|
|
1159
|
+
return {
|
|
1160
|
+
row.name: pickle.loads(row.handle) if row is not None else None
|
|
1161
|
+
for row in rows
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
|
|
1165
|
+
@_init_db
|
|
1166
|
+
@metrics_lib.time_me
|
|
1167
|
+
def get_cluster_name_to_handle_map(
|
|
1168
|
+
is_managed: Optional[bool] = None,
|
|
1169
|
+
) -> Dict[str, Optional['backends.ResourceHandle']]:
|
|
1170
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
1171
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1172
|
+
query = session.query(cluster_table.c.name, cluster_table.c.handle)
|
|
1173
|
+
if is_managed is not None:
|
|
1174
|
+
query = query.filter(cluster_table.c.is_managed == int(is_managed))
|
|
1175
|
+
rows = query.all()
|
|
1176
|
+
name_to_handle = {}
|
|
1177
|
+
for row in rows:
|
|
1178
|
+
if row.handle and len(row.handle) > 0:
|
|
1179
|
+
name_to_handle[row.name] = pickle.loads(row.handle)
|
|
1180
|
+
else:
|
|
1181
|
+
name_to_handle[row.name] = None
|
|
1182
|
+
return name_to_handle
|
|
1183
|
+
|
|
1184
|
+
|
|
1185
|
+
@_init_db_async
|
|
1186
|
+
@metrics_lib.time_me
|
|
1187
|
+
async def get_status_from_cluster_name_async(
|
|
1188
|
+
cluster_name: str) -> Optional[status_lib.ClusterStatus]:
|
|
1189
|
+
"""Get the status of a cluster."""
|
|
1190
|
+
assert _SQLALCHEMY_ENGINE_ASYNC is not None
|
|
1191
|
+
assert cluster_name is not None, 'cluster_name cannot be None'
|
|
1192
|
+
async with sql_async.AsyncSession(_SQLALCHEMY_ENGINE_ASYNC) as session:
|
|
1193
|
+
result = await session.execute(
|
|
1194
|
+
sqlalchemy.select(cluster_table.c.status).where(
|
|
1195
|
+
cluster_table.c.name == cluster_name))
|
|
1196
|
+
row = result.first()
|
|
1197
|
+
|
|
1198
|
+
if row is None:
|
|
1199
|
+
return None
|
|
1200
|
+
return status_lib.ClusterStatus(row[0])
|
|
1201
|
+
|
|
1202
|
+
|
|
1203
|
+
@_init_db
|
|
1204
|
+
@metrics_lib.time_me
|
|
1205
|
+
def get_status_from_cluster_name(
|
|
1206
|
+
cluster_name: str) -> Optional[status_lib.ClusterStatus]:
|
|
1207
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
1208
|
+
assert cluster_name is not None, 'cluster_name cannot be None'
|
|
1209
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1210
|
+
row = session.query(
|
|
1211
|
+
cluster_table.c.status).filter_by(name=cluster_name).first()
|
|
1212
|
+
if row is None:
|
|
1213
|
+
return None
|
|
1214
|
+
return status_lib.ClusterStatus[row.status]
|
|
1215
|
+
|
|
1216
|
+
|
|
1217
|
+
@_init_db
|
|
1218
|
+
@metrics_lib.time_me
|
|
1219
|
+
def get_glob_cluster_names(
|
|
1220
|
+
cluster_name: str,
|
|
1221
|
+
workspaces_filter: Optional[Set[str]] = None) -> List[str]:
|
|
1002
1222
|
assert _SQLALCHEMY_ENGINE is not None
|
|
1003
1223
|
assert cluster_name is not None, 'cluster_name cannot be None'
|
|
1004
1224
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1005
1225
|
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
1006
1226
|
db_utils.SQLAlchemyDialect.SQLITE.value):
|
|
1007
|
-
|
|
1008
|
-
cluster_table.c.name.op('GLOB')(cluster_name))
|
|
1227
|
+
query = session.query(cluster_table.c.name).filter(
|
|
1228
|
+
cluster_table.c.name.op('GLOB')(cluster_name))
|
|
1009
1229
|
elif (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
1010
1230
|
db_utils.SQLAlchemyDialect.POSTGRESQL.value):
|
|
1011
|
-
|
|
1231
|
+
query = session.query(cluster_table.c.name).filter(
|
|
1012
1232
|
cluster_table.c.name.op('SIMILAR TO')(
|
|
1013
|
-
_glob_to_similar(cluster_name)))
|
|
1233
|
+
_glob_to_similar(cluster_name)))
|
|
1014
1234
|
else:
|
|
1015
1235
|
raise ValueError('Unsupported database dialect')
|
|
1236
|
+
if workspaces_filter is not None:
|
|
1237
|
+
query = query.filter(
|
|
1238
|
+
cluster_table.c.workspace.in_(workspaces_filter))
|
|
1239
|
+
rows = query.all()
|
|
1016
1240
|
return [row.name for row in rows]
|
|
1017
1241
|
|
|
1018
1242
|
|
|
@@ -1056,7 +1280,8 @@ def set_cluster_autostop_value(cluster_name: str, idle_minutes: int,
|
|
|
1056
1280
|
def get_cluster_launch_time(cluster_name: str) -> Optional[int]:
|
|
1057
1281
|
assert _SQLALCHEMY_ENGINE is not None
|
|
1058
1282
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1059
|
-
row = session.query(
|
|
1283
|
+
row = session.query(
|
|
1284
|
+
cluster_table.c.launched_at).filter_by(name=cluster_name).first()
|
|
1060
1285
|
if row is None or row.launched_at is None:
|
|
1061
1286
|
return None
|
|
1062
1287
|
return int(row.launched_at)
|
|
@@ -1067,7 +1292,8 @@ def get_cluster_launch_time(cluster_name: str) -> Optional[int]:
|
|
|
1067
1292
|
def get_cluster_info(cluster_name: str) -> Optional[Dict[str, Any]]:
|
|
1068
1293
|
assert _SQLALCHEMY_ENGINE is not None
|
|
1069
1294
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1070
|
-
row = session.query(
|
|
1295
|
+
row = session.query(
|
|
1296
|
+
cluster_table.c.metadata).filter_by(name=cluster_name).first()
|
|
1071
1297
|
if row is None or row.metadata is None:
|
|
1072
1298
|
return None
|
|
1073
1299
|
return json.loads(row.metadata)
|
|
@@ -1147,7 +1373,8 @@ def get_cluster_storage_mounts_metadata(
|
|
|
1147
1373
|
cluster_name: str) -> Optional[Dict[str, Any]]:
|
|
1148
1374
|
assert _SQLALCHEMY_ENGINE is not None
|
|
1149
1375
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1150
|
-
row = session.query(cluster_table).filter_by(
|
|
1376
|
+
row = (session.query(cluster_table.c.storage_mounts_metadata).filter_by(
|
|
1377
|
+
name=cluster_name).first())
|
|
1151
1378
|
if row is None or row.storage_mounts_metadata is None:
|
|
1152
1379
|
return None
|
|
1153
1380
|
return pickle.loads(row.storage_mounts_metadata)
|
|
@@ -1170,6 +1397,39 @@ def set_cluster_storage_mounts_metadata(
|
|
|
1170
1397
|
raise ValueError(f'Cluster {cluster_name} not found.')
|
|
1171
1398
|
|
|
1172
1399
|
|
|
1400
|
+
@_init_db
|
|
1401
|
+
@metrics_lib.time_me
|
|
1402
|
+
def get_cluster_skylet_ssh_tunnel_metadata(
|
|
1403
|
+
cluster_name: str) -> Optional[Tuple[int, int]]:
|
|
1404
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
1405
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1406
|
+
row = session.query(
|
|
1407
|
+
cluster_table.c.skylet_ssh_tunnel_metadata).filter_by(
|
|
1408
|
+
name=cluster_name).first()
|
|
1409
|
+
if row is None or row.skylet_ssh_tunnel_metadata is None:
|
|
1410
|
+
return None
|
|
1411
|
+
return pickle.loads(row.skylet_ssh_tunnel_metadata)
|
|
1412
|
+
|
|
1413
|
+
|
|
1414
|
+
@_init_db
|
|
1415
|
+
@metrics_lib.time_me
|
|
1416
|
+
def set_cluster_skylet_ssh_tunnel_metadata(
|
|
1417
|
+
cluster_name: str,
|
|
1418
|
+
skylet_ssh_tunnel_metadata: Optional[Tuple[int, int]]) -> None:
|
|
1419
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
1420
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1421
|
+
value = pickle.dumps(
|
|
1422
|
+
skylet_ssh_tunnel_metadata
|
|
1423
|
+
) if skylet_ssh_tunnel_metadata is not None else None
|
|
1424
|
+
count = session.query(cluster_table).filter_by(
|
|
1425
|
+
name=cluster_name).update(
|
|
1426
|
+
{cluster_table.c.skylet_ssh_tunnel_metadata: value})
|
|
1427
|
+
session.commit()
|
|
1428
|
+
assert count <= 1, count
|
|
1429
|
+
if count == 0:
|
|
1430
|
+
raise ValueError(f'Cluster {cluster_name} not found.')
|
|
1431
|
+
|
|
1432
|
+
|
|
1173
1433
|
@_init_db
|
|
1174
1434
|
@metrics_lib.time_me
|
|
1175
1435
|
def _get_cluster_usage_intervals(
|
|
@@ -1179,23 +1439,24 @@ def _get_cluster_usage_intervals(
|
|
|
1179
1439
|
if cluster_hash is None:
|
|
1180
1440
|
return None
|
|
1181
1441
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1182
|
-
row = session.query(cluster_history_table).filter_by(
|
|
1442
|
+
row = session.query(cluster_history_table.c.usage_intervals).filter_by(
|
|
1183
1443
|
cluster_hash=cluster_hash).first()
|
|
1184
1444
|
if row is None or row.usage_intervals is None:
|
|
1185
1445
|
return None
|
|
1186
1446
|
return pickle.loads(row.usage_intervals)
|
|
1187
1447
|
|
|
1188
1448
|
|
|
1189
|
-
def _get_cluster_launch_time(
|
|
1190
|
-
usage_intervals
|
|
1449
|
+
def _get_cluster_launch_time(
|
|
1450
|
+
usage_intervals: Optional[List[Tuple[int,
|
|
1451
|
+
Optional[int]]]]) -> Optional[int]:
|
|
1191
1452
|
if usage_intervals is None:
|
|
1192
1453
|
return None
|
|
1193
1454
|
return usage_intervals[0][0]
|
|
1194
1455
|
|
|
1195
1456
|
|
|
1196
|
-
def _get_cluster_duration(
|
|
1457
|
+
def _get_cluster_duration(
|
|
1458
|
+
usage_intervals: Optional[List[Tuple[int, Optional[int]]]]) -> int:
|
|
1197
1459
|
total_duration = 0
|
|
1198
|
-
usage_intervals = _get_cluster_usage_intervals(cluster_hash)
|
|
1199
1460
|
|
|
1200
1461
|
if usage_intervals is None:
|
|
1201
1462
|
return total_duration
|
|
@@ -1212,17 +1473,33 @@ def _get_cluster_duration(cluster_hash: str) -> int:
|
|
|
1212
1473
|
return total_duration
|
|
1213
1474
|
|
|
1214
1475
|
|
|
1476
|
+
def _get_cluster_last_activity_time(
|
|
1477
|
+
usage_intervals: Optional[List[Tuple[int,
|
|
1478
|
+
Optional[int]]]]) -> Optional[int]:
|
|
1479
|
+
last_activity_time = None
|
|
1480
|
+
if usage_intervals:
|
|
1481
|
+
last_interval = usage_intervals[-1]
|
|
1482
|
+
last_activity_time = (last_interval[1] if last_interval[1] is not None
|
|
1483
|
+
else last_interval[0])
|
|
1484
|
+
return last_activity_time
|
|
1485
|
+
|
|
1486
|
+
|
|
1215
1487
|
@_init_db
|
|
1216
1488
|
@metrics_lib.time_me
|
|
1217
1489
|
def _set_cluster_usage_intervals(
|
|
1218
1490
|
cluster_hash: str, usage_intervals: List[Tuple[int,
|
|
1219
1491
|
Optional[int]]]) -> None:
|
|
1220
1492
|
assert _SQLALCHEMY_ENGINE is not None
|
|
1493
|
+
|
|
1494
|
+
# Calculate last_activity_time from usage_intervals
|
|
1495
|
+
last_activity_time = _get_cluster_last_activity_time(usage_intervals)
|
|
1496
|
+
|
|
1221
1497
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1222
1498
|
count = session.query(cluster_history_table).filter_by(
|
|
1223
1499
|
cluster_hash=cluster_hash).update({
|
|
1224
1500
|
cluster_history_table.c.usage_intervals:
|
|
1225
|
-
pickle.dumps(usage_intervals)
|
|
1501
|
+
pickle.dumps(usage_intervals),
|
|
1502
|
+
cluster_history_table.c.last_activity_time: last_activity_time,
|
|
1226
1503
|
})
|
|
1227
1504
|
session.commit()
|
|
1228
1505
|
assert count <= 1, count
|
|
@@ -1253,7 +1530,8 @@ def set_owner_identity_for_cluster(cluster_name: str,
|
|
|
1253
1530
|
def _get_hash_for_existing_cluster(cluster_name: str) -> Optional[str]:
|
|
1254
1531
|
assert _SQLALCHEMY_ENGINE is not None
|
|
1255
1532
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1256
|
-
row = session.query(
|
|
1533
|
+
row = (session.query(
|
|
1534
|
+
cluster_table.c.cluster_hash).filter_by(name=cluster_name).first())
|
|
1257
1535
|
if row is None or row.cluster_hash is None:
|
|
1258
1536
|
return None
|
|
1259
1537
|
return row.cluster_hash
|
|
@@ -1265,8 +1543,10 @@ def get_launched_resources_from_cluster_hash(
|
|
|
1265
1543
|
cluster_hash: str) -> Optional[Tuple[int, Any]]:
|
|
1266
1544
|
assert _SQLALCHEMY_ENGINE is not None
|
|
1267
1545
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1268
|
-
row = session.query(
|
|
1269
|
-
|
|
1546
|
+
row = session.query(
|
|
1547
|
+
cluster_history_table.c.num_nodes,
|
|
1548
|
+
cluster_history_table.c.launched_resources).filter_by(
|
|
1549
|
+
cluster_hash=cluster_hash).first()
|
|
1270
1550
|
if row is None:
|
|
1271
1551
|
return None
|
|
1272
1552
|
num_nodes = row.num_nodes
|
|
@@ -1310,17 +1590,46 @@ def _load_storage_mounts_metadata(
|
|
|
1310
1590
|
@metrics_lib.time_me
|
|
1311
1591
|
@context_utils.cancellation_guard
|
|
1312
1592
|
def get_cluster_from_name(
|
|
1313
|
-
cluster_name: Optional[str]
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1593
|
+
cluster_name: Optional[str],
|
|
1594
|
+
*,
|
|
1595
|
+
include_user_info: bool = True,
|
|
1596
|
+
summary_response: bool = False) -> Optional[Dict[str, Any]]:
|
|
1597
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
1598
|
+
query_fields = [
|
|
1599
|
+
cluster_table.c.name,
|
|
1600
|
+
cluster_table.c.launched_at,
|
|
1601
|
+
cluster_table.c.handle,
|
|
1602
|
+
cluster_table.c.last_use,
|
|
1603
|
+
cluster_table.c.status,
|
|
1604
|
+
cluster_table.c.autostop,
|
|
1605
|
+
cluster_table.c.to_down,
|
|
1606
|
+
cluster_table.c.owner,
|
|
1607
|
+
cluster_table.c.metadata,
|
|
1608
|
+
cluster_table.c.cluster_hash,
|
|
1609
|
+
cluster_table.c.cluster_ever_up,
|
|
1610
|
+
cluster_table.c.status_updated_at,
|
|
1611
|
+
cluster_table.c.user_hash,
|
|
1612
|
+
cluster_table.c.config_hash,
|
|
1613
|
+
cluster_table.c.workspace,
|
|
1614
|
+
cluster_table.c.is_managed,
|
|
1615
|
+
]
|
|
1616
|
+
if not summary_response:
|
|
1617
|
+
query_fields.extend([
|
|
1618
|
+
cluster_table.c.last_creation_yaml,
|
|
1619
|
+
cluster_table.c.last_creation_command,
|
|
1620
|
+
])
|
|
1621
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1622
|
+
query = session.query(*query_fields)
|
|
1623
|
+
row = query.filter_by(name=cluster_name).first()
|
|
1317
1624
|
if row is None:
|
|
1318
1625
|
return None
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1626
|
+
if include_user_info:
|
|
1627
|
+
user_hash = _get_user_hash_or_current_user(row.user_hash)
|
|
1628
|
+
user = get_user(user_hash)
|
|
1629
|
+
user_name = user.name if user is not None else None
|
|
1630
|
+
if not summary_response:
|
|
1631
|
+
last_event = get_last_cluster_event(
|
|
1632
|
+
row.cluster_hash, event_type=ClusterEventType.STATUS_CHANGE)
|
|
1324
1633
|
# TODO: use namedtuple instead of dict
|
|
1325
1634
|
record = {
|
|
1326
1635
|
'name': row.name,
|
|
@@ -1333,30 +1642,45 @@ def get_cluster_from_name(
|
|
|
1333
1642
|
'owner': _load_owner(row.owner),
|
|
1334
1643
|
'metadata': json.loads(row.metadata),
|
|
1335
1644
|
'cluster_hash': row.cluster_hash,
|
|
1336
|
-
'storage_mounts_metadata': _load_storage_mounts_metadata(
|
|
1337
|
-
row.storage_mounts_metadata),
|
|
1338
1645
|
'cluster_ever_up': bool(row.cluster_ever_up),
|
|
1339
1646
|
'status_updated_at': row.status_updated_at,
|
|
1340
|
-
'user_hash': user_hash,
|
|
1341
|
-
'user_name': user_name,
|
|
1342
|
-
'config_hash': row.config_hash,
|
|
1343
1647
|
'workspace': row.workspace,
|
|
1344
|
-
'last_creation_yaml': row.last_creation_yaml,
|
|
1345
|
-
'last_creation_command': row.last_creation_command,
|
|
1346
1648
|
'is_managed': bool(row.is_managed),
|
|
1347
|
-
'
|
|
1649
|
+
'config_hash': row.config_hash,
|
|
1348
1650
|
}
|
|
1651
|
+
if not summary_response:
|
|
1652
|
+
record['last_creation_yaml'] = row.last_creation_yaml
|
|
1653
|
+
record['last_creation_command'] = row.last_creation_command
|
|
1654
|
+
record['last_event'] = last_event
|
|
1655
|
+
if include_user_info:
|
|
1656
|
+
record['user_hash'] = user_hash
|
|
1657
|
+
record['user_name'] = user_name
|
|
1349
1658
|
|
|
1350
1659
|
return record
|
|
1351
1660
|
|
|
1352
1661
|
|
|
1662
|
+
@_init_db
|
|
1663
|
+
@metrics_lib.time_me
|
|
1664
|
+
@context_utils.cancellation_guard
|
|
1665
|
+
def cluster_with_name_exists(cluster_name: str) -> bool:
|
|
1666
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
1667
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1668
|
+
row = session.query(
|
|
1669
|
+
cluster_table.c.name).filter_by(name=cluster_name).first()
|
|
1670
|
+
if row is None:
|
|
1671
|
+
return False
|
|
1672
|
+
return True
|
|
1673
|
+
|
|
1674
|
+
|
|
1353
1675
|
@_init_db
|
|
1354
1676
|
@metrics_lib.time_me
|
|
1355
1677
|
def get_clusters(
|
|
1356
1678
|
*, # keyword only separator
|
|
1357
1679
|
exclude_managed_clusters: bool = False,
|
|
1358
|
-
workspaces_filter: Optional[
|
|
1680
|
+
workspaces_filter: Optional[Dict[str, Any]] = None,
|
|
1359
1681
|
user_hashes_filter: Optional[Set[str]] = None,
|
|
1682
|
+
cluster_names: Optional[List[str]] = None,
|
|
1683
|
+
summary_response: bool = False,
|
|
1360
1684
|
) -> List[Dict[str, Any]]:
|
|
1361
1685
|
"""Get clusters from the database.
|
|
1362
1686
|
|
|
@@ -1367,13 +1691,41 @@ def get_clusters(
|
|
|
1367
1691
|
that has workspace field set to one of the values.
|
|
1368
1692
|
user_hashes_filter: If specified, only include clusters
|
|
1369
1693
|
that has user_hash field set to one of the values.
|
|
1694
|
+
cluster_names: If specified, only include clusters
|
|
1695
|
+
that has name field set to one of the values.
|
|
1370
1696
|
"""
|
|
1371
1697
|
# is a cluster has a null user_hash,
|
|
1372
1698
|
# we treat it as belonging to the current user.
|
|
1373
1699
|
current_user_hash = common_utils.get_user_hash()
|
|
1374
1700
|
assert _SQLALCHEMY_ENGINE is not None
|
|
1375
|
-
|
|
1376
|
-
|
|
1701
|
+
query_fields = [
|
|
1702
|
+
cluster_table.c.name,
|
|
1703
|
+
cluster_table.c.launched_at,
|
|
1704
|
+
cluster_table.c.handle,
|
|
1705
|
+
cluster_table.c.status,
|
|
1706
|
+
cluster_table.c.autostop,
|
|
1707
|
+
cluster_table.c.to_down,
|
|
1708
|
+
cluster_table.c.cluster_hash,
|
|
1709
|
+
cluster_table.c.cluster_ever_up,
|
|
1710
|
+
cluster_table.c.user_hash,
|
|
1711
|
+
cluster_table.c.workspace,
|
|
1712
|
+
user_table.c.name.label('user_name'),
|
|
1713
|
+
]
|
|
1714
|
+
if not summary_response:
|
|
1715
|
+
query_fields.extend([
|
|
1716
|
+
cluster_table.c.last_creation_yaml,
|
|
1717
|
+
cluster_table.c.last_creation_command,
|
|
1718
|
+
cluster_table.c.config_hash,
|
|
1719
|
+
cluster_table.c.owner,
|
|
1720
|
+
cluster_table.c.metadata,
|
|
1721
|
+
cluster_table.c.last_use,
|
|
1722
|
+
cluster_table.c.status_updated_at,
|
|
1723
|
+
])
|
|
1724
|
+
if not exclude_managed_clusters:
|
|
1725
|
+
query_fields.append(cluster_table.c.is_managed)
|
|
1726
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1727
|
+
query = session.query(*query_fields).outerjoin(
|
|
1728
|
+
user_table, cluster_table.c.user_hash == user_table.c.id)
|
|
1377
1729
|
if exclude_managed_clusters:
|
|
1378
1730
|
query = query.filter(cluster_table.c.is_managed == int(False))
|
|
1379
1731
|
if workspaces_filter is not None:
|
|
@@ -1385,71 +1737,84 @@ def get_clusters(
|
|
|
1385
1737
|
# If current_user_hash is in user_hashes_filter, we include
|
|
1386
1738
|
# clusters that have a null user_hash.
|
|
1387
1739
|
query = query.filter(
|
|
1388
|
-
cluster_table.c.user_hash.in_(user_hashes_filter) |
|
|
1389
|
-
|
|
1740
|
+
(cluster_table.c.user_hash.in_(user_hashes_filter) |
|
|
1741
|
+
(cluster_table.c.user_hash is None)))
|
|
1390
1742
|
else:
|
|
1391
1743
|
query = query.filter(
|
|
1392
1744
|
cluster_table.c.user_hash.in_(user_hashes_filter))
|
|
1745
|
+
if cluster_names is not None:
|
|
1746
|
+
query = query.filter(cluster_table.c.name.in_(cluster_names))
|
|
1393
1747
|
query = query.order_by(sqlalchemy.desc(cluster_table.c.launched_at))
|
|
1394
1748
|
rows = query.all()
|
|
1395
1749
|
records = []
|
|
1396
1750
|
|
|
1397
|
-
#
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
user_hashes = set(row_to_user_hash.values())
|
|
1406
|
-
user_hash_to_user = _get_users(user_hashes)
|
|
1751
|
+
# Check if we need to fetch the current user's name,
|
|
1752
|
+
# for backwards compatibility, if user_hash is None.
|
|
1753
|
+
current_user_name = None
|
|
1754
|
+
needs_current_user = any(row.user_hash is None for row in rows)
|
|
1755
|
+
if needs_current_user:
|
|
1756
|
+
current_user = get_user(current_user_hash)
|
|
1757
|
+
current_user_name = (current_user.name
|
|
1758
|
+
if current_user is not None else None)
|
|
1407
1759
|
|
|
1408
1760
|
# get last cluster event for each row
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1761
|
+
if not summary_response:
|
|
1762
|
+
cluster_hashes = {row.cluster_hash for row in rows}
|
|
1763
|
+
last_cluster_event_dict = _get_last_cluster_event_multiple(
|
|
1764
|
+
cluster_hashes, ClusterEventType.STATUS_CHANGE)
|
|
1412
1765
|
|
|
1413
|
-
# get user for each row
|
|
1414
1766
|
for row in rows:
|
|
1415
|
-
user_hash = row_to_user_hash[row.cluster_hash]
|
|
1416
|
-
user = user_hash_to_user.get(user_hash, None)
|
|
1417
|
-
user_name = user.name if user is not None else None
|
|
1418
|
-
last_event = last_cluster_event_dict.get(row.cluster_hash, None)
|
|
1419
1767
|
# TODO: use namedtuple instead of dict
|
|
1420
1768
|
record = {
|
|
1421
1769
|
'name': row.name,
|
|
1422
1770
|
'launched_at': row.launched_at,
|
|
1423
1771
|
'handle': pickle.loads(row.handle),
|
|
1424
|
-
'last_use': row.last_use,
|
|
1425
1772
|
'status': status_lib.ClusterStatus[row.status],
|
|
1426
1773
|
'autostop': row.autostop,
|
|
1427
1774
|
'to_down': bool(row.to_down),
|
|
1428
|
-
'owner': _load_owner(row.owner),
|
|
1429
|
-
'metadata': json.loads(row.metadata),
|
|
1430
1775
|
'cluster_hash': row.cluster_hash,
|
|
1431
|
-
'storage_mounts_metadata': _load_storage_mounts_metadata(
|
|
1432
|
-
row.storage_mounts_metadata),
|
|
1433
1776
|
'cluster_ever_up': bool(row.cluster_ever_up),
|
|
1434
|
-
'
|
|
1435
|
-
|
|
1436
|
-
'user_name': user_name
|
|
1437
|
-
|
|
1777
|
+
'user_hash': (row.user_hash
|
|
1778
|
+
if row.user_hash is not None else current_user_hash),
|
|
1779
|
+
'user_name': (row.user_name
|
|
1780
|
+
if row.user_name is not None else current_user_name),
|
|
1438
1781
|
'workspace': row.workspace,
|
|
1439
|
-
'
|
|
1440
|
-
|
|
1441
|
-
'is_managed': bool(row.is_managed),
|
|
1442
|
-
'last_event': last_event,
|
|
1782
|
+
'is_managed': False
|
|
1783
|
+
if exclude_managed_clusters else bool(row.is_managed),
|
|
1443
1784
|
}
|
|
1785
|
+
if not summary_response:
|
|
1786
|
+
record['last_creation_yaml'] = row.last_creation_yaml
|
|
1787
|
+
record['last_creation_command'] = row.last_creation_command
|
|
1788
|
+
record['last_event'] = last_cluster_event_dict.get(
|
|
1789
|
+
row.cluster_hash, None)
|
|
1790
|
+
record['config_hash'] = row.config_hash
|
|
1791
|
+
record['owner'] = _load_owner(row.owner)
|
|
1792
|
+
record['metadata'] = json.loads(row.metadata)
|
|
1793
|
+
record['last_use'] = row.last_use
|
|
1794
|
+
record['status_updated_at'] = row.status_updated_at
|
|
1444
1795
|
|
|
1445
1796
|
records.append(record)
|
|
1446
1797
|
return records
|
|
1447
1798
|
|
|
1448
1799
|
|
|
1800
|
+
@_init_db
|
|
1801
|
+
@metrics_lib.time_me
|
|
1802
|
+
def get_cluster_names(exclude_managed_clusters: bool = False,) -> List[str]:
|
|
1803
|
+
assert _SQLALCHEMY_ENGINE is not None
|
|
1804
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1805
|
+
query = session.query(cluster_table.c.name)
|
|
1806
|
+
if exclude_managed_clusters:
|
|
1807
|
+
query = query.filter(cluster_table.c.is_managed == int(False))
|
|
1808
|
+
rows = query.all()
|
|
1809
|
+
return [row[0] for row in rows]
|
|
1810
|
+
|
|
1811
|
+
|
|
1449
1812
|
@_init_db
|
|
1450
1813
|
@metrics_lib.time_me
|
|
1451
1814
|
def get_clusters_from_history(
|
|
1452
|
-
days: Optional[int] = None
|
|
1815
|
+
days: Optional[int] = None,
|
|
1816
|
+
abbreviate_response: bool = False,
|
|
1817
|
+
cluster_hashes: Optional[List[str]] = None) -> List[Dict[str, Any]]:
|
|
1453
1818
|
"""Get cluster reports from history.
|
|
1454
1819
|
|
|
1455
1820
|
Args:
|
|
@@ -1462,69 +1827,103 @@ def get_clusters_from_history(
|
|
|
1462
1827
|
List of cluster records with history information.
|
|
1463
1828
|
"""
|
|
1464
1829
|
assert _SQLALCHEMY_ENGINE is not None
|
|
1465
|
-
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1466
|
-
# Explicitly select columns from both tables to avoid ambiguity
|
|
1467
|
-
query = session.query(
|
|
1468
|
-
cluster_history_table.c.cluster_hash, cluster_history_table.c.name,
|
|
1469
|
-
cluster_history_table.c.num_nodes,
|
|
1470
|
-
cluster_history_table.c.requested_resources,
|
|
1471
|
-
cluster_history_table.c.launched_resources,
|
|
1472
|
-
cluster_history_table.c.usage_intervals,
|
|
1473
|
-
cluster_history_table.c.user_hash,
|
|
1474
|
-
cluster_history_table.c.last_creation_yaml,
|
|
1475
|
-
cluster_history_table.c.last_creation_command,
|
|
1476
|
-
cluster_history_table.c.workspace.label('history_workspace'),
|
|
1477
|
-
cluster_table.c.status, cluster_table.c.workspace,
|
|
1478
|
-
cluster_table.c.status_updated_at).select_from(
|
|
1479
|
-
cluster_history_table.join(cluster_table,
|
|
1480
|
-
cluster_history_table.c.cluster_hash
|
|
1481
|
-
== cluster_table.c.cluster_hash,
|
|
1482
|
-
isouter=True))
|
|
1483
1830
|
|
|
1484
|
-
|
|
1831
|
+
current_user_hash = common_utils.get_user_hash()
|
|
1485
1832
|
|
|
1486
1833
|
# Prepare filtering parameters
|
|
1487
|
-
cutoff_time =
|
|
1834
|
+
cutoff_time = 0
|
|
1488
1835
|
if days is not None:
|
|
1489
1836
|
cutoff_time = int(time.time()) - (days * 24 * 60 * 60)
|
|
1490
1837
|
|
|
1838
|
+
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1839
|
+
# Explicitly select columns from both tables to avoid ambiguity
|
|
1840
|
+
if abbreviate_response:
|
|
1841
|
+
query = session.query(
|
|
1842
|
+
cluster_history_table.c.cluster_hash,
|
|
1843
|
+
cluster_history_table.c.name, cluster_history_table.c.num_nodes,
|
|
1844
|
+
cluster_history_table.c.launched_resources,
|
|
1845
|
+
cluster_history_table.c.usage_intervals,
|
|
1846
|
+
cluster_history_table.c.user_hash,
|
|
1847
|
+
cluster_history_table.c.workspace.label('history_workspace'),
|
|
1848
|
+
cluster_history_table.c.last_activity_time,
|
|
1849
|
+
cluster_history_table.c.launched_at, cluster_table.c.status,
|
|
1850
|
+
cluster_table.c.workspace)
|
|
1851
|
+
else:
|
|
1852
|
+
query = session.query(
|
|
1853
|
+
cluster_history_table.c.cluster_hash,
|
|
1854
|
+
cluster_history_table.c.name, cluster_history_table.c.num_nodes,
|
|
1855
|
+
cluster_history_table.c.launched_resources,
|
|
1856
|
+
cluster_history_table.c.usage_intervals,
|
|
1857
|
+
cluster_history_table.c.user_hash,
|
|
1858
|
+
cluster_history_table.c.last_creation_yaml,
|
|
1859
|
+
cluster_history_table.c.last_creation_command,
|
|
1860
|
+
cluster_history_table.c.workspace.label('history_workspace'),
|
|
1861
|
+
cluster_history_table.c.last_activity_time,
|
|
1862
|
+
cluster_history_table.c.launched_at, cluster_table.c.status,
|
|
1863
|
+
cluster_table.c.workspace)
|
|
1864
|
+
|
|
1865
|
+
query = query.select_from(
|
|
1866
|
+
cluster_history_table.join(cluster_table,
|
|
1867
|
+
cluster_history_table.c.cluster_hash ==
|
|
1868
|
+
cluster_table.c.cluster_hash,
|
|
1869
|
+
isouter=True))
|
|
1870
|
+
|
|
1871
|
+
# Only include clusters that are either active (status is not None)
|
|
1872
|
+
# or are within the cutoff time (cutoff_time <= last_activity_time).
|
|
1873
|
+
# If days is not specified, we include all clusters by setting
|
|
1874
|
+
# cutoff_time to 0.
|
|
1875
|
+
query = query.filter(
|
|
1876
|
+
(cluster_table.c.status.isnot(None) |
|
|
1877
|
+
(cluster_history_table.c.last_activity_time >= cutoff_time)))
|
|
1878
|
+
|
|
1879
|
+
# Order by launched_at descending (most recent first)
|
|
1880
|
+
query = query.order_by(
|
|
1881
|
+
sqlalchemy.desc(cluster_history_table.c.launched_at))
|
|
1882
|
+
|
|
1883
|
+
if cluster_hashes is not None:
|
|
1884
|
+
query = query.filter(
|
|
1885
|
+
cluster_history_table.c.cluster_hash.in_(cluster_hashes))
|
|
1886
|
+
rows = query.all()
|
|
1887
|
+
|
|
1888
|
+
usage_intervals_dict = {}
|
|
1889
|
+
row_to_user_hash = {}
|
|
1890
|
+
for row in rows:
|
|
1891
|
+
row_usage_intervals: List[Tuple[int, Optional[int]]] = []
|
|
1892
|
+
if row.usage_intervals:
|
|
1893
|
+
try:
|
|
1894
|
+
row_usage_intervals = pickle.loads(row.usage_intervals)
|
|
1895
|
+
except (pickle.PickleError, AttributeError):
|
|
1896
|
+
pass
|
|
1897
|
+
usage_intervals_dict[row.cluster_hash] = row_usage_intervals
|
|
1898
|
+
user_hash = (row.user_hash
|
|
1899
|
+
if row.user_hash is not None else current_user_hash)
|
|
1900
|
+
row_to_user_hash[row.cluster_hash] = user_hash
|
|
1901
|
+
|
|
1902
|
+
user_hashes = set(row_to_user_hash.values())
|
|
1903
|
+
user_hash_to_user = get_users(user_hashes)
|
|
1904
|
+
cluster_hashes = set(row_to_user_hash.keys())
|
|
1905
|
+
if not abbreviate_response:
|
|
1906
|
+
last_cluster_event_dict = _get_last_cluster_event_multiple(
|
|
1907
|
+
cluster_hashes, ClusterEventType.STATUS_CHANGE)
|
|
1908
|
+
|
|
1491
1909
|
records = []
|
|
1492
1910
|
for row in rows:
|
|
1493
|
-
user_hash =
|
|
1494
|
-
|
|
1495
|
-
|
|
1911
|
+
user_hash = row_to_user_hash[row.cluster_hash]
|
|
1912
|
+
user = user_hash_to_user.get(user_hash, None)
|
|
1913
|
+
user_name = user.name if user is not None else None
|
|
1914
|
+
if not abbreviate_response:
|
|
1915
|
+
last_event = last_cluster_event_dict.get(row.cluster_hash, None)
|
|
1916
|
+
launched_at = row.launched_at
|
|
1917
|
+
usage_intervals: Optional[List[Tuple[
|
|
1918
|
+
int,
|
|
1919
|
+
Optional[int]]]] = usage_intervals_dict.get(row.cluster_hash, None)
|
|
1920
|
+
duration = _get_cluster_duration(usage_intervals)
|
|
1496
1921
|
|
|
1497
1922
|
# Parse status
|
|
1498
1923
|
status = None
|
|
1499
1924
|
if row.status:
|
|
1500
1925
|
status = status_lib.ClusterStatus[row.status]
|
|
1501
1926
|
|
|
1502
|
-
# Apply filtering: always include active clusters, filter historical
|
|
1503
|
-
# ones by time
|
|
1504
|
-
if cutoff_time is not None and status is None: # Historical cluster
|
|
1505
|
-
# For historical clusters, check if they were used recently
|
|
1506
|
-
# Use the most recent activity from usage_intervals to determine
|
|
1507
|
-
# last use
|
|
1508
|
-
usage_intervals = []
|
|
1509
|
-
if row.usage_intervals:
|
|
1510
|
-
try:
|
|
1511
|
-
usage_intervals = pickle.loads(row.usage_intervals)
|
|
1512
|
-
except (pickle.PickleError, AttributeError):
|
|
1513
|
-
usage_intervals = []
|
|
1514
|
-
|
|
1515
|
-
# Find the most recent activity time from usage_intervals
|
|
1516
|
-
last_activity_time = None
|
|
1517
|
-
if usage_intervals:
|
|
1518
|
-
# Get the end time of the last interval (or start time if
|
|
1519
|
-
# still running)
|
|
1520
|
-
last_interval = usage_intervals[-1]
|
|
1521
|
-
last_activity_time = (last_interval[1] if last_interval[1]
|
|
1522
|
-
is not None else last_interval[0])
|
|
1523
|
-
|
|
1524
|
-
# Skip historical clusters that haven't been used recently
|
|
1525
|
-
if last_activity_time is None or last_activity_time < cutoff_time:
|
|
1526
|
-
continue
|
|
1527
|
-
|
|
1528
1927
|
# Parse launched resources safely
|
|
1529
1928
|
launched_resources = None
|
|
1530
1929
|
if row.launched_resources:
|
|
@@ -1533,17 +1932,6 @@ def get_clusters_from_history(
|
|
|
1533
1932
|
except (pickle.PickleError, AttributeError):
|
|
1534
1933
|
launched_resources = None
|
|
1535
1934
|
|
|
1536
|
-
# Parse usage intervals safely
|
|
1537
|
-
usage_intervals = []
|
|
1538
|
-
if row.usage_intervals:
|
|
1539
|
-
try:
|
|
1540
|
-
usage_intervals = pickle.loads(row.usage_intervals)
|
|
1541
|
-
except (pickle.PickleError, AttributeError):
|
|
1542
|
-
usage_intervals = []
|
|
1543
|
-
|
|
1544
|
-
# Get user name from user hash
|
|
1545
|
-
user = get_user(user_hash)
|
|
1546
|
-
user_name = user.name if user is not None else None
|
|
1547
1935
|
workspace = (row.history_workspace
|
|
1548
1936
|
if row.history_workspace else row.workspace)
|
|
1549
1937
|
|
|
@@ -1559,11 +1947,11 @@ def get_clusters_from_history(
|
|
|
1559
1947
|
'user_hash': user_hash,
|
|
1560
1948
|
'user_name': user_name,
|
|
1561
1949
|
'workspace': workspace,
|
|
1562
|
-
'last_creation_yaml': row.last_creation_yaml,
|
|
1563
|
-
'last_creation_command': row.last_creation_command,
|
|
1564
|
-
'last_event': get_last_cluster_event(
|
|
1565
|
-
row.cluster_hash, event_type=ClusterEventType.STATUS_CHANGE),
|
|
1566
1950
|
}
|
|
1951
|
+
if not abbreviate_response:
|
|
1952
|
+
record['last_creation_yaml'] = row.last_creation_yaml
|
|
1953
|
+
record['last_creation_command'] = row.last_creation_command
|
|
1954
|
+
record['last_event'] = last_event
|
|
1567
1955
|
|
|
1568
1956
|
records.append(record)
|
|
1569
1957
|
|
|
@@ -1846,10 +2234,14 @@ def get_volume_names_start_with(starts_with: str) -> List[str]:
|
|
|
1846
2234
|
|
|
1847
2235
|
@_init_db
|
|
1848
2236
|
@metrics_lib.time_me
|
|
1849
|
-
def get_volumes() -> List[Dict[str, Any]]:
|
|
2237
|
+
def get_volumes(is_ephemeral: Optional[bool] = None) -> List[Dict[str, Any]]:
|
|
1850
2238
|
assert _SQLALCHEMY_ENGINE is not None
|
|
1851
2239
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1852
|
-
|
|
2240
|
+
if is_ephemeral is None:
|
|
2241
|
+
rows = session.query(volume_table).all()
|
|
2242
|
+
else:
|
|
2243
|
+
rows = session.query(volume_table).filter_by(
|
|
2244
|
+
is_ephemeral=int(is_ephemeral)).all()
|
|
1853
2245
|
records = []
|
|
1854
2246
|
for row in rows:
|
|
1855
2247
|
records.append({
|
|
@@ -1861,6 +2253,7 @@ def get_volumes() -> List[Dict[str, Any]]:
|
|
|
1861
2253
|
'last_attached_at': row.last_attached_at,
|
|
1862
2254
|
'last_use': row.last_use,
|
|
1863
2255
|
'status': status_lib.VolumeStatus[row.status],
|
|
2256
|
+
'is_ephemeral': bool(row.is_ephemeral),
|
|
1864
2257
|
})
|
|
1865
2258
|
return records
|
|
1866
2259
|
|
|
@@ -1887,14 +2280,23 @@ def get_volume_by_name(name: str) -> Optional[Dict[str, Any]]:
|
|
|
1887
2280
|
|
|
1888
2281
|
@_init_db
|
|
1889
2282
|
@metrics_lib.time_me
|
|
1890
|
-
def add_volume(
|
|
1891
|
-
|
|
2283
|
+
def add_volume(
|
|
2284
|
+
name: str,
|
|
2285
|
+
config: models.VolumeConfig,
|
|
2286
|
+
status: status_lib.VolumeStatus,
|
|
2287
|
+
is_ephemeral: bool = False,
|
|
2288
|
+
) -> None:
|
|
1892
2289
|
assert _SQLALCHEMY_ENGINE is not None
|
|
1893
2290
|
volume_launched_at = int(time.time())
|
|
1894
2291
|
handle = pickle.dumps(config)
|
|
1895
2292
|
last_use = common_utils.get_current_command()
|
|
1896
2293
|
user_hash = common_utils.get_current_user().id
|
|
1897
2294
|
active_workspace = skypilot_config.get_active_workspace()
|
|
2295
|
+
if is_ephemeral:
|
|
2296
|
+
last_attached_at = int(time.time())
|
|
2297
|
+
status = status_lib.VolumeStatus.IN_USE
|
|
2298
|
+
else:
|
|
2299
|
+
last_attached_at = None
|
|
1898
2300
|
|
|
1899
2301
|
with orm.Session(_SQLALCHEMY_ENGINE) as session:
|
|
1900
2302
|
if (_SQLALCHEMY_ENGINE.dialect.name ==
|
|
@@ -1911,9 +2313,10 @@ def add_volume(name: str, config: models.VolumeConfig,
|
|
|
1911
2313
|
handle=handle,
|
|
1912
2314
|
user_hash=user_hash,
|
|
1913
2315
|
workspace=active_workspace,
|
|
1914
|
-
last_attached_at=
|
|
2316
|
+
last_attached_at=last_attached_at,
|
|
1915
2317
|
last_use=last_use,
|
|
1916
2318
|
status=status.value,
|
|
2319
|
+
is_ephemeral=int(is_ephemeral),
|
|
1917
2320
|
)
|
|
1918
2321
|
do_update_stmt = insert_stmnt.on_conflict_do_nothing()
|
|
1919
2322
|
session.execute(do_update_stmt)
|
|
@@ -2184,11 +2587,22 @@ def _set_cluster_yaml_from_file(cluster_yaml_path: str,
|
|
|
2184
2587
|
# on the local file system and migrate it to the database.
|
|
2185
2588
|
# TODO(syang): remove this check once we have a way to migrate the
|
|
2186
2589
|
# cluster from file to database. Remove on v0.12.0.
|
|
2187
|
-
if cluster_yaml_path is not None
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2590
|
+
if cluster_yaml_path is not None:
|
|
2591
|
+
# First try the exact path
|
|
2592
|
+
path_to_read = None
|
|
2593
|
+
if os.path.exists(cluster_yaml_path):
|
|
2594
|
+
path_to_read = cluster_yaml_path
|
|
2595
|
+
# Fallback: try with .debug suffix (when debug logging was enabled)
|
|
2596
|
+
# Debug logging causes YAML files to be saved with .debug suffix
|
|
2597
|
+
# but the path stored in the handle doesn't include it
|
|
2598
|
+
debug_path = cluster_yaml_path + '.debug'
|
|
2599
|
+
if os.path.exists(debug_path):
|
|
2600
|
+
path_to_read = debug_path
|
|
2601
|
+
if path_to_read is not None:
|
|
2602
|
+
with open(path_to_read, 'r', encoding='utf-8') as f:
|
|
2603
|
+
yaml_str = f.read()
|
|
2604
|
+
set_cluster_yaml(cluster_name, yaml_str)
|
|
2605
|
+
return yaml_str
|
|
2192
2606
|
return None
|
|
2193
2607
|
|
|
2194
2608
|
|