dstack 0.0.9__py3-none-any.whl → 0.20.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dstack/_internal/cli/commands/__init__.py +80 -0
- dstack/_internal/cli/commands/apply.py +100 -0
- dstack/_internal/cli/commands/attach.py +161 -0
- dstack/_internal/cli/commands/completion.py +22 -0
- dstack/_internal/cli/commands/delete.py +44 -0
- dstack/_internal/cli/commands/event.py +168 -0
- dstack/_internal/cli/commands/fleet.py +161 -0
- dstack/_internal/cli/commands/gateway.py +159 -0
- dstack/_internal/cli/commands/init.py +64 -0
- dstack/_internal/cli/commands/login.py +352 -0
- dstack/_internal/cli/commands/logs.py +62 -0
- dstack/_internal/cli/commands/metrics.py +153 -0
- dstack/_internal/cli/commands/offer.py +146 -0
- dstack/_internal/cli/commands/project.py +259 -0
- dstack/_internal/cli/commands/ps.py +81 -0
- dstack/_internal/cli/commands/run.py +69 -0
- dstack/_internal/cli/commands/secrets.py +92 -0
- dstack/_internal/cli/commands/server.py +96 -0
- dstack/_internal/cli/commands/stop.py +26 -0
- dstack/_internal/cli/commands/volume.py +117 -0
- dstack/_internal/cli/main.py +101 -0
- dstack/_internal/cli/models/gateways.py +16 -0
- dstack/_internal/cli/models/offers.py +47 -0
- dstack/_internal/cli/models/runs.py +16 -0
- dstack/_internal/cli/services/args.py +31 -0
- dstack/_internal/cli/services/completion.py +91 -0
- dstack/_internal/cli/services/configurators/__init__.py +86 -0
- dstack/_internal/cli/services/configurators/base.py +103 -0
- dstack/_internal/cli/services/configurators/fleet.py +475 -0
- dstack/_internal/cli/services/configurators/gateway.py +231 -0
- dstack/_internal/cli/services/configurators/run.py +882 -0
- dstack/_internal/cli/services/configurators/volume.py +222 -0
- dstack/_internal/cli/services/events.py +68 -0
- dstack/_internal/cli/services/profile.py +182 -0
- dstack/_internal/cli/services/repos.py +71 -0
- dstack/_internal/cli/services/resources.py +54 -0
- dstack/_internal/cli/utils/common.py +159 -0
- dstack/_internal/cli/utils/fleet.py +106 -0
- dstack/_internal/cli/utils/gateway.py +56 -0
- dstack/_internal/cli/utils/gpu.py +178 -0
- dstack/_internal/cli/utils/rich.py +156 -0
- dstack/_internal/cli/utils/run.py +517 -0
- dstack/_internal/cli/utils/secrets.py +25 -0
- dstack/_internal/cli/utils/updates.py +98 -0
- dstack/_internal/cli/utils/volume.py +58 -0
- dstack/_internal/compat.py +3 -0
- dstack/_internal/core/backends/amddevcloud/__init__.py +1 -0
- dstack/_internal/core/backends/amddevcloud/backend.py +16 -0
- dstack/_internal/core/backends/amddevcloud/compute.py +5 -0
- dstack/_internal/core/backends/amddevcloud/configurator.py +29 -0
- dstack/_internal/core/backends/aws/auth.py +30 -0
- dstack/_internal/core/backends/aws/backend.py +31 -0
- dstack/_internal/core/backends/aws/compute.py +1153 -0
- dstack/_internal/core/backends/aws/configurator.py +191 -0
- dstack/_internal/core/backends/aws/models.py +135 -0
- dstack/_internal/core/backends/aws/resources.py +700 -0
- dstack/_internal/core/backends/azure/auth.py +39 -0
- dstack/_internal/core/backends/azure/backend.py +21 -0
- dstack/_internal/core/backends/azure/compute.py +676 -0
- dstack/_internal/core/backends/azure/configurator.py +472 -0
- dstack/_internal/core/backends/azure/models.py +98 -0
- dstack/_internal/core/backends/azure/resources.py +116 -0
- dstack/_internal/core/backends/azure/utils.py +42 -0
- dstack/_internal/core/backends/base/backend.py +18 -0
- dstack/_internal/core/backends/base/compute.py +1101 -0
- dstack/_internal/core/backends/base/configurator.py +117 -0
- dstack/_internal/core/backends/base/models.py +24 -0
- dstack/_internal/core/backends/base/offers.py +232 -0
- dstack/_internal/core/backends/cloudrift/api_client.py +220 -0
- dstack/_internal/core/backends/cloudrift/backend.py +16 -0
- dstack/_internal/core/backends/cloudrift/compute.py +138 -0
- dstack/_internal/core/backends/cloudrift/configurator.py +72 -0
- dstack/_internal/core/backends/cloudrift/models.py +40 -0
- dstack/_internal/core/backends/configurators.py +181 -0
- dstack/_internal/core/backends/cudo/__init__.py +0 -0
- dstack/_internal/core/backends/cudo/api_client.py +111 -0
- dstack/_internal/core/backends/cudo/backend.py +16 -0
- dstack/_internal/core/backends/cudo/compute.py +174 -0
- dstack/_internal/core/backends/cudo/configurator.py +63 -0
- dstack/_internal/core/backends/cudo/models.py +37 -0
- dstack/_internal/core/backends/datacrunch/__init__.py +1 -0
- dstack/_internal/core/backends/datacrunch/backend.py +18 -0
- dstack/_internal/core/backends/datacrunch/compute.py +8 -0
- dstack/_internal/core/backends/datacrunch/configurator.py +17 -0
- dstack/_internal/core/backends/digitalocean/__init__.py +1 -0
- dstack/_internal/core/backends/digitalocean/backend.py +16 -0
- dstack/_internal/core/backends/digitalocean/compute.py +5 -0
- dstack/_internal/core/backends/digitalocean/configurator.py +31 -0
- dstack/_internal/core/backends/digitalocean_base/__init__.py +1 -0
- dstack/_internal/core/backends/digitalocean_base/api_client.py +104 -0
- dstack/_internal/core/backends/digitalocean_base/backend.py +5 -0
- dstack/_internal/core/backends/digitalocean_base/compute.py +174 -0
- dstack/_internal/core/backends/digitalocean_base/configurator.py +57 -0
- dstack/_internal/core/backends/digitalocean_base/models.py +43 -0
- dstack/_internal/core/backends/dstack/__init__.py +0 -0
- dstack/_internal/core/backends/dstack/models.py +26 -0
- dstack/_internal/core/backends/features.py +74 -0
- dstack/_internal/core/backends/gcp/__init__.py +0 -0
- dstack/_internal/core/backends/gcp/auth.py +57 -0
- dstack/_internal/core/backends/gcp/backend.py +17 -0
- dstack/_internal/core/backends/gcp/compute.py +1257 -0
- dstack/_internal/core/backends/gcp/configurator.py +206 -0
- dstack/_internal/core/backends/gcp/features/__init__.py +0 -0
- dstack/_internal/core/backends/gcp/features/tcpx.py +65 -0
- dstack/_internal/core/backends/gcp/models.py +160 -0
- dstack/_internal/core/backends/gcp/resources.py +585 -0
- dstack/_internal/core/backends/hotaisle/__init__.py +1 -0
- dstack/_internal/core/backends/hotaisle/api_client.py +101 -0
- dstack/_internal/core/backends/hotaisle/backend.py +16 -0
- dstack/_internal/core/backends/hotaisle/compute.py +188 -0
- dstack/_internal/core/backends/hotaisle/configurator.py +66 -0
- dstack/_internal/core/backends/hotaisle/models.py +45 -0
- dstack/_internal/core/backends/kubernetes/__init__.py +0 -0
- dstack/_internal/core/backends/kubernetes/backend.py +16 -0
- dstack/_internal/core/backends/kubernetes/compute.py +1077 -0
- dstack/_internal/core/backends/kubernetes/configurator.py +61 -0
- dstack/_internal/core/backends/kubernetes/models.py +71 -0
- dstack/_internal/core/backends/kubernetes/utils.py +81 -0
- dstack/_internal/core/backends/lambdalabs/__init__.py +0 -0
- dstack/_internal/core/backends/lambdalabs/api_client.py +87 -0
- dstack/_internal/core/backends/lambdalabs/backend.py +17 -0
- dstack/_internal/core/backends/lambdalabs/compute.py +233 -0
- dstack/_internal/core/backends/lambdalabs/configurator.py +65 -0
- dstack/_internal/core/backends/lambdalabs/models.py +37 -0
- dstack/_internal/core/backends/local/__init__.py +0 -0
- dstack/_internal/core/backends/local/backend.py +14 -0
- dstack/_internal/core/backends/local/compute.py +130 -0
- dstack/_internal/core/backends/models.py +158 -0
- dstack/_internal/core/backends/nebius/__init__.py +0 -0
- dstack/_internal/core/backends/nebius/backend.py +16 -0
- dstack/_internal/core/backends/nebius/compute.py +401 -0
- dstack/_internal/core/backends/nebius/configurator.py +98 -0
- dstack/_internal/core/backends/nebius/models.py +185 -0
- dstack/_internal/core/backends/nebius/resources.py +433 -0
- dstack/_internal/core/backends/oci/__init__.py +0 -0
- dstack/_internal/core/backends/oci/auth.py +21 -0
- dstack/_internal/core/backends/oci/backend.py +16 -0
- dstack/_internal/core/backends/oci/compute.py +209 -0
- dstack/_internal/core/backends/oci/configurator.py +156 -0
- dstack/_internal/core/backends/oci/exceptions.py +15 -0
- dstack/_internal/core/backends/oci/models.py +87 -0
- dstack/_internal/core/backends/oci/region.py +86 -0
- dstack/_internal/core/backends/oci/resources.py +836 -0
- dstack/_internal/core/backends/runpod/__init__.py +0 -0
- dstack/_internal/core/backends/runpod/api_client.py +627 -0
- dstack/_internal/core/backends/runpod/backend.py +16 -0
- dstack/_internal/core/backends/runpod/compute.py +444 -0
- dstack/_internal/core/backends/runpod/configurator.py +63 -0
- dstack/_internal/core/backends/runpod/models.py +54 -0
- dstack/_internal/core/backends/template/__init__.py +0 -0
- dstack/_internal/core/backends/template/backend.py.jinja +16 -0
- dstack/_internal/core/backends/template/compute.py.jinja +95 -0
- dstack/_internal/core/backends/template/configurator.py.jinja +69 -0
- dstack/_internal/core/backends/template/models.py.jinja +62 -0
- dstack/_internal/core/backends/tensordock/models.py +40 -0
- dstack/_internal/core/backends/vastai/__init__.py +0 -0
- dstack/_internal/core/backends/vastai/api_client.py +143 -0
- dstack/_internal/core/backends/vastai/backend.py +16 -0
- dstack/_internal/core/backends/vastai/compute.py +141 -0
- dstack/_internal/core/backends/vastai/configurator.py +69 -0
- dstack/_internal/core/backends/vastai/models.py +37 -0
- dstack/_internal/core/backends/verda/__init__.py +0 -0
- dstack/_internal/core/backends/verda/backend.py +16 -0
- dstack/_internal/core/backends/verda/compute.py +266 -0
- dstack/_internal/core/backends/verda/configurator.py +73 -0
- dstack/_internal/core/backends/verda/models.py +38 -0
- dstack/_internal/core/backends/vultr/__init__.py +0 -0
- dstack/_internal/core/backends/vultr/api_client.py +116 -0
- dstack/_internal/core/backends/vultr/backend.py +16 -0
- dstack/_internal/core/backends/vultr/compute.py +167 -0
- dstack/_internal/core/backends/vultr/configurator.py +71 -0
- dstack/_internal/core/backends/vultr/models.py +34 -0
- dstack/_internal/core/compatibility/__init__.py +0 -0
- dstack/_internal/core/compatibility/events.py +13 -0
- dstack/_internal/core/compatibility/fleets.py +58 -0
- dstack/_internal/core/compatibility/gateways.py +39 -0
- dstack/_internal/core/compatibility/gpus.py +13 -0
- dstack/_internal/core/compatibility/logs.py +14 -0
- dstack/_internal/core/compatibility/runs.py +86 -0
- dstack/_internal/core/compatibility/volumes.py +37 -0
- dstack/_internal/core/consts.py +8 -0
- dstack/_internal/core/errors.py +160 -0
- dstack/_internal/core/models/__init__.py +0 -0
- dstack/_internal/core/models/auth.py +28 -0
- dstack/_internal/core/models/backends/__init__.py +0 -0
- dstack/_internal/core/models/backends/base.py +48 -0
- dstack/_internal/core/models/common.py +143 -0
- dstack/_internal/core/models/compute_groups.py +39 -0
- dstack/_internal/core/models/config.py +28 -0
- dstack/_internal/core/models/configurations.py +1123 -0
- dstack/_internal/core/models/envs.py +149 -0
- dstack/_internal/core/models/events.py +98 -0
- dstack/_internal/core/models/files.py +67 -0
- dstack/_internal/core/models/fleets.py +437 -0
- dstack/_internal/core/models/gateways.py +146 -0
- dstack/_internal/core/models/gpus.py +45 -0
- dstack/_internal/core/models/health.py +28 -0
- dstack/_internal/core/models/instances.py +346 -0
- dstack/_internal/core/models/logs.py +27 -0
- dstack/_internal/core/models/metrics.py +14 -0
- dstack/_internal/core/models/placement.py +27 -0
- dstack/_internal/core/models/profiles.py +431 -0
- dstack/_internal/core/models/projects.py +46 -0
- dstack/_internal/core/models/repos/__init__.py +34 -0
- dstack/_internal/core/models/repos/base.py +36 -0
- dstack/_internal/core/models/repos/local.py +96 -0
- dstack/_internal/core/models/repos/remote.py +341 -0
- dstack/_internal/core/models/repos/virtual.py +85 -0
- dstack/_internal/core/models/resources.py +424 -0
- dstack/_internal/core/models/routers.py +24 -0
- dstack/_internal/core/models/runs.py +618 -0
- dstack/_internal/core/models/secrets.py +16 -0
- dstack/_internal/core/models/server.py +7 -0
- dstack/_internal/core/models/services.py +76 -0
- dstack/_internal/core/models/unix.py +53 -0
- dstack/_internal/core/models/users.py +60 -0
- dstack/_internal/core/models/volumes.py +221 -0
- dstack/_internal/core/services/__init__.py +16 -0
- dstack/_internal/core/services/api_client.py +15 -0
- dstack/_internal/core/services/configs/__init__.py +116 -0
- dstack/_internal/core/services/diff.py +71 -0
- dstack/_internal/core/services/logs.py +58 -0
- dstack/_internal/core/services/profiles.py +46 -0
- dstack/_internal/core/services/repos.py +236 -0
- dstack/_internal/core/services/ssh/__init__.py +27 -0
- dstack/_internal/core/services/ssh/attach.py +241 -0
- dstack/_internal/core/services/ssh/client.py +113 -0
- dstack/_internal/core/services/ssh/key_manager.py +53 -0
- dstack/_internal/core/services/ssh/ports.py +89 -0
- dstack/_internal/core/services/ssh/tunnel.py +337 -0
- dstack/_internal/proxy/__init__.py +8 -0
- dstack/_internal/proxy/gateway/__init__.py +0 -0
- dstack/_internal/proxy/gateway/app.py +89 -0
- dstack/_internal/proxy/gateway/auth.py +26 -0
- dstack/_internal/proxy/gateway/const.py +7 -0
- dstack/_internal/proxy/gateway/deps.py +73 -0
- dstack/_internal/proxy/gateway/main.py +17 -0
- dstack/_internal/proxy/gateway/models.py +23 -0
- dstack/_internal/proxy/gateway/repo/__init__.py +0 -0
- dstack/_internal/proxy/gateway/repo/repo.py +121 -0
- dstack/_internal/proxy/gateway/repo/state_v1.py +164 -0
- dstack/_internal/proxy/gateway/resources/nginx/00-log-format.conf +11 -0
- dstack/_internal/proxy/gateway/resources/nginx/entrypoint.jinja2 +27 -0
- dstack/_internal/proxy/gateway/resources/nginx/router_workers.jinja2 +23 -0
- dstack/_internal/proxy/gateway/resources/nginx/service.jinja2 +105 -0
- dstack/_internal/proxy/gateway/routers/__init__.py +0 -0
- dstack/_internal/proxy/gateway/routers/auth.py +10 -0
- dstack/_internal/proxy/gateway/routers/config.py +28 -0
- dstack/_internal/proxy/gateway/routers/registry.py +124 -0
- dstack/_internal/proxy/gateway/routers/stats.py +18 -0
- dstack/_internal/proxy/gateway/schemas/__init__.py +0 -0
- dstack/_internal/proxy/gateway/schemas/common.py +5 -0
- dstack/_internal/proxy/gateway/schemas/config.py +9 -0
- dstack/_internal/proxy/gateway/schemas/registry.py +63 -0
- dstack/_internal/proxy/gateway/schemas/stats.py +15 -0
- dstack/_internal/proxy/gateway/services/__init__.py +0 -0
- dstack/_internal/proxy/gateway/services/model_routers/__init__.py +18 -0
- dstack/_internal/proxy/gateway/services/model_routers/base.py +91 -0
- dstack/_internal/proxy/gateway/services/model_routers/sglang.py +269 -0
- dstack/_internal/proxy/gateway/services/nginx.py +455 -0
- dstack/_internal/proxy/gateway/services/registry.py +426 -0
- dstack/_internal/proxy/gateway/services/server_client.py +95 -0
- dstack/_internal/proxy/gateway/services/stats.py +170 -0
- dstack/_internal/proxy/gateway/testing/__init__.py +0 -0
- dstack/_internal/proxy/gateway/testing/common.py +13 -0
- dstack/_internal/proxy/lib/__init__.py +0 -0
- dstack/_internal/proxy/lib/auth.py +7 -0
- dstack/_internal/proxy/lib/deps.py +106 -0
- dstack/_internal/proxy/lib/errors.py +14 -0
- dstack/_internal/proxy/lib/models.py +112 -0
- dstack/_internal/proxy/lib/repo.py +27 -0
- dstack/_internal/proxy/lib/routers/__init__.py +0 -0
- dstack/_internal/proxy/lib/routers/model_proxy.py +102 -0
- dstack/_internal/proxy/lib/schemas/__init__.py +0 -0
- dstack/_internal/proxy/lib/schemas/model_proxy.py +77 -0
- dstack/_internal/proxy/lib/services/__init__.py +0 -0
- dstack/_internal/proxy/lib/services/model_proxy/__init__.py +0 -0
- dstack/_internal/proxy/lib/services/model_proxy/clients/__init__.py +0 -0
- dstack/_internal/proxy/lib/services/model_proxy/clients/base.py +18 -0
- dstack/_internal/proxy/lib/services/model_proxy/clients/openai.py +67 -0
- dstack/_internal/proxy/lib/services/model_proxy/clients/tgi.py +208 -0
- dstack/_internal/proxy/lib/services/model_proxy/model_proxy.py +23 -0
- dstack/_internal/proxy/lib/services/service_connection.py +160 -0
- dstack/_internal/proxy/lib/testing/__init__.py +0 -0
- dstack/_internal/proxy/lib/testing/auth.py +11 -0
- dstack/_internal/proxy/lib/testing/common.py +51 -0
- dstack/_internal/server/__init__.py +0 -0
- dstack/_internal/server/alembic.ini +100 -0
- dstack/_internal/server/app.py +432 -0
- dstack/_internal/server/background/__init__.py +142 -0
- dstack/_internal/server/background/tasks/__init__.py +0 -0
- dstack/_internal/server/background/tasks/common.py +24 -0
- dstack/_internal/server/background/tasks/process_compute_groups.py +167 -0
- dstack/_internal/server/background/tasks/process_events.py +17 -0
- dstack/_internal/server/background/tasks/process_fleets.py +289 -0
- dstack/_internal/server/background/tasks/process_gateways.py +188 -0
- dstack/_internal/server/background/tasks/process_idle_volumes.py +145 -0
- dstack/_internal/server/background/tasks/process_instances.py +1186 -0
- dstack/_internal/server/background/tasks/process_metrics.py +172 -0
- dstack/_internal/server/background/tasks/process_placement_groups.py +104 -0
- dstack/_internal/server/background/tasks/process_probes.py +164 -0
- dstack/_internal/server/background/tasks/process_prometheus_metrics.py +150 -0
- dstack/_internal/server/background/tasks/process_running_jobs.py +1238 -0
- dstack/_internal/server/background/tasks/process_runs.py +842 -0
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +1106 -0
- dstack/_internal/server/background/tasks/process_terminating_jobs.py +108 -0
- dstack/_internal/server/background/tasks/process_volumes.py +129 -0
- dstack/_internal/server/compatibility/__init__.py +0 -0
- dstack/_internal/server/compatibility/common.py +20 -0
- dstack/_internal/server/compatibility/gpus.py +22 -0
- dstack/_internal/server/db.py +127 -0
- dstack/_internal/server/deps.py +19 -0
- dstack/_internal/server/main.py +4 -0
- dstack/_internal/server/migrations/__init__.py +0 -0
- dstack/_internal/server/migrations/env.py +112 -0
- dstack/_internal/server/migrations/script.py.mako +28 -0
- dstack/_internal/server/migrations/versions/006512f572b4_add_projects_original_name.py +38 -0
- dstack/_internal/server/migrations/versions/065588ec72b8_add_vultr_to_backendtype_enum.py +81 -0
- dstack/_internal/server/migrations/versions/06e977bc61c7_add_usermodel_deleted_and_original_name.py +45 -0
- dstack/_internal/server/migrations/versions/0e33559e16ed_update_instancestatus.py +64 -0
- dstack/_internal/server/migrations/versions/112753bc17dd_remove_nullable_fields.py +50 -0
- dstack/_internal/server/migrations/versions/1338b788b612_reverse_job_instance_relationship.py +71 -0
- dstack/_internal/server/migrations/versions/14f2cb002fc2_add_jobmodel_removed_flag.py +44 -0
- dstack/_internal/server/migrations/versions/1a48dfe44a40_rework_termination_handling.py +42 -0
- dstack/_internal/server/migrations/versions/1aa9638ad963_added_email_index.py +31 -0
- dstack/_internal/server/migrations/versions/1e3fb39ef74b_add_remote_connection_details.py +26 -0
- dstack/_internal/server/migrations/versions/1e76fb0dde87_add_jobmodel_inactivity_secs.py +32 -0
- dstack/_internal/server/migrations/versions/20166748b60c_add_jobmodel_disconnected_at.py +100 -0
- dstack/_internal/server/migrations/versions/22d74df9897e_add_events_and_event_targets.py +99 -0
- dstack/_internal/server/migrations/versions/23e01c56279a_make_blob_nullable.py +32 -0
- dstack/_internal/server/migrations/versions/2498ab323443_add_fleetmodel_consolidation_attempt_.py +44 -0
- dstack/_internal/server/migrations/versions/252d3743b641_.py +40 -0
- dstack/_internal/server/migrations/versions/25479f540245_add_probes.py +43 -0
- dstack/_internal/server/migrations/versions/27d3e55759fa_add_pools.py +152 -0
- dstack/_internal/server/migrations/versions/29826f417010_remove_instancemodel_retry_policy.py +34 -0
- dstack/_internal/server/migrations/versions/29c08c6a8cb3_.py +36 -0
- dstack/_internal/server/migrations/versions/35e90e1b0d3e_add_rolling_deployment_fields.py +42 -0
- dstack/_internal/server/migrations/versions/35f732ee4cf5_add_projectmodel_is_public.py +39 -0
- dstack/_internal/server/migrations/versions/3cf77fb8bcf1_store_repo_clone_url.py +85 -0
- dstack/_internal/server/migrations/versions/3d7f6c2ec000_add_jobmodel_registered.py +28 -0
- dstack/_internal/server/migrations/versions/3dbdce90d0e0_fix_code_uq_constraint.py +33 -0
- dstack/_internal/server/migrations/versions/48ad3ecbaea2_do_not_delete_projects_and_runs.py +46 -0
- dstack/_internal/server/migrations/versions/4ae1a5b0e7f1_add_run_list_index.py +34 -0
- dstack/_internal/server/migrations/versions/4b4319398164_introduce_runs_processing.py +144 -0
- dstack/_internal/server/migrations/versions/50dd7ea98639_index_status_columns.py +55 -0
- dstack/_internal/server/migrations/versions/51d45659d574_add_instancemodel_blocks_fields.py +43 -0
- dstack/_internal/server/migrations/versions/54a77e19c64c_add_manager_project_role.py +67 -0
- dstack/_internal/server/migrations/versions/555138b1f77f_change_instancemodel_for_asynchronous_.py +61 -0
- dstack/_internal/server/migrations/versions/58aa5162dcc3_add_gatewaymodel_configuration.py +32 -0
- dstack/_internal/server/migrations/versions/5ad8debc8fe6_fixes_for_psql.py +329 -0
- dstack/_internal/server/migrations/versions/5ec538b70e71_replace_instansestatus.py +31 -0
- dstack/_internal/server/migrations/versions/5f1707c525d2_add_filearchivemodel.py +39 -0
- dstack/_internal/server/migrations/versions/5fd659afca82_add_ix_instances_fleet_id.py +31 -0
- dstack/_internal/server/migrations/versions/60e444118b6d_add_jobprometheusmetrics.py +40 -0
- dstack/_internal/server/migrations/versions/63c3f19cb184_add_jobterminationreason_inactivity_.py +83 -0
- dstack/_internal/server/migrations/versions/644b8a114187_add_secretmodel.py +49 -0
- dstack/_internal/server/migrations/versions/686fb8341ea5_add_user_emails.py +32 -0
- dstack/_internal/server/migrations/versions/6c1a9d6530ee_add_jobmodel_exit_status.py +26 -0
- dstack/_internal/server/migrations/versions/706e0acc3a7d_add_runmodel_desired_replica_counts.py +26 -0
- dstack/_internal/server/migrations/versions/710e5b3fac8f_add_encryption.py +54 -0
- dstack/_internal/server/migrations/versions/728b1488b1b4_add_instance_health.py +50 -0
- dstack/_internal/server/migrations/versions/74a1f55209bd_store_enums_as_strings.py +484 -0
- dstack/_internal/server/migrations/versions/7b24b1c8eba7_add_instancemodel_last_processed_at.py +68 -0
- dstack/_internal/server/migrations/versions/7ba3b59d7ca6_add_runmodel_resubmission_attempt.py +35 -0
- dstack/_internal/server/migrations/versions/7bc2586e8b9e_make_instancemodel_pool_id_optional.py +36 -0
- dstack/_internal/server/migrations/versions/7d1ec2b920ac_add_computegroupmodel.py +91 -0
- dstack/_internal/server/migrations/versions/803c7e9ed85d_add_jobmodel_job_runtime_data.py +32 -0
- dstack/_internal/server/migrations/versions/82b32a135ea2_.py +58 -0
- dstack/_internal/server/migrations/versions/866ec1d67184_replace_retrypolicy_limit_with_.py +93 -0
- dstack/_internal/server/migrations/versions/903c91e24634_add_instances_termination_reason_message.py +34 -0
- dstack/_internal/server/migrations/versions/91a12fff6c76_add_repocredsmodel.py +43 -0
- dstack/_internal/server/migrations/versions/91ac5e543037_extend_repos_creds_column.py +36 -0
- dstack/_internal/server/migrations/versions/98cd9c8b5927_add_volumemodel.py +73 -0
- dstack/_internal/server/migrations/versions/98d1b92988bc_add_jobterminationreason_terminated_due_.py +140 -0
- dstack/_internal/server/migrations/versions/99b4c8c954ea_add_termination_reason_message.py +71 -0
- dstack/_internal/server/migrations/versions/9eea6af28e10_added_fail_reason_for_instancemodel.py +36 -0
- dstack/_internal/server/migrations/versions/__init__.py +0 -0
- dstack/_internal/server/migrations/versions/a060e2440936_.py +206 -0
- dstack/_internal/server/migrations/versions/a751ef183f27_move_attachment_data_to_volumes_.py +34 -0
- dstack/_internal/server/migrations/versions/a7b46c073fa1_add_placementgroupmodel.py +58 -0
- dstack/_internal/server/migrations/versions/afbc600ff2b2_add_created_at_to_usermodel_and_.py +102 -0
- dstack/_internal/server/migrations/versions/b4d6ad60db08_add_instancemodel_unreachable.py +37 -0
- dstack/_internal/server/migrations/versions/b88d55c2a07d_replace_instancestatus_ready.py +21 -0
- dstack/_internal/server/migrations/versions/bc8ca4a505c6_store_backendtype_as_string.py +171 -0
- dstack/_internal/server/migrations/versions/bca2fdf130bf_add_runmodel_priority.py +34 -0
- dstack/_internal/server/migrations/versions/bfba43f6def2_.py +32 -0
- dstack/_internal/server/migrations/versions/c00090eaef21_support_fleets.py +108 -0
- dstack/_internal/server/migrations/versions/c154eece89da_add_fields_for_async_gateway_creation.py +74 -0
- dstack/_internal/server/migrations/versions/c20626d03cfb_add_jobmetricspoint.py +43 -0
- dstack/_internal/server/migrations/versions/c48df7985d57_add_instance_termination_retries.py +38 -0
- dstack/_internal/server/migrations/versions/c83d45f9a971_replace_string_with_text.py +150 -0
- dstack/_internal/server/migrations/versions/d0bb68e48b9f_add_project_owners_and_quotas.py +106 -0
- dstack/_internal/server/migrations/versions/d3e8af4786fa_gateway_compute_flag_deleted.py +34 -0
- dstack/_internal/server/migrations/versions/d4d9dc26cf58_add_ix_jobs_run_id.py +31 -0
- dstack/_internal/server/migrations/versions/d5863798bf41_add_volumemodel_last_job_processed_at.py +40 -0
- dstack/_internal/server/migrations/versions/d6b11105f659_add_usermodel_active.py +36 -0
- dstack/_internal/server/migrations/versions/da574e93fee0_add_jobmodel_volumes_detached_at.py +40 -0
- dstack/_internal/server/migrations/versions/dfffd6a1165c_add_fields_for_gateways_behind_alb.py +36 -0
- dstack/_internal/server/migrations/versions/e2d08cd1b8d9_add_jobmodel_fleet.py +41 -0
- dstack/_internal/server/migrations/versions/e3b7db07727f_add_gatewaycomputemodel_app_updated_at.py +61 -0
- dstack/_internal/server/migrations/versions/e6391ca6a264_separate_gateways_from_compute.py +72 -0
- dstack/_internal/server/migrations/versions/ea60480f82bb_add_membermodel_member_num.py +32 -0
- dstack/_internal/server/migrations/versions/ec02a26a256c_add_runmodel_next_triggered_at.py +38 -0
- dstack/_internal/server/migrations/versions/ed0ca30e13bb_migrate_instancestatus_provisioning.py +29 -0
- dstack/_internal/server/migrations/versions/fe72c4de8376_add_gateways.py +81 -0
- dstack/_internal/server/migrations/versions/ff1d94f65b08_user_ssh_key.py +34 -0
- dstack/_internal/server/migrations/versions/ffa99edd1988_add_jobterminationreason_max_duration_.py +81 -0
- dstack/_internal/server/models.py +930 -0
- dstack/_internal/server/routers/__init__.py +0 -0
- dstack/_internal/server/routers/auth.py +34 -0
- dstack/_internal/server/routers/backends.py +142 -0
- dstack/_internal/server/routers/events.py +60 -0
- dstack/_internal/server/routers/files.py +68 -0
- dstack/_internal/server/routers/fleets.py +202 -0
- dstack/_internal/server/routers/gateways.py +109 -0
- dstack/_internal/server/routers/gpus.py +32 -0
- dstack/_internal/server/routers/instances.py +77 -0
- dstack/_internal/server/routers/logs.py +34 -0
- dstack/_internal/server/routers/metrics.py +82 -0
- dstack/_internal/server/routers/projects.py +205 -0
- dstack/_internal/server/routers/prometheus.py +35 -0
- dstack/_internal/server/routers/repos.py +118 -0
- dstack/_internal/server/routers/runs.py +216 -0
- dstack/_internal/server/routers/secrets.py +86 -0
- dstack/_internal/server/routers/server.py +19 -0
- dstack/_internal/server/routers/users.py +158 -0
- dstack/_internal/server/routers/volumes.py +122 -0
- dstack/_internal/server/schemas/__init__.py +0 -0
- dstack/_internal/server/schemas/auth.py +83 -0
- dstack/_internal/server/schemas/backends.py +16 -0
- dstack/_internal/server/schemas/common.py +9 -0
- dstack/_internal/server/schemas/events.py +211 -0
- dstack/_internal/server/schemas/files.py +5 -0
- dstack/_internal/server/schemas/fleets.py +49 -0
- dstack/_internal/server/schemas/gateways.py +31 -0
- dstack/_internal/server/schemas/gpus.py +26 -0
- dstack/_internal/server/schemas/health/__init__.py +0 -0
- dstack/_internal/server/schemas/health/dcgm.py +56 -0
- dstack/_internal/server/schemas/instances.py +47 -0
- dstack/_internal/server/schemas/logs.py +17 -0
- dstack/_internal/server/schemas/projects.py +81 -0
- dstack/_internal/server/schemas/repos.py +24 -0
- dstack/_internal/server/schemas/runner.py +269 -0
- dstack/_internal/server/schemas/runs.py +66 -0
- dstack/_internal/server/schemas/secrets.py +16 -0
- dstack/_internal/server/schemas/users.py +72 -0
- dstack/_internal/server/schemas/volumes.py +29 -0
- dstack/_internal/server/security/__init__.py +0 -0
- dstack/_internal/server/security/permissions.py +251 -0
- dstack/_internal/server/services/__init__.py +0 -0
- dstack/_internal/server/services/auth.py +77 -0
- dstack/_internal/server/services/backends/__init__.py +404 -0
- dstack/_internal/server/services/backends/handlers.py +105 -0
- dstack/_internal/server/services/compute_groups.py +22 -0
- dstack/_internal/server/services/config.py +279 -0
- dstack/_internal/server/services/docker.py +162 -0
- dstack/_internal/server/services/encryption/__init__.py +102 -0
- dstack/_internal/server/services/encryption/keys/__init__.py +0 -0
- dstack/_internal/server/services/encryption/keys/aes.py +68 -0
- dstack/_internal/server/services/encryption/keys/base.py +19 -0
- dstack/_internal/server/services/encryption/keys/identity.py +28 -0
- dstack/_internal/server/services/events.py +477 -0
- dstack/_internal/server/services/files.py +91 -0
- dstack/_internal/server/services/fleets.py +1224 -0
- dstack/_internal/server/services/gateways/__init__.py +686 -0
- dstack/_internal/server/services/gateways/client.py +209 -0
- dstack/_internal/server/services/gateways/connection.py +139 -0
- dstack/_internal/server/services/gateways/pool.py +58 -0
- dstack/_internal/server/services/gpus.py +387 -0
- dstack/_internal/server/services/instances.py +731 -0
- dstack/_internal/server/services/jobs/__init__.py +840 -0
- dstack/_internal/server/services/jobs/configurators/__init__.py +0 -0
- dstack/_internal/server/services/jobs/configurators/base.py +469 -0
- dstack/_internal/server/services/jobs/configurators/dev.py +69 -0
- dstack/_internal/server/services/jobs/configurators/extensions/__init__.py +0 -0
- dstack/_internal/server/services/jobs/configurators/extensions/base.py +15 -0
- dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +42 -0
- dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +42 -0
- dstack/_internal/server/services/jobs/configurators/extensions/windsurf.py +43 -0
- dstack/_internal/server/services/jobs/configurators/service.py +28 -0
- dstack/_internal/server/services/jobs/configurators/task.py +39 -0
- dstack/_internal/server/services/locking.py +187 -0
- dstack/_internal/server/services/logging.py +29 -0
- dstack/_internal/server/services/logs/__init__.py +122 -0
- dstack/_internal/server/services/logs/aws.py +373 -0
- dstack/_internal/server/services/logs/base.py +47 -0
- dstack/_internal/server/services/logs/filelog.py +261 -0
- dstack/_internal/server/services/logs/fluentbit.py +329 -0
- dstack/_internal/server/services/logs/gcp.py +181 -0
- dstack/_internal/server/services/metrics.py +172 -0
- dstack/_internal/server/services/offers.py +249 -0
- dstack/_internal/server/services/permissions.py +37 -0
- dstack/_internal/server/services/placement.py +234 -0
- dstack/_internal/server/services/plugins.py +109 -0
- dstack/_internal/server/services/probes.py +10 -0
- dstack/_internal/server/services/projects.py +835 -0
- dstack/_internal/server/services/prometheus/__init__.py +0 -0
- dstack/_internal/server/services/prometheus/client_metrics.py +55 -0
- dstack/_internal/server/services/prometheus/custom_metrics.py +327 -0
- dstack/_internal/server/services/proxy/__init__.py +3 -0
- dstack/_internal/server/services/proxy/auth.py +12 -0
- dstack/_internal/server/services/proxy/deps.py +18 -0
- dstack/_internal/server/services/proxy/repo.py +189 -0
- dstack/_internal/server/services/proxy/routers/__init__.py +0 -0
- dstack/_internal/server/services/proxy/routers/service_proxy.py +49 -0
- dstack/_internal/server/services/proxy/services/__init__.py +0 -0
- dstack/_internal/server/services/proxy/services/service_proxy.py +135 -0
- dstack/_internal/server/services/repos.py +362 -0
- dstack/_internal/server/services/requirements/__init__.py +0 -0
- dstack/_internal/server/services/requirements/combine.py +260 -0
- dstack/_internal/server/services/resources.py +21 -0
- dstack/_internal/server/services/runner/__init__.py +0 -0
- dstack/_internal/server/services/runner/client.py +646 -0
- dstack/_internal/server/services/runner/ssh.py +128 -0
- dstack/_internal/server/services/runs/__init__.py +1026 -0
- dstack/_internal/server/services/runs/plan.py +703 -0
- dstack/_internal/server/services/runs/replicas.py +317 -0
- dstack/_internal/server/services/runs/spec.py +191 -0
- dstack/_internal/server/services/secrets.py +245 -0
- dstack/_internal/server/services/services/__init__.py +345 -0
- dstack/_internal/server/services/services/autoscalers.py +140 -0
- dstack/_internal/server/services/services/options.py +53 -0
- dstack/_internal/server/services/ssh.py +67 -0
- dstack/_internal/server/services/storage/__init__.py +37 -0
- dstack/_internal/server/services/storage/base.py +48 -0
- dstack/_internal/server/services/storage/gcs.py +66 -0
- dstack/_internal/server/services/storage/s3.py +69 -0
- dstack/_internal/server/services/users.py +461 -0
- dstack/_internal/server/services/volumes.py +496 -0
- dstack/_internal/server/settings.py +161 -0
- dstack/_internal/server/statics/00a6e1fb461ed2929fb9.png +0 -0
- dstack/_internal/server/statics/0cae4d9f0a36034984a7.png +0 -0
- dstack/_internal/server/statics/391de232cc0e30cae513.png +0 -0
- dstack/_internal/server/statics/4e0eead8c1a73689ef9d.svg +1 -0
- dstack/_internal/server/statics/544afa2f63428c2235b0.png +0 -0
- dstack/_internal/server/statics/54a4f50f74c6b9381530.svg +7 -0
- dstack/_internal/server/statics/68dd1360a7d2611e0132.svg +4 -0
- dstack/_internal/server/statics/69544b4c81973b54a66f.png +0 -0
- dstack/_internal/server/statics/77a8b02b17af19e39266.png +0 -0
- dstack/_internal/server/statics/83a93a8871c219104367.svg +9 -0
- dstack/_internal/server/statics/8f28bb8e9999e5e6a48b.svg +4 -0
- dstack/_internal/server/statics/9124086961ab8c366bc4.svg +9 -0
- dstack/_internal/server/statics/9a9ebaeb54b025dbac0a.svg +5 -0
- dstack/_internal/server/statics/a3428392dc534f3b15c4.svg +7 -0
- dstack/_internal/server/statics/ae22625574d69361f72c.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-144x144.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-192x192.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-256x256.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-36x36.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-384x384.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-48x48.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-512x512.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-72x72.png +0 -0
- dstack/_internal/server/statics/assets/android-chrome-96x96.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-1024x1024.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-114x114.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-120x120.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-144x144.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-152x152.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-167x167.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-180x180.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-57x57.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-60x60.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-72x72.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-76x76.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon-precomposed.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-icon.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1125x2436.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1136x640.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1170x2532.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1179x2556.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1242x2208.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1242x2688.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1284x2778.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1290x2796.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1334x750.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1488x2266.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1536x2048.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1620x2160.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1640x2160.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1668x2224.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1668x2388.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-1792x828.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2048x1536.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2048x2732.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2160x1620.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2160x1640.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2208x1242.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2224x1668.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2266x1488.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2388x1668.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2436x1125.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2532x1170.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2556x1179.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2688x1242.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2732x2048.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2778x1284.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-2796x1290.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-640x1136.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-750x1334.png +0 -0
- dstack/_internal/server/statics/assets/apple-touch-startup-image-828x1792.png +0 -0
- dstack/_internal/server/statics/assets/browserconfig.xml +12 -0
- dstack/_internal/server/statics/assets/favicon-16x16.png +0 -0
- dstack/_internal/server/statics/assets/favicon-32x32.png +0 -0
- dstack/_internal/server/statics/assets/favicon-48x48.png +0 -0
- dstack/_internal/server/statics/assets/favicon.ico +0 -0
- dstack/{dashboard/statics/assets/manifest.json → _internal/server/statics/assets/manifest.webmanifest} +18 -9
- dstack/_internal/server/statics/assets/mstile-144x144.png +0 -0
- dstack/_internal/server/statics/assets/mstile-150x150.png +0 -0
- dstack/_internal/server/statics/assets/mstile-310x150.png +0 -0
- dstack/_internal/server/statics/assets/mstile-310x310.png +0 -0
- dstack/_internal/server/statics/assets/mstile-70x70.png +0 -0
- dstack/_internal/server/statics/assets/yandex-browser-50x50.png +0 -0
- dstack/_internal/server/statics/b7ae68f44193474fc578.png +0 -0
- dstack/_internal/server/statics/d2f008c75b2b5b191f3f.png +0 -0
- dstack/_internal/server/statics/d44c33e1b92e05c379fd.png +0 -0
- dstack/_internal/server/statics/dd43ff0552815179d7ab.png +0 -0
- dstack/_internal/server/statics/dd4e7166c0b9aac197d7.png +0 -0
- dstack/_internal/server/statics/e30b27916930d43d2271.png +0 -0
- dstack/_internal/server/statics/e467d7d60aae81ab198b.svg +6 -0
- dstack/_internal/server/statics/eb9b344b73818fe2b71a.png +0 -0
- dstack/_internal/server/statics/f517dd626eb964120de0.png +0 -0
- dstack/_internal/server/statics/f958aecddee5d8e3222c.png +0 -0
- dstack/_internal/server/statics/index.html +3 -0
- dstack/_internal/server/statics/logo-notext.svg +116 -0
- dstack/_internal/server/statics/main-2e6967bad9f29395eea6.css +3 -0
- dstack/_internal/server/statics/main-7dc0f6d20b8b41659acc.js +155547 -0
- dstack/_internal/server/statics/main-7dc0f6d20b8b41659acc.js.map +1 -0
- dstack/{dashboard → _internal/server}/statics/manifest.json +2 -2
- dstack/_internal/server/statics/static/media/entraID.d65d1f3e9486a8e56d24fc07b3230885.svg +9 -0
- dstack/_internal/server/statics/static/media/google.b194b06fafd0a52aeb566922160ea514.svg +1 -0
- dstack/{dashboard/statics/static/media/logo.f9d7170678f68f796e270698633770ec.svg → _internal/server/statics/static/media/logo.f602feeb138844eda97c8cb641461448.svg} +8 -6
- dstack/_internal/server/statics/static/media/okta.12f178e6873a1100965f2a4dbd18fcec.svg +2 -0
- dstack/_internal/server/statics/static/media/theme.3994c817bb7dda191c1c9640dee0bf42.svg +3 -0
- dstack/_internal/server/testing/__init__.py +0 -0
- dstack/_internal/server/testing/common.py +1220 -0
- dstack/_internal/server/testing/conf.py +53 -0
- dstack/_internal/server/testing/matchers.py +31 -0
- dstack/_internal/server/utils/__init__.py +0 -0
- dstack/_internal/server/utils/common.py +55 -0
- dstack/_internal/server/utils/logging.py +51 -0
- dstack/_internal/server/utils/provisioning.py +368 -0
- dstack/_internal/server/utils/routers.py +166 -0
- dstack/_internal/server/utils/sentry_utils.py +24 -0
- dstack/_internal/settings.py +49 -0
- dstack/_internal/utils/__init__.py +0 -0
- dstack/_internal/utils/common.py +318 -0
- dstack/_internal/utils/cron.py +5 -0
- dstack/_internal/utils/crypto.py +40 -0
- dstack/_internal/utils/env.py +88 -0
- dstack/_internal/utils/event_loop.py +30 -0
- dstack/_internal/utils/files.py +69 -0
- dstack/_internal/utils/gpu.py +59 -0
- dstack/_internal/utils/hash.py +31 -0
- dstack/_internal/utils/interpolator.py +91 -0
- dstack/_internal/utils/json_schema.py +11 -0
- dstack/_internal/utils/json_utils.py +54 -0
- dstack/_internal/utils/logging.py +5 -0
- dstack/_internal/utils/nested_list.py +47 -0
- dstack/_internal/utils/network.py +50 -0
- dstack/_internal/utils/path.py +57 -0
- dstack/_internal/utils/random_names.py +258 -0
- dstack/_internal/utils/ssh.py +346 -0
- dstack/_internal/utils/tags.py +42 -0
- dstack/_internal/utils/typing.py +14 -0
- dstack/_internal/utils/version.py +22 -0
- dstack/api/__init__.py +46 -0
- dstack/api/_public/__init__.py +96 -0
- dstack/api/_public/backends.py +42 -0
- dstack/api/_public/common.py +5 -0
- dstack/api/_public/repos.py +202 -0
- dstack/api/_public/runs.py +714 -0
- dstack/api/server/__init__.py +206 -0
- dstack/api/server/_auth.py +30 -0
- dstack/api/server/_backends.py +38 -0
- dstack/api/server/_events.py +64 -0
- dstack/api/server/_files.py +18 -0
- dstack/api/server/_fleets.py +82 -0
- dstack/api/server/_gateways.py +54 -0
- dstack/api/server/_gpus.py +27 -0
- dstack/api/server/_group.py +22 -0
- dstack/api/server/_logs.py +15 -0
- dstack/api/server/_metrics.py +23 -0
- dstack/api/server/_projects.py +124 -0
- dstack/api/server/_repos.py +64 -0
- dstack/api/server/_runs.py +102 -0
- dstack/api/server/_secrets.py +36 -0
- dstack/api/server/_users.py +82 -0
- dstack/api/server/_volumes.py +39 -0
- dstack/api/server/utils.py +34 -0
- dstack/api/utils.py +105 -0
- dstack/core/__init__.py +0 -0
- dstack/plugins/__init__.py +8 -0
- dstack/plugins/_base.py +72 -0
- dstack/plugins/_models.py +8 -0
- dstack/plugins/_utils.py +19 -0
- dstack/plugins/builtin/__init__.py +0 -0
- dstack/plugins/builtin/rest_plugin/__init__.py +18 -0
- dstack/plugins/builtin/rest_plugin/_models.py +48 -0
- dstack/plugins/builtin/rest_plugin/_plugin.py +147 -0
- dstack/version.py +3 -1
- dstack-0.20.7.dist-info/METADATA +519 -0
- dstack-0.20.7.dist-info/RECORD +720 -0
- {dstack-0.0.9.dist-info → dstack-0.20.7.dist-info}/WHEEL +1 -2
- dstack-0.20.7.dist-info/entry_points.txt +2 -0
- dstack-0.20.7.dist-info/licenses/LICENSE.md +353 -0
- dstack/aws/__init__.py +0 -180
- dstack/aws/artifacts.py +0 -111
- dstack/aws/config.py +0 -40
- dstack/aws/jobs.py +0 -245
- dstack/aws/logs.py +0 -186
- dstack/aws/repos.py +0 -137
- dstack/aws/run_names.py +0 -17
- dstack/aws/runners.py +0 -693
- dstack/aws/runs.py +0 -79
- dstack/aws/secrets.py +0 -99
- dstack/aws/tags.py +0 -138
- dstack/backend.py +0 -299
- dstack/cli/app.py +0 -41
- dstack/cli/artifacts.py +0 -87
- dstack/cli/common.py +0 -57
- dstack/cli/config.py +0 -194
- dstack/cli/dashboard.py +0 -26
- dstack/cli/delete.py +0 -49
- dstack/cli/init.py +0 -33
- dstack/cli/logs.py +0 -87
- dstack/cli/main.py +0 -81
- dstack/cli/restart.py +0 -43
- dstack/cli/run.py +0 -223
- dstack/cli/schema.py +0 -46
- dstack/cli/secrets.py +0 -97
- dstack/cli/status.py +0 -140
- dstack/cli/stop.py +0 -53
- dstack/cli/tags.py +0 -100
- dstack/config.py +0 -80
- dstack/dashboard/artifacts.py +0 -26
- dstack/dashboard/logs.py +0 -73
- dstack/dashboard/main.py +0 -45
- dstack/dashboard/repos.py +0 -41
- dstack/dashboard/runs.py +0 -140
- dstack/dashboard/secrets.py +0 -53
- dstack/dashboard/statics/4d6a4e032505c1efd23c.png +0 -0
- dstack/dashboard/statics/7e018c3e5566d7c349a8.png +0 -0
- dstack/dashboard/statics/assets/android-chrome-144x144.png +0 -0
- dstack/dashboard/statics/assets/android-chrome-192x192.png +0 -0
- dstack/dashboard/statics/assets/android-chrome-256x256.png +0 -0
- dstack/dashboard/statics/assets/android-chrome-36x36.png +0 -0
- dstack/dashboard/statics/assets/android-chrome-384x384.png +0 -0
- dstack/dashboard/statics/assets/android-chrome-48x48.png +0 -0
- dstack/dashboard/statics/assets/android-chrome-512x512.png +0 -0
- dstack/dashboard/statics/assets/android-chrome-72x72.png +0 -0
- dstack/dashboard/statics/assets/android-chrome-96x96.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-icon-1024x1024.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-icon-114x114.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-icon-120x120.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-icon-144x144.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-icon-152x152.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-icon-167x167.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-icon-180x180.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-icon-57x57.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-icon-60x60.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-icon-72x72.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-icon-76x76.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-icon-precomposed.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-icon.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-1125x2436.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-1136x640.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-1242x2208.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-1242x2688.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-1334x750.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-1536x2048.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-1620x2160.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-1668x2224.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-1668x2388.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-1792x828.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-2048x1536.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-2048x2732.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-2160x1620.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-2208x1242.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-2224x1668.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-2388x1668.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-2436x1125.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-2688x1242.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-2732x2048.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-640x1136.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-750x1334.png +0 -0
- dstack/dashboard/statics/assets/apple-touch-startup-image-828x1792.png +0 -0
- dstack/dashboard/statics/assets/browserconfig.xml +0 -15
- dstack/dashboard/statics/assets/coast-228x228.png +0 -0
- dstack/dashboard/statics/assets/favicon-16x16.png +0 -0
- dstack/dashboard/statics/assets/favicon-32x32.png +0 -0
- dstack/dashboard/statics/assets/favicon-48x48.png +0 -0
- dstack/dashboard/statics/assets/favicon.ico +0 -0
- dstack/dashboard/statics/assets/firefox_app_128x128.png +0 -0
- dstack/dashboard/statics/assets/firefox_app_512x512.png +0 -0
- dstack/dashboard/statics/assets/firefox_app_60x60.png +0 -0
- dstack/dashboard/statics/assets/manifest.webapp +0 -14
- dstack/dashboard/statics/assets/mstile-144x144.png +0 -0
- dstack/dashboard/statics/assets/mstile-150x150.png +0 -0
- dstack/dashboard/statics/assets/mstile-310x150.png +0 -0
- dstack/dashboard/statics/assets/mstile-310x310.png +0 -0
- dstack/dashboard/statics/assets/mstile-70x70.png +0 -0
- dstack/dashboard/statics/assets/yandex-browser-50x50.png +0 -0
- dstack/dashboard/statics/d0f71e48806e25d72553.png +0 -0
- dstack/dashboard/statics/index.html +0 -7
- dstack/dashboard/statics/main-1d87e34eb0454da8ebb4.js +0 -3
- dstack/dashboard/statics/main-1d87e34eb0454da8ebb4.js.LICENSE.txt +0 -102
- dstack/dashboard/statics/main-1d87e34eb0454da8ebb4.js.map +0 -1
- dstack/dashboard/statics/main.css +0 -5058
- dstack/dashboard/statics/splash_thumbnail.png +0 -0
- dstack/dashboard/statics/static/media/check.3f68ffc787a15c0476793a6d18ecb71a.svg +0 -3
- dstack/dashboard/statics/static/media/chevron-down.bfd8f22c4a5db4d443e76bca3b02f334.svg +0 -3
- dstack/dashboard/statics/static/media/chevron-up.bade0c5d82d741cead615813264140c9.svg +0 -3
- dstack/dashboard/statics/static/media/clock.583b744f29b9d143718a55e7c35fe38e.svg +0 -3
- dstack/dashboard/statics/static/media/close.a8bb9e47361b03a3b5084dad676ba1da.svg +0 -3
- dstack/dashboard/statics/static/media/content-copy.73f5f2a175094757758e315243a4111e.svg +0 -3
- dstack/dashboard/statics/static/media/delete-outline.6a8abf4e4f9cb777781967efd56efe9b.svg +0 -3
- dstack/dashboard/statics/static/media/dots-vertical.82fc618192e0c7dc4d615ff93269246a.svg +0 -3
- dstack/dashboard/statics/static/media/earth.1ad57c7f59f4be5c8bb2fa00439c3149.svg +0 -3
- dstack/dashboard/statics/static/media/email.320bc3af24a5f1bb41ebd85f66a5dd70.svg +0 -3
- dstack/dashboard/statics/static/media/external-link.99b88e699c15afb820a1779d9a2261ed.svg +0 -3
- dstack/dashboard/statics/static/media/eye-off-outline.5b4afb7ad624a44dd307518ff93d1faa.svg +0 -3
- dstack/dashboard/statics/static/media/eye-outline.ca41708feaaed1edb15c5fff021fbafe.svg +0 -3
- dstack/dashboard/statics/static/media/file-download-outline.3634b41923ba79b297ff294ef898661c.svg +0 -3
- dstack/dashboard/statics/static/media/folder-outline.33378387af61821dd1207e4b2d061a07.svg +0 -3
- dstack/dashboard/statics/static/media/github-circle.1bb85d171c31a3c2eebad07319377171.svg +0 -3
- dstack/dashboard/statics/static/media/infinity.915f92939afc0a37f94adba211ceb172.svg +0 -3
- dstack/dashboard/statics/static/media/layers.b4b02cea267a617d7aa44c2719250c89.svg +0 -3
- dstack/dashboard/statics/static/media/linkedin.1c52fae553eee54397f0e63a79455a5e.svg +0 -3
- dstack/dashboard/statics/static/media/loading.e466be7b2c1f0ac9e7e51ca929d0e37d.svg +0 -3
- dstack/dashboard/statics/static/media/lock.4a4c7768d0fa60c716609ddc483470ef.svg +0 -3
- dstack/dashboard/statics/static/media/magnify.0c803314d039d21f3cb1504ccd1437a4.svg +0 -3
- dstack/dashboard/statics/static/media/mark.3f68ffc787a15c0476793a6d18ecb71a.svg +0 -3
- dstack/dashboard/statics/static/media/menu-close.3ee84714181017c6ff837830297c8437.svg +0 -3
- dstack/dashboard/statics/static/media/menu.922f81e0972fbcbb5adcd8def20c86a3.svg +0 -3
- dstack/dashboard/statics/static/media/pencil.f706a3b9dcbff4959a91bf72e1e6324f.svg +0 -3
- dstack/dashboard/statics/static/media/refresh.a80edb948e98b322cd73b67814a57a48.svg +0 -3
- dstack/dashboard/statics/static/media/shape-plus.63b093c7f4b44c3def774f30fcfbceca.svg +0 -3
- dstack/dashboard/statics/static/media/slack.ec2fca99c6b944950ac65404ddd26880.svg +0 -4
- dstack/dashboard/statics/static/media/small-logo.b9cc8d09f646a553e65fa336dafd8b10.svg +0 -116
- dstack/dashboard/statics/static/media/source-branch.b8d22cfc42a7bed81f0fc08130818e85.svg +0 -3
- dstack/dashboard/statics/static/media/source-commit.be2bb53c081b9b6836adffccc0b8d3e6.svg +0 -3
- dstack/dashboard/statics/static/media/stop.11488ff1437ad929476be8924a3b7075.svg +0 -3
- dstack/dashboard/statics/static/media/tag-minus.15680a815b0b8d027e973c84832c05e6.svg +0 -3
- dstack/dashboard/statics/static/media/tag-outline.19b0bf86a8afd7d6d9c716e9a91d94ca.svg +0 -3
- dstack/dashboard/statics/static/media/twitter.4af18861c84a2f3044c7546b55d5739c.svg +0 -3
- dstack/dashboard/tags.py +0 -119
- dstack/jobs.py +0 -255
- dstack/providers/__init__.py +0 -316
- dstack/providers/_python/main.py +0 -88
- dstack/providers/_tensorboard/main.py +0 -93
- dstack/providers/_torchrun/main.py +0 -121
- dstack/providers/bash/main.py +0 -90
- dstack/providers/code/main.py +0 -95
- dstack/providers/docker/main.py +0 -79
- dstack/providers/lab/main.py +0 -95
- dstack/providers/notebook/main.py +0 -90
- dstack/random_name.py +0 -29
- dstack/repo.py +0 -135
- dstack/runners.py +0 -35
- dstack/util.py +0 -15
- dstack-0.0.9.dist-info/METADATA +0 -176
- dstack-0.0.9.dist-info/RECORD +0 -179
- dstack-0.0.9.dist-info/entry_points.txt +0 -3
- dstack-0.0.9.dist-info/top_level.txt +0 -2
- tests/test_config.py +0 -70
- /dstack/{cli → _internal}/__init__.py +0 -0
- /dstack/{dashboard → _internal/cli}/__init__.py +0 -0
- /dstack/{providers/_python → _internal/cli/models}/__init__.py +0 -0
- /dstack/{providers/_tensorboard → _internal/cli/services}/__init__.py +0 -0
- /dstack/{providers/_torchrun → _internal/cli/utils}/__init__.py +0 -0
- /dstack/{providers/bash → _internal/core}/__init__.py +0 -0
- /dstack/{providers/code → _internal/core/backends}/__init__.py +0 -0
- /dstack/{providers/docker → _internal/core/backends/aws}/__init__.py +0 -0
- /dstack/{providers/lab → _internal/core/backends/azure}/__init__.py +0 -0
- /dstack/{providers/notebook → _internal/core/backends/base}/__init__.py +0 -0
- {tests → dstack/_internal/core/backends/cloudrift}/__init__.py +0 -0
- /dstack/{dashboard → _internal/server}/statics/assets/yandex-browser-manifest.json +0 -0
- /dstack/{dashboard → _internal/server}/statics/robots.txt +0 -0
|
@@ -0,0 +1,1101 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import random
|
|
3
|
+
import re
|
|
4
|
+
import shlex
|
|
5
|
+
import string
|
|
6
|
+
import threading
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from collections.abc import Iterable, Iterator
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from functools import lru_cache
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Callable, Dict, List, Optional
|
|
14
|
+
|
|
15
|
+
import git
|
|
16
|
+
import requests
|
|
17
|
+
import yaml
|
|
18
|
+
from cachetools import Cache, TTLCache, cachedmethod
|
|
19
|
+
from gpuhunt import CPUArchitecture
|
|
20
|
+
|
|
21
|
+
from dstack._internal import settings
|
|
22
|
+
from dstack._internal.core.backends.base.models import JobConfiguration
|
|
23
|
+
from dstack._internal.core.backends.base.offers import OfferModifier, filter_offers_by_requirements
|
|
24
|
+
from dstack._internal.core.consts import (
|
|
25
|
+
DSTACK_RUNNER_HTTP_PORT,
|
|
26
|
+
DSTACK_RUNNER_SSH_PORT,
|
|
27
|
+
DSTACK_SHIM_HTTP_PORT,
|
|
28
|
+
)
|
|
29
|
+
from dstack._internal.core.models.backends.base import BackendType
|
|
30
|
+
from dstack._internal.core.models.compute_groups import ComputeGroup, ComputeGroupProvisioningData
|
|
31
|
+
from dstack._internal.core.models.gateways import (
|
|
32
|
+
GatewayComputeConfiguration,
|
|
33
|
+
GatewayProvisioningData,
|
|
34
|
+
)
|
|
35
|
+
from dstack._internal.core.models.instances import (
|
|
36
|
+
InstanceConfiguration,
|
|
37
|
+
InstanceOffer,
|
|
38
|
+
InstanceOfferWithAvailability,
|
|
39
|
+
SSHKey,
|
|
40
|
+
)
|
|
41
|
+
from dstack._internal.core.models.placement import PlacementGroup, PlacementGroupProvisioningData
|
|
42
|
+
from dstack._internal.core.models.routers import AnyRouterConfig
|
|
43
|
+
from dstack._internal.core.models.runs import Job, JobProvisioningData, Requirements, Run
|
|
44
|
+
from dstack._internal.core.models.volumes import (
|
|
45
|
+
Volume,
|
|
46
|
+
VolumeAttachmentData,
|
|
47
|
+
VolumeProvisioningData,
|
|
48
|
+
)
|
|
49
|
+
from dstack._internal.core.services import is_valid_dstack_resource_name
|
|
50
|
+
from dstack._internal.utils.logging import get_logger
|
|
51
|
+
from dstack._internal.utils.path import PathLike
|
|
52
|
+
|
|
53
|
+
logger = get_logger(__name__)
|
|
54
|
+
|
|
55
|
+
DSTACK_SHIM_BINARY_NAME = "dstack-shim"
|
|
56
|
+
DSTACK_SHIM_RESTART_INTERVAL_SECONDS = 3
|
|
57
|
+
DSTACK_RUNNER_BINARY_NAME = "dstack-runner"
|
|
58
|
+
DEFAULT_PRIVATE_SUBNETS = ("10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16")
|
|
59
|
+
NVIDIA_GPUS_REQUIRING_PROPRIETARY_KERNEL_MODULES = frozenset(
|
|
60
|
+
# All NVIDIA architectures prior to Turing do not support Open Kernel Modules and require
|
|
61
|
+
# proprietary modules. This list is incomplete, update when necessary.
|
|
62
|
+
[
|
|
63
|
+
"v100",
|
|
64
|
+
"p100",
|
|
65
|
+
"p40",
|
|
66
|
+
"p4",
|
|
67
|
+
"m60",
|
|
68
|
+
"m40",
|
|
69
|
+
"m4",
|
|
70
|
+
"k80",
|
|
71
|
+
"k40",
|
|
72
|
+
"k20",
|
|
73
|
+
]
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class GoArchType(str, Enum):
|
|
78
|
+
"""
|
|
79
|
+
A subset of GOARCH values
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
AMD64 = "amd64"
|
|
83
|
+
ARM64 = "arm64"
|
|
84
|
+
|
|
85
|
+
def to_cpu_architecture(self) -> CPUArchitecture:
|
|
86
|
+
if self == self.AMD64:
|
|
87
|
+
return CPUArchitecture.X86
|
|
88
|
+
if self == self.ARM64:
|
|
89
|
+
return CPUArchitecture.ARM
|
|
90
|
+
assert False, self
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass
|
|
94
|
+
class ComputeCache:
|
|
95
|
+
cache: Cache
|
|
96
|
+
lock: threading.Lock = field(default_factory=threading.Lock)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@dataclass
|
|
100
|
+
class ComputeTTLCache:
|
|
101
|
+
cache: TTLCache
|
|
102
|
+
lock: threading.Lock = field(default_factory=threading.Lock)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class Compute(ABC):
|
|
106
|
+
"""
|
|
107
|
+
A base class for all compute implementations with minimal features.
|
|
108
|
+
If a compute supports additional features, it must also subclass `ComputeWith*` classes.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
@abstractmethod
|
|
112
|
+
def get_offers(self, requirements: Requirements) -> Iterator[InstanceOfferWithAvailability]:
|
|
113
|
+
"""
|
|
114
|
+
Returns offers with availability matching `requirements`.
|
|
115
|
+
If the provider is added to gpuhunt, typically gets offers using
|
|
116
|
+
`base.offers.get_catalog_offers()` and extends them with availability info.
|
|
117
|
+
It is called from async code in executor. It can block on call but not between yields.
|
|
118
|
+
"""
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
@abstractmethod
|
|
122
|
+
def run_job(
|
|
123
|
+
self,
|
|
124
|
+
run: Run,
|
|
125
|
+
job: Job,
|
|
126
|
+
instance_offer: InstanceOfferWithAvailability,
|
|
127
|
+
project_ssh_public_key: str,
|
|
128
|
+
project_ssh_private_key: str,
|
|
129
|
+
volumes: List[Volume],
|
|
130
|
+
placement_group: Optional[PlacementGroup],
|
|
131
|
+
) -> JobProvisioningData:
|
|
132
|
+
"""
|
|
133
|
+
Launches a new instance for the job. It should return `JobProvisioningData` ASAP.
|
|
134
|
+
If required to wait to get the IP address or SSH port, return partially filled `JobProvisioningData`
|
|
135
|
+
and implement `update_provisioning_data()`.
|
|
136
|
+
"""
|
|
137
|
+
pass
|
|
138
|
+
|
|
139
|
+
@abstractmethod
|
|
140
|
+
def terminate_instance(
|
|
141
|
+
self,
|
|
142
|
+
instance_id: str,
|
|
143
|
+
region: str,
|
|
144
|
+
backend_data: Optional[str] = None,
|
|
145
|
+
) -> None:
|
|
146
|
+
"""
|
|
147
|
+
Terminates an instance by `instance_id`.
|
|
148
|
+
If the instance does not exist, it should not raise errors but return silently.
|
|
149
|
+
|
|
150
|
+
Should return ASAP. If required to wait for some operation, raise `NotYetTerminated`.
|
|
151
|
+
In this case, the method will be called again after a few seconds.
|
|
152
|
+
"""
|
|
153
|
+
pass
|
|
154
|
+
|
|
155
|
+
def update_provisioning_data(
|
|
156
|
+
self,
|
|
157
|
+
provisioning_data: JobProvisioningData,
|
|
158
|
+
project_ssh_public_key: str,
|
|
159
|
+
project_ssh_private_key: str,
|
|
160
|
+
):
|
|
161
|
+
"""
|
|
162
|
+
This method is called if `JobProvisioningData` returned from `run_job()`/`create_instance()`
|
|
163
|
+
is not complete, e.g. missing `hostname` or `ssh_port`.
|
|
164
|
+
It can be used if getting complete provisioning data takes a long of time.
|
|
165
|
+
It should not wait but return immediately.
|
|
166
|
+
If it raises `ProvisioningError`, there will be no further attempts to update the provisioning data,
|
|
167
|
+
and the run will be terminated.
|
|
168
|
+
"""
|
|
169
|
+
pass
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class ComputeWithAllOffersCached(ABC):
|
|
173
|
+
"""
|
|
174
|
+
Provides common `get_offers()` implementation for backends
|
|
175
|
+
whose offers do not depend on requirements.
|
|
176
|
+
It caches all offers with availability and post-filters by requirements.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
def __init__(self) -> None:
|
|
180
|
+
super().__init__()
|
|
181
|
+
self._offers_cache_lock = threading.Lock()
|
|
182
|
+
self._offers_cache = TTLCache(maxsize=1, ttl=180)
|
|
183
|
+
|
|
184
|
+
@abstractmethod
|
|
185
|
+
def get_all_offers_with_availability(self) -> List[InstanceOfferWithAvailability]:
|
|
186
|
+
"""
|
|
187
|
+
Returns all backend offers with availability.
|
|
188
|
+
"""
|
|
189
|
+
pass
|
|
190
|
+
|
|
191
|
+
def get_offers_modifiers(self, requirements: Requirements) -> Iterable[OfferModifier]:
|
|
192
|
+
"""
|
|
193
|
+
Returns functions that modify offers before they are filtered by requirements.
|
|
194
|
+
A modifier function can return `None` to exclude the offer.
|
|
195
|
+
E.g. can be used to set appropriate disk size based on requirements.
|
|
196
|
+
"""
|
|
197
|
+
return []
|
|
198
|
+
|
|
199
|
+
def get_offers_post_filter(
|
|
200
|
+
self, requirements: Requirements
|
|
201
|
+
) -> Optional[Callable[[InstanceOfferWithAvailability], bool]]:
|
|
202
|
+
"""
|
|
203
|
+
Returns a filter function to apply to offers based on requirements.
|
|
204
|
+
This allows backends to implement custom post-filtering logic for specific requirements.
|
|
205
|
+
"""
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
def get_offers(self, requirements: Requirements) -> Iterator[InstanceOfferWithAvailability]:
|
|
209
|
+
cached_offers = self._get_all_offers_with_availability_cached()
|
|
210
|
+
offers = self.__apply_modifiers(cached_offers, self.get_offers_modifiers(requirements))
|
|
211
|
+
offers = filter_offers_by_requirements(offers, requirements)
|
|
212
|
+
post_filter = self.get_offers_post_filter(requirements)
|
|
213
|
+
if post_filter is not None:
|
|
214
|
+
offers = (o for o in offers if post_filter(o))
|
|
215
|
+
return offers
|
|
216
|
+
|
|
217
|
+
@cachedmethod(
|
|
218
|
+
cache=lambda self: self._offers_cache,
|
|
219
|
+
lock=lambda self: self._offers_cache_lock,
|
|
220
|
+
)
|
|
221
|
+
def _get_all_offers_with_availability_cached(self) -> List[InstanceOfferWithAvailability]:
|
|
222
|
+
return self.get_all_offers_with_availability()
|
|
223
|
+
|
|
224
|
+
@staticmethod
|
|
225
|
+
def __apply_modifiers(
|
|
226
|
+
offers: Iterable[InstanceOfferWithAvailability], modifiers: Iterable[OfferModifier]
|
|
227
|
+
) -> Iterator[InstanceOfferWithAvailability]:
|
|
228
|
+
for offer in offers:
|
|
229
|
+
for modifier in modifiers:
|
|
230
|
+
offer = modifier(offer)
|
|
231
|
+
if offer is None:
|
|
232
|
+
break
|
|
233
|
+
else:
|
|
234
|
+
yield offer
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
class ComputeWithFilteredOffersCached(ABC):
|
|
238
|
+
"""
|
|
239
|
+
Provides common `get_offers()` implementation for backends
|
|
240
|
+
whose offers depend on requirements.
|
|
241
|
+
It caches offers using requirements as key.
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
def __init__(self) -> None:
|
|
245
|
+
super().__init__()
|
|
246
|
+
self._offers_cache_lock = threading.Lock()
|
|
247
|
+
self._offers_cache = TTLCache(maxsize=10, ttl=180)
|
|
248
|
+
|
|
249
|
+
@abstractmethod
|
|
250
|
+
def get_offers_by_requirements(
|
|
251
|
+
self, requirements: Requirements
|
|
252
|
+
) -> List[InstanceOfferWithAvailability]:
|
|
253
|
+
"""
|
|
254
|
+
Returns backend offers with availability matching requirements.
|
|
255
|
+
"""
|
|
256
|
+
pass
|
|
257
|
+
|
|
258
|
+
def get_offers(self, requirements: Requirements) -> Iterator[InstanceOfferWithAvailability]:
|
|
259
|
+
return iter(self._get_offers_cached(requirements))
|
|
260
|
+
|
|
261
|
+
def _get_offers_cached_key(self, requirements: Requirements) -> int:
|
|
262
|
+
# Requirements is not hashable, so we use a hack to get arguments hash
|
|
263
|
+
return hash(requirements.json())
|
|
264
|
+
|
|
265
|
+
@cachedmethod(
|
|
266
|
+
cache=lambda self: self._offers_cache,
|
|
267
|
+
key=_get_offers_cached_key,
|
|
268
|
+
lock=lambda self: self._offers_cache_lock,
|
|
269
|
+
)
|
|
270
|
+
def _get_offers_cached(
|
|
271
|
+
self, requirements: Requirements
|
|
272
|
+
) -> List[InstanceOfferWithAvailability]:
|
|
273
|
+
return self.get_offers_by_requirements(requirements)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
class ComputeWithCreateInstanceSupport(ABC):
|
|
277
|
+
"""
|
|
278
|
+
Must be subclassed and implemented to support fleets (instance creation without running a job).
|
|
279
|
+
Typically, a compute that runs VMs would implement it,
|
|
280
|
+
and a compute that runs containers would not.
|
|
281
|
+
"""
|
|
282
|
+
|
|
283
|
+
@abstractmethod
|
|
284
|
+
def create_instance(
|
|
285
|
+
self,
|
|
286
|
+
instance_offer: InstanceOfferWithAvailability,
|
|
287
|
+
instance_config: InstanceConfiguration,
|
|
288
|
+
placement_group: Optional[PlacementGroup],
|
|
289
|
+
) -> JobProvisioningData:
|
|
290
|
+
"""
|
|
291
|
+
Launches a new instance. It should return `JobProvisioningData` ASAP.
|
|
292
|
+
If required to wait to get the IP address or SSH port, return partially filled `JobProvisioningData`
|
|
293
|
+
and implement `update_provisioning_data()`.
|
|
294
|
+
"""
|
|
295
|
+
pass
|
|
296
|
+
|
|
297
|
+
def run_job(
|
|
298
|
+
self,
|
|
299
|
+
run: Run,
|
|
300
|
+
job: Job,
|
|
301
|
+
instance_offer: InstanceOfferWithAvailability,
|
|
302
|
+
project_ssh_public_key: str,
|
|
303
|
+
project_ssh_private_key: str,
|
|
304
|
+
volumes: List[Volume],
|
|
305
|
+
placement_group: Optional[PlacementGroup],
|
|
306
|
+
) -> JobProvisioningData:
|
|
307
|
+
"""
|
|
308
|
+
The default `run_job()` implementation for all backends that support `create_instance()`.
|
|
309
|
+
Override only if custom `run_job()` behavior is required.
|
|
310
|
+
"""
|
|
311
|
+
instance_config = InstanceConfiguration(
|
|
312
|
+
project_name=run.project_name,
|
|
313
|
+
instance_name=get_job_instance_name(run, job),
|
|
314
|
+
user=run.user,
|
|
315
|
+
ssh_keys=[SSHKey(public=project_ssh_public_key.strip())],
|
|
316
|
+
volumes=volumes,
|
|
317
|
+
reservation=run.run_spec.configuration.reservation,
|
|
318
|
+
tags=run.run_spec.merged_profile.tags,
|
|
319
|
+
)
|
|
320
|
+
instance_offer = instance_offer.copy()
|
|
321
|
+
self._restrict_instance_offer_az_to_volumes_az(instance_offer, volumes)
|
|
322
|
+
return self.create_instance(
|
|
323
|
+
instance_offer, instance_config, placement_group=placement_group
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
def _restrict_instance_offer_az_to_volumes_az(
|
|
327
|
+
self,
|
|
328
|
+
instance_offer: InstanceOfferWithAvailability,
|
|
329
|
+
volumes: List[Volume],
|
|
330
|
+
):
|
|
331
|
+
if len(volumes) == 0:
|
|
332
|
+
return
|
|
333
|
+
volume = volumes[0]
|
|
334
|
+
if (
|
|
335
|
+
volume.provisioning_data is not None
|
|
336
|
+
and volume.provisioning_data.availability_zone is not None
|
|
337
|
+
):
|
|
338
|
+
if instance_offer.availability_zones is None:
|
|
339
|
+
instance_offer.availability_zones = [volume.provisioning_data.availability_zone]
|
|
340
|
+
instance_offer.availability_zones = [
|
|
341
|
+
z
|
|
342
|
+
for z in instance_offer.availability_zones
|
|
343
|
+
if z == volume.provisioning_data.availability_zone
|
|
344
|
+
]
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
class ComputeWithGroupProvisioningSupport(ABC):
|
|
348
|
+
@abstractmethod
|
|
349
|
+
def run_jobs(
|
|
350
|
+
self,
|
|
351
|
+
run: Run,
|
|
352
|
+
job_configurations: List[JobConfiguration],
|
|
353
|
+
instance_offer: InstanceOfferWithAvailability,
|
|
354
|
+
project_ssh_public_key: str,
|
|
355
|
+
project_ssh_private_key: str,
|
|
356
|
+
placement_group: Optional[PlacementGroup],
|
|
357
|
+
) -> ComputeGroupProvisioningData:
|
|
358
|
+
pass
|
|
359
|
+
|
|
360
|
+
@abstractmethod
|
|
361
|
+
def terminate_compute_group(self, compute_group: ComputeGroup):
|
|
362
|
+
pass
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
class ComputeWithPrivilegedSupport:
|
|
366
|
+
"""
|
|
367
|
+
Must be subclassed to support runs with `privileged: true`.
|
|
368
|
+
All VM-based Computes (that is, Computes that use the shim) should subclass this mixin.
|
|
369
|
+
"""
|
|
370
|
+
|
|
371
|
+
pass
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
class ComputeWithMultinodeSupport:
|
|
375
|
+
"""
|
|
376
|
+
Must be subclassed to support multinode tasks and cluster fleets.
|
|
377
|
+
Instances provisioned in the same project/region must be interconnected.
|
|
378
|
+
"""
|
|
379
|
+
|
|
380
|
+
pass
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
class ComputeWithReservationSupport:
|
|
384
|
+
"""
|
|
385
|
+
Must be subclassed to support provisioning from reservations.
|
|
386
|
+
|
|
387
|
+
The following is expected from a backend that supports reservations:
|
|
388
|
+
|
|
389
|
+
- `get_offers` respects `Requirements.reservation` if set, and only returns
|
|
390
|
+
offers that can be provisioned in the configured reservation. It can
|
|
391
|
+
adjust some offer properties such as `availability` and
|
|
392
|
+
`availability_zones` if necessary.
|
|
393
|
+
- `create_instance` respects `InstanceConfig.reservation` if set, and
|
|
394
|
+
provisions the instance in the configured reservation.
|
|
395
|
+
"""
|
|
396
|
+
|
|
397
|
+
pass
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
class ComputeWithPlacementGroupSupport(ABC):
|
|
401
|
+
"""
|
|
402
|
+
Must be subclassed and implemented to support placement groups.
|
|
403
|
+
"""
|
|
404
|
+
|
|
405
|
+
@abstractmethod
|
|
406
|
+
def create_placement_group(
|
|
407
|
+
self,
|
|
408
|
+
placement_group: PlacementGroup,
|
|
409
|
+
master_instance_offer: InstanceOffer,
|
|
410
|
+
) -> PlacementGroupProvisioningData:
|
|
411
|
+
"""
|
|
412
|
+
Creates a placement group.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
placement_group: details about the placement group to be created
|
|
416
|
+
master_instance_offer: the first instance dstack will attempt to add
|
|
417
|
+
to the placement group
|
|
418
|
+
"""
|
|
419
|
+
pass
|
|
420
|
+
|
|
421
|
+
@abstractmethod
|
|
422
|
+
def delete_placement_group(
|
|
423
|
+
self,
|
|
424
|
+
placement_group: PlacementGroup,
|
|
425
|
+
):
|
|
426
|
+
"""
|
|
427
|
+
Deletes a placement group.
|
|
428
|
+
If the group does not exist, it should not raise errors but return silently.
|
|
429
|
+
"""
|
|
430
|
+
pass
|
|
431
|
+
|
|
432
|
+
@abstractmethod
|
|
433
|
+
def is_suitable_placement_group(
|
|
434
|
+
self,
|
|
435
|
+
placement_group: PlacementGroup,
|
|
436
|
+
instance_offer: InstanceOffer,
|
|
437
|
+
) -> bool:
|
|
438
|
+
"""
|
|
439
|
+
Checks if the instance offer can be provisioned in the placement group.
|
|
440
|
+
|
|
441
|
+
Should return immediately, without performing API calls.
|
|
442
|
+
"""
|
|
443
|
+
pass
|
|
444
|
+
|
|
445
|
+
def are_placement_groups_compatible_with_reservations(self, backend_type: BackendType) -> bool:
|
|
446
|
+
"""
|
|
447
|
+
Whether placement groups can be used for instances provisioned in reservations.
|
|
448
|
+
|
|
449
|
+
Arguments:
|
|
450
|
+
backend_type: matches the backend type of this compute, unless this compute is a proxy
|
|
451
|
+
for other backends (dstack Sky)
|
|
452
|
+
"""
|
|
453
|
+
return True
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
class ComputeWithGatewaySupport(ABC):
|
|
457
|
+
"""
|
|
458
|
+
Must be subclassed and implemented to support gateways.
|
|
459
|
+
"""
|
|
460
|
+
|
|
461
|
+
@abstractmethod
|
|
462
|
+
def create_gateway(
|
|
463
|
+
self,
|
|
464
|
+
configuration: GatewayComputeConfiguration,
|
|
465
|
+
) -> GatewayProvisioningData:
|
|
466
|
+
"""
|
|
467
|
+
Creates a gateway instance.
|
|
468
|
+
"""
|
|
469
|
+
pass
|
|
470
|
+
|
|
471
|
+
@abstractmethod
|
|
472
|
+
def terminate_gateway(
|
|
473
|
+
self,
|
|
474
|
+
instance_id: str,
|
|
475
|
+
configuration: GatewayComputeConfiguration,
|
|
476
|
+
backend_data: Optional[str] = None,
|
|
477
|
+
):
|
|
478
|
+
"""
|
|
479
|
+
Terminates a gateway instance. Generally, it passes the call to `terminate_instance()`,
|
|
480
|
+
but may perform additional work such as deleting a load balancer when a gateway has one.
|
|
481
|
+
"""
|
|
482
|
+
pass
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
class ComputeWithPrivateGatewaySupport:
|
|
486
|
+
"""
|
|
487
|
+
Must be subclassed to support private gateways.
|
|
488
|
+
`create_gateway()` must be able to create private gateways.
|
|
489
|
+
"""
|
|
490
|
+
|
|
491
|
+
pass
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
class ComputeWithVolumeSupport(ABC):
|
|
495
|
+
"""
|
|
496
|
+
Must be subclassed and implemented to support volumes.
|
|
497
|
+
"""
|
|
498
|
+
|
|
499
|
+
@abstractmethod
|
|
500
|
+
def register_volume(self, volume: Volume) -> VolumeProvisioningData:
|
|
501
|
+
"""
|
|
502
|
+
Returns VolumeProvisioningData for an existing volume.
|
|
503
|
+
Used to add external volumes to dstack.
|
|
504
|
+
"""
|
|
505
|
+
pass
|
|
506
|
+
|
|
507
|
+
@abstractmethod
|
|
508
|
+
def create_volume(self, volume: Volume) -> VolumeProvisioningData:
|
|
509
|
+
"""
|
|
510
|
+
Creates a new volume.
|
|
511
|
+
"""
|
|
512
|
+
raise NotImplementedError()
|
|
513
|
+
|
|
514
|
+
@abstractmethod
|
|
515
|
+
def delete_volume(self, volume: Volume):
|
|
516
|
+
"""
|
|
517
|
+
Deletes a volume.
|
|
518
|
+
"""
|
|
519
|
+
raise NotImplementedError()
|
|
520
|
+
|
|
521
|
+
def attach_volume(
|
|
522
|
+
self, volume: Volume, provisioning_data: JobProvisioningData
|
|
523
|
+
) -> VolumeAttachmentData:
|
|
524
|
+
"""
|
|
525
|
+
Attaches a volume to the instance.
|
|
526
|
+
If the volume is not found, it should raise `ComputeError()`.
|
|
527
|
+
Implement only if compute may return `VolumeProvisioningData.attachable`.
|
|
528
|
+
Otherwise, volumes should be attached by `run_job()`.
|
|
529
|
+
"""
|
|
530
|
+
raise NotImplementedError()
|
|
531
|
+
|
|
532
|
+
def detach_volume(
|
|
533
|
+
self, volume: Volume, provisioning_data: JobProvisioningData, force: bool = False
|
|
534
|
+
):
|
|
535
|
+
"""
|
|
536
|
+
Detaches a volume from the instance.
|
|
537
|
+
Implement only if compute may return `VolumeProvisioningData.detachable`.
|
|
538
|
+
Otherwise, volumes should be detached on instance termination.
|
|
539
|
+
"""
|
|
540
|
+
raise NotImplementedError()
|
|
541
|
+
|
|
542
|
+
def is_volume_detached(self, volume: Volume, provisioning_data: JobProvisioningData) -> bool:
|
|
543
|
+
"""
|
|
544
|
+
Checks if a volume was detached from the instance.
|
|
545
|
+
If `detach_volume()` may fail to detach volume,
|
|
546
|
+
this method should be overridden to check the volume status.
|
|
547
|
+
The caller will trigger force detach if the volume gets stuck detaching.
|
|
548
|
+
"""
|
|
549
|
+
return True
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def get_dstack_working_dir(base_path: Optional[PathLike] = None) -> str:
|
|
553
|
+
if base_path is None:
|
|
554
|
+
base_path = "/root"
|
|
555
|
+
return str(Path(base_path, ".dstack"))
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
def get_dstack_shim_binary_path(bin_path: Optional[PathLike] = None) -> str:
|
|
559
|
+
if bin_path is None:
|
|
560
|
+
bin_path = "/usr/local/bin"
|
|
561
|
+
return str(Path(bin_path, DSTACK_SHIM_BINARY_NAME))
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def get_dstack_runner_binary_path(bin_path: Optional[PathLike] = None) -> str:
|
|
565
|
+
if bin_path is None:
|
|
566
|
+
bin_path = "/usr/local/bin"
|
|
567
|
+
return str(Path(bin_path, DSTACK_RUNNER_BINARY_NAME))
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
def get_job_instance_name(run: Run, job: Job) -> str:
|
|
571
|
+
return job.job_spec.job_name
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
_DEFAULT_MAX_RESOURCE_NAME_LEN = 60
|
|
575
|
+
_CLOUD_RESOURCE_SUFFIX_LEN = 8
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def generate_unique_instance_name(
|
|
579
|
+
instance_configuration: InstanceConfiguration,
|
|
580
|
+
max_length: int = _DEFAULT_MAX_RESOURCE_NAME_LEN,
|
|
581
|
+
) -> str:
|
|
582
|
+
"""
|
|
583
|
+
Generates a unique instance name valid across all backends.
|
|
584
|
+
"""
|
|
585
|
+
return generate_unique_backend_name(
|
|
586
|
+
resource_name=instance_configuration.instance_name,
|
|
587
|
+
project_name=instance_configuration.project_name,
|
|
588
|
+
max_length=max_length,
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def generate_unique_instance_name_for_job(
|
|
593
|
+
run: Run,
|
|
594
|
+
job: Job,
|
|
595
|
+
max_length: int = _DEFAULT_MAX_RESOURCE_NAME_LEN,
|
|
596
|
+
) -> str:
|
|
597
|
+
"""
|
|
598
|
+
Generates a unique instance name for a job valid across all backends.
|
|
599
|
+
"""
|
|
600
|
+
return generate_unique_backend_name(
|
|
601
|
+
resource_name=get_job_instance_name(run, job),
|
|
602
|
+
project_name=run.project_name,
|
|
603
|
+
max_length=max_length,
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def generate_unique_gateway_instance_name(
|
|
608
|
+
gateway_compute_configuration: GatewayComputeConfiguration,
|
|
609
|
+
max_length: int = _DEFAULT_MAX_RESOURCE_NAME_LEN,
|
|
610
|
+
) -> str:
|
|
611
|
+
"""
|
|
612
|
+
Generates a unique gateway instance name valid across all backends.
|
|
613
|
+
"""
|
|
614
|
+
return generate_unique_backend_name(
|
|
615
|
+
resource_name=gateway_compute_configuration.instance_name,
|
|
616
|
+
project_name=gateway_compute_configuration.project_name,
|
|
617
|
+
max_length=max_length,
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
def generate_unique_volume_name(
|
|
622
|
+
volume: Volume,
|
|
623
|
+
max_length: int = _DEFAULT_MAX_RESOURCE_NAME_LEN,
|
|
624
|
+
) -> str:
|
|
625
|
+
"""
|
|
626
|
+
Generates a unique volume name valid across all backends.
|
|
627
|
+
"""
|
|
628
|
+
return generate_unique_backend_name(
|
|
629
|
+
resource_name=volume.name,
|
|
630
|
+
project_name=volume.project_name,
|
|
631
|
+
max_length=max_length,
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def generate_unique_placement_group_name(
|
|
636
|
+
project_name: str,
|
|
637
|
+
fleet_name: str,
|
|
638
|
+
max_length: int = _DEFAULT_MAX_RESOURCE_NAME_LEN,
|
|
639
|
+
) -> str:
|
|
640
|
+
"""
|
|
641
|
+
Generates a unique placement group name valid across all backends.
|
|
642
|
+
"""
|
|
643
|
+
return generate_unique_backend_name(
|
|
644
|
+
resource_name=fleet_name,
|
|
645
|
+
project_name=project_name,
|
|
646
|
+
max_length=max_length,
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def generate_unique_backend_name(
|
|
651
|
+
resource_name: str,
|
|
652
|
+
project_name: Optional[str],
|
|
653
|
+
max_length: int,
|
|
654
|
+
) -> str:
|
|
655
|
+
"""
|
|
656
|
+
Generates a unique resource name valid across all backends.
|
|
657
|
+
Backend resource names must be unique on every provisioning so that
|
|
658
|
+
resource re-submission/re-creation doesn't lead to conflicts
|
|
659
|
+
on backends that require unique names (e.g. Azure, GCP).
|
|
660
|
+
"""
|
|
661
|
+
# resource_name is guaranteed to be valid in all backends
|
|
662
|
+
prefix = f"dstack-{resource_name}"
|
|
663
|
+
if project_name is not None and is_valid_dstack_resource_name(project_name):
|
|
664
|
+
# project_name is not guaranteed to be valid in all backends,
|
|
665
|
+
# so we add it only if it passes the validation
|
|
666
|
+
prefix = f"dstack-{project_name}-{resource_name}"
|
|
667
|
+
return _generate_unique_backend_name_with_prefix(
|
|
668
|
+
prefix=prefix,
|
|
669
|
+
max_length=max_length,
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def _generate_unique_backend_name_with_prefix(
|
|
674
|
+
prefix: str,
|
|
675
|
+
max_length: int,
|
|
676
|
+
) -> str:
|
|
677
|
+
prefix_len = max_length - _CLOUD_RESOURCE_SUFFIX_LEN - 1
|
|
678
|
+
prefix = prefix[:prefix_len]
|
|
679
|
+
suffix = "".join(
|
|
680
|
+
random.choice(string.ascii_lowercase + string.digits)
|
|
681
|
+
for _ in range(_CLOUD_RESOURCE_SUFFIX_LEN)
|
|
682
|
+
)
|
|
683
|
+
return f"{prefix}-{suffix}"
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def get_cloud_config(**config) -> str:
|
|
687
|
+
return "#cloud-config\n" + yaml.dump(config, default_flow_style=False)
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
def get_user_data(
|
|
691
|
+
authorized_keys: List[str],
|
|
692
|
+
backend_specific_commands: Optional[List[str]] = None,
|
|
693
|
+
base_path: Optional[PathLike] = None,
|
|
694
|
+
bin_path: Optional[PathLike] = None,
|
|
695
|
+
backend_shim_env: Optional[Dict[str, str]] = None,
|
|
696
|
+
skip_firewall_setup: bool = False,
|
|
697
|
+
firewall_allow_from_subnets: Iterable[str] = DEFAULT_PRIVATE_SUBNETS,
|
|
698
|
+
) -> str:
|
|
699
|
+
shim_commands = get_shim_commands(
|
|
700
|
+
base_path=base_path,
|
|
701
|
+
bin_path=bin_path,
|
|
702
|
+
backend_shim_env=backend_shim_env,
|
|
703
|
+
skip_firewall_setup=skip_firewall_setup,
|
|
704
|
+
firewall_allow_from_subnets=firewall_allow_from_subnets,
|
|
705
|
+
)
|
|
706
|
+
commands = (backend_specific_commands or []) + shim_commands
|
|
707
|
+
return get_cloud_config(
|
|
708
|
+
runcmd=[["sh", "-c", " && ".join(commands)]],
|
|
709
|
+
ssh_authorized_keys=authorized_keys,
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
def get_shim_env(
|
|
714
|
+
base_path: Optional[PathLike] = None,
|
|
715
|
+
bin_path: Optional[PathLike] = None,
|
|
716
|
+
backend_shim_env: Optional[Dict[str, str]] = None,
|
|
717
|
+
arch: Optional[str] = None,
|
|
718
|
+
) -> Dict[str, str]:
|
|
719
|
+
log_level = "5" # Debug
|
|
720
|
+
envs = {
|
|
721
|
+
"DSTACK_SHIM_HOME": get_dstack_working_dir(base_path),
|
|
722
|
+
"DSTACK_SHIM_HTTP_PORT": str(DSTACK_SHIM_HTTP_PORT),
|
|
723
|
+
"DSTACK_SHIM_LOG_LEVEL": log_level,
|
|
724
|
+
"DSTACK_RUNNER_DOWNLOAD_URL": get_dstack_runner_download_url(arch),
|
|
725
|
+
"DSTACK_RUNNER_BINARY_PATH": get_dstack_runner_binary_path(bin_path),
|
|
726
|
+
"DSTACK_RUNNER_HTTP_PORT": str(DSTACK_RUNNER_HTTP_PORT),
|
|
727
|
+
"DSTACK_RUNNER_SSH_PORT": str(DSTACK_RUNNER_SSH_PORT),
|
|
728
|
+
"DSTACK_RUNNER_LOG_LEVEL": log_level,
|
|
729
|
+
}
|
|
730
|
+
if backend_shim_env is not None:
|
|
731
|
+
envs |= backend_shim_env
|
|
732
|
+
return envs
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
def get_shim_commands(
|
|
736
|
+
*,
|
|
737
|
+
is_privileged: bool = False,
|
|
738
|
+
pjrt_device: Optional[str] = None,
|
|
739
|
+
base_path: Optional[PathLike] = None,
|
|
740
|
+
bin_path: Optional[PathLike] = None,
|
|
741
|
+
backend_shim_env: Optional[Dict[str, str]] = None,
|
|
742
|
+
arch: Optional[str] = None,
|
|
743
|
+
skip_firewall_setup: bool = False,
|
|
744
|
+
firewall_allow_from_subnets: Iterable[str] = DEFAULT_PRIVATE_SUBNETS,
|
|
745
|
+
) -> List[str]:
|
|
746
|
+
commands = get_setup_cloud_instance_commands(
|
|
747
|
+
skip_firewall_setup=skip_firewall_setup,
|
|
748
|
+
firewall_allow_from_subnets=firewall_allow_from_subnets,
|
|
749
|
+
)
|
|
750
|
+
commands += get_shim_pre_start_commands(
|
|
751
|
+
base_path=base_path,
|
|
752
|
+
bin_path=bin_path,
|
|
753
|
+
arch=arch,
|
|
754
|
+
)
|
|
755
|
+
shim_env = get_shim_env(
|
|
756
|
+
base_path=base_path,
|
|
757
|
+
bin_path=bin_path,
|
|
758
|
+
backend_shim_env=backend_shim_env,
|
|
759
|
+
arch=arch,
|
|
760
|
+
)
|
|
761
|
+
for k, v in shim_env.items():
|
|
762
|
+
commands += [f'export "{k}={v}"']
|
|
763
|
+
commands += get_run_shim_script(
|
|
764
|
+
is_privileged=is_privileged,
|
|
765
|
+
pjrt_device=pjrt_device,
|
|
766
|
+
bin_path=bin_path,
|
|
767
|
+
)
|
|
768
|
+
return commands
|
|
769
|
+
|
|
770
|
+
|
|
771
|
+
def get_dstack_runner_version() -> Optional[str]:
|
|
772
|
+
if version := settings.DSTACK_VERSION:
|
|
773
|
+
return version
|
|
774
|
+
if version := settings.DSTACK_RUNNER_VERSION:
|
|
775
|
+
return version
|
|
776
|
+
if version_url := settings.DSTACK_RUNNER_VERSION_URL:
|
|
777
|
+
return _fetch_version(version_url)
|
|
778
|
+
if settings.DSTACK_USE_LATEST_FROM_BRANCH:
|
|
779
|
+
return get_latest_runner_build()
|
|
780
|
+
return None
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def get_dstack_shim_version() -> Optional[str]:
|
|
784
|
+
if version := settings.DSTACK_VERSION:
|
|
785
|
+
return version
|
|
786
|
+
if version := settings.DSTACK_SHIM_VERSION:
|
|
787
|
+
return version
|
|
788
|
+
if version := settings.DSTACK_RUNNER_VERSION:
|
|
789
|
+
logger.warning(
|
|
790
|
+
"DSTACK_SHIM_VERSION is not set, using DSTACK_RUNNER_VERSION."
|
|
791
|
+
" Future versions will not fall back to DSTACK_RUNNER_VERSION."
|
|
792
|
+
" Set DSTACK_SHIM_VERSION to supress this warning."
|
|
793
|
+
)
|
|
794
|
+
return version
|
|
795
|
+
if version_url := settings.DSTACK_SHIM_VERSION_URL:
|
|
796
|
+
return _fetch_version(version_url)
|
|
797
|
+
if settings.DSTACK_USE_LATEST_FROM_BRANCH:
|
|
798
|
+
return get_latest_runner_build()
|
|
799
|
+
return None
|
|
800
|
+
|
|
801
|
+
|
|
802
|
+
def normalize_arch(arch: Optional[str] = None) -> GoArchType:
|
|
803
|
+
"""
|
|
804
|
+
Converts the given free-form architecture string to the Go GOARCH format.
|
|
805
|
+
Only 64-bit x86 and ARM are supported. If the word size is not specified (e.g., `x86`, `arm`),
|
|
806
|
+
64-bit is implied.
|
|
807
|
+
If the arch is not specified, falls back to `amd64`.
|
|
808
|
+
"""
|
|
809
|
+
if not arch:
|
|
810
|
+
return GoArchType.AMD64
|
|
811
|
+
arch_lower = arch.lower()
|
|
812
|
+
if "32" in arch_lower or arch_lower in ["i386", "i686"]:
|
|
813
|
+
raise ValueError(f"32-bit architectures are not supported: {arch}")
|
|
814
|
+
if arch_lower.startswith("x86") or arch_lower.startswith("amd"):
|
|
815
|
+
return GoArchType.AMD64
|
|
816
|
+
if arch_lower.startswith("arm") or arch_lower.startswith("aarch"):
|
|
817
|
+
return GoArchType.ARM64
|
|
818
|
+
raise ValueError(f"Unsupported architecture: {arch}")
|
|
819
|
+
|
|
820
|
+
|
|
821
|
+
def get_dstack_runner_download_url(
|
|
822
|
+
arch: Optional[str] = None, version: Optional[str] = None
|
|
823
|
+
) -> str:
|
|
824
|
+
url_template = settings.DSTACK_RUNNER_DOWNLOAD_URL
|
|
825
|
+
if not url_template:
|
|
826
|
+
if settings.DSTACK_VERSION is not None:
|
|
827
|
+
bucket = "dstack-runner-downloads"
|
|
828
|
+
else:
|
|
829
|
+
bucket = "dstack-runner-downloads-stgn"
|
|
830
|
+
url_template = (
|
|
831
|
+
f"https://{bucket}.s3.eu-west-1.amazonaws.com"
|
|
832
|
+
"/{version}/binaries/dstack-runner-linux-{arch}"
|
|
833
|
+
)
|
|
834
|
+
if version is None:
|
|
835
|
+
version = get_dstack_runner_version() or "latest"
|
|
836
|
+
return _format_download_url(url_template, version, arch)
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def get_dstack_shim_download_url(arch: Optional[str] = None, version: Optional[str] = None) -> str:
|
|
840
|
+
url_template = settings.DSTACK_SHIM_DOWNLOAD_URL
|
|
841
|
+
if not url_template:
|
|
842
|
+
if settings.DSTACK_VERSION is not None:
|
|
843
|
+
bucket = "dstack-runner-downloads"
|
|
844
|
+
else:
|
|
845
|
+
bucket = "dstack-runner-downloads-stgn"
|
|
846
|
+
url_template = (
|
|
847
|
+
f"https://{bucket}.s3.eu-west-1.amazonaws.com"
|
|
848
|
+
"/{version}/binaries/dstack-shim-linux-{arch}"
|
|
849
|
+
)
|
|
850
|
+
if version is None:
|
|
851
|
+
version = get_dstack_shim_version() or "latest"
|
|
852
|
+
return _format_download_url(url_template, version, arch)
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
def get_setup_cloud_instance_commands(
|
|
856
|
+
skip_firewall_setup: bool,
|
|
857
|
+
firewall_allow_from_subnets: Iterable[str],
|
|
858
|
+
) -> list[str]:
|
|
859
|
+
commands = [
|
|
860
|
+
# Workaround for https://github.com/NVIDIA/nvidia-container-toolkit/issues/48
|
|
861
|
+
# Attempts to patch /etc/docker/daemon.json while keeping any custom settings it may have.
|
|
862
|
+
(
|
|
863
|
+
"/bin/sh -c '" # wrap in /bin/sh to avoid interfering with other cloud init commands
|
|
864
|
+
" grep -q nvidia /etc/docker/daemon.json"
|
|
865
|
+
" && ! grep -q native.cgroupdriver /etc/docker/daemon.json"
|
|
866
|
+
" && jq '\\''.\"exec-opts\" = ((.\"exec-opts\" // []) + [\"native.cgroupdriver=cgroupfs\"])'\\'' /etc/docker/daemon.json > /tmp/daemon.json"
|
|
867
|
+
" && sudo mv /tmp/daemon.json /etc/docker/daemon.json"
|
|
868
|
+
" && sudo service docker restart"
|
|
869
|
+
" || true"
|
|
870
|
+
"'"
|
|
871
|
+
),
|
|
872
|
+
]
|
|
873
|
+
if not skip_firewall_setup:
|
|
874
|
+
commands += [
|
|
875
|
+
"ufw --force reset", # Some OS images have default rules like `allow 80`. Delete them
|
|
876
|
+
"ufw default deny incoming",
|
|
877
|
+
"ufw default allow outgoing",
|
|
878
|
+
"ufw allow ssh",
|
|
879
|
+
]
|
|
880
|
+
for subnet in firewall_allow_from_subnets:
|
|
881
|
+
commands.append(f"ufw allow from {subnet}")
|
|
882
|
+
commands += [
|
|
883
|
+
"ufw --force enable",
|
|
884
|
+
]
|
|
885
|
+
return commands
|
|
886
|
+
|
|
887
|
+
|
|
888
|
+
def get_shim_pre_start_commands(
|
|
889
|
+
base_path: Optional[PathLike] = None,
|
|
890
|
+
bin_path: Optional[PathLike] = None,
|
|
891
|
+
arch: Optional[str] = None,
|
|
892
|
+
) -> List[str]:
|
|
893
|
+
url = get_dstack_shim_download_url(arch)
|
|
894
|
+
dstack_shim_binary_path = get_dstack_shim_binary_path(bin_path)
|
|
895
|
+
dstack_working_dir = get_dstack_working_dir(base_path)
|
|
896
|
+
return [
|
|
897
|
+
f"dlpath=$(sudo mktemp -t {DSTACK_SHIM_BINARY_NAME}.XXXXXXXXXX)",
|
|
898
|
+
# -sS -- disable progress meter and warnings, but still show errors (unlike bare -s)
|
|
899
|
+
f'sudo curl -sS --compressed --connect-timeout 60 --max-time 240 --retry 1 --output "$dlpath" "{url}"',
|
|
900
|
+
f'sudo mv "$dlpath" {dstack_shim_binary_path}',
|
|
901
|
+
f"sudo chmod +x {dstack_shim_binary_path}",
|
|
902
|
+
f"sudo mkdir {dstack_working_dir} -p",
|
|
903
|
+
]
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
def get_run_shim_script(
|
|
907
|
+
is_privileged: bool,
|
|
908
|
+
pjrt_device: Optional[str],
|
|
909
|
+
bin_path: Optional[PathLike] = None,
|
|
910
|
+
) -> List[str]:
|
|
911
|
+
dstack_shim_binary_path = get_dstack_shim_binary_path(bin_path)
|
|
912
|
+
privileged_flag = "--privileged" if is_privileged else ""
|
|
913
|
+
pjrt_device_env = f"--pjrt-device={pjrt_device}" if pjrt_device else ""
|
|
914
|
+
# TODO: Use a proper process supervisor?
|
|
915
|
+
return [
|
|
916
|
+
f"""
|
|
917
|
+
nohup sh -c '
|
|
918
|
+
while true; do
|
|
919
|
+
{dstack_shim_binary_path} {privileged_flag} {pjrt_device_env}
|
|
920
|
+
sleep {DSTACK_SHIM_RESTART_INTERVAL_SECONDS}
|
|
921
|
+
done
|
|
922
|
+
' &
|
|
923
|
+
""",
|
|
924
|
+
]
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
def get_gateway_user_data(authorized_key: str, router: Optional[AnyRouterConfig] = None) -> str:
|
|
928
|
+
return get_cloud_config(
|
|
929
|
+
package_update=True,
|
|
930
|
+
packages=[
|
|
931
|
+
"nginx",
|
|
932
|
+
"python3.10-venv",
|
|
933
|
+
],
|
|
934
|
+
snap={"commands": [["install", "--classic", "certbot"]]},
|
|
935
|
+
runcmd=[
|
|
936
|
+
["ln", "-s", "/snap/bin/certbot", "/usr/bin/certbot"],
|
|
937
|
+
[
|
|
938
|
+
"sed",
|
|
939
|
+
"-i",
|
|
940
|
+
"s/# server_names_hash_bucket_size 64;/server_names_hash_bucket_size 128;/",
|
|
941
|
+
"/etc/nginx/nginx.conf",
|
|
942
|
+
],
|
|
943
|
+
["su", "ubuntu", "-c", " && ".join(get_dstack_gateway_commands(router))],
|
|
944
|
+
],
|
|
945
|
+
ssh_authorized_keys=[authorized_key],
|
|
946
|
+
)
|
|
947
|
+
|
|
948
|
+
|
|
949
|
+
def get_docker_commands(
|
|
950
|
+
authorized_keys: list[str],
|
|
951
|
+
bin_path: Optional[PathLike] = None,
|
|
952
|
+
) -> list[str]:
|
|
953
|
+
dstack_runner_binary_path = get_dstack_runner_binary_path(bin_path)
|
|
954
|
+
commands = [
|
|
955
|
+
"( :",
|
|
956
|
+
# See https://github.com/dstackai/dstack/issues/1769
|
|
957
|
+
"unset LD_LIBRARY_PATH && unset LD_PRELOAD",
|
|
958
|
+
# common functions
|
|
959
|
+
'exists() { command -v "$1" > /dev/null 2>&1; }',
|
|
960
|
+
# package manager detection/abstraction
|
|
961
|
+
"install_pkg() { NAME=Distribution; test -f /etc/os-release && . /etc/os-release; echo $NAME not supported; exit 11; }",
|
|
962
|
+
'if exists apt-get; then install_pkg() { apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y "$1"; }; fi',
|
|
963
|
+
'if exists yum; then install_pkg() { yum install -y "$1"; }; fi',
|
|
964
|
+
'if exists apk; then install_pkg() { apk add -U "$1"; }; fi',
|
|
965
|
+
# check in sshd is here, install if not
|
|
966
|
+
"if ! exists sshd; then install_pkg openssh-server; fi",
|
|
967
|
+
# install curl if necessary
|
|
968
|
+
"if ! exists curl; then install_pkg curl; fi",
|
|
969
|
+
": )",
|
|
970
|
+
]
|
|
971
|
+
|
|
972
|
+
runner_command = [
|
|
973
|
+
dstack_runner_binary_path,
|
|
974
|
+
"--log-level",
|
|
975
|
+
"6",
|
|
976
|
+
"start",
|
|
977
|
+
"--temp-dir",
|
|
978
|
+
"/tmp/runner",
|
|
979
|
+
"--http-port",
|
|
980
|
+
str(DSTACK_RUNNER_HTTP_PORT),
|
|
981
|
+
"--ssh-port",
|
|
982
|
+
str(DSTACK_RUNNER_SSH_PORT),
|
|
983
|
+
]
|
|
984
|
+
for authorized_key in authorized_keys:
|
|
985
|
+
runner_command += ["--ssh-authorized-key", authorized_key]
|
|
986
|
+
|
|
987
|
+
url = get_dstack_runner_download_url()
|
|
988
|
+
commands += [
|
|
989
|
+
f"curl --connect-timeout 60 --max-time 240 --retry 1 --output {dstack_runner_binary_path} {url}",
|
|
990
|
+
f"chmod +x {dstack_runner_binary_path}",
|
|
991
|
+
shlex.join(runner_command),
|
|
992
|
+
]
|
|
993
|
+
|
|
994
|
+
return commands
|
|
995
|
+
|
|
996
|
+
|
|
997
|
+
@lru_cache() # Restart the server to find the latest build
|
|
998
|
+
def get_latest_runner_build() -> Optional[str]:
|
|
999
|
+
owner_repo = "dstackai/dstack"
|
|
1000
|
+
workflow_id = "build.yml"
|
|
1001
|
+
version_offset = 150
|
|
1002
|
+
|
|
1003
|
+
try:
|
|
1004
|
+
repo = git.Repo(os.path.abspath(os.path.dirname(__file__)), search_parent_directories=True)
|
|
1005
|
+
except git.InvalidGitRepositoryError:
|
|
1006
|
+
return None
|
|
1007
|
+
for remote in repo.remotes:
|
|
1008
|
+
if re.search(rf"[@/]github\.com[:/]{owner_repo}\.", remote.url):
|
|
1009
|
+
break
|
|
1010
|
+
else:
|
|
1011
|
+
return None
|
|
1012
|
+
|
|
1013
|
+
resp = requests.get(
|
|
1014
|
+
f"https://api.github.com/repos/{owner_repo}/actions/workflows/{workflow_id}/runs",
|
|
1015
|
+
headers={
|
|
1016
|
+
"Accept": "application/vnd.github+json",
|
|
1017
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
1018
|
+
},
|
|
1019
|
+
params={
|
|
1020
|
+
"status": "success",
|
|
1021
|
+
},
|
|
1022
|
+
timeout=10,
|
|
1023
|
+
)
|
|
1024
|
+
resp.raise_for_status()
|
|
1025
|
+
|
|
1026
|
+
head = repo.head.commit
|
|
1027
|
+
for run in resp.json()["workflow_runs"]:
|
|
1028
|
+
try:
|
|
1029
|
+
if repo.is_ancestor(run["head_sha"], head):
|
|
1030
|
+
ver = str(run["run_number"] + version_offset)
|
|
1031
|
+
logger.debug("Found the latest runner build: %s", ver)
|
|
1032
|
+
return ver
|
|
1033
|
+
except git.GitCommandError as e:
|
|
1034
|
+
if "Not a valid commit name" not in e.stderr:
|
|
1035
|
+
raise
|
|
1036
|
+
return None
|
|
1037
|
+
|
|
1038
|
+
|
|
1039
|
+
def get_dstack_gateway_wheel(build: str, router: Optional[AnyRouterConfig] = None) -> str:
|
|
1040
|
+
channel = "release" if settings.DSTACK_RELEASE else "stgn"
|
|
1041
|
+
base_url = f"https://dstack-gateway-downloads.s3.amazonaws.com/{channel}"
|
|
1042
|
+
if build == "latest":
|
|
1043
|
+
build = _fetch_version(f"{base_url}/latest-version") or "latest"
|
|
1044
|
+
logger.debug("Found the latest gateway build: %s", build)
|
|
1045
|
+
wheel = f"{base_url}/dstack_gateway-{build}-py3-none-any.whl"
|
|
1046
|
+
# Build package spec with extras if router is specified
|
|
1047
|
+
if router:
|
|
1048
|
+
return f"dstack-gateway[{router.type}] @ {wheel}"
|
|
1049
|
+
return f"dstack-gateway @ {wheel}"
|
|
1050
|
+
|
|
1051
|
+
|
|
1052
|
+
def get_dstack_gateway_commands(router: Optional[AnyRouterConfig] = None) -> List[str]:
|
|
1053
|
+
build = get_dstack_runner_version() or "latest"
|
|
1054
|
+
gateway_package = get_dstack_gateway_wheel(build, router)
|
|
1055
|
+
return [
|
|
1056
|
+
"mkdir -p /home/ubuntu/dstack",
|
|
1057
|
+
"python3 -m venv /home/ubuntu/dstack/blue",
|
|
1058
|
+
"python3 -m venv /home/ubuntu/dstack/green",
|
|
1059
|
+
f"/home/ubuntu/dstack/blue/bin/pip install '{gateway_package}'",
|
|
1060
|
+
"sudo /home/ubuntu/dstack/blue/bin/python -m dstack.gateway.systemd install --run",
|
|
1061
|
+
]
|
|
1062
|
+
|
|
1063
|
+
|
|
1064
|
+
def merge_tags(
|
|
1065
|
+
base_tags: Dict[str, str],
|
|
1066
|
+
backend_tags: Optional[Dict[str, str]] = None,
|
|
1067
|
+
resource_tags: Optional[Dict[str, str]] = None,
|
|
1068
|
+
) -> Dict[str, str]:
|
|
1069
|
+
res = base_tags.copy()
|
|
1070
|
+
# backend_tags have priority over resource_tags
|
|
1071
|
+
# so that regular users do not override the tags set by admins
|
|
1072
|
+
if backend_tags is not None:
|
|
1073
|
+
for k, v in backend_tags.items():
|
|
1074
|
+
res.setdefault(k, v)
|
|
1075
|
+
if resource_tags is not None:
|
|
1076
|
+
for k, v in resource_tags.items():
|
|
1077
|
+
res.setdefault(k, v)
|
|
1078
|
+
return res
|
|
1079
|
+
|
|
1080
|
+
|
|
1081
|
+
def requires_nvidia_proprietary_kernel_modules(gpu_name: str) -> bool:
|
|
1082
|
+
"""
|
|
1083
|
+
Returns:
|
|
1084
|
+
Whether this NVIDIA GPU requires NVIDIA proprietary kernel modules
|
|
1085
|
+
instead of open kernel modules.
|
|
1086
|
+
"""
|
|
1087
|
+
return gpu_name.lower() in NVIDIA_GPUS_REQUIRING_PROPRIETARY_KERNEL_MODULES
|
|
1088
|
+
|
|
1089
|
+
|
|
1090
|
+
def _fetch_version(url: str) -> Optional[str]:
|
|
1091
|
+
r = requests.get(url, timeout=5)
|
|
1092
|
+
r.raise_for_status()
|
|
1093
|
+
version = r.text.strip()
|
|
1094
|
+
if not version:
|
|
1095
|
+
logger.warning("Empty version response from URL: %s", url)
|
|
1096
|
+
return None
|
|
1097
|
+
return version
|
|
1098
|
+
|
|
1099
|
+
|
|
1100
|
+
def _format_download_url(template: str, version: str, arch: Optional[str]) -> str:
|
|
1101
|
+
return template.format(version=version, arch=normalize_arch(arch).value)
|