skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,190 @@
|
|
1
|
+
"""REST API for managed jobs."""
|
2
|
+
import os
|
3
|
+
|
4
|
+
import fastapi
|
5
|
+
import httpx
|
6
|
+
|
7
|
+
from sky import sky_logging
|
8
|
+
from sky.jobs.server import core
|
9
|
+
from sky.jobs.server import dashboard_utils
|
10
|
+
from sky.server import common as server_common
|
11
|
+
from sky.server import stream_utils
|
12
|
+
from sky.server.requests import executor
|
13
|
+
from sky.server.requests import payloads
|
14
|
+
from sky.server.requests import requests as api_requests
|
15
|
+
from sky.skylet import constants
|
16
|
+
from sky.utils import common
|
17
|
+
from sky.utils import common_utils
|
18
|
+
|
19
|
+
logger = sky_logging.init_logger(__name__)
|
20
|
+
|
21
|
+
router = fastapi.APIRouter()
|
22
|
+
|
23
|
+
|
24
|
+
@router.post('/launch')
|
25
|
+
async def launch(request: fastapi.Request,
|
26
|
+
jobs_launch_body: payloads.JobsLaunchBody) -> None:
|
27
|
+
executor.schedule_request(
|
28
|
+
request_id=request.state.request_id,
|
29
|
+
request_name='jobs.launch',
|
30
|
+
request_body=jobs_launch_body,
|
31
|
+
func=core.launch,
|
32
|
+
schedule_type=api_requests.ScheduleType.LONG,
|
33
|
+
request_cluster_name=common.JOB_CONTROLLER_NAME,
|
34
|
+
)
|
35
|
+
|
36
|
+
|
37
|
+
@router.post('/queue')
|
38
|
+
async def queue(request: fastapi.Request,
|
39
|
+
jobs_queue_body: payloads.JobsQueueBody) -> None:
|
40
|
+
executor.schedule_request(
|
41
|
+
request_id=request.state.request_id,
|
42
|
+
request_name='jobs.queue',
|
43
|
+
request_body=jobs_queue_body,
|
44
|
+
func=core.queue,
|
45
|
+
schedule_type=(api_requests.ScheduleType.LONG if jobs_queue_body.refresh
|
46
|
+
else api_requests.ScheduleType.SHORT),
|
47
|
+
request_cluster_name=common.JOB_CONTROLLER_NAME,
|
48
|
+
)
|
49
|
+
|
50
|
+
|
51
|
+
@router.post('/cancel')
|
52
|
+
async def cancel(request: fastapi.Request,
|
53
|
+
jobs_cancel_body: payloads.JobsCancelBody) -> None:
|
54
|
+
executor.schedule_request(
|
55
|
+
request_id=request.state.request_id,
|
56
|
+
request_name='jobs.cancel',
|
57
|
+
request_body=jobs_cancel_body,
|
58
|
+
func=core.cancel,
|
59
|
+
schedule_type=api_requests.ScheduleType.SHORT,
|
60
|
+
request_cluster_name=common.JOB_CONTROLLER_NAME,
|
61
|
+
)
|
62
|
+
|
63
|
+
|
64
|
+
@router.post('/logs')
|
65
|
+
async def logs(
|
66
|
+
request: fastapi.Request, jobs_logs_body: payloads.JobsLogsBody,
|
67
|
+
background_tasks: fastapi.BackgroundTasks
|
68
|
+
) -> fastapi.responses.StreamingResponse:
|
69
|
+
executor.schedule_request(
|
70
|
+
request_id=request.state.request_id,
|
71
|
+
request_name='jobs.logs',
|
72
|
+
request_body=jobs_logs_body,
|
73
|
+
func=core.tail_logs,
|
74
|
+
schedule_type=api_requests.ScheduleType.SHORT
|
75
|
+
if jobs_logs_body.refresh else api_requests.ScheduleType.LONG,
|
76
|
+
request_cluster_name=common.JOB_CONTROLLER_NAME,
|
77
|
+
)
|
78
|
+
request_task = api_requests.get_request(request.state.request_id)
|
79
|
+
|
80
|
+
return stream_utils.stream_response(
|
81
|
+
request_id=request_task.request_id,
|
82
|
+
logs_path=request_task.log_path,
|
83
|
+
background_tasks=background_tasks,
|
84
|
+
)
|
85
|
+
|
86
|
+
|
87
|
+
@router.post('/download_logs')
|
88
|
+
async def download_logs(
|
89
|
+
request: fastapi.Request,
|
90
|
+
jobs_download_logs_body: payloads.JobsDownloadLogsBody) -> None:
|
91
|
+
user_hash = jobs_download_logs_body.env_vars[constants.USER_ID_ENV_VAR]
|
92
|
+
logs_dir_on_api_server = server_common.api_server_user_logs_dir_prefix(
|
93
|
+
user_hash)
|
94
|
+
logs_dir_on_api_server.expanduser().mkdir(parents=True, exist_ok=True)
|
95
|
+
# We should reuse the original request body, so that the env vars, such as
|
96
|
+
# user hash, are kept the same.
|
97
|
+
jobs_download_logs_body.local_dir = str(logs_dir_on_api_server)
|
98
|
+
executor.schedule_request(
|
99
|
+
request_id=request.state.request_id,
|
100
|
+
request_name='jobs.download_logs',
|
101
|
+
request_body=jobs_download_logs_body,
|
102
|
+
func=core.download_logs,
|
103
|
+
schedule_type=api_requests.ScheduleType.LONG
|
104
|
+
if jobs_download_logs_body.refresh else api_requests.ScheduleType.SHORT,
|
105
|
+
request_cluster_name=common.JOB_CONTROLLER_NAME,
|
106
|
+
)
|
107
|
+
|
108
|
+
|
109
|
+
@router.get('/dashboard')
|
110
|
+
async def dashboard(request: fastapi.Request,
|
111
|
+
user_hash: str) -> fastapi.Response:
|
112
|
+
# TODO(cooperc): Support showing only jobs for a specific user.
|
113
|
+
|
114
|
+
# FIX(zhwu/cooperc/eric): Fix log downloading (assumes global
|
115
|
+
# /download_log/xx route)
|
116
|
+
|
117
|
+
# Note: before #4717, each user had their own controller, and thus their own
|
118
|
+
# dashboard. Now, all users share the same controller, so this isn't really
|
119
|
+
# necessary. TODO(cooperc): clean up.
|
120
|
+
|
121
|
+
# TODO: Put this in an executor to avoid blocking the main server thread.
|
122
|
+
# It can take a long time if it needs to check the controller status.
|
123
|
+
|
124
|
+
# Find the port for the dashboard of the user
|
125
|
+
os.environ[constants.USER_ID_ENV_VAR] = user_hash
|
126
|
+
server_common.reload_for_new_request(client_entrypoint=None,
|
127
|
+
client_command=None,
|
128
|
+
using_remote_api_server=False)
|
129
|
+
logger.info(f'Starting dashboard for user hash: {user_hash}')
|
130
|
+
|
131
|
+
with dashboard_utils.get_dashboard_lock_for_user(user_hash):
|
132
|
+
max_retries = 3
|
133
|
+
for attempt in range(max_retries):
|
134
|
+
port, pid = dashboard_utils.get_dashboard_session(user_hash)
|
135
|
+
if port == 0 or attempt > 0:
|
136
|
+
# Let the client know that we are waiting for starting the
|
137
|
+
# dashboard.
|
138
|
+
try:
|
139
|
+
port, pid = core.start_dashboard_forwarding()
|
140
|
+
except Exception as e: # pylint: disable=broad-except
|
141
|
+
# We catch all exceptions to gracefully handle unknown
|
142
|
+
# errors and raise an HTTPException to the client.
|
143
|
+
msg = (
|
144
|
+
'Dashboard failed to start: '
|
145
|
+
f'{common_utils.format_exception(e, use_bracket=True)}')
|
146
|
+
logger.error(msg)
|
147
|
+
raise fastapi.HTTPException(status_code=503, detail=msg)
|
148
|
+
dashboard_utils.add_dashboard_session(user_hash, port, pid)
|
149
|
+
|
150
|
+
# Assuming the dashboard is forwarded to localhost on the API server
|
151
|
+
dashboard_url = f'http://localhost:{port}'
|
152
|
+
try:
|
153
|
+
# Ping the dashboard to check if it's still running
|
154
|
+
async with httpx.AsyncClient() as client:
|
155
|
+
response = await client.request('GET',
|
156
|
+
dashboard_url,
|
157
|
+
timeout=1)
|
158
|
+
break # Connection successful, proceed with the request
|
159
|
+
except Exception as e: # pylint: disable=broad-except
|
160
|
+
# We catch all exceptions to gracefully handle unknown
|
161
|
+
# errors and retry or raise an HTTPException to the client.
|
162
|
+
msg = (
|
163
|
+
f'Dashboard connection attempt {attempt + 1} failed with '
|
164
|
+
f'{common_utils.format_exception(e, use_bracket=True)}')
|
165
|
+
logger.info(msg)
|
166
|
+
if attempt == max_retries - 1:
|
167
|
+
raise fastapi.HTTPException(status_code=503, detail=msg)
|
168
|
+
|
169
|
+
# Create a client session to forward the request
|
170
|
+
try:
|
171
|
+
async with httpx.AsyncClient() as client:
|
172
|
+
# Make the request and get the response
|
173
|
+
response = await client.request(
|
174
|
+
method='GET',
|
175
|
+
url=f'{dashboard_url}',
|
176
|
+
headers=request.headers.raw,
|
177
|
+
)
|
178
|
+
|
179
|
+
# Create a new response with the content already read
|
180
|
+
content = await response.aread()
|
181
|
+
return fastapi.Response(
|
182
|
+
content=content,
|
183
|
+
status_code=response.status_code,
|
184
|
+
headers=dict(response.headers),
|
185
|
+
media_type=response.headers.get('content-type'))
|
186
|
+
except Exception as e:
|
187
|
+
msg = (f'Failed to forward request to dashboard: '
|
188
|
+
f'{common_utils.format_exception(e, use_bracket=True)}')
|
189
|
+
logger.error(msg)
|
190
|
+
raise fastapi.HTTPException(status_code=502, detail=msg)
|