skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
sky/jobs/dashboard/dashboard.py
CHANGED
@@ -6,13 +6,18 @@ https://github.com/ray-project/ray/tree/master/dashboard/client/src) and/or get
|
|
6
6
|
rid of the SSH port-forwarding business (see cli.py's job_dashboard()
|
7
7
|
comment).
|
8
8
|
"""
|
9
|
+
import collections
|
9
10
|
import datetime
|
11
|
+
import enum
|
12
|
+
import os
|
10
13
|
import pathlib
|
11
14
|
|
12
15
|
import flask
|
13
16
|
import yaml
|
14
17
|
|
15
18
|
from sky import jobs as managed_jobs
|
19
|
+
from sky.client import sdk
|
20
|
+
from sky.jobs import constants as managed_job_constants
|
16
21
|
from sky.utils import common_utils
|
17
22
|
from sky.utils import controller_utils
|
18
23
|
|
@@ -26,25 +31,112 @@ def _is_running_on_jobs_controller() -> bool:
|
|
26
31
|
"""
|
27
32
|
if pathlib.Path('~/.sky/sky_ray.yml').expanduser().exists():
|
28
33
|
config = yaml.safe_load(
|
29
|
-
pathlib.Path('~/.sky/sky_ray.yml').expanduser().read_text(
|
34
|
+
pathlib.Path('~/.sky/sky_ray.yml').expanduser().read_text(
|
35
|
+
encoding='utf-8'))
|
30
36
|
cluster_name = config.get('cluster_name', '')
|
31
|
-
candidate_controller_names = (
|
32
|
-
controller_utils.Controllers.JOBS_CONTROLLER.value.
|
33
|
-
candidate_cluster_names)
|
34
37
|
# We use startswith instead of exact match because the cluster name in
|
35
38
|
# the yaml file is cluster_name_on_cloud which may have additional
|
36
39
|
# suffices.
|
37
|
-
return
|
38
|
-
cluster_name
|
39
|
-
for name in candidate_controller_names)
|
40
|
+
return cluster_name.startswith(
|
41
|
+
controller_utils.Controllers.JOBS_CONTROLLER.value.cluster_name)
|
40
42
|
return False
|
41
43
|
|
42
44
|
|
45
|
+
# Column indices for job table
|
46
|
+
class JobTableColumns(enum.IntEnum):
|
47
|
+
"""Column indices for the jobs table in the dashboard.
|
48
|
+
|
49
|
+
- DROPDOWN (0): Column for expandable dropdown arrow
|
50
|
+
- ID (1): Job ID column
|
51
|
+
- TASK (2): Task name/number column
|
52
|
+
- NAME (3): Job name column
|
53
|
+
- RESOURCES (4): Resources used by job
|
54
|
+
- SUBMITTED (5): Job submission timestamp
|
55
|
+
- TOTAL_DURATION (6): Total time since job submission
|
56
|
+
- JOB_DURATION (7): Actual job runtime
|
57
|
+
- RECOVERIES (8): Number of job recoveries
|
58
|
+
- STATUS (9): Current job status
|
59
|
+
- STARTED (10): Job start timestamp
|
60
|
+
- CLUSTER (11): Cluster name
|
61
|
+
- REGION (12): Cloud region
|
62
|
+
- FAILOVER (13): Job failover history
|
63
|
+
- DETAILS (14): Job details
|
64
|
+
- ACTIONS (15): Available actions column
|
65
|
+
"""
|
66
|
+
DROPDOWN = 0
|
67
|
+
ID = 1
|
68
|
+
TASK = 2
|
69
|
+
NAME = 3
|
70
|
+
RESOURCES = 4
|
71
|
+
SUBMITTED = 5
|
72
|
+
TOTAL_DURATION = 6
|
73
|
+
JOB_DURATION = 7
|
74
|
+
RECOVERIES = 8
|
75
|
+
STATUS = 9
|
76
|
+
STARTED = 10
|
77
|
+
CLUSTER = 11
|
78
|
+
REGION = 12
|
79
|
+
DETAILS = 13
|
80
|
+
FAILOVER = 14
|
81
|
+
ACTIONS = 15
|
82
|
+
|
83
|
+
|
84
|
+
# Column headers matching the indices above
|
85
|
+
JOB_TABLE_COLUMNS = [
|
86
|
+
'', 'ID', 'Task', 'Name', 'Resources', 'Submitted', 'Total Duration',
|
87
|
+
'Job Duration', 'Status', 'Started', 'Cluster', 'Region', 'Failover',
|
88
|
+
'Recoveries', 'Details', 'Actions'
|
89
|
+
]
|
90
|
+
|
91
|
+
# This column is given by format_job_table but should be ignored.
|
92
|
+
SCHED_STATE_COLUMN = 12
|
93
|
+
|
94
|
+
|
95
|
+
def _extract_launch_history(log_content: str) -> str:
|
96
|
+
"""Extract launch history from log content.
|
97
|
+
|
98
|
+
Args:
|
99
|
+
log_content: Content of the log file.
|
100
|
+
Returns:
|
101
|
+
A formatted string containing the launch history.
|
102
|
+
"""
|
103
|
+
launches = []
|
104
|
+
current_launch = None
|
105
|
+
|
106
|
+
for line in log_content.splitlines():
|
107
|
+
if 'Launching on' in line:
|
108
|
+
try:
|
109
|
+
parts = line.split(']')
|
110
|
+
if len(parts) >= 2:
|
111
|
+
timestamp = parts[0].split()[1:3]
|
112
|
+
message = parts[1].replace('[0m⚙︎', '').strip()
|
113
|
+
formatted_line = f'{" ".join(timestamp)} {message}'
|
114
|
+
if current_launch:
|
115
|
+
prev_time, prev_target = current_launch.rsplit(
|
116
|
+
' Launching on ', 1)
|
117
|
+
launches.append(
|
118
|
+
f'{prev_time} Tried to launch on {prev_target}')
|
119
|
+
|
120
|
+
# Store the current launch
|
121
|
+
current_launch = formatted_line
|
122
|
+
except IndexError:
|
123
|
+
launches.append(line.strip())
|
124
|
+
|
125
|
+
# Add the final (successful) launch at the beginning
|
126
|
+
if current_launch:
|
127
|
+
result = [current_launch]
|
128
|
+
result.extend(launches)
|
129
|
+
return '\n'.join(result)
|
130
|
+
|
131
|
+
return 'No launch history found'
|
132
|
+
|
133
|
+
|
43
134
|
@app.route('/')
|
44
135
|
def home():
|
45
136
|
if not _is_running_on_jobs_controller():
|
46
137
|
# Experimental: run on laptop (refresh is very slow).
|
47
|
-
|
138
|
+
request_id = managed_jobs.queue(refresh=True, skip_finished=False)
|
139
|
+
all_managed_jobs = sdk.get(request_id)
|
48
140
|
else:
|
49
141
|
job_table = managed_jobs.dump_managed_job_queue()
|
50
142
|
all_managed_jobs = managed_jobs.load_managed_job_queue(job_table)
|
@@ -52,36 +144,90 @@ def home():
|
|
52
144
|
timestamp = datetime.datetime.now(datetime.timezone.utc)
|
53
145
|
rows = managed_jobs.format_job_table(all_managed_jobs,
|
54
146
|
show_all=True,
|
147
|
+
show_user=False,
|
55
148
|
return_rows=True)
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
'
|
149
|
+
|
150
|
+
status_counts = collections.defaultdict(int)
|
151
|
+
for task in all_managed_jobs:
|
152
|
+
if not task['status'].is_terminal():
|
153
|
+
status_counts[task['status'].value] += 1
|
154
|
+
|
155
|
+
# Add an empty column for the dropdown button and actions column
|
156
|
+
# Exclude SCHED. STATE column
|
157
|
+
rows = [
|
158
|
+
[''] + row[:SCHED_STATE_COLUMN] + row[SCHED_STATE_COLUMN + 1:] +
|
159
|
+
# Add empty cell for failover and actions column
|
160
|
+
[''] + [''] for row in rows
|
66
161
|
]
|
67
|
-
|
162
|
+
|
163
|
+
# Add log content as failover history for each job
|
164
|
+
for row in rows:
|
165
|
+
job_id = str(row[JobTableColumns.ID]).strip().replace(' ⤳', '')
|
166
|
+
if job_id and job_id != '-':
|
167
|
+
try:
|
168
|
+
log_path = os.path.join(
|
169
|
+
os.path.expanduser(
|
170
|
+
managed_job_constants.JOBS_CONTROLLER_LOGS_DIR),
|
171
|
+
f'{job_id}.log')
|
172
|
+
if os.path.exists(log_path):
|
173
|
+
with open(log_path, 'r', encoding='utf-8') as f:
|
174
|
+
log_content = f.read()
|
175
|
+
row[JobTableColumns.FAILOVER] = _extract_launch_history(
|
176
|
+
log_content)
|
177
|
+
else:
|
178
|
+
row[JobTableColumns.FAILOVER] = 'Log file not found'
|
179
|
+
except (IOError, OSError) as e:
|
180
|
+
row[JobTableColumns.FAILOVER] = f'Error reading log: {str(e)}'
|
181
|
+
app.logger.error('All managed jobs:')
|
182
|
+
|
183
|
+
# Validate column count
|
184
|
+
if rows and len(rows[0]) != len(JOB_TABLE_COLUMNS):
|
68
185
|
raise RuntimeError(
|
69
|
-
'Dashboard code and managed job queue code are out of sync.'
|
186
|
+
f'Dashboard code and managed job queue code are out of sync. '
|
187
|
+
f'Expected {(JOB_TABLE_COLUMNS)} columns, got {(rows[0])}')
|
70
188
|
|
71
|
-
# Fix STATUS color codes: '\x1b[33mCANCELLED\x1b[0m' -> 'CANCELLED'
|
189
|
+
# Fix STATUS color codes: '\x1b[33mCANCELLED\x1b[0m' -> 'CANCELLED'
|
72
190
|
for row in rows:
|
73
|
-
row[
|
74
|
-
|
75
|
-
|
191
|
+
row[JobTableColumns.STATUS] = common_utils.remove_color(
|
192
|
+
row[JobTableColumns.STATUS])
|
193
|
+
|
194
|
+
# Remove filler rows ([''], ..., ['-'])
|
195
|
+
rows = [
|
196
|
+
row for row in rows
|
197
|
+
if ''.join(map(str, row[:JobTableColumns.ACTIONS])) != ''
|
198
|
+
]
|
199
|
+
|
200
|
+
# Get all unique status values
|
201
|
+
status_values = sorted(
|
202
|
+
list(set(row[JobTableColumns.STATUS] for row in rows)))
|
76
203
|
|
77
204
|
rendered_html = flask.render_template(
|
78
205
|
'index.html',
|
79
|
-
columns=
|
206
|
+
columns=JOB_TABLE_COLUMNS,
|
80
207
|
rows=rows,
|
81
208
|
last_updated_timestamp=timestamp,
|
209
|
+
status_values=status_values,
|
210
|
+
status_counts=status_counts,
|
82
211
|
)
|
83
212
|
return rendered_html
|
84
213
|
|
85
214
|
|
215
|
+
@app.route('/download_log/<job_id>')
|
216
|
+
def download_log(job_id):
|
217
|
+
try:
|
218
|
+
log_path = os.path.join(
|
219
|
+
os.path.expanduser(managed_job_constants.JOBS_CONTROLLER_LOGS_DIR),
|
220
|
+
f'{job_id}.log')
|
221
|
+
if not os.path.exists(log_path):
|
222
|
+
flask.abort(404)
|
223
|
+
return flask.send_file(log_path,
|
224
|
+
mimetype='text/plain',
|
225
|
+
as_attachment=True,
|
226
|
+
download_name=f'job_{job_id}.log')
|
227
|
+
except (IOError, OSError) as e:
|
228
|
+
app.logger.error(f'Error downloading log for job {job_id}: {str(e)}')
|
229
|
+
flask.abort(500)
|
230
|
+
|
231
|
+
|
86
232
|
if __name__ == '__main__':
|
87
233
|
app.run()
|