nemo-evaluator-launcher 0.1.0rc6__py3-none-any.whl → 0.1.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nemo_evaluator_launcher/__init__.py +15 -1
- nemo_evaluator_launcher/api/functional.py +188 -27
- nemo_evaluator_launcher/api/types.py +9 -0
- nemo_evaluator_launcher/cli/export.py +131 -12
- nemo_evaluator_launcher/cli/info.py +477 -82
- nemo_evaluator_launcher/cli/kill.py +5 -3
- nemo_evaluator_launcher/cli/logs.py +102 -0
- nemo_evaluator_launcher/cli/ls_runs.py +31 -10
- nemo_evaluator_launcher/cli/ls_tasks.py +105 -3
- nemo_evaluator_launcher/cli/main.py +101 -5
- nemo_evaluator_launcher/cli/run.py +153 -30
- nemo_evaluator_launcher/cli/status.py +49 -5
- nemo_evaluator_launcher/cli/version.py +26 -23
- nemo_evaluator_launcher/common/execdb.py +121 -27
- nemo_evaluator_launcher/common/helpers.py +213 -33
- nemo_evaluator_launcher/common/logging_utils.py +16 -5
- nemo_evaluator_launcher/common/printing_utils.py +100 -0
- nemo_evaluator_launcher/configs/deployment/generic.yaml +33 -0
- nemo_evaluator_launcher/configs/deployment/sglang.yaml +4 -2
- nemo_evaluator_launcher/configs/deployment/trtllm.yaml +23 -0
- nemo_evaluator_launcher/configs/deployment/vllm.yaml +2 -2
- nemo_evaluator_launcher/configs/execution/local.yaml +2 -0
- nemo_evaluator_launcher/configs/execution/slurm/default.yaml +19 -4
- nemo_evaluator_launcher/executors/base.py +54 -1
- nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +60 -5
- nemo_evaluator_launcher/executors/lepton/executor.py +240 -101
- nemo_evaluator_launcher/executors/lepton/job_helpers.py +15 -11
- nemo_evaluator_launcher/executors/local/executor.py +492 -56
- nemo_evaluator_launcher/executors/local/run.template.sh +76 -9
- nemo_evaluator_launcher/executors/slurm/executor.py +571 -98
- nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +26 -0
- nemo_evaluator_launcher/exporters/base.py +9 -0
- nemo_evaluator_launcher/exporters/gsheets.py +27 -9
- nemo_evaluator_launcher/exporters/local.py +30 -16
- nemo_evaluator_launcher/exporters/mlflow.py +245 -74
- nemo_evaluator_launcher/exporters/utils.py +139 -184
- nemo_evaluator_launcher/exporters/wandb.py +157 -43
- nemo_evaluator_launcher/package_info.py +6 -3
- nemo_evaluator_launcher/resources/mapping.toml +56 -15
- nemo_evaluator_launcher-0.1.41.dist-info/METADATA +494 -0
- nemo_evaluator_launcher-0.1.41.dist-info/RECORD +62 -0
- {nemo_evaluator_launcher-0.1.0rc6.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/entry_points.txt +1 -0
- nemo_evaluator_launcher-0.1.0rc6.dist-info/METADATA +0 -35
- nemo_evaluator_launcher-0.1.0rc6.dist-info/RECORD +0 -57
- {nemo_evaluator_launcher-0.1.0rc6.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/WHEEL +0 -0
- {nemo_evaluator_launcher-0.1.0rc6.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/licenses/LICENSE +0 -0
- {nemo_evaluator_launcher-0.1.0rc6.dist-info → nemo_evaluator_launcher-0.1.41.dist-info}/top_level.txt +0 -0
|
@@ -16,18 +16,32 @@
|
|
|
16
16
|
import pathlib
|
|
17
17
|
import time
|
|
18
18
|
from dataclasses import dataclass
|
|
19
|
+
from typing import Literal
|
|
19
20
|
|
|
20
|
-
import yaml
|
|
21
|
-
from omegaconf import OmegaConf
|
|
22
21
|
from simple_parsing import field
|
|
23
22
|
|
|
24
|
-
from nemo_evaluator_launcher.
|
|
23
|
+
from nemo_evaluator_launcher.common.logging_utils import logger
|
|
24
|
+
from nemo_evaluator_launcher.common.printing_utils import (
|
|
25
|
+
bold,
|
|
26
|
+
cyan,
|
|
27
|
+
green,
|
|
28
|
+
magenta,
|
|
29
|
+
red,
|
|
30
|
+
yellow,
|
|
31
|
+
)
|
|
25
32
|
|
|
26
33
|
|
|
27
34
|
@dataclass
|
|
28
35
|
class Cmd:
|
|
29
36
|
"""Run command parameters"""
|
|
30
37
|
|
|
38
|
+
config: str | None = field(
|
|
39
|
+
default=None,
|
|
40
|
+
alias=["--config"],
|
|
41
|
+
metadata={
|
|
42
|
+
"help": "Full path to config file. Uses Hydra by default (--config-mode=hydra). Use --config-mode=raw to load directly (bypasses Hydra)."
|
|
43
|
+
},
|
|
44
|
+
)
|
|
31
45
|
config_name: str = field(
|
|
32
46
|
default="default",
|
|
33
47
|
alias=["-c", "--config-name"],
|
|
@@ -42,11 +56,11 @@ class Cmd:
|
|
|
42
56
|
"help": "Path to user config directory. If provided, searches here first, then falls back to internal configs."
|
|
43
57
|
},
|
|
44
58
|
)
|
|
45
|
-
|
|
46
|
-
default=
|
|
47
|
-
alias=["
|
|
59
|
+
config_mode: Literal["hydra", "raw"] = field(
|
|
60
|
+
default="hydra",
|
|
61
|
+
alias=["--config-mode"],
|
|
48
62
|
metadata={
|
|
49
|
-
"help": "
|
|
63
|
+
"help": "Config loading mode: 'hydra' (default) uses Hydra config system, 'raw' loads config file directly bypassing Hydra."
|
|
50
64
|
},
|
|
51
65
|
)
|
|
52
66
|
override: list[str] = field(
|
|
@@ -63,40 +77,96 @@ class Cmd:
|
|
|
63
77
|
alias=["-n", "--dry-run"],
|
|
64
78
|
metadata={"help": "Do not run the evaluation, just print the config."},
|
|
65
79
|
)
|
|
80
|
+
config_output: str | None = field(
|
|
81
|
+
default=None,
|
|
82
|
+
alias=["--config-output"],
|
|
83
|
+
metadata={
|
|
84
|
+
"help": "Directory to save the complete run config. Defaults to ~/.nemo-evaluator/run_configs/"
|
|
85
|
+
},
|
|
86
|
+
)
|
|
66
87
|
|
|
67
88
|
def execute(self) -> None:
|
|
68
|
-
#
|
|
69
|
-
|
|
70
|
-
|
|
89
|
+
# Import heavy dependencies only when needed
|
|
90
|
+
import yaml
|
|
91
|
+
from omegaconf import OmegaConf
|
|
92
|
+
|
|
93
|
+
from nemo_evaluator_launcher.api.functional import RunConfig, run_eval
|
|
94
|
+
|
|
95
|
+
# Validate config_mode value
|
|
96
|
+
if self.config_mode not in ["hydra", "raw"]:
|
|
97
|
+
raise ValueError(
|
|
98
|
+
f"Invalid --config-mode value: {self.config_mode}. Must be 'hydra' or 'raw'."
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Validate that raw mode requires --config
|
|
102
|
+
if self.config_mode == "raw" and self.config is None:
|
|
103
|
+
raise ValueError(
|
|
104
|
+
"--config-mode=raw requires --config to be specified. Raw mode loads config files directly."
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Load configuration either from Hydra or directly from a config file
|
|
108
|
+
if self.config_mode == "raw" and self.config:
|
|
109
|
+
# Validate that raw config loading is not used with other config options
|
|
71
110
|
if self.config_name != "default":
|
|
72
|
-
raise ValueError(
|
|
111
|
+
raise ValueError(
|
|
112
|
+
"Cannot use --config-mode=raw with --config-name. Raw mode only works with --config."
|
|
113
|
+
)
|
|
73
114
|
if self.config_dir is not None:
|
|
74
|
-
raise ValueError(
|
|
115
|
+
raise ValueError(
|
|
116
|
+
"Cannot use --config-mode=raw with --config-dir. Raw mode only works with --config."
|
|
117
|
+
)
|
|
75
118
|
if self.override:
|
|
76
|
-
raise ValueError(
|
|
119
|
+
raise ValueError(
|
|
120
|
+
"Cannot use --config-mode=raw with --override. Raw mode only works with --config."
|
|
121
|
+
)
|
|
77
122
|
|
|
78
|
-
# Load from
|
|
79
|
-
with open(self.
|
|
123
|
+
# Load from config file directly (bypass Hydra)
|
|
124
|
+
with open(self.config, "r") as f:
|
|
80
125
|
config_dict = yaml.safe_load(f)
|
|
81
126
|
|
|
82
127
|
# Create RunConfig from the loaded data
|
|
83
128
|
config = OmegaConf.create(config_dict)
|
|
84
129
|
else:
|
|
130
|
+
# Handle --config parameter: split path into config_dir and config_name for Hydra
|
|
131
|
+
if self.config:
|
|
132
|
+
if self.config_name != "default":
|
|
133
|
+
raise ValueError("Cannot use --config with --config-name")
|
|
134
|
+
if self.config_dir is not None:
|
|
135
|
+
raise ValueError("Cannot use --config with --config-dir")
|
|
136
|
+
config_path = pathlib.Path(self.config)
|
|
137
|
+
config_dir = str(config_path.parent)
|
|
138
|
+
config_name = str(config_path.stem)
|
|
139
|
+
else:
|
|
140
|
+
config_dir = self.config_dir
|
|
141
|
+
config_name = self.config_name
|
|
142
|
+
|
|
85
143
|
# Load the complete Hydra configuration
|
|
86
144
|
config = RunConfig.from_hydra(
|
|
87
|
-
|
|
145
|
+
config_dir=config_dir,
|
|
146
|
+
config_name=config_name,
|
|
88
147
|
hydra_overrides=self.override,
|
|
89
|
-
config_dir=self.config_dir,
|
|
90
148
|
)
|
|
91
149
|
|
|
92
|
-
|
|
150
|
+
try:
|
|
151
|
+
invocation_id = run_eval(config, self.dry_run)
|
|
152
|
+
except Exception as e:
|
|
153
|
+
print(red(f"✗ Job submission failed, see logs | Error: {e}"))
|
|
154
|
+
logger.error("Job submission failed", error=e)
|
|
155
|
+
raise
|
|
93
156
|
|
|
94
|
-
# Save the complete configuration
|
|
157
|
+
# Save the complete configuration
|
|
95
158
|
if not self.dry_run and invocation_id is not None:
|
|
96
|
-
#
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
159
|
+
# Determine config output directory
|
|
160
|
+
if self.config_output:
|
|
161
|
+
# Use custom directory specified by --config-output
|
|
162
|
+
config_dir = pathlib.Path(self.config_output)
|
|
163
|
+
else:
|
|
164
|
+
# Default to original location: ~/.nemo-evaluator/run_configs
|
|
165
|
+
home_dir = pathlib.Path.home()
|
|
166
|
+
config_dir = home_dir / ".nemo-evaluator" / "run_configs"
|
|
167
|
+
|
|
168
|
+
# Ensure the directory exists
|
|
169
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
100
170
|
|
|
101
171
|
# Convert DictConfig to dict and save as YAML
|
|
102
172
|
config_dict = OmegaConf.to_container(config, resolve=True)
|
|
@@ -106,7 +176,7 @@ class Cmd:
|
|
|
106
176
|
|
|
107
177
|
# Create config filename with invocation ID
|
|
108
178
|
config_filename = f"{invocation_id}_config.yml"
|
|
109
|
-
config_path =
|
|
179
|
+
config_path = config_dir / config_filename
|
|
110
180
|
|
|
111
181
|
# Save the complete Hydra configuration
|
|
112
182
|
with open(config_path, "w") as f:
|
|
@@ -120,16 +190,69 @@ class Cmd:
|
|
|
120
190
|
f.write("#\n")
|
|
121
191
|
f.write("# To rerun this exact configuration:\n")
|
|
122
192
|
f.write(
|
|
123
|
-
f"# nemo-evaluator-launcher run --
|
|
193
|
+
f"# nemo-evaluator-launcher run --config {config_path} --config-mode=raw\n"
|
|
124
194
|
)
|
|
125
195
|
f.write("#\n")
|
|
126
196
|
f.write(config_yaml)
|
|
127
197
|
|
|
128
|
-
print(
|
|
198
|
+
print(bold(cyan("Complete run config saved to: ")) + f"\n {config_path}\n")
|
|
199
|
+
logger.info("Saved complete config", path=config_path)
|
|
129
200
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
print(f"to kill all jobs: nemo-evaluator-launcher kill {invocation_id}")
|
|
201
|
+
# Print general success message with invocation ID and helpful commands
|
|
202
|
+
if invocation_id is not None and not self.dry_run:
|
|
133
203
|
print(
|
|
134
|
-
|
|
204
|
+
bold(cyan("To check status: "))
|
|
205
|
+
+ f"nemo-evaluator-launcher status {invocation_id}"
|
|
206
|
+
)
|
|
207
|
+
print(
|
|
208
|
+
bold(cyan("To view job info: "))
|
|
209
|
+
+ f"nemo-evaluator-launcher info {invocation_id}"
|
|
210
|
+
)
|
|
211
|
+
print(
|
|
212
|
+
bold(cyan("To kill all jobs: "))
|
|
213
|
+
+ f"nemo-evaluator-launcher kill {invocation_id}"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Show actual job IDs and task names
|
|
217
|
+
print(bold(cyan("To kill individual jobs:")))
|
|
218
|
+
# Access tasks - will work after normalization in run_eval
|
|
219
|
+
tasks = (
|
|
220
|
+
config.evaluation.tasks
|
|
221
|
+
if hasattr(config.evaluation, "tasks")
|
|
222
|
+
else config.evaluation
|
|
223
|
+
)
|
|
224
|
+
for idx, task in enumerate(tasks):
|
|
225
|
+
job_id = f"{invocation_id}.{idx}"
|
|
226
|
+
print(f" nemo-evaluator-launcher kill {job_id} # {task.name}")
|
|
227
|
+
|
|
228
|
+
print(
|
|
229
|
+
magenta(
|
|
230
|
+
"(all commands accept shortened IDs as long as there are no conflicts)"
|
|
231
|
+
)
|
|
232
|
+
)
|
|
233
|
+
print(
|
|
234
|
+
bold(cyan("To print all jobs: ")) + "nemo-evaluator-launcher ls runs"
|
|
235
|
+
"\n (--since 1d or --since 6h for time span, see --help)"
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
print(
|
|
239
|
+
green(
|
|
240
|
+
bold(
|
|
241
|
+
f"✓ Job submission successful | Invocation ID: {invocation_id}"
|
|
242
|
+
)
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Warn if both config_dir and config_name are provided (and config_name is not default)
|
|
247
|
+
if (
|
|
248
|
+
self.config is None
|
|
249
|
+
and self.config_dir is not None
|
|
250
|
+
and self.config_name != "default"
|
|
251
|
+
):
|
|
252
|
+
joint_path = pathlib.Path(self.config_dir) / f"{self.config_name}.yaml"
|
|
253
|
+
print(
|
|
254
|
+
yellow(
|
|
255
|
+
f"Warning: Using --config-dir and --config-name together is deprecated. "
|
|
256
|
+
f"Please use --config {joint_path} instead."
|
|
257
|
+
)
|
|
135
258
|
)
|
|
@@ -13,12 +13,12 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
#
|
|
16
|
-
import json
|
|
17
16
|
from dataclasses import dataclass
|
|
18
17
|
|
|
19
18
|
from simple_parsing import field
|
|
20
19
|
|
|
21
|
-
|
|
20
|
+
import nemo_evaluator_launcher.common.printing_utils as pu
|
|
21
|
+
from nemo_evaluator_launcher.executors.base import ExecutionState
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
@dataclass
|
|
@@ -36,6 +36,11 @@ class Cmd:
|
|
|
36
36
|
)
|
|
37
37
|
|
|
38
38
|
def execute(self) -> None:
|
|
39
|
+
# Import heavy dependencies only when needed
|
|
40
|
+
import json
|
|
41
|
+
|
|
42
|
+
from nemo_evaluator_launcher.api.functional import get_status
|
|
43
|
+
|
|
39
44
|
res = get_status(self.job_ids)
|
|
40
45
|
if self.json:
|
|
41
46
|
# Remove progress field from JSON output as it's a WIP feature
|
|
@@ -94,10 +99,16 @@ class Cmd:
|
|
|
94
99
|
else:
|
|
95
100
|
location = ""
|
|
96
101
|
|
|
102
|
+
# Format status with visual indicators and colors
|
|
103
|
+
status = job.get("status", "")
|
|
104
|
+
formatted_status = self._format_status_with_indicators(status)
|
|
105
|
+
|
|
106
|
+
# Extract task name
|
|
107
|
+
|
|
97
108
|
rows.append(
|
|
98
109
|
[
|
|
99
110
|
job.get("job_id", ""),
|
|
100
|
-
|
|
111
|
+
formatted_status,
|
|
101
112
|
# job.get("progress", ""), temporarily disabled as this is a WIP feature
|
|
102
113
|
executor_info,
|
|
103
114
|
location,
|
|
@@ -106,7 +117,10 @@ class Cmd:
|
|
|
106
117
|
|
|
107
118
|
# Calculate column widths and print
|
|
108
119
|
widths = [
|
|
109
|
-
max(
|
|
120
|
+
max(
|
|
121
|
+
len(str(headers[i])),
|
|
122
|
+
max(len(self._strip_ansi_codes(str(row[i]))) for row in rows),
|
|
123
|
+
)
|
|
110
124
|
for i in range(len(headers))
|
|
111
125
|
]
|
|
112
126
|
|
|
@@ -117,4 +131,34 @@ class Cmd:
|
|
|
117
131
|
print("-" * len(header_row))
|
|
118
132
|
|
|
119
133
|
for row in rows:
|
|
120
|
-
|
|
134
|
+
# Adjust padding for ANSI color codes
|
|
135
|
+
formatted_row = []
|
|
136
|
+
for i in range(len(row)):
|
|
137
|
+
content = str(row[i])
|
|
138
|
+
visible_length = len(self._strip_ansi_codes(content))
|
|
139
|
+
padding = widths[i] - visible_length
|
|
140
|
+
formatted_row.append(content + " " * padding)
|
|
141
|
+
print(" | ".join(formatted_row))
|
|
142
|
+
|
|
143
|
+
def _format_status_with_indicators(self, status: str) -> str:
|
|
144
|
+
"""Format status with Unicode visual indicators only."""
|
|
145
|
+
# Status mapping based on ExecutionState enum
|
|
146
|
+
status_formats = {
|
|
147
|
+
ExecutionState.SUCCESS.value: pu.green("✓ SUCCESS"),
|
|
148
|
+
ExecutionState.FAILED.value: pu.red("✗ FAILED"),
|
|
149
|
+
ExecutionState.RUNNING.value: pu.yellow("▶ RUNNING"),
|
|
150
|
+
ExecutionState.PENDING.value: pu.cyan("⧗ PENDING"),
|
|
151
|
+
ExecutionState.KILLED.value: pu.magenta("✗ KILLED"),
|
|
152
|
+
# Additional states for error handling
|
|
153
|
+
"not_found": pu.grey("? NOT FOUND"),
|
|
154
|
+
"error": pu.red("✗ ERROR"),
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return status_formats.get(status.lower(), pu.grey(status.upper()))
|
|
158
|
+
|
|
159
|
+
def _strip_ansi_codes(self, text: str) -> str:
|
|
160
|
+
"""Remove ANSI color codes from text for length calculation."""
|
|
161
|
+
import re
|
|
162
|
+
|
|
163
|
+
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
|
|
164
|
+
return ansi_escape.sub("", text)
|
|
@@ -19,6 +19,29 @@ import importlib
|
|
|
19
19
|
from dataclasses import dataclass
|
|
20
20
|
|
|
21
21
|
from nemo_evaluator_launcher import __package_name__, __version__
|
|
22
|
+
from nemo_evaluator_launcher.common.logging_utils import logger
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_versions() -> dict:
|
|
26
|
+
internal_module_name = "nemo_evaluator_launcher_internal"
|
|
27
|
+
res = {__package_name__: __version__}
|
|
28
|
+
# Check for internal package
|
|
29
|
+
try:
|
|
30
|
+
internal_module = importlib.import_module(internal_module_name)
|
|
31
|
+
# Try to get version from internal package
|
|
32
|
+
internal_version = getattr(internal_module, "__version__", None)
|
|
33
|
+
if internal_version:
|
|
34
|
+
res[internal_module_name] = internal_version
|
|
35
|
+
else:
|
|
36
|
+
res[internal_module_name] = "available (version unknown)"
|
|
37
|
+
except ImportError:
|
|
38
|
+
# Internal package not available - this is expected in many cases
|
|
39
|
+
pass
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logger.error(f"nemo_evaluator_launcher_internal: error loading ({e})")
|
|
42
|
+
raise
|
|
43
|
+
|
|
44
|
+
return res
|
|
22
45
|
|
|
23
46
|
|
|
24
47
|
@dataclass
|
|
@@ -27,26 +50,6 @@ class Cmd:
|
|
|
27
50
|
|
|
28
51
|
def execute(self) -> None:
|
|
29
52
|
"""Execute the version command."""
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
try:
|
|
34
|
-
internal_module = importlib.import_module(
|
|
35
|
-
"nemo_evaluator_launcher_internal"
|
|
36
|
-
)
|
|
37
|
-
# Try to get version from internal package
|
|
38
|
-
try:
|
|
39
|
-
internal_version = getattr(internal_module, "__version__", None)
|
|
40
|
-
if internal_version:
|
|
41
|
-
print(f"nemo-evaluator-launcher-internal: {internal_version}")
|
|
42
|
-
else:
|
|
43
|
-
print(
|
|
44
|
-
"nemo-evaluator-launcher-internal: available (version unknown)"
|
|
45
|
-
)
|
|
46
|
-
except Exception:
|
|
47
|
-
print("nemo-evaluator-launcher-internal: available (version unknown)")
|
|
48
|
-
except ImportError:
|
|
49
|
-
# Internal package not available - this is expected in many cases
|
|
50
|
-
pass
|
|
51
|
-
except Exception as e:
|
|
52
|
-
print(f"nemo-evaluator-launcher-internal: error loading ({e})")
|
|
53
|
+
res = get_versions()
|
|
54
|
+
for package, version in res.items():
|
|
55
|
+
print(f"{package}: {version}")
|
|
@@ -29,15 +29,15 @@ EXEC_DB_FILE = EXEC_DB_DIR / "exec.v1.jsonl"
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
def generate_invocation_id() -> str:
|
|
32
|
-
"""Generate a unique invocation ID as an
|
|
33
|
-
return secrets.token_hex(
|
|
32
|
+
"""Generate a unique invocation ID as an 16-digit hex string."""
|
|
33
|
+
return secrets.token_hex(8)
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
def generate_job_id(invocation_id: str, index: int) -> str:
|
|
37
37
|
"""Generate a job ID as <invocation_id>.<n>.
|
|
38
38
|
|
|
39
39
|
Args:
|
|
40
|
-
invocation_id: The invocation group ID (
|
|
40
|
+
invocation_id: The invocation group ID (16-digit hex).
|
|
41
41
|
index: The job index (0-based integer).
|
|
42
42
|
Returns:
|
|
43
43
|
The job ID string.
|
|
@@ -50,7 +50,7 @@ class JobData:
|
|
|
50
50
|
"""Data structure for job execution information.
|
|
51
51
|
|
|
52
52
|
Attributes:
|
|
53
|
-
invocation_id:
|
|
53
|
+
invocation_id: 16-digit hex string.
|
|
54
54
|
job_id: <invocation_id>.<n> string.
|
|
55
55
|
timestamp: Unix timestamp when the job was created.
|
|
56
56
|
executor: Name of the executor that handled this job.
|
|
@@ -148,41 +148,135 @@ class ExecutionDB:
|
|
|
148
148
|
)
|
|
149
149
|
raise
|
|
150
150
|
|
|
151
|
+
def _resolve_invocation_id(self, short_id: str) -> Optional[str]:
|
|
152
|
+
"""Resolve a short invocation ID to the full one.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
short_id: Partial or full invocation ID.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Full invocation ID if found uniquely, None if not found.
|
|
159
|
+
|
|
160
|
+
Raises:
|
|
161
|
+
ValueError: If the short_id matches multiple invocation IDs.
|
|
162
|
+
"""
|
|
163
|
+
if not short_id:
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
short_id = short_id.lower()
|
|
167
|
+
|
|
168
|
+
# NOTE(agronskiy): this is a non-optimized implementation that assumes small amount
|
|
169
|
+
# of jobs in ExecDB(), a typical scenario. Speeding up would involve building a
|
|
170
|
+
# prefix tree when loading invocations/jobs.
|
|
171
|
+
matches = [
|
|
172
|
+
inv_id
|
|
173
|
+
for inv_id in self._invocations.keys()
|
|
174
|
+
if inv_id.lower().startswith(short_id)
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
if len(matches) == 1:
|
|
178
|
+
return matches[0]
|
|
179
|
+
elif len(matches) > 1:
|
|
180
|
+
raise ValueError(f"Ambiguous invocation ID '{short_id}': matches {matches}")
|
|
181
|
+
else:
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
def _resolve_job_id(self, short_job_id: str) -> Optional[str]:
|
|
185
|
+
"""Resolve a short job ID to the full one.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
short_job_id: Partial or full job ID.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Full job ID if found uniquely, None if not found.
|
|
192
|
+
|
|
193
|
+
Raises:
|
|
194
|
+
ValueError: If the short_job_id matches multiple job IDs.
|
|
195
|
+
"""
|
|
196
|
+
if not short_job_id:
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
# Normalize to lowercase for case-insensitive matching
|
|
200
|
+
short_job_id = short_job_id.lower()
|
|
201
|
+
|
|
202
|
+
if "." in short_job_id:
|
|
203
|
+
parts = short_job_id.split(".", 1)
|
|
204
|
+
short_inv_id, job_index = parts[0], parts[1]
|
|
205
|
+
|
|
206
|
+
# Resolve the invocation part
|
|
207
|
+
full_inv_id = self._resolve_invocation_id(short_inv_id)
|
|
208
|
+
if full_inv_id:
|
|
209
|
+
candidate_job_id = f"{full_inv_id}.{job_index}"
|
|
210
|
+
if candidate_job_id in self._jobs:
|
|
211
|
+
return candidate_job_id
|
|
212
|
+
|
|
213
|
+
# NOTE(agronskiy): unfortunately, due to legacy, there exist usecases where
|
|
214
|
+
# job_id is the same format as invocation_id
|
|
215
|
+
candidate_job_id = self._resolve_invocation_id(short_job_id)
|
|
216
|
+
if candidate_job_id and candidate_job_id in self._jobs:
|
|
217
|
+
return candidate_job_id
|
|
218
|
+
|
|
219
|
+
return None
|
|
220
|
+
|
|
151
221
|
def get_job(self, job_id: str) -> Optional[JobData]:
|
|
152
|
-
|
|
222
|
+
"""Get job by full or partial job ID.
|
|
153
223
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
return {
|
|
157
|
-
job_id: self._jobs[job_id] for job_id in job_ids if job_id in self._jobs
|
|
158
|
-
}
|
|
224
|
+
Args:
|
|
225
|
+
job_id: Full or partial job ID.
|
|
159
226
|
|
|
160
|
-
|
|
161
|
-
|
|
227
|
+
Returns:
|
|
228
|
+
JobData if found, None otherwise.
|
|
162
229
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
230
|
+
Raises:
|
|
231
|
+
ValueError: If the job_id matches multiple jobs.
|
|
232
|
+
"""
|
|
233
|
+
resolved_id = self._resolve_job_id(job_id)
|
|
234
|
+
if resolved_id:
|
|
235
|
+
return self._jobs.get(resolved_id)
|
|
166
236
|
|
|
237
|
+
return None
|
|
167
238
|
|
|
168
|
-
def
|
|
169
|
-
|
|
170
|
-
db.write_job(job)
|
|
239
|
+
def get_jobs(self, invocation_id: str) -> Dict[str, JobData]:
|
|
240
|
+
"""Get all jobs for a full or partial invocation ID.
|
|
171
241
|
|
|
242
|
+
Args:
|
|
243
|
+
invocation_id: Full or partial invocation ID.
|
|
172
244
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
return db.get_job(job_id)
|
|
245
|
+
Returns:
|
|
246
|
+
Dictionary mapping job_id to JobData for all jobs in the invocation.
|
|
176
247
|
|
|
248
|
+
Raises:
|
|
249
|
+
ValueError: If the invocation_id matches multiple invocations.
|
|
250
|
+
"""
|
|
251
|
+
resolved_inv_id = self._resolve_invocation_id(invocation_id)
|
|
252
|
+
if not resolved_inv_id:
|
|
253
|
+
return {}
|
|
177
254
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
255
|
+
job_ids = self._invocations.get(resolved_inv_id, [])
|
|
256
|
+
return {
|
|
257
|
+
job_id: self._jobs[job_id] for job_id in job_ids if job_id in self._jobs
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
def get_invocation_jobs(self, invocation_id: str) -> List[str]:
|
|
261
|
+
"""Get job IDs for a full or partial invocation ID.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
invocation_id: Full or partial invocation ID.
|
|
181
265
|
|
|
266
|
+
Returns:
|
|
267
|
+
List of job IDs for the invocation.
|
|
182
268
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
269
|
+
Raises:
|
|
270
|
+
ValueError: If the invocation_id matches multiple invocations.
|
|
271
|
+
"""
|
|
272
|
+
resolved_inv_id = self._resolve_invocation_id(invocation_id)
|
|
273
|
+
if not resolved_inv_id:
|
|
274
|
+
return []
|
|
275
|
+
return self._invocations.get(resolved_inv_id, [])
|
|
276
|
+
|
|
277
|
+
def get_all_jobs(self) -> Dict[str, JobData]:
|
|
278
|
+
"""Return a copy of all jobs in the execution DB."""
|
|
279
|
+
return dict(self._jobs)
|
|
186
280
|
|
|
187
281
|
|
|
188
282
|
# Ensure all the paths
|