nemo-evaluator-launcher 0.1.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nemo-evaluator-launcher might be problematic. Click here for more details.
- nemo_evaluator_launcher/__init__.py +65 -0
- nemo_evaluator_launcher/api/__init__.py +24 -0
- nemo_evaluator_launcher/api/functional.py +641 -0
- nemo_evaluator_launcher/api/types.py +89 -0
- nemo_evaluator_launcher/api/utils.py +19 -0
- nemo_evaluator_launcher/cli/__init__.py +15 -0
- nemo_evaluator_launcher/cli/export.py +148 -0
- nemo_evaluator_launcher/cli/info.py +117 -0
- nemo_evaluator_launcher/cli/kill.py +39 -0
- nemo_evaluator_launcher/cli/ls_runs.py +113 -0
- nemo_evaluator_launcher/cli/ls_tasks.py +34 -0
- nemo_evaluator_launcher/cli/main.py +136 -0
- nemo_evaluator_launcher/cli/run.py +135 -0
- nemo_evaluator_launcher/cli/status.py +118 -0
- nemo_evaluator_launcher/cli/version.py +52 -0
- nemo_evaluator_launcher/common/__init__.py +16 -0
- nemo_evaluator_launcher/common/execdb.py +189 -0
- nemo_evaluator_launcher/common/helpers.py +157 -0
- nemo_evaluator_launcher/common/logging_utils.py +349 -0
- nemo_evaluator_launcher/common/mapping.py +310 -0
- nemo_evaluator_launcher/configs/__init__.py +15 -0
- nemo_evaluator_launcher/configs/default.yaml +28 -0
- nemo_evaluator_launcher/configs/deployment/nim.yaml +32 -0
- nemo_evaluator_launcher/configs/deployment/none.yaml +16 -0
- nemo_evaluator_launcher/configs/deployment/sglang.yaml +38 -0
- nemo_evaluator_launcher/configs/deployment/vllm.yaml +41 -0
- nemo_evaluator_launcher/configs/execution/lepton/default.yaml +92 -0
- nemo_evaluator_launcher/configs/execution/local.yaml +17 -0
- nemo_evaluator_launcher/configs/execution/slurm/default.yaml +33 -0
- nemo_evaluator_launcher/executors/__init__.py +22 -0
- nemo_evaluator_launcher/executors/base.py +97 -0
- nemo_evaluator_launcher/executors/lepton/__init__.py +16 -0
- nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +589 -0
- nemo_evaluator_launcher/executors/lepton/executor.py +905 -0
- nemo_evaluator_launcher/executors/lepton/job_helpers.py +394 -0
- nemo_evaluator_launcher/executors/local/__init__.py +15 -0
- nemo_evaluator_launcher/executors/local/executor.py +491 -0
- nemo_evaluator_launcher/executors/local/run.template.sh +88 -0
- nemo_evaluator_launcher/executors/registry.py +38 -0
- nemo_evaluator_launcher/executors/slurm/__init__.py +15 -0
- nemo_evaluator_launcher/executors/slurm/executor.py +982 -0
- nemo_evaluator_launcher/exporters/__init__.py +36 -0
- nemo_evaluator_launcher/exporters/base.py +112 -0
- nemo_evaluator_launcher/exporters/gsheets.py +391 -0
- nemo_evaluator_launcher/exporters/local.py +488 -0
- nemo_evaluator_launcher/exporters/mlflow.py +448 -0
- nemo_evaluator_launcher/exporters/registry.py +40 -0
- nemo_evaluator_launcher/exporters/utils.py +669 -0
- nemo_evaluator_launcher/exporters/wandb.py +376 -0
- nemo_evaluator_launcher/package_info.py +35 -0
- nemo_evaluator_launcher/resources/mapping.toml +344 -0
- nemo_evaluator_launcher-0.1.0rc2.dist-info/METADATA +35 -0
- nemo_evaluator_launcher-0.1.0rc2.dist-info/RECORD +57 -0
- nemo_evaluator_launcher-0.1.0rc2.dist-info/WHEEL +5 -0
- nemo_evaluator_launcher-0.1.0rc2.dist-info/entry_points.txt +3 -0
- nemo_evaluator_launcher-0.1.0rc2.dist-info/licenses/LICENSE +451 -0
- nemo_evaluator_launcher-0.1.0rc2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
16
|
+
"""Type definitions for the nemo-evaluator-launcher public API.
|
|
17
|
+
|
|
18
|
+
This module defines data structures and helpers for configuration and type safety in the API layer.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
from dataclasses import dataclass
|
|
23
|
+
from typing import cast
|
|
24
|
+
|
|
25
|
+
import hydra
|
|
26
|
+
from hydra.core.global_hydra import GlobalHydra
|
|
27
|
+
from omegaconf import DictConfig, OmegaConf
|
|
28
|
+
|
|
29
|
+
from nemo_evaluator_launcher.common.logging_utils import logger
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class RunConfig(DictConfig):
|
|
34
|
+
@staticmethod
|
|
35
|
+
def from_hydra(
|
|
36
|
+
config_name: str = "default",
|
|
37
|
+
config_dir: str | None = None,
|
|
38
|
+
hydra_overrides: list[str] = [],
|
|
39
|
+
dict_overrides: dict = {},
|
|
40
|
+
) -> "RunConfig":
|
|
41
|
+
"""Load configuration from Hydra and merge with dictionary overrides.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
config_name: Name of the Hydra configuration to load.
|
|
45
|
+
hydra_overrides: List of Hydra command-line style overrides.
|
|
46
|
+
dict_overrides: Dictionary of configuration overrides to merge.
|
|
47
|
+
config_dir: Optional path to user config directory. If provided, Hydra will
|
|
48
|
+
search in this directory first, then fall back to internal configs.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
RunConfig: Merged configuration object.
|
|
52
|
+
"""
|
|
53
|
+
overrides = hydra_overrides.copy()
|
|
54
|
+
# Check if a GlobalHydra instance is already initialized and clear it
|
|
55
|
+
if GlobalHydra.instance().is_initialized():
|
|
56
|
+
GlobalHydra.instance().clear()
|
|
57
|
+
|
|
58
|
+
if config_dir:
|
|
59
|
+
# Convert relative path to absolute path if needed
|
|
60
|
+
if not os.path.isabs(config_dir):
|
|
61
|
+
config_dir = os.path.abspath(config_dir)
|
|
62
|
+
|
|
63
|
+
hydra.initialize_config_dir(
|
|
64
|
+
config_dir=config_dir,
|
|
65
|
+
version_base=None,
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
hydra.initialize_config_module(
|
|
69
|
+
config_module="nemo_evaluator_launcher.configs",
|
|
70
|
+
version_base=None,
|
|
71
|
+
)
|
|
72
|
+
overrides = overrides + [
|
|
73
|
+
"hydra.searchpath=[pkg://nemo_evaluator_launcher.configs,pkg://nemo_evaluator_launcher_internal.configs]"
|
|
74
|
+
]
|
|
75
|
+
cfg = hydra.compose(config_name=config_name, overrides=overrides)
|
|
76
|
+
|
|
77
|
+
# Merge dict_overrides if provided
|
|
78
|
+
if dict_overrides:
|
|
79
|
+
cfg = OmegaConf.merge(cfg, dict_overrides)
|
|
80
|
+
|
|
81
|
+
logger.debug(
|
|
82
|
+
"Loaded run config from hydra",
|
|
83
|
+
config_name=config_name,
|
|
84
|
+
config_dir=config_dir,
|
|
85
|
+
overrides=hydra_overrides,
|
|
86
|
+
dict_overrides=dict_overrides,
|
|
87
|
+
result=cfg,
|
|
88
|
+
)
|
|
89
|
+
return cast("RunConfig", cfg)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
16
|
+
"""Configuration validation utilities for nemo-evaluator-launcher.
|
|
17
|
+
|
|
18
|
+
This module provides helper functions to validate configuration objects.
|
|
19
|
+
"""
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
16
|
+
"""Export evaluation results to specified target."""
|
|
17
|
+
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from typing import Any, List, Optional
|
|
20
|
+
|
|
21
|
+
from simple_parsing import field
|
|
22
|
+
|
|
23
|
+
from nemo_evaluator_launcher.api.functional import export_results
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class ExportCmd:
|
|
28
|
+
"""Export evaluation results."""
|
|
29
|
+
|
|
30
|
+
# Short usage examples will show up in -h as the class docstring:
|
|
31
|
+
# Examples:
|
|
32
|
+
# nemo-evaluator-launcher export 8abcd123 --dest local --format json -o .
|
|
33
|
+
# nemo-evaluator-launcher export 8abcd123.0 9ef01234 --dest local --format csv -o results/ -fname processed_results.csv
|
|
34
|
+
# nemo-evaluator-launcher export 8abcd123 --dest jet
|
|
35
|
+
|
|
36
|
+
invocation_ids: List[str] = field(
|
|
37
|
+
positional=True,
|
|
38
|
+
help="IDs to export (space-separated). Accepts invocation IDs (xxxxxxxx) and job IDs (xxxxxxxx.n); mixture of both allowed.",
|
|
39
|
+
)
|
|
40
|
+
dest: str = field(
|
|
41
|
+
default="local",
|
|
42
|
+
alias=["--dest"],
|
|
43
|
+
choices=["local", "wandb", "mlflow", "gsheets", "jet"],
|
|
44
|
+
help="Export destination.",
|
|
45
|
+
)
|
|
46
|
+
output_dir: Optional[str] = field(
|
|
47
|
+
default=".",
|
|
48
|
+
alias=["--output-dir", "-o"],
|
|
49
|
+
help="Output directory (default: current directory).",
|
|
50
|
+
)
|
|
51
|
+
output_filename: Optional[str] = field(
|
|
52
|
+
default=None,
|
|
53
|
+
alias=["--output-filename", "-fname"],
|
|
54
|
+
help="Summary filename (default: processed_results.json/csv based on --format).",
|
|
55
|
+
)
|
|
56
|
+
format: Optional[str] = field(
|
|
57
|
+
default=None,
|
|
58
|
+
alias=["--format"],
|
|
59
|
+
choices=["json", "csv"],
|
|
60
|
+
help="Summary format for --dest local. Omit to only copy artifacts.",
|
|
61
|
+
)
|
|
62
|
+
copy_logs: bool = field(
|
|
63
|
+
default=False,
|
|
64
|
+
alias=["--copy-logs"],
|
|
65
|
+
help="Include logs when copying locally (default: False).",
|
|
66
|
+
)
|
|
67
|
+
log_metrics: List[str] = field(
|
|
68
|
+
default_factory=list,
|
|
69
|
+
alias=["--log-metrics"],
|
|
70
|
+
help="Filter metrics by name (repeatable). Examples: score, f1, mmlu_score_micro.",
|
|
71
|
+
)
|
|
72
|
+
only_required: bool = field(
|
|
73
|
+
default=True,
|
|
74
|
+
alias=["--only-required"],
|
|
75
|
+
help="Copy only required+optional artifacts (default: True). Set to False to copy all available artifacts.",
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def execute(self) -> None:
|
|
79
|
+
"""Execute export."""
|
|
80
|
+
config: dict[str, Any] = {
|
|
81
|
+
"copy_logs": self.copy_logs,
|
|
82
|
+
"only_required": self.only_required,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# Output handling
|
|
86
|
+
if self.output_dir:
|
|
87
|
+
config["output_dir"] = self.output_dir
|
|
88
|
+
if self.output_filename:
|
|
89
|
+
config["output_filename"] = self.output_filename
|
|
90
|
+
|
|
91
|
+
# Format and filters
|
|
92
|
+
if self.format:
|
|
93
|
+
config["format"] = self.format
|
|
94
|
+
if self.log_metrics:
|
|
95
|
+
config["log_metrics"] = self.log_metrics
|
|
96
|
+
|
|
97
|
+
if self.format and self.dest != "local":
|
|
98
|
+
print(
|
|
99
|
+
"Note: --format is only used by --dest local. It will be ignored for other destinations."
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Execute
|
|
103
|
+
print(
|
|
104
|
+
f"Exporting {len(self.invocation_ids)} {'invocations' if len(self.invocation_ids) > 1 else 'invocation'} to {self.dest}..."
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
result = export_results(self.invocation_ids, self.dest, config)
|
|
108
|
+
|
|
109
|
+
if not result["success"]:
|
|
110
|
+
print(f"Export failed: {result.get('error', 'Unknown error')}")
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
# Success path
|
|
114
|
+
if len(self.invocation_ids) == 1:
|
|
115
|
+
# Single invocation
|
|
116
|
+
invocation_id = self.invocation_ids[0]
|
|
117
|
+
print(f"Export completed for {invocation_id}")
|
|
118
|
+
|
|
119
|
+
for job_id, job_result in result["jobs"].items():
|
|
120
|
+
if job_result.get("success"):
|
|
121
|
+
print(f" {job_id}: {job_result.get('message', '')}")
|
|
122
|
+
metadata = job_result.get("metadata", {})
|
|
123
|
+
if metadata.get("run_url"):
|
|
124
|
+
print(f" URL: {metadata['run_url']}")
|
|
125
|
+
if metadata.get("summary_path"):
|
|
126
|
+
print(f" Summary: {metadata['summary_path']}")
|
|
127
|
+
else:
|
|
128
|
+
print(f" {job_id} failed: {job_result.get('message', '')}")
|
|
129
|
+
else:
|
|
130
|
+
# Multiple invocations
|
|
131
|
+
metadata = result.get("metadata", {})
|
|
132
|
+
print(
|
|
133
|
+
f"Export completed: {metadata.get('successful_invocations', 0)}/{metadata.get('total_invocations', 0)} successful"
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# Show summary path if available
|
|
137
|
+
if metadata.get("summary_path"):
|
|
138
|
+
print(f"Summary: {metadata['summary_path']}")
|
|
139
|
+
|
|
140
|
+
# Show per-invocation status
|
|
141
|
+
for invocation_id, inv_result in result["invocations"].items():
|
|
142
|
+
if inv_result.get("success"):
|
|
143
|
+
job_count = len(inv_result.get("jobs", {}))
|
|
144
|
+
print(f" {invocation_id}: {job_count} jobs")
|
|
145
|
+
else:
|
|
146
|
+
print(
|
|
147
|
+
f" {invocation_id}: failed, {inv_result.get('error', 'Unknown error')}"
|
|
148
|
+
)
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
16
|
+
import datetime as _dt
|
|
17
|
+
import sys
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
from simple_parsing import field
|
|
22
|
+
|
|
23
|
+
from nemo_evaluator_launcher.api.functional import (
|
|
24
|
+
get_invocation_benchmarks,
|
|
25
|
+
list_all_invocations_summary,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class Cmd:
|
|
31
|
+
"""Show a concise invocations table from the exec DB."""
|
|
32
|
+
|
|
33
|
+
limit: Optional[int] = field(default=None, alias=["--limit"], help="Max rows")
|
|
34
|
+
executor: Optional[str] = field(
|
|
35
|
+
default=None, alias=["--executor"], help="Filter by executor"
|
|
36
|
+
)
|
|
37
|
+
since: Optional[str] = field(
|
|
38
|
+
default=None,
|
|
39
|
+
alias=["--since"],
|
|
40
|
+
help="Filter by ISO date/time (e.g., 2025-08-20 or 2025-08-20T12:00:00)",
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def execute(self) -> None:
|
|
44
|
+
rows = list_all_invocations_summary()
|
|
45
|
+
|
|
46
|
+
# Apply filters
|
|
47
|
+
if self.executor:
|
|
48
|
+
rows = [
|
|
49
|
+
r
|
|
50
|
+
for r in rows
|
|
51
|
+
if (r.get("executor") or "").lower() == self.executor.lower()
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
if self.since:
|
|
55
|
+
try:
|
|
56
|
+
# Parse with optional time
|
|
57
|
+
if "T" in self.since:
|
|
58
|
+
since_ts = _dt.datetime.fromisoformat(self.since).timestamp()
|
|
59
|
+
else:
|
|
60
|
+
since_ts = _dt.datetime.fromisoformat(
|
|
61
|
+
self.since + "T00:00:00"
|
|
62
|
+
).timestamp()
|
|
63
|
+
rows = [r for r in rows if (r.get("earliest_job_ts") or 0) >= since_ts]
|
|
64
|
+
except Exception:
|
|
65
|
+
print(
|
|
66
|
+
f"Invalid --since value: {self.since}. Use YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS",
|
|
67
|
+
file=sys.stderr,
|
|
68
|
+
)
|
|
69
|
+
sys.exit(2)
|
|
70
|
+
|
|
71
|
+
if self.limit is not None and self.limit >= 0:
|
|
72
|
+
rows = rows[: self.limit]
|
|
73
|
+
|
|
74
|
+
# Print table (always include benchmarks column)
|
|
75
|
+
header = [
|
|
76
|
+
"invocation_id",
|
|
77
|
+
"earliest_job_ts",
|
|
78
|
+
"num_jobs",
|
|
79
|
+
"executor",
|
|
80
|
+
"benchmarks",
|
|
81
|
+
]
|
|
82
|
+
# Format ts as ISO without microseconds
|
|
83
|
+
table_rows = []
|
|
84
|
+
for r in rows:
|
|
85
|
+
ts = r.get("earliest_job_ts", 0) or 0
|
|
86
|
+
try:
|
|
87
|
+
ts_iso = (
|
|
88
|
+
_dt.datetime.fromtimestamp(ts).replace(microsecond=0).isoformat()
|
|
89
|
+
)
|
|
90
|
+
except Exception:
|
|
91
|
+
ts_iso = ""
|
|
92
|
+
inv = r.get("invocation_id", "")
|
|
93
|
+
try:
|
|
94
|
+
bmarks = get_invocation_benchmarks(inv)
|
|
95
|
+
bmarks_cell = ",".join(bmarks) if bmarks else "unknown"
|
|
96
|
+
except Exception:
|
|
97
|
+
bmarks_cell = "unknown"
|
|
98
|
+
table_rows.append(
|
|
99
|
+
[
|
|
100
|
+
str(inv),
|
|
101
|
+
ts_iso,
|
|
102
|
+
str(r.get("num_jobs", 0)),
|
|
103
|
+
str(r.get("executor", "")),
|
|
104
|
+
bmarks_cell,
|
|
105
|
+
]
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
widths = [len(h) for h in header]
|
|
109
|
+
for tr in table_rows:
|
|
110
|
+
for i, cell in enumerate(tr):
|
|
111
|
+
if len(cell) > widths[i]:
|
|
112
|
+
widths[i] = len(cell)
|
|
113
|
+
fmt = " ".join([f"{{:<{w}}}" for w in widths])
|
|
114
|
+
print(fmt.format(*header))
|
|
115
|
+
print(" ".join(["-" * w for w in widths]))
|
|
116
|
+
for tr in table_rows:
|
|
117
|
+
print(fmt.format(*tr))
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
16
|
+
import json
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
|
|
19
|
+
from simple_parsing import field
|
|
20
|
+
|
|
21
|
+
from nemo_evaluator_launcher.api.functional import kill_job_or_invocation
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class Cmd:
|
|
26
|
+
"""Kill command configuration."""
|
|
27
|
+
|
|
28
|
+
id: str = field(
|
|
29
|
+
positional=True,
|
|
30
|
+
metadata={
|
|
31
|
+
"help": "Job ID (e.g., aefc4819.0) or invocation ID (e.g., aefc4819) to kill"
|
|
32
|
+
},
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def execute(self) -> None:
|
|
36
|
+
"""Execute the kill command."""
|
|
37
|
+
result = kill_job_or_invocation(self.id)
|
|
38
|
+
# Output as JSON
|
|
39
|
+
print(json.dumps(result, indent=2))
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
16
|
+
import datetime as _dt
|
|
17
|
+
import sys
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
from simple_parsing import field
|
|
22
|
+
|
|
23
|
+
from nemo_evaluator_launcher.api.functional import (
|
|
24
|
+
get_invocation_benchmarks,
|
|
25
|
+
list_all_invocations_summary,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class Cmd:
|
|
31
|
+
"""List invocations (runs) from the exec DB as a table."""
|
|
32
|
+
|
|
33
|
+
limit: Optional[int] = field(default=None, alias=["--limit"], help="Max rows")
|
|
34
|
+
executor: Optional[str] = field(
|
|
35
|
+
default=None, alias=["--executor"], help="Filter by executor"
|
|
36
|
+
)
|
|
37
|
+
since: Optional[str] = field(
|
|
38
|
+
default=None,
|
|
39
|
+
alias=["--since"],
|
|
40
|
+
help="Filter by ISO date/time (e.g., 2025-08-20 or 2025-08-20T12:00:00)",
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def execute(self) -> None:
|
|
44
|
+
rows = list_all_invocations_summary()
|
|
45
|
+
|
|
46
|
+
if self.executor:
|
|
47
|
+
rows = [
|
|
48
|
+
r
|
|
49
|
+
for r in rows
|
|
50
|
+
if (r.get("executor") or "").lower() == self.executor.lower()
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
if self.since:
|
|
54
|
+
try:
|
|
55
|
+
if "T" in self.since:
|
|
56
|
+
since_ts = _dt.datetime.fromisoformat(self.since).timestamp()
|
|
57
|
+
else:
|
|
58
|
+
since_ts = _dt.datetime.fromisoformat(
|
|
59
|
+
self.since + "T00:00:00"
|
|
60
|
+
).timestamp()
|
|
61
|
+
rows = [r for r in rows if (r.get("earliest_job_ts") or 0) >= since_ts]
|
|
62
|
+
except Exception:
|
|
63
|
+
print(
|
|
64
|
+
f"Invalid --since value: {self.since}. Use YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS",
|
|
65
|
+
file=sys.stderr,
|
|
66
|
+
)
|
|
67
|
+
sys.exit(2)
|
|
68
|
+
|
|
69
|
+
if self.limit is not None and self.limit >= 0:
|
|
70
|
+
rows = rows[: self.limit]
|
|
71
|
+
|
|
72
|
+
header = [
|
|
73
|
+
"invocation_id",
|
|
74
|
+
"earliest_job_ts",
|
|
75
|
+
"num_jobs",
|
|
76
|
+
"executor",
|
|
77
|
+
"benchmarks",
|
|
78
|
+
]
|
|
79
|
+
table_rows = []
|
|
80
|
+
for r in rows:
|
|
81
|
+
ts = r.get("earliest_job_ts", 0) or 0
|
|
82
|
+
try:
|
|
83
|
+
ts_iso = (
|
|
84
|
+
_dt.datetime.fromtimestamp(ts).replace(microsecond=0).isoformat()
|
|
85
|
+
)
|
|
86
|
+
except Exception:
|
|
87
|
+
ts_iso = ""
|
|
88
|
+
inv = r.get("invocation_id", "")
|
|
89
|
+
try:
|
|
90
|
+
bmarks = get_invocation_benchmarks(inv)
|
|
91
|
+
bmarks_cell = ",".join(bmarks) if bmarks else "unknown"
|
|
92
|
+
except Exception:
|
|
93
|
+
bmarks_cell = "unknown"
|
|
94
|
+
table_rows.append(
|
|
95
|
+
[
|
|
96
|
+
str(inv),
|
|
97
|
+
ts_iso,
|
|
98
|
+
str(r.get("num_jobs", 0)),
|
|
99
|
+
str(r.get("executor", "")),
|
|
100
|
+
bmarks_cell,
|
|
101
|
+
]
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
widths = [len(h) for h in header]
|
|
105
|
+
for tr in table_rows:
|
|
106
|
+
for i, cell in enumerate(tr):
|
|
107
|
+
if len(cell) > widths[i]:
|
|
108
|
+
widths[i] = len(cell)
|
|
109
|
+
fmt = " ".join([f"{{:<{w}}}" for w in widths])
|
|
110
|
+
print(fmt.format(*header))
|
|
111
|
+
print(" ".join(["-" * w for w in widths]))
|
|
112
|
+
for tr in table_rows:
|
|
113
|
+
print(fmt.format(*tr))
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
16
|
+
import json
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
|
|
19
|
+
from nemo_evaluator_launcher.api.functional import get_tasks_list
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class Cmd:
|
|
24
|
+
"""List command configuration."""
|
|
25
|
+
|
|
26
|
+
def execute(self) -> None:
|
|
27
|
+
# TODO(dfridman): modify `get_tasks_list` to return a list of dicts in the first place
|
|
28
|
+
data = get_tasks_list()
|
|
29
|
+
headers = ["task", "endpoint_type", "harness", "container"]
|
|
30
|
+
supported_benchmarks = []
|
|
31
|
+
for task_data in data:
|
|
32
|
+
assert len(task_data) == len(headers)
|
|
33
|
+
supported_benchmarks.append(dict(zip(headers, task_data)))
|
|
34
|
+
print(json.dumps(supported_benchmarks, indent=2))
|