nemo-evaluator-launcher 0.1.19__py3-none-any.whl → 0.1.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nemo_evaluator_launcher/api/functional.py +159 -5
- nemo_evaluator_launcher/cli/logs.py +102 -0
- nemo_evaluator_launcher/cli/ls_task.py +280 -0
- nemo_evaluator_launcher/cli/ls_tasks.py +208 -55
- nemo_evaluator_launcher/cli/main.py +29 -2
- nemo_evaluator_launcher/cli/run.py +114 -16
- nemo_evaluator_launcher/cli/version.py +26 -23
- nemo_evaluator_launcher/common/container_metadata/__init__.py +61 -0
- nemo_evaluator_launcher/common/container_metadata/intermediate_repr.py +530 -0
- nemo_evaluator_launcher/common/container_metadata/loading.py +1126 -0
- nemo_evaluator_launcher/common/container_metadata/registries.py +824 -0
- nemo_evaluator_launcher/common/container_metadata/utils.py +63 -0
- nemo_evaluator_launcher/common/helpers.py +200 -51
- nemo_evaluator_launcher/common/logging_utils.py +16 -5
- nemo_evaluator_launcher/common/mapping.py +341 -155
- nemo_evaluator_launcher/common/printing_utils.py +25 -12
- nemo_evaluator_launcher/configs/deployment/sglang.yaml +4 -2
- nemo_evaluator_launcher/configs/deployment/trtllm.yaml +2 -3
- nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -1
- nemo_evaluator_launcher/configs/execution/slurm/default.yaml +14 -0
- nemo_evaluator_launcher/executors/base.py +31 -1
- nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +36 -1
- nemo_evaluator_launcher/executors/lepton/executor.py +107 -9
- nemo_evaluator_launcher/executors/local/executor.py +383 -24
- nemo_evaluator_launcher/executors/local/run.template.sh +54 -2
- nemo_evaluator_launcher/executors/slurm/executor.py +559 -64
- nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +26 -0
- nemo_evaluator_launcher/exporters/utils.py +32 -46
- nemo_evaluator_launcher/package_info.py +1 -1
- nemo_evaluator_launcher/resources/all_tasks_irs.yaml +17016 -0
- nemo_evaluator_launcher/resources/mapping.toml +64 -315
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/METADATA +4 -3
- nemo_evaluator_launcher-0.1.56.dist-info/RECORD +69 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/entry_points.txt +1 -0
- nemo_evaluator_launcher-0.1.19.dist-info/RECORD +0 -60
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/WHEEL +0 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/licenses/LICENSE +0 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
16
|
+
"""Container metadata management: registries, intermediate representations, and loading."""
|
|
17
|
+
|
|
18
|
+
from nemo_evaluator_launcher.common.container_metadata.intermediate_repr import (
|
|
19
|
+
HarnessIntermediateRepresentation,
|
|
20
|
+
TaskIntermediateRepresentation,
|
|
21
|
+
load_harnesses_and_tasks_from_tasks_file,
|
|
22
|
+
load_tasks_from_tasks_file,
|
|
23
|
+
)
|
|
24
|
+
from nemo_evaluator_launcher.common.container_metadata.registries import (
|
|
25
|
+
DockerRegistryHandler,
|
|
26
|
+
create_authenticator,
|
|
27
|
+
)
|
|
28
|
+
from nemo_evaluator_launcher.common.container_metadata.utils import (
|
|
29
|
+
parse_container_image,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"DockerRegistryHandler",
|
|
34
|
+
"create_authenticator",
|
|
35
|
+
"HarnessIntermediateRepresentation",
|
|
36
|
+
"TaskIntermediateRepresentation",
|
|
37
|
+
"load_harnesses_and_tasks_from_tasks_file",
|
|
38
|
+
"load_tasks_from_tasks_file",
|
|
39
|
+
"parse_container_image",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
# Optional imports:
|
|
43
|
+
# `loading` pulls in `nemo_evaluator` (and deps like `pydantic`). Keep IR-only
|
|
44
|
+
# workflows (e.g., docs autogen) usable without requiring the full stack.
|
|
45
|
+
try:
|
|
46
|
+
from nemo_evaluator_launcher.common.container_metadata.loading import ( # noqa: F401
|
|
47
|
+
extract_framework_yml,
|
|
48
|
+
load_tasks_from_container,
|
|
49
|
+
parse_framework_to_irs,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
__all__.extend(
|
|
53
|
+
[
|
|
54
|
+
"extract_framework_yml",
|
|
55
|
+
"load_tasks_from_container",
|
|
56
|
+
"parse_framework_to_irs",
|
|
57
|
+
]
|
|
58
|
+
)
|
|
59
|
+
except ModuleNotFoundError:
|
|
60
|
+
# Allow importing this package for IR-only workflows (docs autogen, etc.)
|
|
61
|
+
pass
|
|
@@ -0,0 +1,530 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
16
|
+
"""Intermediate representations for harnesses and tasks."""
|
|
17
|
+
|
|
18
|
+
import copy
|
|
19
|
+
import hashlib
|
|
20
|
+
import importlib
|
|
21
|
+
import importlib.resources
|
|
22
|
+
import pathlib
|
|
23
|
+
import sys
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
from typing import Any, Optional
|
|
26
|
+
|
|
27
|
+
import yaml
|
|
28
|
+
|
|
29
|
+
from nemo_evaluator_launcher.common.logging_utils import logger
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _deep_merge_dict(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
|
|
33
|
+
"""Deep merge two dictionaries, with override taking precedence."""
|
|
34
|
+
result = copy.deepcopy(base)
|
|
35
|
+
for key, value in override.items():
|
|
36
|
+
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
|
37
|
+
result[key] = _deep_merge_dict(result[key], value)
|
|
38
|
+
else:
|
|
39
|
+
result[key] = copy.deepcopy(value)
|
|
40
|
+
return result
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class HarnessIntermediateRepresentation:
|
|
45
|
+
"""Intermediate representation of a harness with metadata."""
|
|
46
|
+
|
|
47
|
+
name: str
|
|
48
|
+
description: str
|
|
49
|
+
full_name: Optional[str]
|
|
50
|
+
url: Optional[str]
|
|
51
|
+
container: str
|
|
52
|
+
container_digest: Optional[str]
|
|
53
|
+
# Architecture label for the container image:
|
|
54
|
+
# - "amd": linux/amd64 only
|
|
55
|
+
# - "arm": linux/arm64 only
|
|
56
|
+
# - "multiarch": includes both amd64 and arm64
|
|
57
|
+
arch: Optional[str] = None
|
|
58
|
+
|
|
59
|
+
def to_dict(self) -> dict[str, Any]:
|
|
60
|
+
"""Convert to dictionary representation."""
|
|
61
|
+
return {
|
|
62
|
+
"name": self.name,
|
|
63
|
+
"description": self.description,
|
|
64
|
+
"full_name": self.full_name,
|
|
65
|
+
"url": self.url,
|
|
66
|
+
"container": self.container,
|
|
67
|
+
"container_digest": self.container_digest,
|
|
68
|
+
"arch": self.arch,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class TaskIntermediateRepresentation:
|
|
74
|
+
"""Intermediate representation of a task with merged defaults and metadata."""
|
|
75
|
+
|
|
76
|
+
name: str
|
|
77
|
+
description: str
|
|
78
|
+
harness: str
|
|
79
|
+
container: str
|
|
80
|
+
container_digest: Optional[str]
|
|
81
|
+
defaults: dict[str, Any]
|
|
82
|
+
container_arch: Optional[str] = None
|
|
83
|
+
|
|
84
|
+
def to_dict(self) -> dict[str, Any]:
|
|
85
|
+
"""Convert to dictionary representation."""
|
|
86
|
+
return {
|
|
87
|
+
"name": self.name,
|
|
88
|
+
"description": self.description,
|
|
89
|
+
"harness": self.harness,
|
|
90
|
+
"container": self.container,
|
|
91
|
+
"container_digest": self.container_digest,
|
|
92
|
+
"container_arch": self.container_arch,
|
|
93
|
+
"defaults": self.defaults,
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _calculate_mapping_checksum(mapping_file: pathlib.Path) -> Optional[str]:
|
|
98
|
+
"""Calculate SHA256 checksum of mapping.toml file."""
|
|
99
|
+
if not mapping_file.exists():
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
with open(mapping_file, "rb") as f:
|
|
104
|
+
file_content = f.read()
|
|
105
|
+
|
|
106
|
+
checksum = hashlib.sha256(file_content).hexdigest()
|
|
107
|
+
return f"sha256:{checksum}"
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.debug(
|
|
110
|
+
"Failed to calculate mapping.toml checksum",
|
|
111
|
+
path=str(mapping_file),
|
|
112
|
+
error=str(e),
|
|
113
|
+
)
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _validate_checksum(
|
|
118
|
+
stored_checksum: Optional[str],
|
|
119
|
+
mapping_file: pathlib.Path,
|
|
120
|
+
source: str,
|
|
121
|
+
) -> bool:
|
|
122
|
+
"""Validate checksum consistency between stored value and current mapping.toml.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
stored_checksum: Checksum stored in metadata
|
|
126
|
+
mapping_file: Path to mapping.toml file
|
|
127
|
+
source: Source identifier for logging (e.g., "internal", "external")
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
True if checksums match, False otherwise
|
|
131
|
+
"""
|
|
132
|
+
if not stored_checksum:
|
|
133
|
+
return False
|
|
134
|
+
|
|
135
|
+
current_checksum = _calculate_mapping_checksum(mapping_file)
|
|
136
|
+
if not current_checksum:
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
verified = stored_checksum == current_checksum
|
|
140
|
+
if not verified:
|
|
141
|
+
logger.warning(
|
|
142
|
+
f"{source.capitalize()} mapping.toml checksum mismatch detected",
|
|
143
|
+
stored_checksum=stored_checksum,
|
|
144
|
+
current_checksum=current_checksum,
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
logger.info(
|
|
148
|
+
f"{source.capitalize()} mapping.toml checksum matches all_tasks_irs.yaml",
|
|
149
|
+
checksum=stored_checksum,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
return verified
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _parse_tasks_from_yaml_data(
|
|
156
|
+
yaml_data: dict,
|
|
157
|
+
harnesses_by_name: Optional[dict[str, "HarnessIntermediateRepresentation"]] = None,
|
|
158
|
+
) -> list[TaskIntermediateRepresentation]:
|
|
159
|
+
"""Parse TaskIntermediateRepresentation objects from YAML data.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
yaml_data: Parsed YAML data dictionary
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
List of TaskIntermediateRepresentation objects
|
|
166
|
+
"""
|
|
167
|
+
tasks: list[TaskIntermediateRepresentation] = []
|
|
168
|
+
tasks_data = yaml_data.get("tasks", [])
|
|
169
|
+
|
|
170
|
+
for task_dict in tasks_data:
|
|
171
|
+
harness_name = task_dict["harness"]
|
|
172
|
+
harness_ir = harnesses_by_name.get(harness_name) if harnesses_by_name else None
|
|
173
|
+
|
|
174
|
+
# New schema: container info is stored at harness level under `harnesses:`.
|
|
175
|
+
# Backwards compat: older files store container/container_digest per task.
|
|
176
|
+
container = task_dict.get("container") or (
|
|
177
|
+
harness_ir.container if harness_ir else ""
|
|
178
|
+
)
|
|
179
|
+
container_digest = task_dict.get("container_digest") or (
|
|
180
|
+
harness_ir.container_digest if harness_ir else None
|
|
181
|
+
)
|
|
182
|
+
container_arch = task_dict.get("container_arch") or (
|
|
183
|
+
harness_ir.arch if harness_ir else None
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
task_ir = TaskIntermediateRepresentation(
|
|
187
|
+
name=task_dict["name"],
|
|
188
|
+
description=task_dict.get("description", ""),
|
|
189
|
+
harness=harness_name,
|
|
190
|
+
container=container,
|
|
191
|
+
container_digest=container_digest,
|
|
192
|
+
container_arch=container_arch,
|
|
193
|
+
defaults=task_dict.get("defaults", {}),
|
|
194
|
+
)
|
|
195
|
+
tasks.append(task_ir)
|
|
196
|
+
|
|
197
|
+
return tasks
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _parse_harnesses_from_yaml_data(
|
|
201
|
+
yaml_data: dict,
|
|
202
|
+
) -> dict[str, HarnessIntermediateRepresentation]:
|
|
203
|
+
"""Parse HarnessIntermediateRepresentation objects from YAML data.
|
|
204
|
+
|
|
205
|
+
Supports:
|
|
206
|
+
- New schema: top-level `harnesses:` key containing a list of harness dicts.
|
|
207
|
+
- Backwards compat: missing `harnesses:` (returns empty; caller may infer).
|
|
208
|
+
"""
|
|
209
|
+
harnesses_data = yaml_data.get("harnesses", [])
|
|
210
|
+
harnesses: dict[str, HarnessIntermediateRepresentation] = {}
|
|
211
|
+
|
|
212
|
+
if not harnesses_data:
|
|
213
|
+
return harnesses
|
|
214
|
+
|
|
215
|
+
if not isinstance(harnesses_data, list):
|
|
216
|
+
raise ValueError(
|
|
217
|
+
f"Expected `harnesses` to be a list, got {type(harnesses_data).__name__}"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
for harness_dict in harnesses_data:
|
|
221
|
+
if not isinstance(harness_dict, dict):
|
|
222
|
+
raise ValueError(
|
|
223
|
+
f"Expected harness entry to be a dict, got {type(harness_dict).__name__}"
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
harness_ir = HarnessIntermediateRepresentation(
|
|
227
|
+
name=harness_dict["name"],
|
|
228
|
+
description=harness_dict.get("description", ""),
|
|
229
|
+
full_name=harness_dict.get("full_name"),
|
|
230
|
+
url=harness_dict.get("url"),
|
|
231
|
+
container=harness_dict.get("container", ""),
|
|
232
|
+
container_digest=harness_dict.get("container_digest"),
|
|
233
|
+
arch=harness_dict.get("arch"),
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Keep first occurrence if duplicates exist
|
|
237
|
+
if harness_ir.name not in harnesses:
|
|
238
|
+
harnesses[harness_ir.name] = harness_ir
|
|
239
|
+
|
|
240
|
+
return harnesses
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _infer_harnesses_from_tasks(
|
|
244
|
+
tasks: list[TaskIntermediateRepresentation],
|
|
245
|
+
) -> dict[str, HarnessIntermediateRepresentation]:
|
|
246
|
+
"""Backwards compat: infer harness IRs from task IRs.
|
|
247
|
+
|
|
248
|
+
Older `all_tasks_irs.yaml` files didn't have `harnesses:` so we build minimal
|
|
249
|
+
harness IRs from task fields.
|
|
250
|
+
"""
|
|
251
|
+
harnesses: dict[str, HarnessIntermediateRepresentation] = {}
|
|
252
|
+
for task in tasks:
|
|
253
|
+
if not task.harness:
|
|
254
|
+
continue
|
|
255
|
+
if task.harness in harnesses:
|
|
256
|
+
continue
|
|
257
|
+
harnesses[task.harness] = HarnessIntermediateRepresentation(
|
|
258
|
+
name=task.harness,
|
|
259
|
+
description="",
|
|
260
|
+
full_name=None,
|
|
261
|
+
url=None,
|
|
262
|
+
container=str(task.container) if task.container else "",
|
|
263
|
+
container_digest=str(task.container_digest)
|
|
264
|
+
if task.container_digest
|
|
265
|
+
else None,
|
|
266
|
+
arch=str(task.container_arch) if task.container_arch else None,
|
|
267
|
+
)
|
|
268
|
+
return harnesses
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _find_internal_mapping_file() -> Optional[pathlib.Path]:
|
|
272
|
+
"""Find internal mapping.toml file in package paths.
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
Path to internal mapping.toml or None if not found
|
|
276
|
+
"""
|
|
277
|
+
for pkg_path in sys.path:
|
|
278
|
+
potential_path = (
|
|
279
|
+
pathlib.Path(pkg_path)
|
|
280
|
+
/ "nemo_evaluator_launcher_internal"
|
|
281
|
+
/ "resources"
|
|
282
|
+
/ "mapping.toml"
|
|
283
|
+
)
|
|
284
|
+
if potential_path.exists():
|
|
285
|
+
return potential_path
|
|
286
|
+
return None
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _load_irs_from_package(
|
|
290
|
+
package_name: str, resource_name: str, source: str
|
|
291
|
+
) -> tuple[
|
|
292
|
+
dict[str, HarnessIntermediateRepresentation],
|
|
293
|
+
list[TaskIntermediateRepresentation],
|
|
294
|
+
bool,
|
|
295
|
+
]:
|
|
296
|
+
"""Load task IRs from a package resource.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
package_name: Package name (e.g., "nemo_evaluator_launcher_internal.resources")
|
|
300
|
+
resource_name: Resource file name (e.g., "all_tasks_irs.yaml")
|
|
301
|
+
source: Source identifier for logging (e.g., "internal", "external")
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
Tuple of (harnesses dict, tasks list, checksum verified bool)
|
|
305
|
+
"""
|
|
306
|
+
try:
|
|
307
|
+
content = importlib.resources.read_text(
|
|
308
|
+
package_name,
|
|
309
|
+
resource_name,
|
|
310
|
+
encoding="utf-8",
|
|
311
|
+
)
|
|
312
|
+
yaml_data = yaml.safe_load(content)
|
|
313
|
+
|
|
314
|
+
logger.info(
|
|
315
|
+
f"Loaded {source} task IRs from package resources",
|
|
316
|
+
num_tasks=yaml_data.get("metadata", {}).get("num_tasks", 0),
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
metadata = yaml_data.get("metadata", {})
|
|
320
|
+
stored_checksum = metadata.get("mapping_toml_checksum")
|
|
321
|
+
|
|
322
|
+
# Find mapping file for checksum validation
|
|
323
|
+
if source == "internal":
|
|
324
|
+
mapping_file = _find_internal_mapping_file()
|
|
325
|
+
else:
|
|
326
|
+
mapping_file = (
|
|
327
|
+
pathlib.Path(__file__).parent.parent.parent
|
|
328
|
+
/ "resources"
|
|
329
|
+
/ "mapping.toml"
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
verified = False
|
|
333
|
+
if mapping_file:
|
|
334
|
+
verified = _validate_checksum(stored_checksum, mapping_file, source)
|
|
335
|
+
|
|
336
|
+
harnesses = _parse_harnesses_from_yaml_data(yaml_data)
|
|
337
|
+
tasks = _parse_tasks_from_yaml_data(yaml_data, harnesses_by_name=harnesses)
|
|
338
|
+
if not harnesses:
|
|
339
|
+
harnesses = _infer_harnesses_from_tasks(tasks)
|
|
340
|
+
logger.info(f"Loaded {source} tasks from IRs", total_tasks=len(tasks))
|
|
341
|
+
|
|
342
|
+
return harnesses, tasks, verified
|
|
343
|
+
|
|
344
|
+
except (ImportError, FileNotFoundError) as e:
|
|
345
|
+
logger.debug(f"{source.capitalize()} IRs not available", error=str(e))
|
|
346
|
+
return {}, [], False
|
|
347
|
+
except Exception as e:
|
|
348
|
+
logger.debug(f"Failed to load {source} IRs", error=str(e))
|
|
349
|
+
return {}, [], False
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _load_tasks_from_file(
|
|
353
|
+
tasks_file: pathlib.Path,
|
|
354
|
+
) -> tuple[
|
|
355
|
+
dict[str, HarnessIntermediateRepresentation],
|
|
356
|
+
list[TaskIntermediateRepresentation],
|
|
357
|
+
bool,
|
|
358
|
+
]:
|
|
359
|
+
"""Load tasks from a file path.
|
|
360
|
+
|
|
361
|
+
Args:
|
|
362
|
+
tasks_file: Path to all_tasks_irs.yaml file
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
Tuple of (harnesses dict, tasks list, checksum verified bool)
|
|
366
|
+
"""
|
|
367
|
+
if not tasks_file.exists():
|
|
368
|
+
logger.warning("Tasks file not found", path=str(tasks_file))
|
|
369
|
+
return {}, [], False
|
|
370
|
+
|
|
371
|
+
logger.info("Loading tasks from tasks file", path=str(tasks_file))
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
with open(tasks_file, "r", encoding="utf-8") as f:
|
|
375
|
+
content = f.read()
|
|
376
|
+
|
|
377
|
+
yaml_data = yaml.safe_load(content)
|
|
378
|
+
|
|
379
|
+
metadata = yaml_data.get("metadata", {})
|
|
380
|
+
stored_checksum = metadata.get("mapping_toml_checksum")
|
|
381
|
+
|
|
382
|
+
mapping_file = tasks_file.parent / "mapping.toml"
|
|
383
|
+
verified = _validate_checksum(stored_checksum, mapping_file, "file")
|
|
384
|
+
|
|
385
|
+
harnesses = _parse_harnesses_from_yaml_data(yaml_data)
|
|
386
|
+
tasks = _parse_tasks_from_yaml_data(yaml_data, harnesses_by_name=harnesses)
|
|
387
|
+
if not harnesses:
|
|
388
|
+
harnesses = _infer_harnesses_from_tasks(tasks)
|
|
389
|
+
|
|
390
|
+
logger.info(
|
|
391
|
+
"Loaded tasks from tasks file", total_tasks=len(tasks), path=str(tasks_file)
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
return harnesses, tasks, verified
|
|
395
|
+
|
|
396
|
+
except yaml.YAMLError as e:
|
|
397
|
+
logger.error(
|
|
398
|
+
"Failed to parse tasks YAML",
|
|
399
|
+
error=str(e),
|
|
400
|
+
path=str(tasks_file),
|
|
401
|
+
exc_info=True,
|
|
402
|
+
)
|
|
403
|
+
return {}, [], False
|
|
404
|
+
except Exception as e:
|
|
405
|
+
logger.error(
|
|
406
|
+
"Error loading tasks from tasks file",
|
|
407
|
+
error=str(e),
|
|
408
|
+
path=str(tasks_file),
|
|
409
|
+
exc_info=True,
|
|
410
|
+
)
|
|
411
|
+
return {}, [], False
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def load_harnesses_and_tasks_from_tasks_file(
|
|
415
|
+
tasks_file: Optional[pathlib.Path] = None,
|
|
416
|
+
) -> tuple[
|
|
417
|
+
dict[str, HarnessIntermediateRepresentation],
|
|
418
|
+
list[TaskIntermediateRepresentation],
|
|
419
|
+
bool,
|
|
420
|
+
]:
|
|
421
|
+
"""Load harness and task IRs from all_tasks_irs.yaml file.
|
|
422
|
+
|
|
423
|
+
Public API function for loading Intermediate Representations (IRs).
|
|
424
|
+
|
|
425
|
+
Uses a merge strategy:
|
|
426
|
+
- Always load external IRs (baseline)
|
|
427
|
+
- If internal package is available, load internal IRs and merge them on top
|
|
428
|
+
of external IRs (internal overrides external on (harness, task) key).
|
|
429
|
+
|
|
430
|
+
Validates checksum consistency with current mapping.toml.
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
tasks_file: Path to all_tasks_irs.yaml file. If None, uses default packaged path.
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
Tuple of (harnesses dict, tasks list, mapping_verified: bool)
|
|
437
|
+
"""
|
|
438
|
+
# If file path provided, load directly from file
|
|
439
|
+
if tasks_file is not None:
|
|
440
|
+
return _load_tasks_from_file(tasks_file)
|
|
441
|
+
|
|
442
|
+
# Load external IRs (baseline)
|
|
443
|
+
logger.debug("Loading external IRs")
|
|
444
|
+
external_harnesses, external_tasks, external_verified = _load_irs_from_package(
|
|
445
|
+
"nemo_evaluator_launcher.resources",
|
|
446
|
+
"all_tasks_irs.yaml",
|
|
447
|
+
"external",
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
# Load internal IRs (optional overlay)
|
|
451
|
+
internal_harnesses: dict[str, HarnessIntermediateRepresentation] = {}
|
|
452
|
+
internal_tasks: list[TaskIntermediateRepresentation] = []
|
|
453
|
+
internal_verified = False
|
|
454
|
+
try:
|
|
455
|
+
importlib.import_module("nemo_evaluator_launcher_internal")
|
|
456
|
+
logger.debug("Internal package available, loading internal IRs")
|
|
457
|
+
internal_harnesses, internal_tasks, internal_verified = _load_irs_from_package(
|
|
458
|
+
"nemo_evaluator_launcher_internal.resources",
|
|
459
|
+
"all_tasks_irs.yaml",
|
|
460
|
+
"internal",
|
|
461
|
+
)
|
|
462
|
+
except ImportError:
|
|
463
|
+
logger.debug(
|
|
464
|
+
"Internal package not available, proceeding with external IRs only"
|
|
465
|
+
)
|
|
466
|
+
except Exception as e:
|
|
467
|
+
logger.debug(
|
|
468
|
+
"Failed to load internal IRs, proceeding with external IRs only",
|
|
469
|
+
error=str(e),
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
# Merge (internal overrides external)
|
|
473
|
+
def _key(t: TaskIntermediateRepresentation) -> tuple[str, str]:
|
|
474
|
+
return (t.harness, t.name)
|
|
475
|
+
|
|
476
|
+
merged_tasks_by_key: dict[tuple[str, str], TaskIntermediateRepresentation] = {
|
|
477
|
+
_key(t): t for t in external_tasks
|
|
478
|
+
}
|
|
479
|
+
merged_tasks_by_key.update({_key(t): t for t in internal_tasks})
|
|
480
|
+
merged_tasks = list(merged_tasks_by_key.values())
|
|
481
|
+
|
|
482
|
+
merged_harnesses = dict(external_harnesses)
|
|
483
|
+
merged_harnesses.update(internal_harnesses)
|
|
484
|
+
|
|
485
|
+
if merged_tasks:
|
|
486
|
+
mapping_verified = (
|
|
487
|
+
(external_verified and internal_verified)
|
|
488
|
+
if internal_tasks
|
|
489
|
+
else external_verified
|
|
490
|
+
)
|
|
491
|
+
logger.info(
|
|
492
|
+
"Using merged IRs",
|
|
493
|
+
total_tasks=len(merged_tasks),
|
|
494
|
+
internal_tasks=len(internal_tasks),
|
|
495
|
+
external_tasks=len(external_tasks),
|
|
496
|
+
mapping_verified=mapping_verified,
|
|
497
|
+
)
|
|
498
|
+
return merged_harnesses, merged_tasks, mapping_verified
|
|
499
|
+
|
|
500
|
+
# Final fallback: try default file path
|
|
501
|
+
logger.debug("No IRs loaded from package resources, trying file path")
|
|
502
|
+
default_tasks_file = (
|
|
503
|
+
pathlib.Path(__file__).parent.parent.parent / "resources" / "all_tasks_irs.yaml"
|
|
504
|
+
)
|
|
505
|
+
return _load_tasks_from_file(default_tasks_file)
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def load_tasks_from_tasks_file(
|
|
509
|
+
tasks_file: Optional[pathlib.Path] = None,
|
|
510
|
+
) -> tuple[list[TaskIntermediateRepresentation], bool]:
|
|
511
|
+
"""Load tasks from all_tasks_irs.yaml file.
|
|
512
|
+
|
|
513
|
+
Public API function for loading task Intermediate Representations (IRs).
|
|
514
|
+
|
|
515
|
+
Uses a simple swap strategy:
|
|
516
|
+
- If internal package is available, load internal IRs only
|
|
517
|
+
- Otherwise, load external IRs only
|
|
518
|
+
- No merging is performed
|
|
519
|
+
|
|
520
|
+
Validates checksum consistency with current mapping.toml.
|
|
521
|
+
|
|
522
|
+
Args:
|
|
523
|
+
tasks_file: Path to all_tasks_irs.yaml file. If None, uses default path.
|
|
524
|
+
|
|
525
|
+
Returns:
|
|
526
|
+
Tuple of (list of TaskIntermediateRepresentation objects, mapping_verified: bool)
|
|
527
|
+
"""
|
|
528
|
+
# Backwards-compatible wrapper: return tasks only.
|
|
529
|
+
_, tasks, verified = load_harnesses_and_tasks_from_tasks_file(tasks_file)
|
|
530
|
+
return tasks, verified
|