nemo-evaluator-launcher 0.1.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nemo-evaluator-launcher might be problematic. Click here for more details.
- nemo_evaluator_launcher/__init__.py +79 -0
- nemo_evaluator_launcher/api/__init__.py +24 -0
- nemo_evaluator_launcher/api/functional.py +698 -0
- nemo_evaluator_launcher/api/types.py +98 -0
- nemo_evaluator_launcher/api/utils.py +19 -0
- nemo_evaluator_launcher/cli/__init__.py +15 -0
- nemo_evaluator_launcher/cli/export.py +267 -0
- nemo_evaluator_launcher/cli/info.py +512 -0
- nemo_evaluator_launcher/cli/kill.py +41 -0
- nemo_evaluator_launcher/cli/ls_runs.py +134 -0
- nemo_evaluator_launcher/cli/ls_tasks.py +136 -0
- nemo_evaluator_launcher/cli/main.py +226 -0
- nemo_evaluator_launcher/cli/run.py +200 -0
- nemo_evaluator_launcher/cli/status.py +164 -0
- nemo_evaluator_launcher/cli/version.py +55 -0
- nemo_evaluator_launcher/common/__init__.py +16 -0
- nemo_evaluator_launcher/common/execdb.py +283 -0
- nemo_evaluator_launcher/common/helpers.py +366 -0
- nemo_evaluator_launcher/common/logging_utils.py +357 -0
- nemo_evaluator_launcher/common/mapping.py +295 -0
- nemo_evaluator_launcher/common/printing_utils.py +93 -0
- nemo_evaluator_launcher/configs/__init__.py +15 -0
- nemo_evaluator_launcher/configs/default.yaml +28 -0
- nemo_evaluator_launcher/configs/deployment/generic.yaml +33 -0
- nemo_evaluator_launcher/configs/deployment/nim.yaml +32 -0
- nemo_evaluator_launcher/configs/deployment/none.yaml +16 -0
- nemo_evaluator_launcher/configs/deployment/sglang.yaml +38 -0
- nemo_evaluator_launcher/configs/deployment/trtllm.yaml +24 -0
- nemo_evaluator_launcher/configs/deployment/vllm.yaml +42 -0
- nemo_evaluator_launcher/configs/execution/lepton/default.yaml +92 -0
- nemo_evaluator_launcher/configs/execution/local.yaml +19 -0
- nemo_evaluator_launcher/configs/execution/slurm/default.yaml +34 -0
- nemo_evaluator_launcher/executors/__init__.py +22 -0
- nemo_evaluator_launcher/executors/base.py +120 -0
- nemo_evaluator_launcher/executors/lepton/__init__.py +16 -0
- nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +609 -0
- nemo_evaluator_launcher/executors/lepton/executor.py +1004 -0
- nemo_evaluator_launcher/executors/lepton/job_helpers.py +398 -0
- nemo_evaluator_launcher/executors/local/__init__.py +15 -0
- nemo_evaluator_launcher/executors/local/executor.py +605 -0
- nemo_evaluator_launcher/executors/local/run.template.sh +103 -0
- nemo_evaluator_launcher/executors/registry.py +38 -0
- nemo_evaluator_launcher/executors/slurm/__init__.py +15 -0
- nemo_evaluator_launcher/executors/slurm/executor.py +1147 -0
- nemo_evaluator_launcher/exporters/__init__.py +36 -0
- nemo_evaluator_launcher/exporters/base.py +121 -0
- nemo_evaluator_launcher/exporters/gsheets.py +409 -0
- nemo_evaluator_launcher/exporters/local.py +502 -0
- nemo_evaluator_launcher/exporters/mlflow.py +619 -0
- nemo_evaluator_launcher/exporters/registry.py +40 -0
- nemo_evaluator_launcher/exporters/utils.py +624 -0
- nemo_evaluator_launcher/exporters/wandb.py +490 -0
- nemo_evaluator_launcher/package_info.py +38 -0
- nemo_evaluator_launcher/resources/mapping.toml +380 -0
- nemo_evaluator_launcher-0.1.28.dist-info/METADATA +494 -0
- nemo_evaluator_launcher-0.1.28.dist-info/RECORD +60 -0
- nemo_evaluator_launcher-0.1.28.dist-info/WHEEL +5 -0
- nemo_evaluator_launcher-0.1.28.dist-info/entry_points.txt +3 -0
- nemo_evaluator_launcher-0.1.28.dist-info/licenses/LICENSE +451 -0
- nemo_evaluator_launcher-0.1.28.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# check if docker exists
|
|
18
|
+
command -v docker >/dev/null 2>&1 || { echo 'docker not found'; exit 1; }
|
|
19
|
+
|
|
20
|
+
# Initialize: remove killed jobs file from previous runs
|
|
21
|
+
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
22
|
+
killed_jobs_file="$script_dir/killed_jobs.txt"
|
|
23
|
+
rm -f "$killed_jobs_file"
|
|
24
|
+
|
|
25
|
+
{% for task in evaluation_tasks %}
|
|
26
|
+
# {{ task.job_id }} {{ task.name }}
|
|
27
|
+
|
|
28
|
+
task_dir="{{ task.output_dir }}"
|
|
29
|
+
artifacts_dir="$task_dir/artifacts"
|
|
30
|
+
logs_dir="$task_dir/logs"
|
|
31
|
+
|
|
32
|
+
mkdir -m 777 -p "$task_dir"
|
|
33
|
+
mkdir -m 777 -p "$artifacts_dir"
|
|
34
|
+
mkdir -m 777 -p "$logs_dir"
|
|
35
|
+
|
|
36
|
+
# Check if this job was killed
|
|
37
|
+
if [ -f "$killed_jobs_file" ] && grep -q "^{{ task.job_id }}$" "$killed_jobs_file"; then
|
|
38
|
+
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) Job {{ task.job_id }} ({{ task.name }}) was killed, skipping execution" | tee -a "$logs_dir/stdout.log"
|
|
39
|
+
else
|
|
40
|
+
# Create pre-start stage file
|
|
41
|
+
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$logs_dir/stage.pre-start"
|
|
42
|
+
|
|
43
|
+
# Debug contents of the eval factory command's config
|
|
44
|
+
{{ task.eval_factory_command_debug_comment | indent(4) }}
|
|
45
|
+
|
|
46
|
+
# Docker run with eval factory command
|
|
47
|
+
(
|
|
48
|
+
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)" > "$logs_dir/stage.running"
|
|
49
|
+
docker run --rm --shm-size=100g {{ extra_docker_args }} \
|
|
50
|
+
--name {{ task.container_name }} \
|
|
51
|
+
--volume "$artifacts_dir":/results \
|
|
52
|
+
{% for env_var in task.env_vars -%}
|
|
53
|
+
-e {{ env_var }} \
|
|
54
|
+
{% endfor -%}
|
|
55
|
+
{{ task.eval_image }} \
|
|
56
|
+
bash -c '
|
|
57
|
+
{{ task.eval_factory_command | indent(8) }} ;
|
|
58
|
+
exit_code=$?
|
|
59
|
+
chmod 777 -R /results;
|
|
60
|
+
if [ "$exit_code" -ne 0 ]; then
|
|
61
|
+
echo "The evaluation container failed with exit code $exit_code" >&2;
|
|
62
|
+
exit "$exit_code";
|
|
63
|
+
fi;
|
|
64
|
+
echo "Container completed successfully" >&2;
|
|
65
|
+
exit 0;
|
|
66
|
+
' > "$logs_dir/stdout.log" 2>&1
|
|
67
|
+
exit_code=$?
|
|
68
|
+
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) $exit_code" > "$logs_dir/stage.exit"
|
|
69
|
+
) >> "$logs_dir/stdout.log" 2>&1
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
{% if auto_export_destinations %}
|
|
73
|
+
# Monitor job completion and auto-export
|
|
74
|
+
(
|
|
75
|
+
# Give it a moment to ensure file is fully written
|
|
76
|
+
sleep 1
|
|
77
|
+
|
|
78
|
+
exit_code=$(tail -1 "$logs_dir/stage.exit" | cut -d' ' -f2)
|
|
79
|
+
if [ "$exit_code" = "0" ]; then
|
|
80
|
+
# Log auto-export activity to task logs
|
|
81
|
+
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) Job {{ task.job_id }} completed successfully. Starting auto-export..." >> "$logs_dir/stdout.log"
|
|
82
|
+
|
|
83
|
+
{% for dest in auto_export_destinations %}
|
|
84
|
+
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) Exporting job {{ task.job_id }} to {{ dest }}..." >> "$logs_dir/stdout.log"
|
|
85
|
+
nemo-evaluator-launcher export {{ task.job_id }} --dest {{ dest }} >> "$logs_dir/stdout.log" 2>&1
|
|
86
|
+
if [ $? -eq 0 ]; then
|
|
87
|
+
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) Export to {{ dest }} completed successfully" >> "$logs_dir/stdout.log"
|
|
88
|
+
else
|
|
89
|
+
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) Export to {{ dest }} failed" >> "$logs_dir/stdout.log"
|
|
90
|
+
fi
|
|
91
|
+
{% endfor %}
|
|
92
|
+
|
|
93
|
+
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) Auto-export completed for job {{ task.job_id }}" >> "$logs_dir/stdout.log"
|
|
94
|
+
else
|
|
95
|
+
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) Job {{ task.job_id }} failed with exit code $exit_code. Skipping auto-export." >> "$logs_dir/stdout.log"
|
|
96
|
+
fi
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
{% endif %}
|
|
100
|
+
fi
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
{% endfor %}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|
|
16
|
+
from typing import Callable, Type
|
|
17
|
+
|
|
18
|
+
from nemo_evaluator_launcher.executors.base import BaseExecutor
|
|
19
|
+
|
|
20
|
+
_EXECUTOR_REGISTRY: dict[str, Type[BaseExecutor]] = {}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def register_executor(
|
|
24
|
+
executor_name: str,
|
|
25
|
+
) -> Callable[[Type[BaseExecutor]], Type[BaseExecutor]]:
|
|
26
|
+
def wrapper(executor_cls: Type[BaseExecutor]) -> Type[BaseExecutor]:
|
|
27
|
+
_EXECUTOR_REGISTRY[executor_name] = executor_cls
|
|
28
|
+
return executor_cls
|
|
29
|
+
|
|
30
|
+
return wrapper
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_executor(executor_name: str) -> Type[BaseExecutor]:
|
|
34
|
+
if executor_name not in _EXECUTOR_REGISTRY:
|
|
35
|
+
raise ValueError(
|
|
36
|
+
f"Executor {executor_name} not found. Available executors: {list(_EXECUTOR_REGISTRY.keys())}"
|
|
37
|
+
)
|
|
38
|
+
return _EXECUTOR_REGISTRY[executor_name]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
#
|