nemo-evaluator-launcher 0.1.19__py3-none-any.whl → 0.1.56__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nemo_evaluator_launcher/api/functional.py +159 -5
- nemo_evaluator_launcher/cli/logs.py +102 -0
- nemo_evaluator_launcher/cli/ls_task.py +280 -0
- nemo_evaluator_launcher/cli/ls_tasks.py +208 -55
- nemo_evaluator_launcher/cli/main.py +29 -2
- nemo_evaluator_launcher/cli/run.py +114 -16
- nemo_evaluator_launcher/cli/version.py +26 -23
- nemo_evaluator_launcher/common/container_metadata/__init__.py +61 -0
- nemo_evaluator_launcher/common/container_metadata/intermediate_repr.py +530 -0
- nemo_evaluator_launcher/common/container_metadata/loading.py +1126 -0
- nemo_evaluator_launcher/common/container_metadata/registries.py +824 -0
- nemo_evaluator_launcher/common/container_metadata/utils.py +63 -0
- nemo_evaluator_launcher/common/helpers.py +200 -51
- nemo_evaluator_launcher/common/logging_utils.py +16 -5
- nemo_evaluator_launcher/common/mapping.py +341 -155
- nemo_evaluator_launcher/common/printing_utils.py +25 -12
- nemo_evaluator_launcher/configs/deployment/sglang.yaml +4 -2
- nemo_evaluator_launcher/configs/deployment/trtllm.yaml +2 -3
- nemo_evaluator_launcher/configs/deployment/vllm.yaml +0 -1
- nemo_evaluator_launcher/configs/execution/slurm/default.yaml +14 -0
- nemo_evaluator_launcher/executors/base.py +31 -1
- nemo_evaluator_launcher/executors/lepton/deployment_helpers.py +36 -1
- nemo_evaluator_launcher/executors/lepton/executor.py +107 -9
- nemo_evaluator_launcher/executors/local/executor.py +383 -24
- nemo_evaluator_launcher/executors/local/run.template.sh +54 -2
- nemo_evaluator_launcher/executors/slurm/executor.py +559 -64
- nemo_evaluator_launcher/executors/slurm/proxy.cfg.template +26 -0
- nemo_evaluator_launcher/exporters/utils.py +32 -46
- nemo_evaluator_launcher/package_info.py +1 -1
- nemo_evaluator_launcher/resources/all_tasks_irs.yaml +17016 -0
- nemo_evaluator_launcher/resources/mapping.toml +64 -315
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/METADATA +4 -3
- nemo_evaluator_launcher-0.1.56.dist-info/RECORD +69 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/entry_points.txt +1 -0
- nemo_evaluator_launcher-0.1.19.dist-info/RECORD +0 -60
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/WHEEL +0 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/licenses/LICENSE +0 -0
- {nemo_evaluator_launcher-0.1.19.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
global
|
|
2
|
+
log stdout format raw local0
|
|
3
|
+
maxconn 4096
|
|
4
|
+
|
|
5
|
+
defaults
|
|
6
|
+
log global
|
|
7
|
+
mode http
|
|
8
|
+
option httplog
|
|
9
|
+
timeout connect 10s
|
|
10
|
+
timeout client 100000s
|
|
11
|
+
timeout server 100000s
|
|
12
|
+
|
|
13
|
+
frontend service_frontend
|
|
14
|
+
bind *:{{ haproxy_port }}
|
|
15
|
+
default_backend service_backend
|
|
16
|
+
|
|
17
|
+
backend service_backend
|
|
18
|
+
mode http
|
|
19
|
+
option httpchk GET {{ health_check_path }}
|
|
20
|
+
http-check expect status {{ health_check_status }}
|
|
21
|
+
option http-server-close
|
|
22
|
+
balance leastconn
|
|
23
|
+
{% for node in nodes %}
|
|
24
|
+
server node{{ loop.index }} {{ node.ip }}:{{ node.port }} check
|
|
25
|
+
{% endfor %}
|
|
26
|
+
|
|
@@ -471,15 +471,12 @@ def _extract_metrics_from_results(results: dict) -> Dict[str, float]:
|
|
|
471
471
|
section_data = results.get(section)
|
|
472
472
|
if isinstance(section_data, dict):
|
|
473
473
|
for task_name, task_data in section_data.items():
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
source=task_metrics,
|
|
481
|
-
context=f" while extracting results for task '{task_name}'",
|
|
482
|
-
)
|
|
474
|
+
task_metrics = _extract_task_metrics(task_name, task_data)
|
|
475
|
+
_safe_update_metrics(
|
|
476
|
+
target=metrics,
|
|
477
|
+
source=task_metrics,
|
|
478
|
+
context=f" while extracting results for task '{task_name}'",
|
|
479
|
+
)
|
|
483
480
|
return metrics
|
|
484
481
|
|
|
485
482
|
|
|
@@ -518,54 +515,43 @@ def _extract_from_json_files(artifacts_dir: Path) -> Dict[str, float]:
|
|
|
518
515
|
return metrics
|
|
519
516
|
|
|
520
517
|
|
|
521
|
-
def _extract_task_metrics(task_name: str,
|
|
518
|
+
def _extract_task_metrics(task_name: str, task_data: dict) -> Dict[str, float]:
|
|
522
519
|
"""Extract metrics from a task's metrics data."""
|
|
523
520
|
extracted = {}
|
|
524
|
-
score_patterns = [
|
|
525
|
-
"acc",
|
|
526
|
-
"accuracy",
|
|
527
|
-
"score",
|
|
528
|
-
"exact_match",
|
|
529
|
-
"f1",
|
|
530
|
-
"em",
|
|
531
|
-
"pass@1",
|
|
532
|
-
"pass@k",
|
|
533
|
-
]
|
|
534
521
|
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
522
|
+
metrics_data = task_data.get("metrics", {})
|
|
523
|
+
if "groups" in task_data:
|
|
524
|
+
for group_name, group_data in task_data["groups"].items():
|
|
525
|
+
group_extracted = _extract_task_metrics(
|
|
526
|
+
f"{task_name}_{group_name}", group_data
|
|
527
|
+
)
|
|
528
|
+
_safe_update_metrics(
|
|
529
|
+
target=extracted,
|
|
530
|
+
source=group_extracted,
|
|
531
|
+
context=f" in task '{task_name}'",
|
|
532
|
+
)
|
|
539
533
|
|
|
534
|
+
for metric_name, metric_data in metrics_data.items():
|
|
540
535
|
try:
|
|
541
|
-
|
|
542
|
-
if
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
if isinstance(score_data, dict) and "value" in score_data:
|
|
546
|
-
key = f"{task_name}_{metric_name}_{score_type}"
|
|
547
|
-
_safe_set_metric(
|
|
548
|
-
container=extracted,
|
|
549
|
-
key=key,
|
|
550
|
-
new_value=score_data["value"],
|
|
551
|
-
context=f" in task '{task_name}'",
|
|
552
|
-
)
|
|
553
|
-
elif "value" in metric_data:
|
|
536
|
+
for score_type, score_data in metric_data["scores"].items():
|
|
537
|
+
if score_type != metric_name:
|
|
538
|
+
key = f"{task_name}_{metric_name}_{score_type}"
|
|
539
|
+
else:
|
|
554
540
|
key = f"{task_name}_{metric_name}"
|
|
555
|
-
_safe_set_metric(
|
|
556
|
-
container=extracted,
|
|
557
|
-
key=key,
|
|
558
|
-
new_value=metric_data["value"],
|
|
559
|
-
context=f" in task '{task_name}'",
|
|
560
|
-
)
|
|
561
|
-
elif isinstance(metric_data, (int, float)):
|
|
562
|
-
key = f"{task_name}_{metric_name}"
|
|
563
541
|
_safe_set_metric(
|
|
564
542
|
container=extracted,
|
|
565
543
|
key=key,
|
|
566
|
-
new_value=
|
|
544
|
+
new_value=score_data["value"],
|
|
567
545
|
context=f" in task '{task_name}'",
|
|
568
546
|
)
|
|
547
|
+
for stat_name, stat_value in metric_data.get("stats", {}).items():
|
|
548
|
+
stats_key = f"{key}_{stat_name}"
|
|
549
|
+
_safe_set_metric(
|
|
550
|
+
container=extracted,
|
|
551
|
+
key=stats_key,
|
|
552
|
+
new_value=stat_value,
|
|
553
|
+
context=f" in task '{task_name}'",
|
|
554
|
+
)
|
|
569
555
|
except (ValueError, TypeError) as e:
|
|
570
556
|
logger.warning(
|
|
571
557
|
f"Failed to extract metric {metric_name} for task {task_name}: {e}"
|