nemo-evaluator-launcher 0.1.41__py3-none-any.whl → 0.1.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. nemo_evaluator_launcher/api/functional.py +55 -5
  2. nemo_evaluator_launcher/cli/ls_task.py +280 -0
  3. nemo_evaluator_launcher/cli/ls_tasks.py +208 -55
  4. nemo_evaluator_launcher/cli/main.py +17 -2
  5. nemo_evaluator_launcher/cli/run.py +41 -1
  6. nemo_evaluator_launcher/common/container_metadata/__init__.py +61 -0
  7. nemo_evaluator_launcher/common/container_metadata/intermediate_repr.py +530 -0
  8. nemo_evaluator_launcher/common/container_metadata/loading.py +1126 -0
  9. nemo_evaluator_launcher/common/container_metadata/registries.py +824 -0
  10. nemo_evaluator_launcher/common/container_metadata/utils.py +63 -0
  11. nemo_evaluator_launcher/common/helpers.py +44 -28
  12. nemo_evaluator_launcher/common/mapping.py +341 -155
  13. nemo_evaluator_launcher/common/printing_utils.py +18 -12
  14. nemo_evaluator_launcher/executors/lepton/executor.py +26 -8
  15. nemo_evaluator_launcher/executors/local/executor.py +6 -2
  16. nemo_evaluator_launcher/executors/slurm/executor.py +141 -9
  17. nemo_evaluator_launcher/package_info.py +1 -1
  18. nemo_evaluator_launcher/resources/all_tasks_irs.yaml +17016 -0
  19. nemo_evaluator_launcher/resources/mapping.toml +62 -354
  20. {nemo_evaluator_launcher-0.1.41.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/METADATA +2 -1
  21. {nemo_evaluator_launcher-0.1.41.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/RECORD +25 -18
  22. {nemo_evaluator_launcher-0.1.41.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/WHEEL +0 -0
  23. {nemo_evaluator_launcher-0.1.41.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/entry_points.txt +0 -0
  24. {nemo_evaluator_launcher-0.1.41.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/licenses/LICENSE +0 -0
  25. {nemo_evaluator_launcher-0.1.41.dist-info → nemo_evaluator_launcher-0.1.56.dist-info}/top_level.txt +0 -0
@@ -1,385 +1,93 @@
1
- # NOTE(agronskiy): checked parity
2
- [lm-evaluation-harness]
3
- container = "nvcr.io/nvidia/eval-factory/lm-evaluation-harness:25.10"
4
-
5
- [lm-evaluation-harness.tasks.chat.ifeval]
6
- required_env_vars = []
7
-
8
- [lm-evaluation-harness.tasks.chat.mmlu_prox]
9
- required_env_vars = []
10
-
11
- [lm-evaluation-harness.tasks.completions.mmlu]
12
- required_env_vars = []
13
-
14
- [lm-evaluation-harness.tasks.completions.mmlu_pro]
15
-
16
- [lm-evaluation-harness.tasks.completions.global_mmlu]
17
- [lm-evaluation-harness.tasks.completions.global_mmlu_ar]
18
- [lm-evaluation-harness.tasks.completions.global_mmlu_bn]
19
- [lm-evaluation-harness.tasks.completions.global_mmlu_de]
20
- [lm-evaluation-harness.tasks.completions.global_mmlu_en]
21
- [lm-evaluation-harness.tasks.completions.global_mmlu_es]
22
- [lm-evaluation-harness.tasks.completions.global_mmlu_fr]
23
- [lm-evaluation-harness.tasks.completions.global_mmlu_hi]
24
- [lm-evaluation-harness.tasks.completions.global_mmlu_id]
25
- [lm-evaluation-harness.tasks.completions.global_mmlu_it]
26
- [lm-evaluation-harness.tasks.completions.global_mmlu_ja]
27
- [lm-evaluation-harness.tasks.completions.global_mmlu_ko]
28
- [lm-evaluation-harness.tasks.completions.global_mmlu_pt]
29
- [lm-evaluation-harness.tasks.completions.global_mmlu_sw]
30
- [lm-evaluation-harness.tasks.completions.global_mmlu_yo]
31
- [lm-evaluation-harness.tasks.completions.global_mmlu_zh]
32
-
33
- [lm-evaluation-harness.tasks.completions.global_mmlu_full]
34
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_am]
35
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_ar]
36
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_bn]
37
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_cs]
38
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_de]
39
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_el]
40
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_en]
41
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_es]
42
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_fa]
43
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_fil]
44
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_fr]
45
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_ha]
46
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_he]
47
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_hi]
48
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_id]
49
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_ig]
50
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_it]
51
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_ja]
52
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_ko]
53
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_ky]
54
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_lt]
55
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_mg]
56
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_ms]
57
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_ne]
58
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_nl]
59
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_ny]
60
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_pl]
61
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_pt]
62
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_ro]
63
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_ru]
64
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_si]
65
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_sn]
66
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_so]
67
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_sr]
68
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_sv]
69
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_sw]
70
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_te]
71
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_tr]
72
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_uk]
73
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_vi]
74
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_yo]
75
- [lm-evaluation-harness.tasks.completions.global_mmlu_full_zh]
76
- [lm-evaluation-harness.tasks.completions.mmlu_logits]
77
-
78
- [lm-evaluation-harness.tasks.chat.mmlu_instruct]
79
-
80
- [lm-evaluation-harness.tasks.chat.mmlu_redux_instruct]
81
-
82
- [lm-evaluation-harness.tasks.chat.mmlu_cot_0_shot_chat]
83
-
84
- [lm-evaluation-harness.tasks.completions.gsm8k]
85
- required_env_vars = []
86
-
87
- [lm-evaluation-harness.tasks.chat.gsm8k_cot_instruct]
88
- required_env_vars = []
89
-
90
- [lm-evaluation-harness.tasks.chat.gsm8k_cot_llama]
91
- required_env_vars = []
92
-
93
- [lm-evaluation-harness.tasks.chat.mgsm_cot]
94
-
95
- [lm-evaluation-harness.tasks.chat.gpqa_diamond_cot]
96
-
97
- [lm-evaluation-harness.tasks.completions.winogrande]
98
-
99
- [lm-evaluation-harness.tasks.completions.hellaswag]
100
- [lm-evaluation-harness.tasks.completions.hellaswag_multilingual]
101
-
102
- [lm-evaluation-harness.tasks.completions.commonsense_qa]
103
-
104
- [lm-evaluation-harness.tasks.completions.openbookqa]
105
-
106
- [lm-evaluation-harness.tasks.completions.piqa]
107
-
108
- [lm-evaluation-harness.tasks.completions.adlr_race]
109
-
110
- [lm-evaluation-harness.tasks.completions.social_iqa]
111
-
112
- [lm-evaluation-harness.tasks.completions.adlr_truthfulqa_mc2]
113
- [lm-evaluation-harness.tasks.completions.adlr_minerva_math_nemo]
114
- [lm-evaluation-harness.tasks.completions.adlr_arc_challenge_llama]
115
- [lm-evaluation-harness.tasks.completions.adlr_mmlu_pro_5_shot_base]
116
- [lm-evaluation-harness.tasks.completions.adlr_mbpp_sanitized_3shot_greedy]
117
- [lm-evaluation-harness.tasks.completions.adlr_mbppplus_greedy_sanitized]
118
- [lm-evaluation-harness.tasks.completions.adlr_humaneval_greedy]
119
- [lm-evaluation-harness.tasks.completions.adlr_humanevalplus_greedy]
120
- [lm-evaluation-harness.tasks.chat.adlr_gsm8k_fewshot_cot]
121
- required_env_vars = []
122
-
123
- [lm-evaluation-harness.tasks.completions.arc_multilingual]
1
+ # Workflow diagram:
2
+ # mapping.toml
3
+ # (container = "....")
4
+ # (# container-digest:sha256:....) <---- CI: check digests are relevant
5
+ #
6
+ # |
7
+ # |
8
+ # v
9
+ # scripts/container_metadata_controller.py
10
+ # (updates the toml
11
+ # AND
12
+ # creates the resources/all_tasks_irs,
13
+ # records TOML checksum) <---- pre-commit guard: checks TOML checksum
14
+ # | \
15
+ # | \
16
+ # | -------------------> make docs-build
17
+ # | (builds docs on the fly)
18
+ # v
19
+ # scripts/container_metadata_controller.py
20
+ # (updates README, records checksum) <----- pre-commit guard: checks TOML checksum
21
+ #
124
22
 
23
+ [lm-evaluation-harness]
24
+ container = "nvcr.io/nvidia/eval-factory/lm-evaluation-harness:25.11"
25
+ # container-digest:sha256:4405ea93535f0d5067313b4f78fabead639be31fa3b9b700d72150872e2b78c9
125
26
 
126
- ###############################################################################
127
- # NOTE(agronskiy): checked parity
128
27
  [mtbench]
129
- container = "nvcr.io/nvidia/eval-factory/mtbench:25.10"
28
+ container = "nvcr.io/nvidia/eval-factory/mtbench:25.11"
29
+ # container-digest:sha256:285561e1d52d61876ab9b4620f9ed033460bf44458ca692df4467bf2c325d236
130
30
 
131
- [mtbench.tasks.chat.mtbench]
132
-
133
- [mtbench.tasks.chat.mtbench-cor1]
134
-
135
-
136
- ###############################################################################
137
- # NOTE(agronskiy): checked parity
138
31
  [ifbench]
139
- container = "nvcr.io/nvidia/eval-factory/ifbench:25.10"
140
-
141
- [ifbench.tasks.chat.ifbench]
142
- required_env_vars = []
32
+ container = "nvcr.io/nvidia/eval-factory/ifbench:25.11"
33
+ # container-digest:sha256:6263f213cce6cbc7468260b53f308ee77a9aa8e39652d51903a96668ea4badf8
143
34
 
144
-
145
- ###############################################################################
146
35
  [simple_evals]
147
- container = "nvcr.io/nvidia/eval-factory/simple-evals:25.10"
148
-
149
- [simple_evals.tasks.chat.gpqa_diamond]
150
- required_env_vars = ["HF_TOKEN"]
151
-
152
- [simple_evals.tasks.chat.gpqa_diamond_aa_v2]
153
- required_env_vars = ["HF_TOKEN"]
154
-
155
- [simple_evals.tasks.chat.gpqa_diamond_aa_v2_llama_4]
156
- required_env_vars = ["HF_TOKEN"]
157
-
158
- [simple_evals.tasks.chat.gpqa_diamond_nemo]
159
- required_env_vars = ["HF_TOKEN"]
160
-
161
- [simple_evals.tasks.chat.AA_math_test_500]
162
- required_env_vars = ["JUDGE_API_KEY"]
163
-
164
- [simple_evals.tasks.chat.math_test_500_nemo]
165
- required_env_vars = []
166
-
167
- [simple_evals.tasks.chat.aime_2024_nemo]
168
- required_env_vars = []
169
-
170
- [simple_evals.tasks.chat.AA_AIME_2024]
171
- required_env_vars = ["JUDGE_API_KEY"]
36
+ container = "nvcr.io/nvidia/eval-factory/simple-evals:25.11"
37
+ # container-digest:sha256:6e0ffeaa93376852112fedc777800676fb4ae69260563e99b5a85dff7846d6f3
172
38
 
173
- [simple_evals.tasks.chat.aime_2025_nemo]
174
- required_env_vars = []
175
-
176
- [simple_evals.tasks.chat.AIME_2025]
177
- required_env_vars = ["JUDGE_API_KEY"]
178
-
179
- [simple_evals.tasks.chat.humaneval]
180
- required_env_vars = []
181
-
182
- [simple_evals.tasks.chat.mgsm]
183
- required_env_vars = []
184
-
185
- [simple_evals.tasks.chat.mmlu_pro]
186
- required_env_vars = []
187
-
188
- [simple_evals.tasks.chat.mmlu]
189
- required_env_vars = []
190
-
191
- [simple_evals.tasks.chat.mmlu_llama_4]
192
- required_env_vars = []
193
-
194
- [simple_evals.tasks.chat.mmlu_pro_llama_4]
195
- required_env_vars = []
196
-
197
- [simple_evals.tasks.chat.mmlu_ar-lite]
198
- [simple_evals.tasks.chat.mmlu_bn-lite]
199
- [simple_evals.tasks.chat.mmlu_de-lite]
200
- [simple_evals.tasks.chat.mmlu_en-lite]
201
- [simple_evals.tasks.chat.mmlu_es-lite]
202
- [simple_evals.tasks.chat.mmlu_fr-lite]
203
- [simple_evals.tasks.chat.mmlu_hi-lite]
204
- [simple_evals.tasks.chat.mmlu_id-lite]
205
- [simple_evals.tasks.chat.mmlu_it-lite]
206
- [simple_evals.tasks.chat.mmlu_ja-lite]
207
- [simple_evals.tasks.chat.mmlu_ko-lite]
208
- [simple_evals.tasks.chat.mmlu_my-lite]
209
- [simple_evals.tasks.chat.mmlu_pt-lite]
210
- [simple_evals.tasks.chat.mmlu_sw-lite]
211
- [simple_evals.tasks.chat.mmlu_yo-lite]
212
- [simple_evals.tasks.chat.mmlu_zh-lite]
213
-
214
-
215
- ###############################################################################
216
- # NOTE(agronskiy): checked parity
217
39
  [bigcode-evaluation-harness]
218
- container = "nvcr.io/nvidia/eval-factory/bigcode-evaluation-harness:25.10"
219
-
220
- [bigcode-evaluation-harness.tasks.chat.mbpp]
221
- required_env_vars = []
222
-
223
- [bigcode-evaluation-harness.tasks.chat.mbppplus]
40
+ container = "nvcr.io/nvidia/eval-factory/bigcode-evaluation-harness:25.11"
41
+ # container-digest:sha256:8263687c042abd46fb23dab8c3aa831bd048497cdb4bde196e1e9ecf9d06cf0b
224
42
 
225
- [bigcode-evaluation-harness.tasks.chat.mbppplus_nemo]
226
- required_env_vars = []
227
-
228
- [bigcode-evaluation-harness.tasks.completions.humaneval]
229
- required_env_vars = []
230
-
231
- [bigcode-evaluation-harness.tasks.chat.humaneval_instruct]
232
-
233
-
234
- ###############################################################################
235
43
  [livecodebench]
236
- container = "nvcr.io/nvidia/eval-factory/livecodebench:25.10"
237
-
238
- [livecodebench.tasks.chat.livecodebench_0724_0125]
239
- required_env_vars = []
240
-
241
- [livecodebench.tasks.chat.livecodebench_0824_0225]
242
- required_env_vars = []
243
-
44
+ container = "nvcr.io/nvidia/eval-factory/livecodebench:25.11"
45
+ # container-digest:sha256:0ce84b69b79fb2815dfe2d8487efbc098a9e131662cad0fd3d61040d0354d571
244
46
 
245
- ###############################################################################
246
47
  [scicode]
247
- container = "nvcr.io/nvidia/eval-factory/scicode:25.10"
48
+ container = "nvcr.io/nvidia/eval-factory/scicode:25.11"
49
+ # container-digest:sha256:b04e8250f28479b2ea0f2c2b19187e1fad8bca0ad326b1b09dc569289152411d
248
50
 
249
- [scicode.tasks.chat.aa_scicode]
250
- required_env_vars = []
251
-
252
-
253
- ###############################################################################
254
51
  [hle]
255
- container = "nvcr.io/nvidia/eval-factory/hle:25.10"
256
-
257
- [hle.tasks.chat.hle]
258
- required_env_vars = ["HF_TOKEN", "OPENAI_CLIENT_ID", "OPENAI_CLIENT_SECRET"]
52
+ container = "nvcr.io/nvidia/eval-factory/hle:25.11"
53
+ # container-digest:sha256:114e878ede1fd77d3e67bb046ae62a10e498cdc4a297f375d031880d220d63c4
259
54
 
260
-
261
- ###############################################################################
262
55
  [bfcl]
263
- container = "nvcr.io/nvidia/eval-factory/bfcl:25.10"
264
-
265
- [bfcl.tasks.chat.bfclv2_ast_prompting]
266
- required_env_vars = []
267
-
268
- [bfcl.tasks.chat.bfclv3_ast_prompting]
269
- required_env_vars = []
56
+ container = "nvcr.io/nvidia/eval-factory/bfcl:25.11"
57
+ # container-digest:sha256:3926e91f1b2a0a1a639d44c6a93c05c6808ad121b79c1cd1131812e8984c44f2
270
58
 
271
-
272
- ###############################################################################
273
59
  [profbench]
274
- container = "nvcr.io/nvidia/eval-factory/profbench:25.10"
275
-
276
- [profbench.tasks.chat.llm_judge]
277
- required_env_vars = []
278
-
279
- [profbench.tasks.chat.report_generation]
280
- required_env_vars = []
60
+ container = "nvcr.io/nvidia/eval-factory/profbench:25.11"
61
+ # container-digest:sha256:ce5455f06538b3408dbc7513e7c977a095521e35611020388252dee9527d83d5
281
62
 
282
-
283
- ###############################################################################
284
63
  [vlmevalkit]
285
- container = "nvcr.io/nvidia/eval-factory/vlmevalkit:25.10"
286
-
287
- [vlmevalkit.tasks.vlm.ocrbench]
288
- required_env_vars = []
289
-
290
- [vlmevalkit.tasks.vlm.slidevqa]
291
- required_env_vars = ["OPENAI_CLIENT_ID", "OPENAI_CLIENT_SECRET"]
64
+ container = "nvcr.io/nvidia/eval-factory/vlmevalkit:25.11"
65
+ # container-digest:sha256:dee182f7052e17fe90ef1303c4c9ab6f0f67d2a9e159400555c74a9cc2bf8dfb
292
66
 
293
- [vlmevalkit.tasks.vlm.chartqa]
294
- required_env_vars = []
295
-
296
- [vlmevalkit.tasks.vlm.ai2d_judge]
297
- required_env_vars = ["OPENAI_CLIENT_ID", "OPENAI_CLIENT_SECRET"]
298
-
299
-
300
- ###############################################################################
301
67
  [garak]
302
- container = "nvcr.io/nvidia/eval-factory/garak:25.10"
68
+ container = "nvcr.io/nvidia/eval-factory/garak:25.11"
69
+ # container-digest:sha256:a8d69f5582303921d0310a31ce8ed95d317a8d7750da32c35867f903315f0653
303
70
 
304
- [garak.tasks.chat.garak]
305
- required_env_vars = []
306
-
307
- ###############################################################################
308
- # NOTE(wprazuch): to verify if the tasks need any env var setting
309
71
  [nemo_skills]
310
- container = "nvcr.io/nvidia/eval-factory/nemo_skills:25.10"
311
-
312
- [nemo_skills.tasks.chat.ns_aime2024]
313
- required_env_vars = ["JUDGE_API_KEY"]
314
-
315
- [nemo_skills.tasks.chat.ns_aime2025]
316
- required_env_vars = []
72
+ container = "nvcr.io/nvidia/eval-factory/nemo_skills:25.11"
73
+ # container-digest:sha256:36de6ac02d56ccda90ca8fd2bbf7c2dfd8a3318377273fdd5c9089d4a31fcec1
317
74
 
318
- [nemo_skills.tasks.chat.ns_bfcl_v3]
319
- required_env_vars = []
75
+ [safety_eval]
76
+ container = "nvcr.io/nvidia/eval-factory/safety-harness:25.11"
77
+ # container-digest:sha256:08eeb3f5c3282522ca30da7d3ddc2cab1a48909be05ba561a0dae9a299c637f0
320
78
 
321
- [nemo_skills.tasks.chat.ns_gpqa]
322
- required_env_vars = ["HF_TOKEN"]
323
-
324
- [nemo_skills.tasks.chat.ns_hle]
325
- required_env_vars = []
326
-
327
- [nemo_skills.tasks.chat.ns_mmlu]
328
- required_env_vars = ["HF_TOKEN"]
329
-
330
- [nemo_skills.tasks.chat.ns_mmlu_pro]
331
- required_env_vars = ["HF_TOKEN"]
332
-
333
- [nemo_skills.tasks.chat.ns_aa_lcr]
334
- required_env_vars = ["JUDGE_API_KEY"]
335
-
336
- ###############################################################################
337
- [safety-harness]
338
- container = "nvcr.io/nvidia/eval-factory/safety-harness:25.10"
339
-
340
- [safety-harness.tasks.chat.aegis_v2]
341
- required_env_vars = ["HF_TOKEN"]
342
-
343
-
344
- ###############################################################################
345
- # NOTE(agronskiy): checked parity
346
79
  [helm]
347
- container = "nvcr.io/nvidia/eval-factory/helm:25.10"
348
-
349
- [helm.tasks.chat.medcalc_bench]
350
-
351
- [helm.tasks.chat.medec]
352
-
353
- [helm.tasks.chat.head_qa]
354
-
355
- [helm.tasks.chat.medbullets]
356
-
357
- [helm.tasks.chat.pubmed_qa]
80
+ container = "nvcr.io/nvidia/eval-factory/helm:25.11"
81
+ # container-digest:sha256:8147c9eaf4ef717b2de6390c264cb217e87206658fec4a1c730adb232b7a735c
358
82
 
359
- [helm.tasks.chat.ehr_sql]
360
-
361
- [helm.tasks.chat.race_based_med]
362
-
363
- [helm.tasks.chat.medhallu]
364
-
365
- [helm.tasks.chat.mtsamples_replicate]
366
-
367
- [helm.tasks.chat.aci_bench]
368
-
369
- [helm.tasks.chat.mtsamples_procedures]
370
-
371
- [helm.tasks.chat.medication_qa]
372
-
373
- [helm.tasks.chat.med_dialog_healthcaremagic]
374
-
375
- [helm.tasks.chat.med_dialog_icliniq]
376
-
377
- [helm.tasks.chat.medi_qa]
378
-
379
-
380
- ###############################################################################
381
- # NOTE(agronskiy): checked parity
382
83
  [tooltalk]
383
- container = "nvcr.io/nvidia/eval-factory/tooltalk:25.10"
84
+ container = "nvcr.io/nvidia/eval-factory/tooltalk:25.11"
85
+ # container-digest:sha256:d829280eb563106a29a45020e20a9859db9f7e5f9e7988ec7a7d99277fb135ba
86
+
87
+ [genai_perf_eval]
88
+ container = "nvcr.io/nvidia/eval-factory/genai-perf:25.11"
89
+ # container-digest:sha256:b6a3f60bc1762b2f2a216a5bdb73f84588e12c9914adcbdf0ecaa12d58acebe6
384
90
 
385
- [tooltalk.tasks.chat.tooltalk]
91
+ [mmath]
92
+ container = "nvcr.io/nvidia/eval-factory/mmath:25.11"
93
+ # container-digest:sha256:4e30f8c40a7ff8f200734cf1251072b49888841bf0513fe2d5725ae81ab8b428
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nemo-evaluator-launcher
3
- Version: 0.1.41
3
+ Version: 0.1.56
4
4
  Summary: Launcher for the evaluations provided by NeMo Evaluator containers with different runtime backends
5
5
  Author: NVIDIA
6
6
  Author-email: nemo-toolkit@nvidia.com
@@ -467,6 +467,7 @@ License-File: LICENSE
467
467
  Requires-Dist: hydra-core<2.0.0,>=1.3.2
468
468
  Requires-Dist: jinja2<4.0.0,>=3.1.6
469
469
  Requires-Dist: leptonai>=0.25.0
470
+ Requires-Dist: nemo-evaluator
470
471
  Requires-Dist: pyyaml>=6.0.0
471
472
  Requires-Dist: requests>=2.32.4
472
473
  Requires-Dist: simple-parsing<0.2.0,>=0.1.7
@@ -1,7 +1,7 @@
1
1
  nemo_evaluator_launcher/__init__.py,sha256=GT38zGwbvBOSeU52WCRx-n9N49LvLGEV1PItgKC8orA,2320
2
- nemo_evaluator_launcher/package_info.py,sha256=RGnruVj93NUhGw5fqI4DqujBesAV9Ixf05LquZO-JBU,1586
2
+ nemo_evaluator_launcher/package_info.py,sha256=zHKOOOIbhAWOrU7pN-acMaSYwI2zdqC5syv_zy4x21A,1586
3
3
  nemo_evaluator_launcher/api/__init__.py,sha256=U9q_MJK2vRsFaymanhyy0nD1SNAZQZC8oY45RXPX7ac,1024
4
- nemo_evaluator_launcher/api/functional.py,sha256=4Se7avg6Wde4EK7is3y-KjXV5qR9RGBpe8zjwjej4M8,31859
4
+ nemo_evaluator_launcher/api/functional.py,sha256=di5az6OkMSGBr5bWTT6JLgtqNBU8-fwAb_eZETyVulI,33641
5
5
  nemo_evaluator_launcher/api/types.py,sha256=W7ZQ9ZTPR6YxInxxsKE6NxuuQAg4pVYz6SRmFCFxY0A,3635
6
6
  nemo_evaluator_launcher/api/utils.py,sha256=q5HArRj7PKgBfeH3bOX8q1U97yMyQQp72yRRA5JP9PE,818
7
7
  nemo_evaluator_launcher/cli/__init__.py,sha256=lNC_skFLYTOt-arnY3ZQnZMWzHlrtD2wAoHvDcHddwM,673
@@ -10,17 +10,23 @@ nemo_evaluator_launcher/cli/info.py,sha256=2dZA2BqXpTG1wO_Wzt6Ol9ZNJzJJ0PibOB0hL
10
10
  nemo_evaluator_launcher/cli/kill.py,sha256=C-4PWmMu8mIITo92o5AHxtq_s-8Cckbp7wAlG0I_ylw,1323
11
11
  nemo_evaluator_launcher/cli/logs.py,sha256=eYgfDbHNZhw1xo56bwCIR1sGQYJIPPzzUJu-pqseDWk,3774
12
12
  nemo_evaluator_launcher/cli/ls_runs.py,sha256=vJTwRdhVKLolnJuP8AnnQdJBE-BKZfCcCypLKSz5gqs,4942
13
- nemo_evaluator_launcher/cli/ls_tasks.py,sha256=Pd2lBQOQBNHBWrjk4tZg0SQ9Ul9F2Ak-zOyh-G9x-DY,5293
14
- nemo_evaluator_launcher/cli/main.py,sha256=iNEAeE7_dfDia4oCjUYhYikJJJXg9cMlbGADwMfMtfM,7613
15
- nemo_evaluator_launcher/cli/run.py,sha256=XKF4nRdijhFH01JvMtWXav6tc6Xp7tAhbSnk32q8Ma4,9966
13
+ nemo_evaluator_launcher/cli/ls_task.py,sha256=sChh9UQJK0Zh1g3RitVtm8o_6qJQuS75JGrHXf3Y0y4,10954
14
+ nemo_evaluator_launcher/cli/ls_tasks.py,sha256=aMzuAiUUeTCeSN3XCtoneOm-BBSnOVfhgIzgxZ8Y8wI,11167
15
+ nemo_evaluator_launcher/cli/main.py,sha256=3BwJXuB9WHmiTKu4FR2dyrVonwpo8YWcPfw3MSmqOu8,8148
16
+ nemo_evaluator_launcher/cli/run.py,sha256=KGBIXiDfT5OUv09FaTl-GyOxDhjxWEIE1rGlUkccB2o,11336
16
17
  nemo_evaluator_launcher/cli/status.py,sha256=ANdu0JYnfKNvd1gXmdu_0FrbPG-g0A_R4leOuNXzenQ,5947
17
18
  nemo_evaluator_launcher/cli/version.py,sha256=GgMNTAd4S0bu3t-uVfVedAf7p6pymWDDwOaNm4WHOxQ,1998
18
19
  nemo_evaluator_launcher/common/__init__.py,sha256=6-xb4KpG8-lZbWBI42c_Gax-Sq0kMSW8UG0Vn8dOBlo,744
19
20
  nemo_evaluator_launcher/common/execdb.py,sha256=WPzg5Iu2ojvFpBuYahSt3voP_iEUpoO8NgqMLUBwFxA,9767
20
- nemo_evaluator_launcher/common/helpers.py,sha256=MrLHPlOSn8GGc7sxB0xlDmUN0uWluSgVmDLUBwuo7pU,13540
21
+ nemo_evaluator_launcher/common/helpers.py,sha256=pWYd1P5tbBSAK1JPv7dk_d_Sq92gJ4NIHXiiOEyNiZY,13218
21
22
  nemo_evaluator_launcher/common/logging_utils.py,sha256=7QkWlpA80QN5ipTUFJ198IiAsPRS36C6ISAAtNverbA,12338
22
- nemo_evaluator_launcher/common/mapping.py,sha256=tD3jWN7rm9-iJEFlENhYMt7adz8DKs67g3Xd43XIAMM,10731
23
- nemo_evaluator_launcher/common/printing_utils.py,sha256=QPjLqFlp0dd18RY__UYo5-yG8OqABaa78cpzMhZVCeo,2579
23
+ nemo_evaluator_launcher/common/mapping.py,sha256=O4Xg_0hgnQOOxAEGFlFJjHh_sTXvbIEUCgNXYxvfnDU,18035
24
+ nemo_evaluator_launcher/common/printing_utils.py,sha256=K9_-ENUIGHcL9i-fmGscfCJoV-uWJlaMKNToNx1Ypmg,2631
25
+ nemo_evaluator_launcher/common/container_metadata/__init__.py,sha256=QxPpIUTWprJybsfsaNoHnpLNIGxIVERW0mAtZ-u-F3c,2145
26
+ nemo_evaluator_launcher/common/container_metadata/intermediate_repr.py,sha256=aa9J0tdvdVYpGPu85vkdpBnFOpSReLdA0SNb45jAC0Y,17525
27
+ nemo_evaluator_launcher/common/container_metadata/loading.py,sha256=KY6mML3St4q-OgDQ1TeSyWQRIGSUx-CvLSC1sHjI1jc,41561
28
+ nemo_evaluator_launcher/common/container_metadata/registries.py,sha256=UNJdUBiXJyMeKKSIgCHZod4bDAHGhOFdXH9WjlQJbKw,28404
29
+ nemo_evaluator_launcher/common/container_metadata/utils.py,sha256=kaINUugS7Jhydm7wcYrOQujMe7SfY8dkaRzzr89Z5aE,2331
24
30
  nemo_evaluator_launcher/configs/__init__.py,sha256=lNC_skFLYTOt-arnY3ZQnZMWzHlrtD2wAoHvDcHddwM,673
25
31
  nemo_evaluator_launcher/configs/default.yaml,sha256=JHFjSl3KByhggRMTTo9nesQATVoz18PJIV6KM5Wng64,974
26
32
  nemo_evaluator_launcher/configs/deployment/generic.yaml,sha256=8_Z0fcjZuH6GfV9jJkY_8CS18Tbsn0gV-nK1LfGr_Vg,1262
@@ -37,13 +43,13 @@ nemo_evaluator_launcher/executors/base.py,sha256=QC1HDyTqXJFQMPMz48A-DLOFu3le4NF
37
43
  nemo_evaluator_launcher/executors/registry.py,sha256=8QXSrsJyHeNi8iSttJ8KWQLXmZve1vxnnCNw_CkeopI,1409
38
44
  nemo_evaluator_launcher/executors/lepton/__init__.py,sha256=F_7yuBaYQ6WWTcptADdkL3AIZ_jXJQHGgKag-Hm7BbQ,698
39
45
  nemo_evaluator_launcher/executors/lepton/deployment_helpers.py,sha256=vzAWaNmacRsloURwVIc6LsP-xpKwvr7vsPlFIFTFdLQ,23234
40
- nemo_evaluator_launcher/executors/lepton/executor.py,sha256=gKls0DNTVyFVOh3rjIPAVruaAeQ5qmVcNoZctlLnb5w,44347
46
+ nemo_evaluator_launcher/executors/lepton/executor.py,sha256=CG4bRnF3BNXujgztsX15WmMFsnmcfalqrW8K_z95phI,45049
41
47
  nemo_evaluator_launcher/executors/lepton/job_helpers.py,sha256=6baTxcygfP1oFgAJ7I9EL4xRlcJDWqbqzZoE1CRrwSk,13528
42
48
  nemo_evaluator_launcher/executors/local/__init__.py,sha256=lNC_skFLYTOt-arnY3ZQnZMWzHlrtD2wAoHvDcHddwM,673
43
- nemo_evaluator_launcher/executors/local/executor.py,sha256=ElRNKv8GVlcOHP23Uc67RiSleI6GfWpBK0e8w6VcXRk,36880
49
+ nemo_evaluator_launcher/executors/local/executor.py,sha256=lLBEv0UoB7PRKdVsYdcodW0klCn90t0ua3Sp41IZr4U,37012
44
50
  nemo_evaluator_launcher/executors/local/run.template.sh,sha256=S6qLqnqA3B4P_-ngklCU5dO9rKV0Qom5NWDwDHf5i0g,6103
45
51
  nemo_evaluator_launcher/executors/slurm/__init__.py,sha256=lNC_skFLYTOt-arnY3ZQnZMWzHlrtD2wAoHvDcHddwM,673
46
- nemo_evaluator_launcher/executors/slurm/executor.py,sha256=2miKvU5kHr5fs706MxgAjDPE8nuqmOuWjnzni2F97qo,55108
52
+ nemo_evaluator_launcher/executors/slurm/executor.py,sha256=zumFTbnKQzKNv5g7Y_PxsAXAsHIIvXKvIIOPLS8hoiA,59726
47
53
  nemo_evaluator_launcher/executors/slurm/proxy.cfg.template,sha256=nPg_JyBHPcjI1RSxDvl-lBgcbakrbZLl4IAXNBc8LM0,594
48
54
  nemo_evaluator_launcher/exporters/__init__.py,sha256=mBXG9FG48FeYrs8sF0zA2mgo1eqBmRgoml7zjJrqDso,1323
49
55
  nemo_evaluator_launcher/exporters/base.py,sha256=0BEqS-Zjez-KsrGE9yfo8S5w2uwMW3btBZve3SiiUp0,4307
@@ -53,10 +59,11 @@ nemo_evaluator_launcher/exporters/mlflow.py,sha256=qFEe9774s31Uybpah7kiz4BfZUDMy
53
59
  nemo_evaluator_launcher/exporters/registry.py,sha256=XsPTv_SBAFjcErO6BJ3OHqs3EvXQpLeyKRJuK9Ql4_M,1299
54
60
  nemo_evaluator_launcher/exporters/utils.py,sha256=5HGBKoFTSCng-GhAJpvfAa7N52_r9UDyrGlBLqRiiGo,22587
55
61
  nemo_evaluator_launcher/exporters/wandb.py,sha256=1qRUV_YE1Ury7rH7KH65AabR7gmEQ38kXBh2XrfiEpE,18082
56
- nemo_evaluator_launcher/resources/mapping.toml,sha256=oUeYpfOTyh5iiQs1rdc9zo-j_1P8ReWAzole2wYDFgg,12370
57
- nemo_evaluator_launcher-0.1.41.dist-info/licenses/LICENSE,sha256=DyGb0fqHPZAsd_uXHA0DGcOCqsvrNsImuLC0Ts4s1zI,23413
58
- nemo_evaluator_launcher-0.1.41.dist-info/METADATA,sha256=rCHDM0bplzE1q9gmi1GYzG1oQBpia3NfToSOEmN-JiU,28730
59
- nemo_evaluator_launcher-0.1.41.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
60
- nemo_evaluator_launcher-0.1.41.dist-info/entry_points.txt,sha256=HPmLybw-y1y4NxjK7slFS4mtTB7p4pnjRvCtJLvSiZs,174
61
- nemo_evaluator_launcher-0.1.41.dist-info/top_level.txt,sha256=5PvawNm9TXKqPRjZita1xPOtFiMOipcoRf50FI1iY3s,24
62
- nemo_evaluator_launcher-0.1.41.dist-info/RECORD,,
62
+ nemo_evaluator_launcher/resources/all_tasks_irs.yaml,sha256=g2T5Jb_2zkJFZd4gqHmOLKiKR52ioz9adjOYcaSEnyE,978316
63
+ nemo_evaluator_launcher/resources/mapping.toml,sha256=iB5tHeMYJMnnej4TwKmrmI1rq3zJ-rWymO8eWxvfGvk,3619
64
+ nemo_evaluator_launcher-0.1.56.dist-info/licenses/LICENSE,sha256=DyGb0fqHPZAsd_uXHA0DGcOCqsvrNsImuLC0Ts4s1zI,23413
65
+ nemo_evaluator_launcher-0.1.56.dist-info/METADATA,sha256=uKG_JgnSQcHFxj2Nxs-Qmq7jc39BCksOdQ-qMfevGUE,28760
66
+ nemo_evaluator_launcher-0.1.56.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
67
+ nemo_evaluator_launcher-0.1.56.dist-info/entry_points.txt,sha256=HPmLybw-y1y4NxjK7slFS4mtTB7p4pnjRvCtJLvSiZs,174
68
+ nemo_evaluator_launcher-0.1.56.dist-info/top_level.txt,sha256=5PvawNm9TXKqPRjZita1xPOtFiMOipcoRf50FI1iY3s,24
69
+ nemo_evaluator_launcher-0.1.56.dist-info/RECORD,,