EuroEval 15.8.1__py3-none-any.whl → 15.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

@@ -401,6 +401,12 @@ class LiteLLMModel(BenchmarkModule):
401
401
  model_responses=ordered_responses, model_id=self.model_config.model_id
402
402
  )
403
403
 
404
+ if len(messages) != len(model_output.sequences):
405
+ raise InvalidBenchmark(
406
+ f"Number of model inputs ({len(messages):,}) does not match the "
407
+ f"number of model outputs ({len(model_output.sequences):,})."
408
+ )
409
+
404
410
  return model_output
405
411
 
406
412
  def _handle_exception(
@@ -616,8 +622,7 @@ class LiteLLMModel(BenchmarkModule):
616
622
  scores = []
617
623
  for model_response in model_responses:
618
624
  if not model_response.choices:
619
- # This happens for reasoning models, when they don't finish thinking
620
- # and run out of tokens. Happens quite rarely, but we need to handle it.
625
+ sequences.append("")
621
626
  logger.warning(
622
627
  f"The model {model_id!r} did not end up "
623
628
  "generating any text. This is likely because the model ran "
euroeval/model_cache.py CHANGED
@@ -168,6 +168,15 @@ class ModelCache:
168
168
  input_column = "messages" if "messages" in model_inputs else "text"
169
169
  model_inputs = model_inputs[input_column]
170
170
 
171
+ # Double check that the number of inputs and outputs match
172
+ if not len(model_inputs) == len(model_output.sequences):
173
+ logger.warning(
174
+ f"Number of model inputs ({len(model_inputs)}) does not match the "
175
+ f"number of model outputs ({len(model_output.sequences)}). We will not "
176
+ f"cache the model outputs."
177
+ )
178
+ return
179
+
171
180
  # Store the generated sequences in the cache, one by one
172
181
  with tqdm(
173
182
  iterable=model_inputs,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 15.8.1
3
+ Version: 15.8.2
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -62,12 +62,12 @@ Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == '
62
62
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
63
63
  Requires-Dist: gradio>=4.26.0; extra == 'all'
64
64
  Requires-Dist: outlines>=0.1.11; extra == 'all'
65
- Requires-Dist: vllm>=0.8.3; (platform_system == 'Linux') and extra == 'all'
65
+ Requires-Dist: vllm<0.8.5,>=0.8.3; (platform_system == 'Linux') and extra == 'all'
66
66
  Provides-Extra: generative
67
67
  Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
68
68
  Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
69
69
  Requires-Dist: outlines>=0.1.11; extra == 'generative'
70
- Requires-Dist: vllm>=0.8.3; (platform_system == 'Linux') and extra == 'generative'
70
+ Requires-Dist: vllm<0.8.5,>=0.8.3; (platform_system == 'Linux') and extra == 'generative'
71
71
  Provides-Extra: human-evaluation
72
72
  Requires-Dist: gradio>=4.26.0; extra == 'human-evaluation'
73
73
  Provides-Extra: test
@@ -13,7 +13,7 @@ euroeval/generation.py,sha256=LSsskfLjIJ-c3gQxmr7eiAobPOm-5bU9vnR7uHQ7XmU,10745
13
13
  euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
14
14
  euroeval/human_evaluation.py,sha256=VGvw1X6Mkdf22r-THSNWXMIqyJP44yh4rW53vq-0huo,27681
15
15
  euroeval/languages.py,sha256=LerXuRBAUYkQL6qSV-F82itAE4EgBGFBtzaGnJJZvOE,8555
16
- euroeval/model_cache.py,sha256=n39yFpZkudBCVwz1EQpZ-g5BQtlQemQ5nP3IiFKJZHg,8275
16
+ euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
17
17
  euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
18
18
  euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
19
19
  euroeval/scores.py,sha256=TovjCZD8wmGrIjA4v5oAQp18P5KVcHvakkByDh0Hstk,3059
@@ -26,7 +26,7 @@ euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwR
26
26
  euroeval/benchmark_modules/base.py,sha256=LcG46I2O5wcvu_3T_irBY6VkUhWVPKifBhcP-ln93TA,10798
27
27
  euroeval/benchmark_modules/fresh.py,sha256=_LWmpqiNGGTA-NoVC0v3-fS1sraDS9n-pgKUzz89jVk,9919
28
28
  euroeval/benchmark_modules/hf.py,sha256=yFApLL4_ia5Kw2iat5RSI8h5RhI4OP04HlzYidlhBCs,44012
29
- euroeval/benchmark_modules/litellm.py,sha256=dd7OqBvWA75zNrsEHtC3cx3rNpNJ-1QOL2arV_CqYG0,48231
29
+ euroeval/benchmark_modules/litellm.py,sha256=SxSr_0C6b_jVavR3y9QyhfkCOP5-va4zijGfghFTArY,48362
30
30
  euroeval/benchmark_modules/vllm.py,sha256=DJyla0jr-DVMPPs4RBguxq1Xn5YguvyuAnIlgIOfFaw,39394
31
31
  euroeval/dataset_configs/__init__.py,sha256=kWKtlSAOY-olOQL3UtFqL6I3Tki3G3waMZSd2YChjCg,1895
32
32
  euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
@@ -54,8 +54,8 @@ euroeval/task_group_utils/question_answering.py,sha256=kZBABJ_WYNTH4Xgo2jIvfx7iY
54
54
  euroeval/task_group_utils/sequence_classification.py,sha256=Yqx0pUhuHYmSkv1ZUfOndSLTvpr0lWCk19oYITfSjV4,13555
55
55
  euroeval/task_group_utils/text_to_text.py,sha256=Nu1_qRPLbboCd9Q5rxqY4fQFJ_aGXu80aWQqoTG1cYc,5047
56
56
  euroeval/task_group_utils/token_classification.py,sha256=3idWB81Fcx9UhTuk-gxMfXENrCBmiWBDUWdULXoIhpw,17863
57
- euroeval-15.8.1.dist-info/METADATA,sha256=Fdzj20PR6wWZUx_7f_bhPh8S4DF6ghZwMIDrJ4ozxFE,13669
58
- euroeval-15.8.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
59
- euroeval-15.8.1.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
60
- euroeval-15.8.1.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
61
- euroeval-15.8.1.dist-info/RECORD,,
57
+ euroeval-15.8.2.dist-info/METADATA,sha256=4L3u0qzbAjcZsog0LZXSurfKJO7ILdXk4h0ORMGepd0,13683
58
+ euroeval-15.8.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
59
+ euroeval-15.8.2.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
60
+ euroeval-15.8.2.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
61
+ euroeval-15.8.2.dist-info/RECORD,,