EuroEval 16.0.1__py3-none-any.whl → 16.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (48) hide show
  1. euroeval/benchmark_config_factory.py +6 -1
  2. euroeval/benchmark_modules/base.py +2 -0
  3. euroeval/benchmark_modules/fresh.py +7 -1
  4. euroeval/benchmark_modules/hf.py +26 -21
  5. euroeval/benchmark_modules/litellm.py +258 -131
  6. euroeval/benchmark_modules/vllm.py +79 -40
  7. euroeval/benchmarker.py +11 -2
  8. euroeval/cli.py +14 -1
  9. euroeval/constants.py +1 -1
  10. euroeval/data_models.py +77 -6
  11. euroeval/dataset_configs/__init__.py +1 -0
  12. euroeval/dataset_configs/danish.py +14 -0
  13. euroeval/dataset_configs/dutch.py +14 -0
  14. euroeval/dataset_configs/english.py +22 -0
  15. euroeval/dataset_configs/estonian.py +15 -7
  16. euroeval/dataset_configs/finnish.py +14 -0
  17. euroeval/dataset_configs/french.py +14 -0
  18. euroeval/dataset_configs/german.py +23 -0
  19. euroeval/dataset_configs/italian.py +14 -0
  20. euroeval/dataset_configs/latvian.py +14 -0
  21. euroeval/dataset_configs/norwegian.py +14 -0
  22. euroeval/dataset_configs/polish.py +126 -0
  23. euroeval/dataset_configs/portuguese.py +14 -0
  24. euroeval/dataset_configs/spanish.py +14 -0
  25. euroeval/dataset_configs/swedish.py +25 -0
  26. euroeval/enums.py +12 -0
  27. euroeval/generation.py +17 -8
  28. euroeval/generation_utils.py +65 -11
  29. euroeval/metrics/pipeline.py +1 -1
  30. euroeval/prompt_templates/linguistic_acceptability.py +9 -0
  31. euroeval/prompt_templates/multiple_choice.py +27 -1
  32. euroeval/prompt_templates/named_entity_recognition.py +20 -0
  33. euroeval/prompt_templates/reading_comprehension.py +11 -0
  34. euroeval/prompt_templates/sentiment_classification.py +15 -0
  35. euroeval/prompt_templates/summarization.py +27 -1
  36. euroeval/scores.py +5 -0
  37. euroeval/task_group_utils/question_answering.py +29 -29
  38. euroeval/task_group_utils/sequence_classification.py +11 -34
  39. euroeval/task_group_utils/token_classification.py +3 -3
  40. euroeval/tasks.py +4 -4
  41. euroeval/{tokenization_utils.py → tokenisation_utils.py} +50 -28
  42. euroeval/utils.py +36 -3
  43. {euroeval-16.0.1.dist-info → euroeval-16.1.1.dist-info}/METADATA +1 -1
  44. euroeval-16.1.1.dist-info/RECORD +70 -0
  45. euroeval-16.0.1.dist-info/RECORD +0 -69
  46. {euroeval-16.0.1.dist-info → euroeval-16.1.1.dist-info}/WHEEL +0 -0
  47. {euroeval-16.0.1.dist-info → euroeval-16.1.1.dist-info}/entry_points.txt +0 -0
  48. {euroeval-16.0.1.dist-info → euroeval-16.1.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- """Utility functions related to tokenization."""
1
+ """Utility functions related to tokenisation."""
2
2
 
3
3
  import logging
4
4
  import re
@@ -79,8 +79,8 @@ def should_prompts_be_stripped(
79
79
  """Determine if we should strip the prompts for few-shot evaluation.
80
80
 
81
81
  This is the case if the tokeniser needs to include the space as part of the label
82
- token. The strategy is thus to tokenize a label with a preceeding colon (as in the
83
- prompts), i.e., ": positive", and check if the tokenization starts with the tokens
82
+ token. The strategy is thus to tokenise a label with a preceeding colon (as in the
83
+ prompts), i.e., ": positive", and check if the tokenisation starts with the tokens
84
84
  of ": ". If this is the case, then we should not strip the prompts, since the
85
85
  tokeniser produces the whitespace token separately.
86
86
 
@@ -88,7 +88,7 @@ def should_prompts_be_stripped(
88
88
  labels_to_be_generated:
89
89
  The labels that are to be generated.
90
90
  tokeniser:
91
- The tokeniser used to tokenize the labels.
91
+ The tokeniser used to tokenise the labels.
92
92
 
93
93
  Returns:
94
94
  Whether we should strip the prompts.
@@ -124,7 +124,7 @@ def should_prefix_space_be_added_to_labels(
124
124
  labels_to_be_generated:
125
125
  The labels that are to be generated.
126
126
  tokeniser:
127
- The tokeniser used to tokenize the labels.
127
+ The tokeniser used to tokenise the labels.
128
128
 
129
129
  Returns:
130
130
  Whether we should add a prefix space to the labels.
@@ -318,7 +318,9 @@ def get_pad_token(
318
318
  return pad_token, pad_token_id
319
319
 
320
320
 
321
- def get_end_of_chat_token_ids(tokeniser: "PreTrainedTokenizer") -> list[int] | None:
321
+ def get_end_of_chat_token_ids(
322
+ tokeniser: "PreTrainedTokenizer", generative_type: GenerativeType | None
323
+ ) -> list[int] | None:
322
324
  """Get the end token ID for chat models.
323
325
 
324
326
  This is only relevant for tokenisers with a chat template.
@@ -326,21 +328,29 @@ def get_end_of_chat_token_ids(tokeniser: "PreTrainedTokenizer") -> list[int] | N
326
328
  Args:
327
329
  tokeniser:
328
330
  The tokeniser.
331
+ generative_type:
332
+ The generative type, or None if not available.
329
333
 
330
334
  Returns:
331
335
  The token IDs used to end chats, or None if the tokeniser does not have a chat
332
336
  template or if no end-of-chat token could be found.
333
337
  """
334
- if not has_chat_template(tokeniser=tokeniser):
338
+ if generative_type == GenerativeType.BASE:
335
339
  return None
336
340
 
337
341
  user_message: dict[str, str] = dict(role="user", content="X")
338
- token_ids = apply_chat_template(
339
- conversation=[user_message],
340
- tokeniser=tokeniser,
341
- tokenize=True,
342
- add_generation_prompt=False,
343
- )
342
+ try:
343
+ token_ids = apply_chat_template(
344
+ conversation=[user_message],
345
+ tokeniser=tokeniser,
346
+ tokenise=True,
347
+ add_generation_prompt=False,
348
+ enable_thinking=generative_type == GenerativeType.REASONING,
349
+ )
350
+ except InvalidModel as e:
351
+ if "does not have a chat template" in str(e):
352
+ return None
353
+ raise e
344
354
  assert isinstance(token_ids, list)
345
355
 
346
356
  for idx, token in enumerate(tokeniser.convert_ids_to_tokens(token_ids)):
@@ -420,7 +430,7 @@ def get_first_label_token_mapping(
420
430
  for label in dataset_config.labels
421
431
  ]
422
432
 
423
- # Tokenize some text containing each label, which we will use to extract the
433
+ # Tokenise some text containing each label, which we will use to extract the
424
434
  # first token of each label
425
435
  all_tokens: list[list[str]]
426
436
  if not has_chat_template(tokeniser=tokeniser):
@@ -439,11 +449,13 @@ def get_first_label_token_mapping(
439
449
  dict(role="user", content=""),
440
450
  dict(role="assistant", content=label),
441
451
  # Adding extra user message as Mistral tokenisers require
442
- # conversamtions to end with a user message
452
+ # conversations to end with a user message
443
453
  dict(role="user", content=""),
444
454
  ],
445
455
  tokeniser=tokeniser,
446
- tokenize=True,
456
+ tokenise=True,
457
+ add_generation_prompt=True,
458
+ enable_thinking=generative_type == GenerativeType.REASONING,
447
459
  )
448
460
  )
449
461
  for label in local_labels
@@ -537,9 +549,10 @@ def has_chat_template(tokeniser: "PreTrainedTokenizer") -> bool:
537
549
  def apply_chat_template(
538
550
  conversation: list[dict[str, str]],
539
551
  tokeniser: "PreTrainedTokenizer",
540
- tokenize: bool = False,
541
- add_generation_prompt: bool = True,
542
- **transformers_tokeniser_kwargs,
552
+ tokenise: bool,
553
+ add_generation_prompt: bool,
554
+ enable_thinking: bool,
555
+ **extra_kwargs,
543
556
  ) -> str | list[int]:
544
557
  """Apply the chat template to a prompt.
545
558
 
@@ -548,38 +561,47 @@ def apply_chat_template(
548
561
  The conversation to apply the chat template to.
549
562
  tokeniser:
550
563
  The tokeniser.
551
- tokenize:
552
- Whether to tokenize the resulting prompt, returning a list of token IDs
564
+ tokenise:
565
+ Whether to tokenise the resulting prompt, returning a list of token IDs
553
566
  instead of a string.
554
567
  add_generation_prompt:
555
568
  Whether to add a generation prompt at the end of the conversation. This is
556
569
  only relevant for regular Hugging Face tokenisers, as Mistral tokenisers
557
570
  always add a generation prompt.
558
- **transformers_tokeniser_kwargs:
559
- Additional keyword arguments to pass to the tokeniser, in case the tokeniser
560
- is a regular Hugging Face tokeniser.
571
+ enable_thinking:
572
+ Whether to enable special handling for reasoning models, such as adding
573
+ special tokens for thinking. This is only relevant for regular Hugging
574
+ Face tokenisers, as Mistral tokenisers always handle reasoning models.
575
+ **extra_kwargs:
576
+ Extra keyword arguments to pass to the tokeniser's `apply_chat_template`
577
+ method. Only relevant for regular Hugging Face tokenisers.
561
578
 
562
579
  Returns:
563
580
  The prompt with the chat template applied, either as a string or a list of
564
- token IDs, depending on the value of `tokenize`.
581
+ token IDs, depending on the value of `tokenise`.
565
582
 
566
583
  Raises:
567
584
  InvalidModel:
568
585
  If the tokeniser does not have a chat template.
569
586
  """
587
+ # Ensure that the first user message is not empty, as this can cause issues with
588
+ # Jinja2
589
+ conversation[0]["content"] = conversation[0]["content"] or " "
590
+
570
591
  if not has_chat_template(tokeniser=tokeniser):
571
592
  raise InvalidModel(
572
593
  "The tokeniser does not have a chat template, so cannot apply it."
573
594
  )
574
595
  elif isinstance(tokeniser, MistralCommonTokenizer):
575
596
  templated_prompt = tokeniser.apply_chat_template(
576
- conversation=conversation, tokenize=tokenize
597
+ conversation=conversation, tokenize=tokenise
577
598
  )
578
599
  else:
579
600
  templated_prompt = tokeniser.apply_chat_template(
580
601
  conversation=conversation,
581
602
  add_generation_prompt=add_generation_prompt,
582
- tokenize=tokenize,
583
- **transformers_tokeniser_kwargs,
603
+ tokenize=tokenise,
604
+ enable_thinking=enable_thinking,
605
+ **extra_kwargs,
584
606
  )
585
607
  return templated_prompt
euroeval/utils.py CHANGED
@@ -4,7 +4,6 @@ import asyncio
4
4
  import gc
5
5
  import importlib
6
6
  import importlib.metadata
7
- import importlib.util
8
7
  import logging
9
8
  import os
10
9
  import random
@@ -25,11 +24,12 @@ from datasets.utils import disable_progress_bar
25
24
  from requests.exceptions import RequestException
26
25
  from transformers import logging as tf_logging
27
26
 
28
- from .exceptions import InvalidBenchmark, NaNValueInModelOutput
27
+ from .exceptions import InvalidBenchmark, InvalidModel, NaNValueInModelOutput
29
28
 
30
29
  if t.TYPE_CHECKING:
31
30
  from types import TracebackType
32
31
 
32
+ from .data_models import ModelIdComponents
33
33
  from .types import Predictions
34
34
 
35
35
 
@@ -347,7 +347,8 @@ def safe_run(coroutine: t.Coroutine[t.Any, t.Any, T]) -> T:
347
347
  loop = asyncio.new_event_loop()
348
348
  try:
349
349
  asyncio.set_event_loop(loop)
350
- return loop.run_until_complete(coroutine)
350
+ response = loop.run_until_complete(coroutine)
351
+ return response
351
352
  finally:
352
353
  loop.close()
353
354
  asyncio.set_event_loop(None)
@@ -488,3 +489,35 @@ def extract_multiple_choice_labels(
488
489
  f"{', '.join(candidate_labels)}. Here is the prompt: {prompt!r}"
489
490
  )
490
491
  return sample_candidate_labels
492
+
493
+
494
+ def split_model_id(model_id: str) -> "ModelIdComponents":
495
+ """Split a model ID into its components.
496
+
497
+ Args:
498
+ model_id:
499
+ The model ID to split.
500
+
501
+ Returns:
502
+ The split model ID.
503
+
504
+ Raises:
505
+ If the model ID is not valid.
506
+ """
507
+ # Importing here to avoid circular imports
508
+ from .data_models import ModelIdComponents
509
+
510
+ # Attempt to extract the model ID, revision, and param using regex
511
+ model_id_match = re.match(pattern=r"^[^@#]+", string=model_id)
512
+ revision_match = re.search(pattern=r"@([^@#]+)", string=model_id)
513
+ param_match = re.search(pattern=r"#([^@#]+)", string=model_id)
514
+
515
+ # If we cannot extract the model ID, raise an error
516
+ if model_id_match is None:
517
+ raise InvalidModel(f"The model ID {model_id!r} is not valid.")
518
+ model_id = model_id_match.group()
519
+
520
+ # Extract the revision and param and return the result
521
+ revision = revision_match.group(1) if revision_match is not None else "main"
522
+ param = param_match.group(1) if param_match is not None else None
523
+ return ModelIdComponents(model_id=model_id, revision=revision, param=param)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 16.0.1
3
+ Version: 16.1.1
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -0,0 +1,70 @@
1
+ euroeval/__init__.py,sha256=8jqSCcDWvwwNb1guPi8cLAekPSOX9V8DpRx_v3-c19E,3730
2
+ euroeval/benchmark_config_factory.py,sha256=NzNSiqix4hlVXk3xnyzdg2WDxomkectf97UWdVS3POo,11667
3
+ euroeval/benchmarker.py,sha256=JkhvYxhVpQPcWmDLzwnB8Yy6tTqj3yfDWTefklbI7RM,50355
4
+ euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
5
+ euroeval/cli.py,sha256=wUGetj9Ld4wkS872ZOfYqHIJMh58o8L2MDi78wU5nxI,9099
6
+ euroeval/constants.py,sha256=NN7kcwQdlDyyGFSrLjsL_qKVRyoRqZ9sKO5SjlgtRwA,2741
7
+ euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
8
+ euroeval/data_models.py,sha256=S-PATp4F1wBwvra6wtjlJFXxZbZB_vEpJHXcdTTKA70,27593
9
+ euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
10
+ euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
+ euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
12
+ euroeval/generation.py,sha256=MSrd0oIkoqwKsCOaIkY2CFF_urXLOfNR1OO5nMvcCpY,12476
13
+ euroeval/generation_utils.py,sha256=d2_vylWXIeH4xIXgbsI5rN6dMt0zKp0zXExD6aOKWaA,18299
14
+ euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
15
+ euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
16
+ euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
17
+ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
18
+ euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
19
+ euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
20
+ euroeval/tasks.py,sha256=3qEOBAMmfeqgXqlGkCKzQ-s0Yw-0-jPRgFZ97EZCFng,4535
21
+ euroeval/tokenisation_utils.py,sha256=e2H86vhSVfz5gx6GmzoBJwLZLG6sf3GEcoCGmvJBQLc,21505
22
+ euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
23
+ euroeval/utils.py,sha256=c0tFw1IXZIqgLU4EfY_k28iJ1ZlCZ_oFoKZH2sGCKYg,16499
24
+ euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
25
+ euroeval/benchmark_modules/base.py,sha256=mHF8XS6GGUXV-sJtxmI5WJBWPLMHuh-4Z4OWjC25x9Y,11566
26
+ euroeval/benchmark_modules/fresh.py,sha256=TveSQiFBi3xXgCEQBdHwkUQ685PDkKW0y3G5Yt5rkeM,10655
27
+ euroeval/benchmark_modules/hf.py,sha256=oBjVumnSM9PW7ZocQwCGLKpbeGFWLN_71DBotxZo1aY,44038
28
+ euroeval/benchmark_modules/litellm.py,sha256=6EKjHnUoPCpuupISZHXqZsXLG8tyiA1-G12a5C6L8MM,64629
29
+ euroeval/benchmark_modules/vllm.py,sha256=sYFdVzB9CZX6_sGI4xghDyXoVn6I95_nbeFUWeSMXcc,43132
30
+ euroeval/dataset_configs/__init__.py,sha256=uuIZmElpJV8iupo5oDj3TeQhBDRANdWpLKYFASLirHA,2046
31
+ euroeval/dataset_configs/danish.py,sha256=QABfgI7m-0-5AimDXegp5ssDSLcM2VrAI_RWsinSZP4,5631
32
+ euroeval/dataset_configs/dutch.py,sha256=63Ro2yFym5MuIDXf5953vUYenw9B0kZSCmZbXjdy4Rs,5517
33
+ euroeval/dataset_configs/english.py,sha256=7lS12Tj7FnMGkS4xj7UoZyymNX6PGXTVl5muPswIgAE,4737
34
+ euroeval/dataset_configs/estonian.py,sha256=tdnz0gmMR9yO5rm3SsIz-Wd0LmlCvi3UJ2M5r4VwkSE,3093
35
+ euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
36
+ euroeval/dataset_configs/finnish.py,sha256=esb5nu4HAEdqiP7F9klmME-tkjme01Qd89TOxTB1S20,4390
37
+ euroeval/dataset_configs/french.py,sha256=lZKhJcTpaG8n3y8u5KY61UfU9YzEHF9tIPKm8UakoBs,4720
38
+ euroeval/dataset_configs/german.py,sha256=gF0idcfDt5Iy89ozwgEXEYR_ukyYurdQSS1KITPz5aM,5130
39
+ euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
40
+ euroeval/dataset_configs/italian.py,sha256=tJ_-OYRJ8wJX7ZCwdE4KJIScn1ijYigAXK3lDTZTA3E,5004
41
+ euroeval/dataset_configs/latvian.py,sha256=-zVftcd7Zl6MbrqL-zqBSixsIiPsbt5ZAqldE2wFOEI,2713
42
+ euroeval/dataset_configs/norwegian.py,sha256=ccLM2Zkf5eaFH1K1KyzqoMwkVNcXgjMQTxIhPf4tl_E,7745
43
+ euroeval/dataset_configs/polish.py,sha256=Z-9PT9KaopQUmBgFk5F85ve3pjQwTJqouG8IFgg5iqw,3672
44
+ euroeval/dataset_configs/portuguese.py,sha256=gQ054SdLQ5fkm4IAP6Mdh5RcPDJPDITcuyaLKZit_9o,4089
45
+ euroeval/dataset_configs/spanish.py,sha256=DvJlMK6OQg4qmxKzQA2IficlBMB7BafvxqIVuTKiZyw,4902
46
+ euroeval/dataset_configs/swedish.py,sha256=YWHp7hbJ25o36csSg9uXaQCEJK1BPb7u2RQZiCe0lNs,5445
47
+ euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
48
+ euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
49
+ euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
50
+ euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
51
+ euroeval/metrics/pipeline.py,sha256=Wcan3eDWV7t4WRXMPWCCe_JsA-fZnIfZU2ESinbbL2I,10284
52
+ euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
53
+ euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
54
+ euroeval/prompt_templates/linguistic_acceptability.py,sha256=pRR1QBnYt5DnfxQp6dw1OYFZfIct-1R9pfdgPGpjoco,8667
55
+ euroeval/prompt_templates/multiple_choice.py,sha256=Q-8-ETqG-RZeLzR8v8WUBIN7djiNSfNpmYnZRUWcd84,6905
56
+ euroeval/prompt_templates/named_entity_recognition.py,sha256=LT7J6Y9rUCJFimpnwujBZq_V5buSmXHJteIXbTOoaCE,16442
57
+ euroeval/prompt_templates/reading_comprehension.py,sha256=ogzmhiSZO6egrdxxQiWz6a0XMdC0vws-lg5yRKQoYV0,8730
58
+ euroeval/prompt_templates/sentiment_classification.py,sha256=BwnTpSdsAN_rL693ImgtKIRc5T_2G6ptWW0jCdC02NQ,9454
59
+ euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13aIOgY0SjIzzsNo,6593
60
+ euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
61
+ euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
62
+ euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
63
+ euroeval/task_group_utils/sequence_classification.py,sha256=TAqZCoMQ9I-HFhMH35_J1mY2SQg95HUbXcgrBIyhgk0,16082
64
+ euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
65
+ euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
66
+ euroeval-16.1.1.dist-info/METADATA,sha256=gyqd2PPeT0vv_ye9nnfqv-0DlpejquzqcftBwpwnH7Y,13729
67
+ euroeval-16.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
68
+ euroeval-16.1.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
69
+ euroeval-16.1.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
70
+ euroeval-16.1.1.dist-info/RECORD,,
@@ -1,69 +0,0 @@
1
- euroeval/__init__.py,sha256=8jqSCcDWvwwNb1guPi8cLAekPSOX9V8DpRx_v3-c19E,3730
2
- euroeval/benchmark_config_factory.py,sha256=ZKzGkWr-Mr4wEMYNXUHsYkd2R-dxnNyETZJJ-Fq-my0,11386
3
- euroeval/benchmarker.py,sha256=YNqhl2QchqzbGMGu8QoJAG_mnYbcJ46ksfaS0x78fiw,49847
4
- euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
5
- euroeval/cli.py,sha256=RR45NiHMI9hphqBJ7Xopde-C18Be9JgJxgg6eYPFVMM,8594
6
- euroeval/constants.py,sha256=imy-YwofbAwTbjk_vgynYf3zaK5kKV349oXZl99DVyM,2742
7
- euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
8
- euroeval/data_models.py,sha256=UGyqPAYFImrR1gi4ctQdCVb0rjVkEmyf4Lc1a7_6t6E,24663
9
- euroeval/enums.py,sha256=V73E8FTL1aRz74OKcxokTYLnO7Q8HGs2QI0JPZI4qQo,3032
10
- euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
- euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
12
- euroeval/generation.py,sha256=wm2u8fDGDgtWxCReG3N6v4_lLvo0OHTpR88ThGSRH7A,12139
13
- euroeval/generation_utils.py,sha256=w3hfiJfUPDjf2xSKdDrhlpfuxZlztF0_0h2sFPB2hT0,16212
14
- euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
15
- euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
16
- euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
17
- euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
18
- euroeval/scores.py,sha256=gJ7DSQVyE2_8qZxJPuUJcFk7Byj2D7nevE23kd4XMbA,3004
19
- euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
20
- euroeval/tasks.py,sha256=fwmDKnIexmWbm8HueLUilYzqdNRfo0rFxX-tjZ53Nbg,4503
21
- euroeval/tokenization_utils.py,sha256=66nip9llPw3XBEzGY0TE1DrejLV2WvdSA1p1euXC6Bg,20556
22
- euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
23
- euroeval/utils.py,sha256=ITvT-JxXosrDuElNV7cbASfxzDWSBz9mJWAZHiTOiZY,15304
24
- euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
25
- euroeval/benchmark_modules/base.py,sha256=vYW97bnlzqxxcIq6lY-zd0o6zxyDRMhT85jOhdKnoYE,11482
26
- euroeval/benchmark_modules/fresh.py,sha256=_iRTHt9qUkq7jPOlgwx7IwZG48dK4mjMrh7KiEHeUjE,10462
27
- euroeval/benchmark_modules/hf.py,sha256=HDXuVwt0kZUyL9x3aG5pEjSdGCRfzegqT0xKZYprjU0,43843
28
- euroeval/benchmark_modules/litellm.py,sha256=M6ct5ppcYfO-Il5VMRm3PuyAeQ-rtS22UKyRStLnqfM,59210
29
- euroeval/benchmark_modules/vllm.py,sha256=ckWLA9maDP5TLAfLhEXzkOYJBngb5BQR7X7RLKPl64A,41824
30
- euroeval/dataset_configs/__init__.py,sha256=lEOr4kJzgtUymeNBVhd-VwdUK0YTUZ3GjUMlLz5fGWk,2010
31
- euroeval/dataset_configs/danish.py,sha256=Pb43E-xfgQk9uaxq8ooznvf8okdX8KAYFEPHt1CG_TQ,5192
32
- euroeval/dataset_configs/dutch.py,sha256=tY7FDw7BmhXxNfI1hqfasxQXP0QbYTqknokTZ7gqdRY,5079
33
- euroeval/dataset_configs/english.py,sha256=Y4yc3AQu8WojqENj0sy4-rIlx1LhPnsCQ0DeonqDsVs,4128
34
- euroeval/dataset_configs/estonian.py,sha256=o13P_XkrdhLFCz9l8LJy-TSY3JIN7XmByxesEDiagnc,2879
35
- euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
36
- euroeval/dataset_configs/finnish.py,sha256=7iXjjpJ23tupvtXwJF3TH1Tzwhxw0RFaoBv38HclsJc,3950
37
- euroeval/dataset_configs/french.py,sha256=9ofGQpnjw0j_lPB0SuWMvbuWVZXfOvROMqZ03d-EAHs,4281
38
- euroeval/dataset_configs/german.py,sha256=qsJO2YCND8Kuc_atSWXjkoD2itUQNbUsExiGk7P0OnE,4459
39
- euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
40
- euroeval/dataset_configs/italian.py,sha256=xoS_oIFXnTraiV9PX2dBsE1GyodlAbma5dEB7yM_Q8A,4564
41
- euroeval/dataset_configs/latvian.py,sha256=tibwTbe-atsRZEBbegJ6nbr1Oh4RthUYhZoHPVVawq0,2273
42
- euroeval/dataset_configs/norwegian.py,sha256=eTX0KpjH60FyLGrUTfspvNvYaL-Ytfw3DTFftlriVM0,7295
43
- euroeval/dataset_configs/portuguese.py,sha256=x-Idrdo_EtmB_xoabwKivKG091DvFEQEbO6MTcjZVqs,3646
44
- euroeval/dataset_configs/spanish.py,sha256=5m3Qh328YPhbN8jFPIy9Sa7ZWob02ToCWzlDoT8IsSw,4462
45
- euroeval/dataset_configs/swedish.py,sha256=j_I7ba9a0nXzEPvpnPTuNFEkS51pnUPrnRwcqGh7tu0,4715
46
- euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
47
- euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
48
- euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
49
- euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
50
- euroeval/metrics/pipeline.py,sha256=a09Um3tnNdyQhzyDa9k-seYQXriYiJRQ5vyHK2lrKcg,10276
51
- euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
52
- euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
53
- euroeval/prompt_templates/linguistic_acceptability.py,sha256=9ZIyv_hfI2Aj20Uy9SY1izq5OBRV844PXPiZCNCOoEY,8207
54
- euroeval/prompt_templates/multiple_choice.py,sha256=TCMKB0xS5IEa8f4YEUjsoifcUpaIv4yOL4FisVvPwok,6423
55
- euroeval/prompt_templates/named_entity_recognition.py,sha256=_ZRVDcnbXvTs_C2NXy78oMbCLFDtW9SuxmvSVg51Umo,15554
56
- euroeval/prompt_templates/reading_comprehension.py,sha256=eRMN-kCT3wuImbuFXzZYfo5WiVhCFWJkCYwRUDtpeWo,8208
57
- euroeval/prompt_templates/sentiment_classification.py,sha256=eIXn-aAY7LKeXqxzMKoqdVbihA2f1RaNQk7DhceuQdQ,8887
58
- euroeval/prompt_templates/summarization.py,sha256=GvnKuYJKbJ_2QkdtSWp_h4RhfOXdq-7_yYeClJSPaTY,6137
59
- euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
60
- euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
61
- euroeval/task_group_utils/question_answering.py,sha256=vdEbcZy7BE6ICA7kWkPYmPW4eVuIiZ_4uJRLUexDhwY,27750
62
- euroeval/task_group_utils/sequence_classification.py,sha256=ZIXcYo6ins9VUv8TT4aupWrfUQoWGBlgU8a1hYATOYM,17249
63
- euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
64
- euroeval/task_group_utils/token_classification.py,sha256=sNl0rhkXI9g5zKsJujrWX-9jWbYYK2iaKA1AcUg0xW4,17118
65
- euroeval-16.0.1.dist-info/METADATA,sha256=toyIiyjwyl4Oty2YsD-P6r95hN0Si3BkBNBMOfmiwBA,13729
66
- euroeval-16.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
67
- euroeval-16.0.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
68
- euroeval-16.0.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
69
- euroeval-16.0.1.dist-info/RECORD,,