EuroEval 16.0.1__py3-none-any.whl → 16.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of EuroEval might be problematic. Click here for more details.

Files changed (48) hide show
  1. euroeval/benchmark_config_factory.py +6 -1
  2. euroeval/benchmark_modules/base.py +2 -0
  3. euroeval/benchmark_modules/fresh.py +7 -1
  4. euroeval/benchmark_modules/hf.py +26 -21
  5. euroeval/benchmark_modules/litellm.py +258 -131
  6. euroeval/benchmark_modules/vllm.py +79 -40
  7. euroeval/benchmarker.py +11 -2
  8. euroeval/cli.py +14 -1
  9. euroeval/constants.py +1 -1
  10. euroeval/data_models.py +77 -6
  11. euroeval/dataset_configs/__init__.py +1 -0
  12. euroeval/dataset_configs/danish.py +14 -0
  13. euroeval/dataset_configs/dutch.py +14 -0
  14. euroeval/dataset_configs/english.py +22 -0
  15. euroeval/dataset_configs/estonian.py +15 -7
  16. euroeval/dataset_configs/finnish.py +14 -0
  17. euroeval/dataset_configs/french.py +14 -0
  18. euroeval/dataset_configs/german.py +23 -0
  19. euroeval/dataset_configs/italian.py +14 -0
  20. euroeval/dataset_configs/latvian.py +14 -0
  21. euroeval/dataset_configs/norwegian.py +14 -0
  22. euroeval/dataset_configs/polish.py +126 -0
  23. euroeval/dataset_configs/portuguese.py +14 -0
  24. euroeval/dataset_configs/spanish.py +14 -0
  25. euroeval/dataset_configs/swedish.py +25 -0
  26. euroeval/enums.py +12 -0
  27. euroeval/generation.py +17 -8
  28. euroeval/generation_utils.py +58 -10
  29. euroeval/metrics/pipeline.py +1 -1
  30. euroeval/prompt_templates/linguistic_acceptability.py +9 -0
  31. euroeval/prompt_templates/multiple_choice.py +27 -1
  32. euroeval/prompt_templates/named_entity_recognition.py +20 -0
  33. euroeval/prompt_templates/reading_comprehension.py +11 -0
  34. euroeval/prompt_templates/sentiment_classification.py +15 -0
  35. euroeval/prompt_templates/summarization.py +27 -1
  36. euroeval/scores.py +5 -0
  37. euroeval/task_group_utils/question_answering.py +29 -29
  38. euroeval/task_group_utils/sequence_classification.py +10 -33
  39. euroeval/task_group_utils/token_classification.py +3 -3
  40. euroeval/tasks.py +4 -4
  41. euroeval/{tokenization_utils.py → tokenisation_utils.py} +40 -23
  42. euroeval/utils.py +36 -3
  43. {euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/METADATA +1 -1
  44. euroeval-16.1.0.dist-info/RECORD +70 -0
  45. euroeval-16.0.1.dist-info/RECORD +0 -69
  46. {euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/WHEEL +0 -0
  47. {euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/entry_points.txt +0 -0
  48. {euroeval-16.0.1.dist-info → euroeval-16.1.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- """Utility functions related to tokenization."""
1
+ """Utility functions related to tokenisation."""
2
2
 
3
3
  import logging
4
4
  import re
@@ -79,8 +79,8 @@ def should_prompts_be_stripped(
79
79
  """Determine if we should strip the prompts for few-shot evaluation.
80
80
 
81
81
  This is the case if the tokeniser needs to include the space as part of the label
82
- token. The strategy is thus to tokenize a label with a preceeding colon (as in the
83
- prompts), i.e., ": positive", and check if the tokenization starts with the tokens
82
+ token. The strategy is thus to tokenise a label with a preceeding colon (as in the
83
+ prompts), i.e., ": positive", and check if the tokenisation starts with the tokens
84
84
  of ": ". If this is the case, then we should not strip the prompts, since the
85
85
  tokeniser produces the whitespace token separately.
86
86
 
@@ -88,7 +88,7 @@ def should_prompts_be_stripped(
88
88
  labels_to_be_generated:
89
89
  The labels that are to be generated.
90
90
  tokeniser:
91
- The tokeniser used to tokenize the labels.
91
+ The tokeniser used to tokenise the labels.
92
92
 
93
93
  Returns:
94
94
  Whether we should strip the prompts.
@@ -124,7 +124,7 @@ def should_prefix_space_be_added_to_labels(
124
124
  labels_to_be_generated:
125
125
  The labels that are to be generated.
126
126
  tokeniser:
127
- The tokeniser used to tokenize the labels.
127
+ The tokeniser used to tokenise the labels.
128
128
 
129
129
  Returns:
130
130
  Whether we should add a prefix space to the labels.
@@ -318,7 +318,9 @@ def get_pad_token(
318
318
  return pad_token, pad_token_id
319
319
 
320
320
 
321
- def get_end_of_chat_token_ids(tokeniser: "PreTrainedTokenizer") -> list[int] | None:
321
+ def get_end_of_chat_token_ids(
322
+ tokeniser: "PreTrainedTokenizer", generative_type: GenerativeType | None
323
+ ) -> list[int] | None:
322
324
  """Get the end token ID for chat models.
323
325
 
324
326
  This is only relevant for tokenisers with a chat template.
@@ -326,20 +328,23 @@ def get_end_of_chat_token_ids(tokeniser: "PreTrainedTokenizer") -> list[int] | N
326
328
  Args:
327
329
  tokeniser:
328
330
  The tokeniser.
331
+ generative_type:
332
+ The generative type, or None if not available.
329
333
 
330
334
  Returns:
331
335
  The token IDs used to end chats, or None if the tokeniser does not have a chat
332
336
  template or if no end-of-chat token could be found.
333
337
  """
334
- if not has_chat_template(tokeniser=tokeniser):
338
+ if generative_type == GenerativeType.BASE:
335
339
  return None
336
340
 
337
341
  user_message: dict[str, str] = dict(role="user", content="X")
338
342
  token_ids = apply_chat_template(
339
343
  conversation=[user_message],
340
344
  tokeniser=tokeniser,
341
- tokenize=True,
345
+ tokenise=True,
342
346
  add_generation_prompt=False,
347
+ enable_thinking=generative_type == GenerativeType.REASONING,
343
348
  )
344
349
  assert isinstance(token_ids, list)
345
350
 
@@ -420,7 +425,7 @@ def get_first_label_token_mapping(
420
425
  for label in dataset_config.labels
421
426
  ]
422
427
 
423
- # Tokenize some text containing each label, which we will use to extract the
428
+ # Tokenise some text containing each label, which we will use to extract the
424
429
  # first token of each label
425
430
  all_tokens: list[list[str]]
426
431
  if not has_chat_template(tokeniser=tokeniser):
@@ -439,11 +444,13 @@ def get_first_label_token_mapping(
439
444
  dict(role="user", content=""),
440
445
  dict(role="assistant", content=label),
441
446
  # Adding extra user message as Mistral tokenisers require
442
- # conversamtions to end with a user message
447
+ # conversations to end with a user message
443
448
  dict(role="user", content=""),
444
449
  ],
445
450
  tokeniser=tokeniser,
446
- tokenize=True,
451
+ tokenise=True,
452
+ add_generation_prompt=True,
453
+ enable_thinking=generative_type == GenerativeType.REASONING,
447
454
  )
448
455
  )
449
456
  for label in local_labels
@@ -537,9 +544,10 @@ def has_chat_template(tokeniser: "PreTrainedTokenizer") -> bool:
537
544
  def apply_chat_template(
538
545
  conversation: list[dict[str, str]],
539
546
  tokeniser: "PreTrainedTokenizer",
540
- tokenize: bool = False,
541
- add_generation_prompt: bool = True,
542
- **transformers_tokeniser_kwargs,
547
+ tokenise: bool,
548
+ add_generation_prompt: bool,
549
+ enable_thinking: bool,
550
+ **extra_kwargs,
543
551
  ) -> str | list[int]:
544
552
  """Apply the chat template to a prompt.
545
553
 
@@ -548,38 +556,47 @@ def apply_chat_template(
548
556
  The conversation to apply the chat template to.
549
557
  tokeniser:
550
558
  The tokeniser.
551
- tokenize:
552
- Whether to tokenize the resulting prompt, returning a list of token IDs
559
+ tokenise:
560
+ Whether to tokenise the resulting prompt, returning a list of token IDs
553
561
  instead of a string.
554
562
  add_generation_prompt:
555
563
  Whether to add a generation prompt at the end of the conversation. This is
556
564
  only relevant for regular Hugging Face tokenisers, as Mistral tokenisers
557
565
  always add a generation prompt.
558
- **transformers_tokeniser_kwargs:
559
- Additional keyword arguments to pass to the tokeniser, in case the tokeniser
560
- is a regular Hugging Face tokeniser.
566
+ enable_thinking:
567
+ Whether to enable special handling for reasoning models, such as adding
568
+ special tokens for thinking. This is only relevant for regular Hugging
569
+ Face tokenisers, as Mistral tokenisers always handle reasoning models.
570
+ **extra_kwargs:
571
+ Extra keyword arguments to pass to the tokeniser's `apply_chat_template`
572
+ method. Only relevant for regular Hugging Face tokenisers.
561
573
 
562
574
  Returns:
563
575
  The prompt with the chat template applied, either as a string or a list of
564
- token IDs, depending on the value of `tokenize`.
576
+ token IDs, depending on the value of `tokenise`.
565
577
 
566
578
  Raises:
567
579
  InvalidModel:
568
580
  If the tokeniser does not have a chat template.
569
581
  """
582
+ # Ensure that the first user message is not empty, as this can cause issues with
583
+ # Jinja2
584
+ conversation[0]["content"] = conversation[0]["content"] or " "
585
+
570
586
  if not has_chat_template(tokeniser=tokeniser):
571
587
  raise InvalidModel(
572
588
  "The tokeniser does not have a chat template, so cannot apply it."
573
589
  )
574
590
  elif isinstance(tokeniser, MistralCommonTokenizer):
575
591
  templated_prompt = tokeniser.apply_chat_template(
576
- conversation=conversation, tokenize=tokenize
592
+ conversation=conversation, tokenize=tokenise
577
593
  )
578
594
  else:
579
595
  templated_prompt = tokeniser.apply_chat_template(
580
596
  conversation=conversation,
581
597
  add_generation_prompt=add_generation_prompt,
582
- tokenize=tokenize,
583
- **transformers_tokeniser_kwargs,
598
+ tokenize=tokenise,
599
+ enable_thinking=enable_thinking,
600
+ **extra_kwargs,
584
601
  )
585
602
  return templated_prompt
euroeval/utils.py CHANGED
@@ -4,7 +4,6 @@ import asyncio
4
4
  import gc
5
5
  import importlib
6
6
  import importlib.metadata
7
- import importlib.util
8
7
  import logging
9
8
  import os
10
9
  import random
@@ -25,11 +24,12 @@ from datasets.utils import disable_progress_bar
25
24
  from requests.exceptions import RequestException
26
25
  from transformers import logging as tf_logging
27
26
 
28
- from .exceptions import InvalidBenchmark, NaNValueInModelOutput
27
+ from .exceptions import InvalidBenchmark, InvalidModel, NaNValueInModelOutput
29
28
 
30
29
  if t.TYPE_CHECKING:
31
30
  from types import TracebackType
32
31
 
32
+ from .data_models import ModelIdComponents
33
33
  from .types import Predictions
34
34
 
35
35
 
@@ -347,7 +347,8 @@ def safe_run(coroutine: t.Coroutine[t.Any, t.Any, T]) -> T:
347
347
  loop = asyncio.new_event_loop()
348
348
  try:
349
349
  asyncio.set_event_loop(loop)
350
- return loop.run_until_complete(coroutine)
350
+ response = loop.run_until_complete(coroutine)
351
+ return response
351
352
  finally:
352
353
  loop.close()
353
354
  asyncio.set_event_loop(None)
@@ -488,3 +489,35 @@ def extract_multiple_choice_labels(
488
489
  f"{', '.join(candidate_labels)}. Here is the prompt: {prompt!r}"
489
490
  )
490
491
  return sample_candidate_labels
492
+
493
+
494
+ def split_model_id(model_id: str) -> "ModelIdComponents":
495
+ """Split a model ID into its components.
496
+
497
+ Args:
498
+ model_id:
499
+ The model ID to split.
500
+
501
+ Returns:
502
+ The split model ID.
503
+
504
+ Raises:
505
+ If the model ID is not valid.
506
+ """
507
+ # Importing here to avoid circular imports
508
+ from .data_models import ModelIdComponents
509
+
510
+ # Attempt to extract the model ID, revision, and param using regex
511
+ model_id_match = re.match(pattern=r"^[^@#]+", string=model_id)
512
+ revision_match = re.search(pattern=r"@([^@#]+)", string=model_id)
513
+ param_match = re.search(pattern=r"#([^@#]+)", string=model_id)
514
+
515
+ # If we cannot extract the model ID, raise an error
516
+ if model_id_match is None:
517
+ raise InvalidModel(f"The model ID {model_id!r} is not valid.")
518
+ model_id = model_id_match.group()
519
+
520
+ # Extract the revision and param and return the result
521
+ revision = revision_match.group(1) if revision_match is not None else "main"
522
+ param = param_match.group(1) if param_match is not None else None
523
+ return ModelIdComponents(model_id=model_id, revision=revision, param=param)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: EuroEval
3
- Version: 16.0.1
3
+ Version: 16.1.0
4
4
  Summary: The robust European language model benchmark.
5
5
  Project-URL: Repository, https://github.com/EuroEval/EuroEval
6
6
  Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
@@ -0,0 +1,70 @@
1
+ euroeval/__init__.py,sha256=8jqSCcDWvwwNb1guPi8cLAekPSOX9V8DpRx_v3-c19E,3730
2
+ euroeval/benchmark_config_factory.py,sha256=NzNSiqix4hlVXk3xnyzdg2WDxomkectf97UWdVS3POo,11667
3
+ euroeval/benchmarker.py,sha256=JkhvYxhVpQPcWmDLzwnB8Yy6tTqj3yfDWTefklbI7RM,50355
4
+ euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
5
+ euroeval/cli.py,sha256=wUGetj9Ld4wkS872ZOfYqHIJMh58o8L2MDi78wU5nxI,9099
6
+ euroeval/constants.py,sha256=NN7kcwQdlDyyGFSrLjsL_qKVRyoRqZ9sKO5SjlgtRwA,2741
7
+ euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
8
+ euroeval/data_models.py,sha256=S-PATp4F1wBwvra6wtjlJFXxZbZB_vEpJHXcdTTKA70,27593
9
+ euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
10
+ euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
+ euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
12
+ euroeval/generation.py,sha256=MSrd0oIkoqwKsCOaIkY2CFF_urXLOfNR1OO5nMvcCpY,12476
13
+ euroeval/generation_utils.py,sha256=OtEXLhI6L1vlbC768dH3xzj0qkokz43m0vswGKrRmBA,18061
14
+ euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
15
+ euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
16
+ euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
17
+ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
18
+ euroeval/scores.py,sha256=HQQqyjdgm853FZ_ifIdnSltKfBhsY7pOITov6F3Et5o,3165
19
+ euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
20
+ euroeval/tasks.py,sha256=3qEOBAMmfeqgXqlGkCKzQ-s0Yw-0-jPRgFZ97EZCFng,4535
21
+ euroeval/tokenisation_utils.py,sha256=jRIi9m8XmGh3LeZna47AWmJI9U9m4ojXQynQTe7kzWc,21344
22
+ euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
23
+ euroeval/utils.py,sha256=c0tFw1IXZIqgLU4EfY_k28iJ1ZlCZ_oFoKZH2sGCKYg,16499
24
+ euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
25
+ euroeval/benchmark_modules/base.py,sha256=mHF8XS6GGUXV-sJtxmI5WJBWPLMHuh-4Z4OWjC25x9Y,11566
26
+ euroeval/benchmark_modules/fresh.py,sha256=TveSQiFBi3xXgCEQBdHwkUQ685PDkKW0y3G5Yt5rkeM,10655
27
+ euroeval/benchmark_modules/hf.py,sha256=oBjVumnSM9PW7ZocQwCGLKpbeGFWLN_71DBotxZo1aY,44038
28
+ euroeval/benchmark_modules/litellm.py,sha256=6EKjHnUoPCpuupISZHXqZsXLG8tyiA1-G12a5C6L8MM,64629
29
+ euroeval/benchmark_modules/vllm.py,sha256=sYFdVzB9CZX6_sGI4xghDyXoVn6I95_nbeFUWeSMXcc,43132
30
+ euroeval/dataset_configs/__init__.py,sha256=uuIZmElpJV8iupo5oDj3TeQhBDRANdWpLKYFASLirHA,2046
31
+ euroeval/dataset_configs/danish.py,sha256=QABfgI7m-0-5AimDXegp5ssDSLcM2VrAI_RWsinSZP4,5631
32
+ euroeval/dataset_configs/dutch.py,sha256=63Ro2yFym5MuIDXf5953vUYenw9B0kZSCmZbXjdy4Rs,5517
33
+ euroeval/dataset_configs/english.py,sha256=7lS12Tj7FnMGkS4xj7UoZyymNX6PGXTVl5muPswIgAE,4737
34
+ euroeval/dataset_configs/estonian.py,sha256=tdnz0gmMR9yO5rm3SsIz-Wd0LmlCvi3UJ2M5r4VwkSE,3093
35
+ euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
36
+ euroeval/dataset_configs/finnish.py,sha256=esb5nu4HAEdqiP7F9klmME-tkjme01Qd89TOxTB1S20,4390
37
+ euroeval/dataset_configs/french.py,sha256=lZKhJcTpaG8n3y8u5KY61UfU9YzEHF9tIPKm8UakoBs,4720
38
+ euroeval/dataset_configs/german.py,sha256=gF0idcfDt5Iy89ozwgEXEYR_ukyYurdQSS1KITPz5aM,5130
39
+ euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
40
+ euroeval/dataset_configs/italian.py,sha256=tJ_-OYRJ8wJX7ZCwdE4KJIScn1ijYigAXK3lDTZTA3E,5004
41
+ euroeval/dataset_configs/latvian.py,sha256=-zVftcd7Zl6MbrqL-zqBSixsIiPsbt5ZAqldE2wFOEI,2713
42
+ euroeval/dataset_configs/norwegian.py,sha256=ccLM2Zkf5eaFH1K1KyzqoMwkVNcXgjMQTxIhPf4tl_E,7745
43
+ euroeval/dataset_configs/polish.py,sha256=Z-9PT9KaopQUmBgFk5F85ve3pjQwTJqouG8IFgg5iqw,3672
44
+ euroeval/dataset_configs/portuguese.py,sha256=gQ054SdLQ5fkm4IAP6Mdh5RcPDJPDITcuyaLKZit_9o,4089
45
+ euroeval/dataset_configs/spanish.py,sha256=DvJlMK6OQg4qmxKzQA2IficlBMB7BafvxqIVuTKiZyw,4902
46
+ euroeval/dataset_configs/swedish.py,sha256=YWHp7hbJ25o36csSg9uXaQCEJK1BPb7u2RQZiCe0lNs,5445
47
+ euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
48
+ euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
49
+ euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
50
+ euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
51
+ euroeval/metrics/pipeline.py,sha256=Wcan3eDWV7t4WRXMPWCCe_JsA-fZnIfZU2ESinbbL2I,10284
52
+ euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
53
+ euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
54
+ euroeval/prompt_templates/linguistic_acceptability.py,sha256=pRR1QBnYt5DnfxQp6dw1OYFZfIct-1R9pfdgPGpjoco,8667
55
+ euroeval/prompt_templates/multiple_choice.py,sha256=Q-8-ETqG-RZeLzR8v8WUBIN7djiNSfNpmYnZRUWcd84,6905
56
+ euroeval/prompt_templates/named_entity_recognition.py,sha256=LT7J6Y9rUCJFimpnwujBZq_V5buSmXHJteIXbTOoaCE,16442
57
+ euroeval/prompt_templates/reading_comprehension.py,sha256=ogzmhiSZO6egrdxxQiWz6a0XMdC0vws-lg5yRKQoYV0,8730
58
+ euroeval/prompt_templates/sentiment_classification.py,sha256=BwnTpSdsAN_rL693ImgtKIRc5T_2G6ptWW0jCdC02NQ,9454
59
+ euroeval/prompt_templates/summarization.py,sha256=4Sqwj6C7yNfqj4FFFCseJMLDoSZ13aIOgY0SjIzzsNo,6593
60
+ euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
61
+ euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
62
+ euroeval/task_group_utils/question_answering.py,sha256=eUczZntrC9lhCUQlwNQB49i-5Ei12cdRnrfq4pE-T7Y,27750
63
+ euroeval/task_group_utils/sequence_classification.py,sha256=qWUUrh4X4jK2XfUzP4aoPDoJhVJifrnDEaaw_F48hig,16080
64
+ euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
65
+ euroeval/task_group_utils/token_classification.py,sha256=Yjai937ia1nZBMOWySqCXr_dA6WiVLGvmb4Hm_TU0Bg,17118
66
+ euroeval-16.1.0.dist-info/METADATA,sha256=pYdW0IZwY8vatTA55EERxBK1kMaQuGhqzNys5xiSqsM,13729
67
+ euroeval-16.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
68
+ euroeval-16.1.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
69
+ euroeval-16.1.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
70
+ euroeval-16.1.0.dist-info/RECORD,,
@@ -1,69 +0,0 @@
1
- euroeval/__init__.py,sha256=8jqSCcDWvwwNb1guPi8cLAekPSOX9V8DpRx_v3-c19E,3730
2
- euroeval/benchmark_config_factory.py,sha256=ZKzGkWr-Mr4wEMYNXUHsYkd2R-dxnNyETZJJ-Fq-my0,11386
3
- euroeval/benchmarker.py,sha256=YNqhl2QchqzbGMGu8QoJAG_mnYbcJ46ksfaS0x78fiw,49847
4
- euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
5
- euroeval/cli.py,sha256=RR45NiHMI9hphqBJ7Xopde-C18Be9JgJxgg6eYPFVMM,8594
6
- euroeval/constants.py,sha256=imy-YwofbAwTbjk_vgynYf3zaK5kKV349oXZl99DVyM,2742
7
- euroeval/data_loading.py,sha256=F3fHyR7FoS_a1dx_DyqtcxdB-jxWwE3RCNRvWcp5z1c,4527
8
- euroeval/data_models.py,sha256=UGyqPAYFImrR1gi4ctQdCVb0rjVkEmyf4Lc1a7_6t6E,24663
9
- euroeval/enums.py,sha256=V73E8FTL1aRz74OKcxokTYLnO7Q8HGs2QI0JPZI4qQo,3032
10
- euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
11
- euroeval/finetuning.py,sha256=G86pxxjOAgtcEWpyYDwYOV9pM7WG2Uu9fu7GdDso8dI,11426
12
- euroeval/generation.py,sha256=wm2u8fDGDgtWxCReG3N6v4_lLvo0OHTpR88ThGSRH7A,12139
13
- euroeval/generation_utils.py,sha256=w3hfiJfUPDjf2xSKdDrhlpfuxZlztF0_0h2sFPB2hT0,16212
14
- euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
15
- euroeval/model_cache.py,sha256=h61cL_fy2Sd1sqYZis5lAWqvQIfQXXt_v8QZeftKNkg,9226
16
- euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
17
- euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
18
- euroeval/scores.py,sha256=gJ7DSQVyE2_8qZxJPuUJcFk7Byj2D7nevE23kd4XMbA,3004
19
- euroeval/speed_benchmark.py,sha256=3iz_bfJgAoJ9K2HNjufyrBMjHVT8PAjuY_NocBGwKe0,4044
20
- euroeval/tasks.py,sha256=fwmDKnIexmWbm8HueLUilYzqdNRfo0rFxX-tjZ53Nbg,4503
21
- euroeval/tokenization_utils.py,sha256=66nip9llPw3XBEzGY0TE1DrejLV2WvdSA1p1euXC6Bg,20556
22
- euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
23
- euroeval/utils.py,sha256=ITvT-JxXosrDuElNV7cbASfxzDWSBz9mJWAZHiTOiZY,15304
24
- euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
25
- euroeval/benchmark_modules/base.py,sha256=vYW97bnlzqxxcIq6lY-zd0o6zxyDRMhT85jOhdKnoYE,11482
26
- euroeval/benchmark_modules/fresh.py,sha256=_iRTHt9qUkq7jPOlgwx7IwZG48dK4mjMrh7KiEHeUjE,10462
27
- euroeval/benchmark_modules/hf.py,sha256=HDXuVwt0kZUyL9x3aG5pEjSdGCRfzegqT0xKZYprjU0,43843
28
- euroeval/benchmark_modules/litellm.py,sha256=M6ct5ppcYfO-Il5VMRm3PuyAeQ-rtS22UKyRStLnqfM,59210
29
- euroeval/benchmark_modules/vllm.py,sha256=ckWLA9maDP5TLAfLhEXzkOYJBngb5BQR7X7RLKPl64A,41824
30
- euroeval/dataset_configs/__init__.py,sha256=lEOr4kJzgtUymeNBVhd-VwdUK0YTUZ3GjUMlLz5fGWk,2010
31
- euroeval/dataset_configs/danish.py,sha256=Pb43E-xfgQk9uaxq8ooznvf8okdX8KAYFEPHt1CG_TQ,5192
32
- euroeval/dataset_configs/dutch.py,sha256=tY7FDw7BmhXxNfI1hqfasxQXP0QbYTqknokTZ7gqdRY,5079
33
- euroeval/dataset_configs/english.py,sha256=Y4yc3AQu8WojqENj0sy4-rIlx1LhPnsCQ0DeonqDsVs,4128
34
- euroeval/dataset_configs/estonian.py,sha256=o13P_XkrdhLFCz9l8LJy-TSY3JIN7XmByxesEDiagnc,2879
35
- euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
36
- euroeval/dataset_configs/finnish.py,sha256=7iXjjpJ23tupvtXwJF3TH1Tzwhxw0RFaoBv38HclsJc,3950
37
- euroeval/dataset_configs/french.py,sha256=9ofGQpnjw0j_lPB0SuWMvbuWVZXfOvROMqZ03d-EAHs,4281
38
- euroeval/dataset_configs/german.py,sha256=qsJO2YCND8Kuc_atSWXjkoD2itUQNbUsExiGk7P0OnE,4459
39
- euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
40
- euroeval/dataset_configs/italian.py,sha256=xoS_oIFXnTraiV9PX2dBsE1GyodlAbma5dEB7yM_Q8A,4564
41
- euroeval/dataset_configs/latvian.py,sha256=tibwTbe-atsRZEBbegJ6nbr1Oh4RthUYhZoHPVVawq0,2273
42
- euroeval/dataset_configs/norwegian.py,sha256=eTX0KpjH60FyLGrUTfspvNvYaL-Ytfw3DTFftlriVM0,7295
43
- euroeval/dataset_configs/portuguese.py,sha256=x-Idrdo_EtmB_xoabwKivKG091DvFEQEbO6MTcjZVqs,3646
44
- euroeval/dataset_configs/spanish.py,sha256=5m3Qh328YPhbN8jFPIy9Sa7ZWob02ToCWzlDoT8IsSw,4462
45
- euroeval/dataset_configs/swedish.py,sha256=j_I7ba9a0nXzEPvpnPTuNFEkS51pnUPrnRwcqGh7tu0,4715
46
- euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
47
- euroeval/metrics/base.py,sha256=4vnRIPfKUwTNe0ZVm5YC2jQNecwchGUpN6nAH5cX0PM,2288
48
- euroeval/metrics/huggingface.py,sha256=b_Z_FUELQcmK7HeJh0zlAZs3pim1uNHnFLu7nvlZ4_A,5824
49
- euroeval/metrics/llm_as_a_judge.py,sha256=YCUHWK3_bkMEYvL7Q79ZAK3V0M1m5rq5zJYdtMxa4fs,9686
50
- euroeval/metrics/pipeline.py,sha256=a09Um3tnNdyQhzyDa9k-seYQXriYiJRQ5vyHK2lrKcg,10276
51
- euroeval/metrics/speed.py,sha256=tLna031y0SVzAv6lvXBxf8IOSiw9dvLlonky2zM3MnE,1369
52
- euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
53
- euroeval/prompt_templates/linguistic_acceptability.py,sha256=9ZIyv_hfI2Aj20Uy9SY1izq5OBRV844PXPiZCNCOoEY,8207
54
- euroeval/prompt_templates/multiple_choice.py,sha256=TCMKB0xS5IEa8f4YEUjsoifcUpaIv4yOL4FisVvPwok,6423
55
- euroeval/prompt_templates/named_entity_recognition.py,sha256=_ZRVDcnbXvTs_C2NXy78oMbCLFDtW9SuxmvSVg51Umo,15554
56
- euroeval/prompt_templates/reading_comprehension.py,sha256=eRMN-kCT3wuImbuFXzZYfo5WiVhCFWJkCYwRUDtpeWo,8208
57
- euroeval/prompt_templates/sentiment_classification.py,sha256=eIXn-aAY7LKeXqxzMKoqdVbihA2f1RaNQk7DhceuQdQ,8887
58
- euroeval/prompt_templates/summarization.py,sha256=GvnKuYJKbJ_2QkdtSWp_h4RhfOXdq-7_yYeClJSPaTY,6137
59
- euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
60
- euroeval/task_group_utils/multiple_choice_classification.py,sha256=i5sidJGAXnENRoB6pOelyaUeGP1qoxwPSzD-F9RLwWk,7106
61
- euroeval/task_group_utils/question_answering.py,sha256=vdEbcZy7BE6ICA7kWkPYmPW4eVuIiZ_4uJRLUexDhwY,27750
62
- euroeval/task_group_utils/sequence_classification.py,sha256=ZIXcYo6ins9VUv8TT4aupWrfUQoWGBlgU8a1hYATOYM,17249
63
- euroeval/task_group_utils/text_to_text.py,sha256=7f4hGAs5WNJ9PmW1mLhjDMrPxrYAvw5axXsneiJop1w,4993
64
- euroeval/task_group_utils/token_classification.py,sha256=sNl0rhkXI9g5zKsJujrWX-9jWbYYK2iaKA1AcUg0xW4,17118
65
- euroeval-16.0.1.dist-info/METADATA,sha256=toyIiyjwyl4Oty2YsD-P6r95hN0Si3BkBNBMOfmiwBA,13729
66
- euroeval-16.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
67
- euroeval-16.0.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
68
- euroeval-16.0.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
69
- euroeval-16.0.1.dist-info/RECORD,,