EuroEval 15.7.1__py3-none-any.whl → 15.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/benchmark_config_factory.py +1 -1
- euroeval/benchmark_modules/litellm.py +15 -5
- euroeval/benchmark_modules/vllm.py +1 -1
- euroeval/benchmarker.py +13 -11
- euroeval/dataset_configs/__init__.py +1 -0
- euroeval/dataset_configs/finnish.py +11 -9
- euroeval/languages.py +1 -1
- euroeval/task_group_utils/sequence_classification.py +46 -11
- euroeval/tokenization_utils.py +50 -14
- {euroeval-15.7.1.dist-info → euroeval-15.7.2.dist-info}/METADATA +1 -1
- {euroeval-15.7.1.dist-info → euroeval-15.7.2.dist-info}/RECORD +14 -14
- {euroeval-15.7.1.dist-info → euroeval-15.7.2.dist-info}/WHEEL +0 -0
- {euroeval-15.7.1.dist-info → euroeval-15.7.2.dist-info}/entry_points.txt +0 -0
- {euroeval-15.7.1.dist-info → euroeval-15.7.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -238,7 +238,7 @@ def prepare_languages(
|
|
|
238
238
|
The default language codes of the languages to include.
|
|
239
239
|
|
|
240
240
|
Returns:
|
|
241
|
-
The prepared
|
|
241
|
+
The prepared dataset languages.
|
|
242
242
|
"""
|
|
243
243
|
# Create a dictionary that maps languages to their associated language objects
|
|
244
244
|
language_mapping = get_all_languages()
|
|
@@ -1007,6 +1007,10 @@ def try_download_ollama_model(model_id: str) -> bool:
|
|
|
1007
1007
|
|
|
1008
1008
|
Returns:
|
|
1009
1009
|
Whether the model was downloaded successfully.
|
|
1010
|
+
|
|
1011
|
+
Raises:
|
|
1012
|
+
InvalidModel:
|
|
1013
|
+
If Ollama is not running or the model cannot be downloaded.
|
|
1010
1014
|
"""
|
|
1011
1015
|
if not (model_id.startswith("ollama/") or model_id.startswith("ollama_chat/")):
|
|
1012
1016
|
return False
|
|
@@ -1021,11 +1025,17 @@ def try_download_ollama_model(model_id: str) -> bool:
|
|
|
1021
1025
|
level=logging.WARNING,
|
|
1022
1026
|
)
|
|
1023
1027
|
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1028
|
+
try:
|
|
1029
|
+
downloaded_ollama_models: list[str] = [
|
|
1030
|
+
model_obj.model
|
|
1031
|
+
for model_obj in ollama.list().models
|
|
1032
|
+
if model_obj.model is not None
|
|
1033
|
+
]
|
|
1034
|
+
except ConnectionError:
|
|
1035
|
+
raise InvalidModel(
|
|
1036
|
+
"Ollama does not seem to be running, so we cannot evaluate the model "
|
|
1037
|
+
f"{model_id!r}. Please make sure that Ollama is running and try again."
|
|
1038
|
+
)
|
|
1029
1039
|
|
|
1030
1040
|
ollama_model_id = "/".join(model_id.split("/")[1:])
|
|
1031
1041
|
if ollama_model_id not in downloaded_ollama_models:
|
|
@@ -797,7 +797,7 @@ def load_model_and_tokenizer(
|
|
|
797
797
|
enable_lora=model_config.adapter_base_model_id is not None,
|
|
798
798
|
max_lora_rank=256,
|
|
799
799
|
)
|
|
800
|
-
except (ValueError, OSError) as e:
|
|
800
|
+
except (RuntimeError, ValueError, OSError) as e:
|
|
801
801
|
if "awaiting a review from the repo authors" in str(e):
|
|
802
802
|
raise InvalidModel(
|
|
803
803
|
f"The model {model_id!r} is awaiting a review from the repository "
|
euroeval/benchmarker.py
CHANGED
|
@@ -372,15 +372,7 @@ class Benchmarker:
|
|
|
372
372
|
|
|
373
373
|
current_benchmark_results: list[BenchmarkResult] = list()
|
|
374
374
|
for model_id in model_ids:
|
|
375
|
-
|
|
376
|
-
model_config = get_model_config(
|
|
377
|
-
model_id=model_id, benchmark_config=benchmark_config
|
|
378
|
-
)
|
|
379
|
-
except InvalidModel as e:
|
|
380
|
-
logger.info(e.message)
|
|
381
|
-
num_finished_benchmarks += len(dataset_configs)
|
|
382
|
-
continue
|
|
383
|
-
|
|
375
|
+
model_config: ModelConfig | None = None
|
|
384
376
|
loaded_model: BenchmarkModule | None = None
|
|
385
377
|
for dataset_config in dataset_configs:
|
|
386
378
|
# Skip if we have already benchmarked this model on this dataset and
|
|
@@ -394,12 +386,22 @@ class Benchmarker:
|
|
|
394
386
|
):
|
|
395
387
|
logger.debug(
|
|
396
388
|
f"Skipping benchmarking {model_id} on "
|
|
397
|
-
f"{dataset_config.pretty_name}, as it "
|
|
398
|
-
"
|
|
389
|
+
f"{dataset_config.pretty_name}, as it has already been "
|
|
390
|
+
"benchmarked."
|
|
399
391
|
)
|
|
400
392
|
num_finished_benchmarks += 1
|
|
401
393
|
continue
|
|
402
394
|
|
|
395
|
+
if model_config is None:
|
|
396
|
+
try:
|
|
397
|
+
model_config = get_model_config(
|
|
398
|
+
model_id=model_id, benchmark_config=benchmark_config
|
|
399
|
+
)
|
|
400
|
+
except InvalidModel as e:
|
|
401
|
+
logger.info(e.message)
|
|
402
|
+
num_finished_benchmarks += len(dataset_configs)
|
|
403
|
+
continue
|
|
404
|
+
|
|
403
405
|
# Skip if the model is an encoder model and the task is generative
|
|
404
406
|
task_is_generative = (
|
|
405
407
|
dataset_config.task.task_group in GENERATIVE_DATASET_TASK_GROUPS
|
|
@@ -7,6 +7,7 @@ from .danish import * # noqa: F403
|
|
|
7
7
|
from .dutch import * # noqa: F403
|
|
8
8
|
from .english import * # noqa: F403
|
|
9
9
|
from .faroese import * # noqa: F403
|
|
10
|
+
from .finnish import * # noqa: F403
|
|
10
11
|
from .french import * # noqa: F403
|
|
11
12
|
from .german import * # noqa: F403
|
|
12
13
|
from .icelandic import * # noqa: F403
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from ..data_models import DatasetConfig
|
|
4
4
|
from ..languages import FI
|
|
5
|
-
from ..tasks import
|
|
5
|
+
from ..tasks import LA, NER, RC, SENT, SUMM
|
|
6
6
|
|
|
7
7
|
### Official datasets ###
|
|
8
8
|
|
|
@@ -40,14 +40,16 @@ XLSUM_FI_CONFIG = DatasetConfig(
|
|
|
40
40
|
languages=[FI],
|
|
41
41
|
)
|
|
42
42
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
43
|
+
# TODO: Include when this issue has been resolved:
|
|
44
|
+
# https://github.com/EuroEval/EuroEval/issues/158#issuecomment-2846664885
|
|
45
|
+
# HELLASWAG_FI_CONFIG = DatasetConfig(
|
|
46
|
+
# name="hellaswag-fi",
|
|
47
|
+
# pretty_name="the truncated version of the Finnish common-sense reasoning dataset "
|
|
48
|
+
# "HellaSwag-fi, translated from the English HellaSwag dataset",
|
|
49
|
+
# huggingface_id="EuroEval/hellaswag-fi-mini",
|
|
50
|
+
# task=COMMON_SENSE,
|
|
51
|
+
# languages=[FI],
|
|
52
|
+
# )
|
|
51
53
|
|
|
52
54
|
SCALA_FI_CONFIG = DatasetConfig(
|
|
53
55
|
name="scala-fi",
|
euroeval/languages.py
CHANGED
|
@@ -21,6 +21,7 @@ def get_all_languages() -> dict[str, Language]:
|
|
|
21
21
|
DA = Language(code="da", name="Danish", _and_separator="og", _or_separator="eller")
|
|
22
22
|
NL = Language(code="nl", name="Dutch", _and_separator="en", _or_separator="of")
|
|
23
23
|
EN = Language(code="en", name="English", _and_separator="and", _or_separator="or")
|
|
24
|
+
FI = Language(code="fi", name="Finnish", _and_separator="ja", _or_separator="tai")
|
|
24
25
|
FO = Language(code="fo", name="Faroese", _and_separator="og", _or_separator="ella")
|
|
25
26
|
FR = Language(code="fr", name="French", _and_separator="et", _or_separator="ou")
|
|
26
27
|
DE = Language(code="de", name="German", _and_separator="und", _or_separator="oder")
|
|
@@ -78,7 +79,6 @@ EO = Language(code="eo", name="Esperanto")
|
|
|
78
79
|
ET = Language(code="et", name="Estonian")
|
|
79
80
|
EE = Language(code="ee", name="Ewe")
|
|
80
81
|
FJ = Language(code="fj", name="Fijian")
|
|
81
|
-
FI = Language(code="fi", name="Finnish")
|
|
82
82
|
FY = Language(code="fy", name="Western Frisian")
|
|
83
83
|
FF = Language(code="ff", name="Fulah")
|
|
84
84
|
GD = Language(code="gd", name="Gaelic")
|
|
@@ -132,6 +132,11 @@ def extract_labels_from_generation(
|
|
|
132
132
|
The predicted labels.
|
|
133
133
|
"""
|
|
134
134
|
if model_output.scores is not None:
|
|
135
|
+
if first_label_token_mapping is False:
|
|
136
|
+
raise InvalidBenchmark(
|
|
137
|
+
"The model outputted logprobs, but the first label token mapping is "
|
|
138
|
+
"not provided. This means that the model should not output logprobs."
|
|
139
|
+
)
|
|
135
140
|
labels = get_closest_logprobs_labels(
|
|
136
141
|
generation_logprobs=model_output.scores,
|
|
137
142
|
dataset_config=dataset_config,
|
|
@@ -147,7 +152,7 @@ def extract_labels_from_generation(
|
|
|
147
152
|
def get_closest_logprobs_labels(
|
|
148
153
|
generation_logprobs: list[list[list[tuple[str, float]]]],
|
|
149
154
|
dataset_config: "DatasetConfig",
|
|
150
|
-
first_label_token_mapping: dict[str, str] |
|
|
155
|
+
first_label_token_mapping: dict[str, str] | t.Literal[True],
|
|
151
156
|
) -> list[str] | None:
|
|
152
157
|
"""Get the labels with the highest predicted logprob value.
|
|
153
158
|
|
|
@@ -164,8 +169,7 @@ def get_closest_logprobs_labels(
|
|
|
164
169
|
The configuration of the dataset.
|
|
165
170
|
first_label_token_mapping:
|
|
166
171
|
A mapping from labels to the first token in each label, or alternatively a
|
|
167
|
-
|
|
168
|
-
mapping is outputted then the model will always output scores).
|
|
172
|
+
`True` value indicating that the model should output logprobs.
|
|
169
173
|
|
|
170
174
|
Returns:
|
|
171
175
|
The predicted labels, or None if labels could not be extracted.
|
|
@@ -195,7 +199,9 @@ def get_closest_logprobs_labels(
|
|
|
195
199
|
# label, as the output label
|
|
196
200
|
output_label: str | None = None
|
|
197
201
|
for generated_label in generated_labels:
|
|
198
|
-
# Get the candidate labels
|
|
202
|
+
# Get the candidate labels. If we have a first label token mapping, we
|
|
203
|
+
# use it to get the candidate labels. Otherwise, we check if any of the
|
|
204
|
+
# labels start with the generated label.
|
|
199
205
|
if isinstance(first_label_token_mapping, dict):
|
|
200
206
|
if any(
|
|
201
207
|
candidate_label not in first_label_token_mapping
|
|
@@ -239,14 +245,43 @@ def get_closest_logprobs_labels(
|
|
|
239
245
|
)
|
|
240
246
|
return None
|
|
241
247
|
|
|
242
|
-
# If no candidate label is found, we
|
|
243
|
-
#
|
|
244
|
-
#
|
|
248
|
+
# If no candidate label is found, we first check if any of the labels
|
|
249
|
+
# start with the generated label. This could be the case if the labels
|
|
250
|
+
# in the first token mapping is inaccurate or incomplete, for instance
|
|
251
|
+
# if 'pos' is in the first label token mapping, but the model outputted
|
|
252
|
+
# 'posit'. If this is the case then we cannot trust the first label
|
|
253
|
+
# token mapping, and we fall back to using word edit distance.
|
|
254
|
+
# Otherwise, the generated label is just bad, and we skip to the next
|
|
255
|
+
# generated label.
|
|
245
256
|
elif len(candidate_output_labels) == 0:
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
257
|
+
candidate_output_labels_starting_with_generated_label = [
|
|
258
|
+
candidate_label
|
|
259
|
+
for candidate_label in candidate_labels
|
|
260
|
+
if candidate_label.startswith(generated_label)
|
|
261
|
+
]
|
|
262
|
+
if candidate_output_labels_starting_with_generated_label:
|
|
263
|
+
log_once(
|
|
264
|
+
f"No candidate label found for the generated label "
|
|
265
|
+
f"{generated_label!r}. This means that using logprobs to "
|
|
266
|
+
"extract the labels is not reliable, and we will instead "
|
|
267
|
+
"fall back to extracting the labels using word edit "
|
|
268
|
+
"distance.",
|
|
269
|
+
level=logging.DEBUG,
|
|
270
|
+
)
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
# If we did not find any candidate label for any of the generated labels, we
|
|
274
|
+
# assume that something is wrong with the model output, and we fall back to
|
|
275
|
+
# using word edit distance to extract the labels
|
|
276
|
+
else:
|
|
277
|
+
log_once(
|
|
278
|
+
f"No candidate label found for any of the generated labels "
|
|
279
|
+
f"{generated_labels}. This means that using logprobs to extract "
|
|
280
|
+
"the labels is not reliable, and we will instead fall back to "
|
|
281
|
+
"extracting the labels using word edit distance.",
|
|
282
|
+
level=logging.DEBUG,
|
|
283
|
+
)
|
|
284
|
+
return None
|
|
250
285
|
|
|
251
286
|
if output_label is not None:
|
|
252
287
|
output_labels.append(output_label)
|
euroeval/tokenization_utils.py
CHANGED
|
@@ -311,24 +311,60 @@ def get_first_label_token_mapping(
|
|
|
311
311
|
for label in dataset_config.labels
|
|
312
312
|
]
|
|
313
313
|
|
|
314
|
-
#
|
|
315
|
-
|
|
316
|
-
|
|
314
|
+
# Tokenize some text containing each label, which we will use to extract the
|
|
315
|
+
# first token of each label
|
|
316
|
+
all_tokens: list[list[str]]
|
|
317
|
+
if tokenizer.chat_template is None:
|
|
318
|
+
add_prefix_space = should_prefix_space_be_added_to_labels(
|
|
317
319
|
labels_to_be_generated=local_labels, tokenizer=tokenizer
|
|
318
320
|
)
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
321
|
+
all_tokens = [
|
|
322
|
+
tokenizer.tokenize(text=f" {label}" if add_prefix_space else label)
|
|
323
|
+
for label in local_labels
|
|
324
|
+
]
|
|
325
|
+
else:
|
|
326
|
+
all_tokens = [
|
|
327
|
+
tokenizer.convert_ids_to_tokens(
|
|
328
|
+
ids=tokenizer.apply_chat_template(
|
|
329
|
+
conversation=[
|
|
330
|
+
dict(role="user", content=""),
|
|
331
|
+
dict(role="assistant", content=label),
|
|
332
|
+
],
|
|
333
|
+
add_generation_prompt=True,
|
|
334
|
+
tokenize=True,
|
|
335
|
+
)
|
|
336
|
+
)
|
|
337
|
+
for label in local_labels
|
|
338
|
+
]
|
|
339
|
+
|
|
340
|
+
# Remove any non-alphabetic characters from the tokens
|
|
341
|
+
all_tokens = [
|
|
342
|
+
[
|
|
343
|
+
re.sub(
|
|
344
|
+
pattern=r"^[^a-zæøåüöä]+|[^a-zæøåüöä]+$",
|
|
345
|
+
repl="",
|
|
346
|
+
string=token.lower(),
|
|
347
|
+
)
|
|
348
|
+
for token in token_list
|
|
349
|
+
]
|
|
350
|
+
for token_list in all_tokens
|
|
330
351
|
]
|
|
331
352
|
|
|
353
|
+
# Extract the first token of each label
|
|
354
|
+
first_tokens: list[str] = list()
|
|
355
|
+
for token_list, label in zip(all_tokens, local_labels):
|
|
356
|
+
matching_tokens = [
|
|
357
|
+
tok for tok in token_list if tok and label.startswith(tok)
|
|
358
|
+
]
|
|
359
|
+
if not matching_tokens:
|
|
360
|
+
log_once(
|
|
361
|
+
f"No matching token found in token_list for label '{label}', so "
|
|
362
|
+
"we will not output scores.",
|
|
363
|
+
level=logging.DEBUG,
|
|
364
|
+
)
|
|
365
|
+
return False
|
|
366
|
+
first_tokens.append(matching_tokens[0])
|
|
367
|
+
|
|
332
368
|
# Build a mapping from labels to the first token in each label if the first
|
|
333
369
|
# tokens are distinct
|
|
334
370
|
if len(first_tokens) == len(set(first_tokens)):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
euroeval/__init__.py,sha256=NiT6S4II1YpnNl5KFHDNogE-rvVkOHQy5pR483eq_do,2581
|
|
2
|
-
euroeval/benchmark_config_factory.py,sha256=
|
|
3
|
-
euroeval/benchmarker.py,sha256=
|
|
2
|
+
euroeval/benchmark_config_factory.py,sha256=RDYotoLcfNr3xU8Cw-G-Y8wLe6RSlJD1Ok9C97lWfOs,12553
|
|
3
|
+
euroeval/benchmarker.py,sha256=4tCrs0CvKvQcMpJRtaonxELEDXkmY95stCGwht6wTGE,48649
|
|
4
4
|
euroeval/callbacks.py,sha256=F1AJCLB8FJpxqYprwLi_PsH4Bc0x4lyR8UiTG-GlFLY,2452
|
|
5
5
|
euroeval/cli.py,sha256=EMB6g6kRvxIqlfYLSoMzwLAtEd-fqXipo4A_HTkhjkA,8575
|
|
6
6
|
euroeval/constants.py,sha256=p6kp_R6-Tq5LBvyXyT6Sa6N3SkjEElGS2LSZRBoQaYs,1985
|
|
@@ -12,28 +12,28 @@ euroeval/finetuning.py,sha256=IieAhgvxjeLHAHBief1Ay-STcCosQmrDHFTRTXFZX0Q,10743
|
|
|
12
12
|
euroeval/generation.py,sha256=LSsskfLjIJ-c3gQxmr7eiAobPOm-5bU9vnR7uHQ7XmU,10745
|
|
13
13
|
euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
|
|
14
14
|
euroeval/human_evaluation.py,sha256=VGvw1X6Mkdf22r-THSNWXMIqyJP44yh4rW53vq-0huo,27681
|
|
15
|
-
euroeval/languages.py,sha256=
|
|
15
|
+
euroeval/languages.py,sha256=LerXuRBAUYkQL6qSV-F82itAE4EgBGFBtzaGnJJZvOE,8555
|
|
16
16
|
euroeval/model_cache.py,sha256=n39yFpZkudBCVwz1EQpZ-g5BQtlQemQ5nP3IiFKJZHg,8275
|
|
17
17
|
euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
|
|
18
18
|
euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
|
|
19
19
|
euroeval/scores.py,sha256=TovjCZD8wmGrIjA4v5oAQp18P5KVcHvakkByDh0Hstk,3059
|
|
20
20
|
euroeval/speed_benchmark.py,sha256=J7VKWMf7GU_l0lRR8f0QeUr_vAaBQqTbgQ_yToHhp_0,3980
|
|
21
21
|
euroeval/tasks.py,sha256=87gbe__K5KNIb1aBSuwGnMPmZgamJFecNNYmNgMxaVo,7069
|
|
22
|
-
euroeval/tokenization_utils.py,sha256=
|
|
22
|
+
euroeval/tokenization_utils.py,sha256=RYTYbzCM9cryZ_w-_CzyN9Sbt47DbaGU5ukm-H38sHI,13871
|
|
23
23
|
euroeval/types.py,sha256=E0JhLfg-ek5pdFcYJbnGRUSodHxkuR3o8XGuIrBcuRM,2485
|
|
24
24
|
euroeval/utils.py,sha256=DyWhtdFlAM1TZuiYXWNPN8KxNrZGNa-J3WfS6DGwkvM,10467
|
|
25
25
|
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
26
26
|
euroeval/benchmark_modules/base.py,sha256=LcG46I2O5wcvu_3T_irBY6VkUhWVPKifBhcP-ln93TA,10798
|
|
27
27
|
euroeval/benchmark_modules/fresh.py,sha256=_LWmpqiNGGTA-NoVC0v3-fS1sraDS9n-pgKUzz89jVk,9919
|
|
28
28
|
euroeval/benchmark_modules/hf.py,sha256=yFApLL4_ia5Kw2iat5RSI8h5RhI4OP04HlzYidlhBCs,44012
|
|
29
|
-
euroeval/benchmark_modules/litellm.py,sha256=
|
|
30
|
-
euroeval/benchmark_modules/vllm.py,sha256=
|
|
31
|
-
euroeval/dataset_configs/__init__.py,sha256=
|
|
29
|
+
euroeval/benchmark_modules/litellm.py,sha256=_32H-M1L_TfW-opyaMLJFPxx0iOG8A8Zfq7uVGFKZdA,43005
|
|
30
|
+
euroeval/benchmark_modules/vllm.py,sha256=DJyla0jr-DVMPPs4RBguxq1Xn5YguvyuAnIlgIOfFaw,39394
|
|
31
|
+
euroeval/dataset_configs/__init__.py,sha256=kWKtlSAOY-olOQL3UtFqL6I3Tki3G3waMZSd2YChjCg,1895
|
|
32
32
|
euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
|
|
33
33
|
euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
|
|
34
34
|
euroeval/dataset_configs/english.py,sha256=yHw7D0zSNVbiSBAjR1mWX4V5FSkhqy4y-o-pnyWCLxE,2323
|
|
35
35
|
euroeval/dataset_configs/faroese.py,sha256=QQgLe5gv0f3AtXe5rV65xZ98gFgyITQPDr3UwO4Bnv4,1350
|
|
36
|
-
euroeval/dataset_configs/finnish.py,sha256=
|
|
36
|
+
euroeval/dataset_configs/finnish.py,sha256=lZA2bY_ul9qh3uGFrTNe7q15WyZ04EL9OYmrkcNjygY,1857
|
|
37
37
|
euroeval/dataset_configs/french.py,sha256=ATsj8_9_GxFTQgmfrniPQFZ1R9hoQCI1_ieWTnscFHU,2382
|
|
38
38
|
euroeval/dataset_configs/german.py,sha256=QO6PrBQY6kyZeQMU1vg6KrC_sKyj9U2ukS9nbKO19is,2560
|
|
39
39
|
euroeval/dataset_configs/icelandic.py,sha256=mncl7X4yO9gBmYqXMBfm7FKU1jcKryerSgd0dqlIA_4,4198
|
|
@@ -51,11 +51,11 @@ euroeval/prompt_templates/summarization.py,sha256=mcWeKNhGWmp7IG_iY64T-VOSabQg5w
|
|
|
51
51
|
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
52
52
|
euroeval/task_group_utils/multiple_choice_classification.py,sha256=nB78TzOgd0HBvTclmjOYJid9ZVAgu8IHZsqB_n1SAZU,6178
|
|
53
53
|
euroeval/task_group_utils/question_answering.py,sha256=kZBABJ_WYNTH4Xgo2jIvfx7iYvfoGt0EUObSaXRCGmk,27700
|
|
54
|
-
euroeval/task_group_utils/sequence_classification.py,sha256=
|
|
54
|
+
euroeval/task_group_utils/sequence_classification.py,sha256=MCdO5h3v_LWTkrvKAeefPq7rl1H5mFed50nAL4uZq0E,13837
|
|
55
55
|
euroeval/task_group_utils/text_to_text.py,sha256=Nu1_qRPLbboCd9Q5rxqY4fQFJ_aGXu80aWQqoTG1cYc,5047
|
|
56
56
|
euroeval/task_group_utils/token_classification.py,sha256=3idWB81Fcx9UhTuk-gxMfXENrCBmiWBDUWdULXoIhpw,17863
|
|
57
|
-
euroeval-15.7.
|
|
58
|
-
euroeval-15.7.
|
|
59
|
-
euroeval-15.7.
|
|
60
|
-
euroeval-15.7.
|
|
61
|
-
euroeval-15.7.
|
|
57
|
+
euroeval-15.7.2.dist-info/METADATA,sha256=nCF9GI8kOoKP3Up_KgPSxe4pnomawC1rQqRGlYoEsIA,13669
|
|
58
|
+
euroeval-15.7.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
59
|
+
euroeval-15.7.2.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
|
|
60
|
+
euroeval-15.7.2.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
|
|
61
|
+
euroeval-15.7.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|