EuroEval 15.9.1__py3-none-any.whl → 15.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/benchmark_modules/hf.py +3 -3
- euroeval/benchmark_modules/litellm.py +158 -122
- euroeval/benchmark_modules/vllm.py +188 -235
- euroeval/constants.py +13 -0
- euroeval/data_loading.py +8 -2
- euroeval/finetuning.py +22 -0
- euroeval/task_group_utils/multiple_choice_classification.py +11 -1
- euroeval/task_group_utils/question_answering.py +14 -4
- euroeval/task_group_utils/sequence_classification.py +1 -1
- euroeval/tokenization_utils.py +121 -18
- euroeval/utils.py +13 -8
- {euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/METADATA +7 -8
- {euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/RECORD +16 -16
- {euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/WHEEL +0 -0
- {euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/entry_points.txt +0 -0
- {euroeval-15.9.1.dist-info → euroeval-15.10.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -12,6 +12,8 @@ from transformers.tokenization_utils import PreTrainedTokenizer
|
|
|
12
12
|
from transformers.tokenization_utils_base import BatchEncoding
|
|
13
13
|
from transformers.trainer import Trainer
|
|
14
14
|
|
|
15
|
+
from ..exceptions import InvalidBenchmark
|
|
16
|
+
|
|
15
17
|
if t.TYPE_CHECKING:
|
|
16
18
|
from ..types import Labels, Predictions
|
|
17
19
|
|
|
@@ -19,7 +21,7 @@ logger = logging.getLogger("euroeval")
|
|
|
19
21
|
|
|
20
22
|
|
|
21
23
|
class MultipleChoiceClassificationTrainer(Trainer):
|
|
22
|
-
"""Trainer subclass for
|
|
24
|
+
"""Trainer subclass for multiple-choice classification tasks."""
|
|
23
25
|
|
|
24
26
|
def evaluate( # type: ignore[override]
|
|
25
27
|
self,
|
|
@@ -57,6 +59,8 @@ class MultipleChoiceClassificationTrainer(Trainer):
|
|
|
57
59
|
)
|
|
58
60
|
|
|
59
61
|
predictions = output.predictions
|
|
62
|
+
if isinstance(predictions, tuple):
|
|
63
|
+
predictions = predictions[0]
|
|
60
64
|
assert isinstance(predictions, np.ndarray)
|
|
61
65
|
|
|
62
66
|
metrics = output.metrics
|
|
@@ -150,6 +154,12 @@ def postprocess_predictions_and_labels(
|
|
|
150
154
|
Returns:
|
|
151
155
|
The postprocessed predictions and labels.
|
|
152
156
|
"""
|
|
157
|
+
if predictions.ndim != 2 or predictions.shape[1] != 2:
|
|
158
|
+
raise InvalidBenchmark(
|
|
159
|
+
"Predictions must be a 2D array with shape (num_examples, 2). Found "
|
|
160
|
+
f"shape {predictions.shape}."
|
|
161
|
+
)
|
|
162
|
+
|
|
153
163
|
mapping = {0: "a", 1: "b", 2: "c", 3: "d", 4: "e"}
|
|
154
164
|
|
|
155
165
|
all_predictions: list[str] = list()
|
|
@@ -8,11 +8,11 @@ from collections import defaultdict
|
|
|
8
8
|
import evaluate
|
|
9
9
|
import numpy as np
|
|
10
10
|
from evaluate import EvaluationModule
|
|
11
|
-
from transformers.tokenization_utils import PreTrainedTokenizer
|
|
12
11
|
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
|
|
13
12
|
from transformers.trainer import Trainer
|
|
14
13
|
|
|
15
14
|
from ..data_models import BenchmarkConfig, DatasetConfig, GenerativeModelOutput
|
|
15
|
+
from ..exceptions import InvalidBenchmark
|
|
16
16
|
from ..tokenization_utils import get_special_token_metadata
|
|
17
17
|
from ..utils import raise_if_model_output_contains_nan_values
|
|
18
18
|
|
|
@@ -20,6 +20,7 @@ if t.TYPE_CHECKING:
|
|
|
20
20
|
import torch.nn as nn
|
|
21
21
|
from datasets.arrow_dataset import Dataset
|
|
22
22
|
from transformers.modeling_utils import PreTrainedModel
|
|
23
|
+
from transformers.tokenization_utils import PreTrainedTokenizer
|
|
23
24
|
from transformers.tokenization_utils_base import BatchEncoding
|
|
24
25
|
from transformers.trainer_callback import TrainerCallback
|
|
25
26
|
from transformers.trainer_utils import EvalPrediction
|
|
@@ -43,6 +44,7 @@ class QuestionAnsweringTrainer(Trainer):
|
|
|
43
44
|
compute_metrics: "c.Callable[[EvalPrediction], dict[str, float]]",
|
|
44
45
|
callbacks: "list[TrainerCallback]",
|
|
45
46
|
data_collator: "c.Callable",
|
|
47
|
+
**kwargs,
|
|
46
48
|
) -> None:
|
|
47
49
|
"""Initialise the trainer."""
|
|
48
50
|
super().__init__(
|
|
@@ -54,6 +56,7 @@ class QuestionAnsweringTrainer(Trainer):
|
|
|
54
56
|
compute_metrics=compute_metrics,
|
|
55
57
|
callbacks=callbacks,
|
|
56
58
|
data_collator=data_collator,
|
|
59
|
+
**kwargs,
|
|
57
60
|
)
|
|
58
61
|
|
|
59
62
|
# Get the CLS token id for the tokenizer
|
|
@@ -475,7 +478,7 @@ def prepare_test_examples(
|
|
|
475
478
|
|
|
476
479
|
|
|
477
480
|
def postprocess_predictions_and_labels(
|
|
478
|
-
predictions: tuple[np.ndarray,
|
|
481
|
+
predictions: tuple[np.ndarray, ...],
|
|
479
482
|
dataset: "Dataset",
|
|
480
483
|
prepared_dataset: "Dataset",
|
|
481
484
|
cls_token_index: int,
|
|
@@ -484,7 +487,7 @@ def postprocess_predictions_and_labels(
|
|
|
484
487
|
|
|
485
488
|
Args:
|
|
486
489
|
predictions:
|
|
487
|
-
A
|
|
490
|
+
A tuple whose first two elements are (start_logits, end_logits).
|
|
488
491
|
dataset:
|
|
489
492
|
The dataset containing the examples.
|
|
490
493
|
prepared_dataset:
|
|
@@ -495,7 +498,14 @@ def postprocess_predictions_and_labels(
|
|
|
495
498
|
Returns:
|
|
496
499
|
The postprocessed predictions and labels.
|
|
497
500
|
"""
|
|
498
|
-
|
|
501
|
+
if len(predictions) < 2:
|
|
502
|
+
raise InvalidBenchmark(
|
|
503
|
+
"The predictions should be a tuple with the first two elements being "
|
|
504
|
+
"(start_logits, end_logits), but got {len(predictions)} elements instead: "
|
|
505
|
+
f"{predictions}."
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
all_start_logits, all_end_logits = predictions[:2]
|
|
499
509
|
|
|
500
510
|
# Build a map from an example to its corresponding features, being the blocks of
|
|
501
511
|
# text from the context that we're feeding into the model. An example can have
|
|
@@ -135,7 +135,7 @@ def extract_labels_from_generation(
|
|
|
135
135
|
if first_label_token_mapping is False:
|
|
136
136
|
raise InvalidBenchmark(
|
|
137
137
|
"The model outputted logprobs, but the first label token mapping is "
|
|
138
|
-
"not provided
|
|
138
|
+
"not provided, which is not supported."
|
|
139
139
|
)
|
|
140
140
|
labels = get_closest_logprobs_labels(
|
|
141
141
|
generation_logprobs=model_output.scores,
|
euroeval/tokenization_utils.py
CHANGED
|
@@ -8,7 +8,6 @@ import torch
|
|
|
8
8
|
|
|
9
9
|
from .constants import TASK_GROUPS_USING_LOGPROBS
|
|
10
10
|
from .enums import GenerativeType
|
|
11
|
-
from .exceptions import InvalidModel
|
|
12
11
|
from .utils import log_once
|
|
13
12
|
|
|
14
13
|
if t.TYPE_CHECKING:
|
|
@@ -153,7 +152,9 @@ def should_prefix_space_be_added_to_labels(
|
|
|
153
152
|
return add_prefix_space
|
|
154
153
|
|
|
155
154
|
|
|
156
|
-
def get_bos_token(
|
|
155
|
+
def get_bos_token(
|
|
156
|
+
tokenizer: "PreTrainedTokenizer",
|
|
157
|
+
) -> tuple[str, int] | tuple[None, None]:
|
|
157
158
|
"""Get the beginning-of-sequence token from a tokenizer.
|
|
158
159
|
|
|
159
160
|
Args:
|
|
@@ -162,7 +163,7 @@ def get_bos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
|
|
|
162
163
|
|
|
163
164
|
Returns:
|
|
164
165
|
A pair (token, token_id) representing the beginning-of-sequence token and its
|
|
165
|
-
token ID.
|
|
166
|
+
token ID, or (None, None) if no BOS token is found.
|
|
166
167
|
"""
|
|
167
168
|
if isinstance(tokenizer.bos_token, str) and isinstance(tokenizer.bos_token_id, int):
|
|
168
169
|
return tokenizer.bos_token, tokenizer.bos_token_id
|
|
@@ -176,15 +177,25 @@ def get_bos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
|
|
|
176
177
|
bos_token_id = vocab[bos_token]
|
|
177
178
|
break
|
|
178
179
|
else:
|
|
179
|
-
|
|
180
|
+
log_once(
|
|
180
181
|
"The model does not have a beginning-of-sequence token. Please ensure that "
|
|
181
|
-
"this has been set in the tokenizer's configuration."
|
|
182
|
+
"this has been set in the tokenizer's configuration. Using no BOS token."
|
|
183
|
+
" This may lead to unexpected behavior in the model.",
|
|
184
|
+
level=logging.INFO,
|
|
182
185
|
)
|
|
186
|
+
return None, None
|
|
183
187
|
|
|
188
|
+
log_once(
|
|
189
|
+
f"Beginning-of-sequence token was not set, but detected it as {bos_token!r} "
|
|
190
|
+
f"with ID {bos_token_id}.",
|
|
191
|
+
level=logging.DEBUG,
|
|
192
|
+
)
|
|
184
193
|
return bos_token, bos_token_id
|
|
185
194
|
|
|
186
195
|
|
|
187
|
-
def get_eos_token(
|
|
196
|
+
def get_eos_token(
|
|
197
|
+
tokenizer: "PreTrainedTokenizer",
|
|
198
|
+
) -> tuple[str, int] | tuple[None, None]:
|
|
188
199
|
"""Get the end-of-sequence token from a tokenizer.
|
|
189
200
|
|
|
190
201
|
Args:
|
|
@@ -193,7 +204,7 @@ def get_eos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
|
|
|
193
204
|
|
|
194
205
|
Returns:
|
|
195
206
|
A pair (token, token_id) representing the end-of-sequence token and its token
|
|
196
|
-
ID.
|
|
207
|
+
ID, or (None, None) if no EOS token is found.
|
|
197
208
|
"""
|
|
198
209
|
if isinstance(tokenizer.eos_token, str) and isinstance(tokenizer.eos_token_id, int):
|
|
199
210
|
return tokenizer.eos_token, tokenizer.eos_token_id
|
|
@@ -207,14 +218,105 @@ def get_eos_token(tokenizer: "PreTrainedTokenizer") -> tuple[str, int]:
|
|
|
207
218
|
eos_token_id = vocab[eos_token]
|
|
208
219
|
break
|
|
209
220
|
else:
|
|
210
|
-
|
|
221
|
+
log_once(
|
|
211
222
|
"The model does not have an end-of-sequence token. Please ensure that this "
|
|
212
|
-
"has been set in the tokenizer's configuration."
|
|
223
|
+
"has been set in the tokenizer's configuration. Using no EOS token. This "
|
|
224
|
+
"may lead to unexpected behavior in the model.",
|
|
225
|
+
level=logging.INFO,
|
|
213
226
|
)
|
|
227
|
+
return None, None
|
|
214
228
|
|
|
229
|
+
log_once(
|
|
230
|
+
f"End-of-sequence token was not set, but detected it as {eos_token!r} with "
|
|
231
|
+
f"ID {eos_token_id}.",
|
|
232
|
+
level=logging.DEBUG,
|
|
233
|
+
)
|
|
215
234
|
return eos_token, eos_token_id
|
|
216
235
|
|
|
217
236
|
|
|
237
|
+
def get_pad_token(
|
|
238
|
+
tokenizer: "PreTrainedTokenizer",
|
|
239
|
+
) -> tuple[str, int] | tuple[None, None]:
|
|
240
|
+
"""Get the padding token from a tokenizer.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
tokenizer:
|
|
244
|
+
The tokenizer.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
A pair (token, token_id) representing the padding token and its token ID, or
|
|
248
|
+
(None, None) if no padding token is found.
|
|
249
|
+
"""
|
|
250
|
+
# If the tokenizer already has a padding token, return it
|
|
251
|
+
if tokenizer.pad_token is not None and tokenizer.pad_token_id is not None:
|
|
252
|
+
assert isinstance(tokenizer.pad_token, str), (
|
|
253
|
+
"Expected tokenizer.pad_token to be a string, but got "
|
|
254
|
+
f"{type(tokenizer.pad_token)}."
|
|
255
|
+
)
|
|
256
|
+
assert isinstance(tokenizer.pad_token_id, int), (
|
|
257
|
+
"Expected tokenizer.pad_token_id to be an integer, but got "
|
|
258
|
+
f"{type(tokenizer.pad_token_id)}."
|
|
259
|
+
)
|
|
260
|
+
return (tokenizer.pad_token, tokenizer.pad_token_id)
|
|
261
|
+
|
|
262
|
+
# If the tokenizer has a BOS token, use it as the padding token
|
|
263
|
+
if tokenizer.bos_token is not None and tokenizer.bos_token_id is not None:
|
|
264
|
+
assert isinstance(tokenizer.bos_token, str), (
|
|
265
|
+
"Expected tokenizer.bos_token to be a string, but got "
|
|
266
|
+
f"{type(tokenizer.bos_token)}."
|
|
267
|
+
)
|
|
268
|
+
assert isinstance(tokenizer.bos_token_id, int), (
|
|
269
|
+
"Expected tokenizer.bos_token_id to be an integer, but got "
|
|
270
|
+
f"{type(tokenizer.bos_token_id)}."
|
|
271
|
+
)
|
|
272
|
+
pad_token = tokenizer.bos_token
|
|
273
|
+
pad_token_id = tokenizer.bos_token_id
|
|
274
|
+
|
|
275
|
+
# If the tokenizer has an EOS token, use it as the padding token
|
|
276
|
+
elif tokenizer.eos_token is not None and tokenizer.eos_token_id is not None:
|
|
277
|
+
assert isinstance(tokenizer.eos_token, str), (
|
|
278
|
+
"Expected tokenizer.eos_token to be a string, but got "
|
|
279
|
+
f"{type(tokenizer.eos_token)}."
|
|
280
|
+
)
|
|
281
|
+
assert isinstance(tokenizer.eos_token_id, int), (
|
|
282
|
+
"Expected tokenizer.eos_token_id to be an integer, but got "
|
|
283
|
+
f"{type(tokenizer.eos_token_id)}."
|
|
284
|
+
)
|
|
285
|
+
pad_token = tokenizer.eos_token
|
|
286
|
+
pad_token_id = tokenizer.eos_token_id
|
|
287
|
+
|
|
288
|
+
# Otherwise, try to find a candidate padding token in the vocabulary
|
|
289
|
+
else:
|
|
290
|
+
pad_token_candidates = [
|
|
291
|
+
"<pad>",
|
|
292
|
+
"[pad]",
|
|
293
|
+
"<|endoftext|>",
|
|
294
|
+
"<|end▁of▁sentence|>",
|
|
295
|
+
"<|im_end|>",
|
|
296
|
+
]
|
|
297
|
+
pad_token_candidates.extend([c.upper() for c in pad_token_candidates])
|
|
298
|
+
for candidate in pad_token_candidates:
|
|
299
|
+
if candidate in tokenizer.get_vocab():
|
|
300
|
+
pad_token = candidate
|
|
301
|
+
pad_token_id = tokenizer.get_vocab()[candidate]
|
|
302
|
+
break
|
|
303
|
+
else:
|
|
304
|
+
log_once(
|
|
305
|
+
"Could not identify a padding token for the model. Please ensure that "
|
|
306
|
+
"this has been set in the tokenizer's configuration. Using no padding "
|
|
307
|
+
"token. This may lead to unexpected behavior in the model.",
|
|
308
|
+
level=logging.INFO,
|
|
309
|
+
)
|
|
310
|
+
return None, None
|
|
311
|
+
|
|
312
|
+
log_once(
|
|
313
|
+
f"Padding token was not set, but detected it as {pad_token!r} with ID "
|
|
314
|
+
f"{pad_token_id}.",
|
|
315
|
+
level=logging.DEBUG,
|
|
316
|
+
)
|
|
317
|
+
return pad_token, pad_token_id
|
|
318
|
+
|
|
319
|
+
|
|
218
320
|
def get_end_of_chat_token_ids(tokenizer: "PreTrainedTokenizer") -> list[int] | None:
|
|
219
321
|
"""Get the end token ID for chat models.
|
|
220
322
|
|
|
@@ -291,14 +393,14 @@ def get_first_label_token_mapping(
|
|
|
291
393
|
if tokenizer is None:
|
|
292
394
|
if output_scores:
|
|
293
395
|
log_once(
|
|
294
|
-
f"
|
|
295
|
-
"dataset supports it and no tokenizer is available.",
|
|
396
|
+
f"We will use logprobs with the model {model_config.model_id!r} "
|
|
397
|
+
"since the dataset supports it and no tokenizer is available.",
|
|
296
398
|
level=logging.DEBUG,
|
|
297
399
|
)
|
|
298
400
|
else:
|
|
299
401
|
log_once(
|
|
300
|
-
f"
|
|
301
|
-
"the dataset does not support it and no tokenizer is available.",
|
|
402
|
+
f"We will not use logprobs with the model {model_config.model_id!r} "
|
|
403
|
+
"since the dataset does not support it and no tokenizer is available.",
|
|
302
404
|
level=logging.DEBUG,
|
|
303
405
|
)
|
|
304
406
|
return output_scores
|
|
@@ -359,7 +461,7 @@ def get_first_label_token_mapping(
|
|
|
359
461
|
if not matching_tokens:
|
|
360
462
|
log_once(
|
|
361
463
|
f"No matching token found in token_list for label '{label}', so "
|
|
362
|
-
"we will not
|
|
464
|
+
"we will not use logprobs with the model.",
|
|
363
465
|
level=logging.DEBUG,
|
|
364
466
|
)
|
|
365
467
|
return False
|
|
@@ -369,8 +471,8 @@ def get_first_label_token_mapping(
|
|
|
369
471
|
# tokens are distinct
|
|
370
472
|
if len(first_tokens) == len(set(first_tokens)):
|
|
371
473
|
log_once(
|
|
372
|
-
"
|
|
373
|
-
"are distinct.",
|
|
474
|
+
"We will use logprobs with the model since the first tokens of the "
|
|
475
|
+
"labels are distinct.",
|
|
374
476
|
level=logging.DEBUG,
|
|
375
477
|
)
|
|
376
478
|
return {
|
|
@@ -379,7 +481,7 @@ def get_first_label_token_mapping(
|
|
|
379
481
|
}
|
|
380
482
|
else:
|
|
381
483
|
log_once(
|
|
382
|
-
"
|
|
484
|
+
"We will not use logprobs with the model since the first tokens of the "
|
|
383
485
|
"labels are not distinct. The first tokens for the labels "
|
|
384
486
|
f"{local_labels} are {first_tokens}"
|
|
385
487
|
)
|
|
@@ -389,7 +491,8 @@ def get_first_label_token_mapping(
|
|
|
389
491
|
# evaluation errors. This will force the label extraction to rely on word edit
|
|
390
492
|
# distance instead of logprobs.
|
|
391
493
|
log_once(
|
|
392
|
-
"
|
|
494
|
+
"We will not use logprobs with the model, since the dataset does not have "
|
|
495
|
+
"labels.",
|
|
393
496
|
level=logging.DEBUG,
|
|
394
497
|
)
|
|
395
498
|
return False
|
euroeval/utils.py
CHANGED
|
@@ -121,6 +121,8 @@ def block_terminal_output() -> None:
|
|
|
121
121
|
logging.getLogger("matplotlib.font_manager").setLevel(logging.CRITICAL)
|
|
122
122
|
logging.getLogger("accelerate").setLevel(logging.CRITICAL)
|
|
123
123
|
logging.getLogger("LiteLLM").setLevel(logging.CRITICAL)
|
|
124
|
+
logging.getLogger("LiteLLM Router").setLevel(logging.CRITICAL)
|
|
125
|
+
logging.getLogger("LiteLLM Proxy").setLevel(logging.CRITICAL)
|
|
124
126
|
logging.getLogger("huggingface_hub").setLevel(logging.CRITICAL)
|
|
125
127
|
|
|
126
128
|
# This suppresses vLLM logging
|
|
@@ -352,19 +354,22 @@ def safe_run(coroutine: t.Coroutine[t.Any, t.Any, T]) -> T:
|
|
|
352
354
|
asyncio.set_event_loop(None)
|
|
353
355
|
|
|
354
356
|
|
|
355
|
-
async def
|
|
356
|
-
coroutine: t.Coroutine[t.Any, t.Any, T],
|
|
357
|
+
async def add_semaphore_and_catch_exception(
|
|
358
|
+
coroutine: t.Coroutine[t.Any, t.Any, T], semaphore: asyncio.Semaphore
|
|
357
359
|
) -> T | Exception:
|
|
358
|
-
"""Run a coroutine
|
|
360
|
+
"""Run a coroutine with a semaphore.
|
|
359
361
|
|
|
360
362
|
Args:
|
|
361
363
|
coroutine:
|
|
362
364
|
The coroutine to run.
|
|
365
|
+
semaphore:
|
|
366
|
+
The semaphore to use.
|
|
363
367
|
|
|
364
368
|
Returns:
|
|
365
|
-
The result of the coroutine
|
|
369
|
+
The result of the coroutine.
|
|
366
370
|
"""
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
+
async with semaphore:
|
|
372
|
+
try:
|
|
373
|
+
return await coroutine
|
|
374
|
+
except Exception as exc:
|
|
375
|
+
return exc
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: EuroEval
|
|
3
|
-
Version: 15.
|
|
3
|
+
Version: 15.10.0
|
|
4
4
|
Summary: The robust European language model benchmark.
|
|
5
5
|
Project-URL: Repository, https://github.com/EuroEval/EuroEval
|
|
6
6
|
Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
|
|
7
7
|
Author-email: Dan Saattrup Nielsen <dan.nielsen@alexandra.dk>
|
|
8
|
-
Maintainer-email: Dan Saattrup Nielsen <dan.nielsen@alexandra.dk
|
|
8
|
+
Maintainer-email: Dan Saattrup Nielsen <dan.nielsen@alexandra.dk>
|
|
9
9
|
License: MIT License
|
|
10
10
|
|
|
11
11
|
Copyright (c) 2022-2024 Dan Saattrup Nielsen
|
|
@@ -37,13 +37,12 @@ Requires-Dist: demjson3>=3.0.6
|
|
|
37
37
|
Requires-Dist: evaluate>=0.4.1
|
|
38
38
|
Requires-Dist: huggingface-hub>=0.30.1
|
|
39
39
|
Requires-Dist: levenshtein>=0.24.0
|
|
40
|
-
Requires-Dist: litellm>=1.
|
|
40
|
+
Requires-Dist: litellm>=1.72.2
|
|
41
41
|
Requires-Dist: more-itertools>=10.5.0
|
|
42
42
|
Requires-Dist: numpy<2.0.0,>=1.23.0
|
|
43
|
-
Requires-Dist: ollama>=0.
|
|
43
|
+
Requires-Dist: ollama>=0.5.1
|
|
44
44
|
Requires-Dist: pandas>=2.2.0
|
|
45
45
|
Requires-Dist: peft>=0.15.0
|
|
46
|
-
Requires-Dist: protobuf~=3.20.0
|
|
47
46
|
Requires-Dist: pydantic>=2.6.0
|
|
48
47
|
Requires-Dist: pyinfer>=0.0.3
|
|
49
48
|
Requires-Dist: python-dotenv>=1.0.1
|
|
@@ -62,12 +61,12 @@ Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == '
|
|
|
62
61
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
|
|
63
62
|
Requires-Dist: gradio>=4.26.0; extra == 'all'
|
|
64
63
|
Requires-Dist: outlines>=0.1.11; extra == 'all'
|
|
65
|
-
Requires-Dist: vllm>=0.9.
|
|
64
|
+
Requires-Dist: vllm>=0.9.1; (platform_system == 'Linux') and extra == 'all'
|
|
66
65
|
Provides-Extra: generative
|
|
67
66
|
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
|
|
68
67
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
|
|
69
68
|
Requires-Dist: outlines>=0.1.11; extra == 'generative'
|
|
70
|
-
Requires-Dist: vllm>=0.9.
|
|
69
|
+
Requires-Dist: vllm>=0.9.1; (platform_system == 'Linux') and extra == 'generative'
|
|
71
70
|
Provides-Extra: human-evaluation
|
|
72
71
|
Requires-Dist: gradio>=4.26.0; extra == 'human-evaluation'
|
|
73
72
|
Provides-Extra: test
|
|
@@ -93,7 +92,7 @@ ______________________________________________________________________
|
|
|
93
92
|
[](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
|
|
94
93
|
|
|
95
94
|
|
|
96
|
-
##
|
|
95
|
+
## Maintainer
|
|
97
96
|
|
|
98
97
|
- Dan Saattrup Nielsen ([@saattrupdan](https://github.com/saattrupdan),
|
|
99
98
|
dan.nielsen@alexandra.dk)
|
|
@@ -3,12 +3,12 @@ euroeval/benchmark_config_factory.py,sha256=icTeT5C-bNCJmvSWFlxKdEpRboZN8OjwaHGu
|
|
|
3
3
|
euroeval/benchmarker.py,sha256=wmgrYVS31PMhhrVienjaVHHyfnZAy51kUvC6OjooiOw,48047
|
|
4
4
|
euroeval/callbacks.py,sha256=F1AJCLB8FJpxqYprwLi_PsH4Bc0x4lyR8UiTG-GlFLY,2452
|
|
5
5
|
euroeval/cli.py,sha256=d8JztMi_RbpUlEBXidd6DQ-xeC-xhozf_qU6Vkzye20,8161
|
|
6
|
-
euroeval/constants.py,sha256=
|
|
7
|
-
euroeval/data_loading.py,sha256=
|
|
6
|
+
euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
|
|
7
|
+
euroeval/data_loading.py,sha256=2rMLSy8pbntlwmImizMtkTiUzj93mcv5kzYjZELWWfU,4081
|
|
8
8
|
euroeval/data_models.py,sha256=7nAGDpN58Y35Lt9JZE_y0y5iOYesw2htcwHc68MkBZU,22953
|
|
9
9
|
euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
|
|
10
10
|
euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
|
|
11
|
-
euroeval/finetuning.py,sha256=
|
|
11
|
+
euroeval/finetuning.py,sha256=cx5SVgEsveMDNfoMxwLfAFsjZeKmYyHftaOZWZ-L9hA,11285
|
|
12
12
|
euroeval/generation.py,sha256=LSsskfLjIJ-c3gQxmr7eiAobPOm-5bU9vnR7uHQ7XmU,10745
|
|
13
13
|
euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
|
|
14
14
|
euroeval/human_evaluation.py,sha256=zqbbJkqm2Uymf-88PxM3R9vVRR8SZJlq3QrqWEoiVeE,27643
|
|
@@ -19,15 +19,15 @@ euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,223
|
|
|
19
19
|
euroeval/scores.py,sha256=TovjCZD8wmGrIjA4v5oAQp18P5KVcHvakkByDh0Hstk,3059
|
|
20
20
|
euroeval/speed_benchmark.py,sha256=J7VKWMf7GU_l0lRR8f0QeUr_vAaBQqTbgQ_yToHhp_0,3980
|
|
21
21
|
euroeval/tasks.py,sha256=87gbe__K5KNIb1aBSuwGnMPmZgamJFecNNYmNgMxaVo,7069
|
|
22
|
-
euroeval/tokenization_utils.py,sha256=
|
|
22
|
+
euroeval/tokenization_utils.py,sha256=LxgGs7juS5PuMYt5LL2X6eVXdtnpi-A2jFxqcWpF6NA,17931
|
|
23
23
|
euroeval/types.py,sha256=E0JhLfg-ek5pdFcYJbnGRUSodHxkuR3o8XGuIrBcuRM,2485
|
|
24
|
-
euroeval/utils.py,sha256=
|
|
24
|
+
euroeval/utils.py,sha256=5R7y67xe0ODaje7k8nOu2AFS3Ph2gcsiWpIq5rjSSuA,11613
|
|
25
25
|
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
26
26
|
euroeval/benchmark_modules/base.py,sha256=LcG46I2O5wcvu_3T_irBY6VkUhWVPKifBhcP-ln93TA,10798
|
|
27
27
|
euroeval/benchmark_modules/fresh.py,sha256=_LWmpqiNGGTA-NoVC0v3-fS1sraDS9n-pgKUzz89jVk,9919
|
|
28
|
-
euroeval/benchmark_modules/hf.py,sha256=
|
|
29
|
-
euroeval/benchmark_modules/litellm.py,sha256=
|
|
30
|
-
euroeval/benchmark_modules/vllm.py,sha256=
|
|
28
|
+
euroeval/benchmark_modules/hf.py,sha256=Nbtn5eZ4axbmL09M8dGZCBr07pn9-btbqGgQ6q7KbHg,44620
|
|
29
|
+
euroeval/benchmark_modules/litellm.py,sha256=LS4mBXXG6h4uJwySPc6SI6f0y_HuiKE7IprprqWpoCI,50601
|
|
30
|
+
euroeval/benchmark_modules/vllm.py,sha256=sgeltOVfZA9bu0AmXV7PtZvuRst0I8s6VOIp0CI6DO8,38880
|
|
31
31
|
euroeval/dataset_configs/__init__.py,sha256=kWKtlSAOY-olOQL3UtFqL6I3Tki3G3waMZSd2YChjCg,1895
|
|
32
32
|
euroeval/dataset_configs/danish.py,sha256=MTt9EcriSer0QaFQ7_6evYxh-g9OPjroWegYdFpiKag,3395
|
|
33
33
|
euroeval/dataset_configs/dutch.py,sha256=r21nxEvMmBkKqPXVW082batPsxJ9d0RB4DzngOTMJSk,3185
|
|
@@ -49,13 +49,13 @@ euroeval/prompt_templates/reading_comprehension.py,sha256=yLqryWQAW04GULz_EyNDLO
|
|
|
49
49
|
euroeval/prompt_templates/sentiment_classification.py,sha256=LDOwjGQ2kqhwgNyphPywQeolwNB09o-xYWc9RUbzc84,7136
|
|
50
50
|
euroeval/prompt_templates/summarization.py,sha256=mcWeKNhGWmp7IG_iY64T-VOSabQg5wKddjSbJNYFDp8,4984
|
|
51
51
|
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
52
|
-
euroeval/task_group_utils/multiple_choice_classification.py,sha256=
|
|
53
|
-
euroeval/task_group_utils/question_answering.py,sha256=
|
|
54
|
-
euroeval/task_group_utils/sequence_classification.py,sha256=
|
|
52
|
+
euroeval/task_group_utils/multiple_choice_classification.py,sha256=LQ6zD1UGi-jGCKI2xUJiQdAXoqb5QMpIJu41B2U0HPw,6543
|
|
53
|
+
euroeval/task_group_utils/question_answering.py,sha256=D4oJL2vQEjHghyxiiiq_vj1IQC6eryqNoLXuTiQEPmw,28071
|
|
54
|
+
euroeval/task_group_utils/sequence_classification.py,sha256=zwRUgVHqLlREILwyg-yuDPkrIQOfqGVPsFBai-2D9a8,13525
|
|
55
55
|
euroeval/task_group_utils/text_to_text.py,sha256=Nu1_qRPLbboCd9Q5rxqY4fQFJ_aGXu80aWQqoTG1cYc,5047
|
|
56
56
|
euroeval/task_group_utils/token_classification.py,sha256=3idWB81Fcx9UhTuk-gxMfXENrCBmiWBDUWdULXoIhpw,17863
|
|
57
|
-
euroeval-15.
|
|
58
|
-
euroeval-15.
|
|
59
|
-
euroeval-15.
|
|
60
|
-
euroeval-15.
|
|
61
|
-
euroeval-15.
|
|
57
|
+
euroeval-15.10.0.dist-info/METADATA,sha256=WUXtSfS6qvrlA25lazql3DvyS5chyMnBPKyu-l65A_I,13472
|
|
58
|
+
euroeval-15.10.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
59
|
+
euroeval-15.10.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
|
|
60
|
+
euroeval-15.10.0.dist-info/licenses/LICENSE,sha256=oZp5fpOSQ7w-vFui8QNwrBIosrO7cnpArItdbvn52Ao,1082
|
|
61
|
+
euroeval-15.10.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|