EuroEval 15.13.0__py3-none-any.whl → 15.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/__init__.py +7 -0
- euroeval/benchmark_modules/litellm.py +31 -4
- euroeval/benchmark_modules/vllm.py +11 -12
- euroeval/data_models.py +1 -1
- euroeval/dataset_configs/danish.py +10 -0
- euroeval/dataset_configs/dutch.py +10 -0
- euroeval/dataset_configs/finnish.py +10 -0
- euroeval/dataset_configs/french.py +10 -0
- euroeval/dataset_configs/german.py +10 -0
- euroeval/dataset_configs/italian.py +10 -0
- euroeval/dataset_configs/spanish.py +10 -0
- euroeval/dataset_configs/swedish.py +10 -0
- euroeval/generation.py +1 -1
- euroeval/human_evaluation.py +2 -1
- euroeval/metrics.py +20 -4
- euroeval/prompt_templates/multiple_choice.py +1 -1
- euroeval/task_group_utils/question_answering.py +7 -1
- euroeval/task_group_utils/sequence_classification.py +8 -1
- euroeval/task_group_utils/text_to_text.py +8 -1
- euroeval/task_group_utils/token_classification.py +9 -2
- euroeval/types.py +5 -0
- {euroeval-15.13.0.dist-info → euroeval-15.15.0.dist-info}/METADATA +3 -5
- {euroeval-15.13.0.dist-info → euroeval-15.15.0.dist-info}/RECORD +26 -26
- {euroeval-15.13.0.dist-info → euroeval-15.15.0.dist-info}/WHEEL +0 -0
- {euroeval-15.13.0.dist-info → euroeval-15.15.0.dist-info}/entry_points.txt +0 -0
- {euroeval-15.13.0.dist-info → euroeval-15.15.0.dist-info}/licenses/LICENSE +0 -0
euroeval/__init__.py
CHANGED
|
@@ -86,6 +86,13 @@ os.environ["RAY_DISABLE_DOCKER_CPU_WARNING"] = "1"
|
|
|
86
86
|
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
|
|
87
87
|
|
|
88
88
|
|
|
89
|
+
# Allow long max model length in vLLM. This happens when vLLM registers that the model
|
|
90
|
+
# has a shorter context length than the value we are inserting. But since we do a
|
|
91
|
+
# thorough check of the model's config before setting the context length, we trust our
|
|
92
|
+
# own checks and ignore the internal vLLM check.
|
|
93
|
+
os.environ["VLLM_ALLOW_LONG_MAX_MODEL_LEN"] = "1"
|
|
94
|
+
|
|
95
|
+
|
|
89
96
|
# Avoid the "Unclosed client session" error when evaluating Ollama models with LiteLLM.
|
|
90
97
|
# The error comes from the `aiohttp` package, and this environment variable forces the
|
|
91
98
|
# use of `httpx` instead.
|
|
@@ -31,6 +31,7 @@ from litellm.exceptions import (
|
|
|
31
31
|
from litellm.llms.vertex_ai.common_utils import VertexAIError
|
|
32
32
|
from litellm.router import Router
|
|
33
33
|
from litellm.types.utils import ChoiceLogprobs
|
|
34
|
+
from litellm.utils import supports_reasoning, supports_response_schema
|
|
34
35
|
from pydantic import conlist, create_model
|
|
35
36
|
from requests.exceptions import RequestException
|
|
36
37
|
from tqdm.asyncio import tqdm as tqdm_async
|
|
@@ -234,6 +235,8 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
234
235
|
pattern="|".join(REASONING_MODELS), string=self.model_config.model_id
|
|
235
236
|
):
|
|
236
237
|
type_ = GenerativeType.REASONING
|
|
238
|
+
elif supports_reasoning(model=self.model_config.model_id):
|
|
239
|
+
type_ = GenerativeType.REASONING
|
|
237
240
|
else:
|
|
238
241
|
type_ = GenerativeType.INSTRUCTION_TUNED
|
|
239
242
|
|
|
@@ -314,9 +317,7 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
314
317
|
"enable it.",
|
|
315
318
|
level=logging.DEBUG,
|
|
316
319
|
)
|
|
317
|
-
elif
|
|
318
|
-
model=self.model_config.model_id
|
|
319
|
-
):
|
|
320
|
+
elif supports_response_schema(model=self.model_config.model_id):
|
|
320
321
|
ner_tag_names = list(self.dataset_config.prompt_label_mapping.values())
|
|
321
322
|
keys_and_their_types: dict[str, t.Any] = {
|
|
322
323
|
tag_name: (conlist(str, max_length=5), ...)
|
|
@@ -361,7 +362,7 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
361
362
|
level=logging.DEBUG,
|
|
362
363
|
)
|
|
363
364
|
elif self.model_config.revision == "no-thinking":
|
|
364
|
-
generation_kwargs["thinking"] = dict(
|
|
365
|
+
generation_kwargs["thinking"] = dict(budget_tokens=0)
|
|
365
366
|
log_once(
|
|
366
367
|
f"Disabling thinking mode for model {self.model_config.model_id!r}",
|
|
367
368
|
level=logging.DEBUG,
|
|
@@ -377,6 +378,19 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
377
378
|
# Drop generation kwargs that are not supported by the model
|
|
378
379
|
litellm.drop_params = True
|
|
379
380
|
|
|
381
|
+
# First attempt is a test run with a single conversation to handle errors
|
|
382
|
+
# quickly
|
|
383
|
+
test_conversation = conversations[0]
|
|
384
|
+
_, failures = safe_run(
|
|
385
|
+
self._generate_async(
|
|
386
|
+
model_id=self.model_config.model_id,
|
|
387
|
+
conversations=[test_conversation],
|
|
388
|
+
**generation_kwargs,
|
|
389
|
+
)
|
|
390
|
+
)
|
|
391
|
+
for _, error in failures:
|
|
392
|
+
self._handle_exception(error=error, generation_kwargs=generation_kwargs)
|
|
393
|
+
|
|
380
394
|
all_responses: dict[int, "ModelResponse"] = {}
|
|
381
395
|
conversations_to_run: list[tuple[int, list[litellm.AllMessageValues]]] = list(
|
|
382
396
|
enumerate(conversations)
|
|
@@ -477,6 +491,7 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
477
491
|
r"the thinking budget [0-9]+ is invalid. please choose a value between "
|
|
478
492
|
r"[0-9]+ and ([0-9]+)\."
|
|
479
493
|
)
|
|
494
|
+
requires_thinking_disabled_messages = ["thinking.type: Field required"]
|
|
480
495
|
|
|
481
496
|
if any(msg.lower() in error_msg for msg in stop_messages):
|
|
482
497
|
log_once(
|
|
@@ -557,6 +572,18 @@ class LiteLLMModel(BenchmarkModule):
|
|
|
557
572
|
type="enabled", budget_tokens=thinking_budget - 1
|
|
558
573
|
)
|
|
559
574
|
return
|
|
575
|
+
elif (
|
|
576
|
+
any(msg.lower() in error_msg for msg in requires_thinking_disabled_messages)
|
|
577
|
+
and self.generative_type != GenerativeType.REASONING
|
|
578
|
+
):
|
|
579
|
+
log_once(
|
|
580
|
+
f"The model {model_id!r} requires the `thinking.type` field to be "
|
|
581
|
+
f"set to `disabled` rather than just setting `budget_tokens` to 0. "
|
|
582
|
+
"Setting `thinking.type` to `disabled`.",
|
|
583
|
+
level=logging.DEBUG,
|
|
584
|
+
)
|
|
585
|
+
generation_kwargs["thinking"] = dict(type="disabled")
|
|
586
|
+
return
|
|
560
587
|
elif isinstance(
|
|
561
588
|
error, (Timeout, ServiceUnavailableError, InternalServerError, SystemError)
|
|
562
589
|
):
|
|
@@ -77,10 +77,7 @@ if t.TYPE_CHECKING or importlib.util.find_spec("vllm") is not None:
|
|
|
77
77
|
destroy_model_parallel,
|
|
78
78
|
)
|
|
79
79
|
from vllm.lora.request import LoRARequest
|
|
80
|
-
|
|
81
|
-
if t.TYPE_CHECKING or importlib.util.find_spec("outlines") is not None:
|
|
82
|
-
from outlines.models.vllm import adapt_tokenizer
|
|
83
|
-
from outlines.processors.structured import JSONLogitsProcessor
|
|
80
|
+
from vllm.sampling_params import GuidedDecodingParams
|
|
84
81
|
|
|
85
82
|
if t.TYPE_CHECKING or importlib.util.find_spec("ray") is not None:
|
|
86
83
|
import ray
|
|
@@ -327,7 +324,7 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
327
324
|
if end_of_chat_token:
|
|
328
325
|
stop_tokens.append(end_of_chat_token)
|
|
329
326
|
|
|
330
|
-
|
|
327
|
+
structured_generation_schema = None
|
|
331
328
|
if self.dataset_config.task in TASKS_USING_JSON:
|
|
332
329
|
if self.generative_type == GenerativeType.REASONING:
|
|
333
330
|
log_once(
|
|
@@ -342,15 +339,13 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
342
339
|
tag_name: (conlist(str, max_length=5), ...)
|
|
343
340
|
for tag_name in ner_tag_names
|
|
344
341
|
}
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
schema=pydantic_class,
|
|
348
|
-
tokenizer=adapt_tokenizer(tokenizer=self._tokenizer), # type: ignore
|
|
349
|
-
whitespace_pattern=r" ?",
|
|
342
|
+
answer_format_class = create_model(
|
|
343
|
+
"AnswerFormat", **keys_and_their_types
|
|
350
344
|
)
|
|
345
|
+
structured_generation_schema = answer_format_class.model_json_schema()
|
|
351
346
|
log_once(
|
|
352
347
|
"Using structured generation with the JSON schema "
|
|
353
|
-
f"{
|
|
348
|
+
f"{structured_generation_schema}",
|
|
354
349
|
level=logging.DEBUG,
|
|
355
350
|
)
|
|
356
351
|
|
|
@@ -374,7 +369,11 @@ class VLLMModel(HuggingFaceEncoderModel):
|
|
|
374
369
|
logprobs=MAX_LOGPROBS if self.buffer["first_label_token_mapping"] else None,
|
|
375
370
|
temperature=0.0,
|
|
376
371
|
stop=[stop_token for stop_token in stop_tokens if stop_token],
|
|
377
|
-
|
|
372
|
+
guided_decoding=(
|
|
373
|
+
GuidedDecodingParams(json=structured_generation_schema)
|
|
374
|
+
if structured_generation_schema
|
|
375
|
+
else None
|
|
376
|
+
),
|
|
378
377
|
)
|
|
379
378
|
|
|
380
379
|
# If any of the prompts are empty then we need to replace them with a BOS token
|
euroeval/data_models.py
CHANGED
|
@@ -259,7 +259,7 @@ class BenchmarkResult(pydantic.BaseModel):
|
|
|
259
259
|
transformers_version: str | None = get_package_version("transformers")
|
|
260
260
|
torch_version: str | None = get_package_version("torch")
|
|
261
261
|
vllm_version: str | None = get_package_version("vllm")
|
|
262
|
-
|
|
262
|
+
xgrammar_version: str | None = get_package_version("xgrammar")
|
|
263
263
|
|
|
264
264
|
@classmethod
|
|
265
265
|
def from_dict(cls, config: dict) -> "BenchmarkResult":
|
|
@@ -128,3 +128,13 @@ MULTI_WIKI_QA_DA_CONFIG = DatasetConfig(
|
|
|
128
128
|
languages=[DA],
|
|
129
129
|
unofficial=True,
|
|
130
130
|
)
|
|
131
|
+
|
|
132
|
+
GOLDENSWAG_DA_CONFIG = DatasetConfig(
|
|
133
|
+
name="goldenswag-da",
|
|
134
|
+
pretty_name="the truncated version of the Danish common-sense reasoning "
|
|
135
|
+
"dataset GoldenSwag-da, translated from the English GoldenSwag dataset",
|
|
136
|
+
huggingface_id="EuroEval/goldenswag-da-mini",
|
|
137
|
+
task=COMMON_SENSE,
|
|
138
|
+
languages=[DA],
|
|
139
|
+
unofficial=True,
|
|
140
|
+
)
|
|
@@ -120,3 +120,13 @@ MULTI_WIKI_QA_NL_CONFIG = DatasetConfig(
|
|
|
120
120
|
languages=[NL],
|
|
121
121
|
unofficial=True,
|
|
122
122
|
)
|
|
123
|
+
|
|
124
|
+
GOLDENSWAG_NL_CONFIG = DatasetConfig(
|
|
125
|
+
name="goldenswag-nl",
|
|
126
|
+
pretty_name="the truncated version of the Dutch common-sense reasoning "
|
|
127
|
+
"dataset GoldenSwag-nl, translated from the English GoldenSwag dataset",
|
|
128
|
+
huggingface_id="EuroEval/goldenswag-nl-mini",
|
|
129
|
+
task=COMMON_SENSE,
|
|
130
|
+
languages=[NL],
|
|
131
|
+
unofficial=True,
|
|
132
|
+
)
|
|
@@ -78,3 +78,13 @@ MULTI_WIKI_QA_FI_CONFIG = DatasetConfig(
|
|
|
78
78
|
languages=[FI],
|
|
79
79
|
unofficial=True,
|
|
80
80
|
)
|
|
81
|
+
|
|
82
|
+
GOLDENSWAG_FI_CONFIG = DatasetConfig(
|
|
83
|
+
name="goldenswag-fi",
|
|
84
|
+
pretty_name="the truncated version of the Finnish common-sense reasoning "
|
|
85
|
+
"dataset GoldenSwag-fi, translated from the English GoldenSwag dataset",
|
|
86
|
+
huggingface_id="EuroEval/goldenswag-fi-mini",
|
|
87
|
+
task=COMMON_SENSE,
|
|
88
|
+
languages=[FI],
|
|
89
|
+
unofficial=True,
|
|
90
|
+
)
|
|
@@ -91,3 +91,13 @@ MULTI_WIKI_QA_FR_CONFIG = DatasetConfig(
|
|
|
91
91
|
languages=[FR],
|
|
92
92
|
unofficial=True,
|
|
93
93
|
)
|
|
94
|
+
|
|
95
|
+
GOLDENSWAG_FR_CONFIG = DatasetConfig(
|
|
96
|
+
name="goldenswag-fr",
|
|
97
|
+
pretty_name="the truncated version of the French common-sense reasoning "
|
|
98
|
+
"dataset GoldenSwag-fr, translated from the English GoldenSwag dataset",
|
|
99
|
+
huggingface_id="EuroEval/goldenswag-fr-mini",
|
|
100
|
+
task=COMMON_SENSE,
|
|
101
|
+
languages=[FR],
|
|
102
|
+
unofficial=True,
|
|
103
|
+
)
|
|
@@ -99,3 +99,13 @@ MULTI_WIKI_QA_DE_CONFIG = DatasetConfig(
|
|
|
99
99
|
languages=[DE],
|
|
100
100
|
unofficial=True,
|
|
101
101
|
)
|
|
102
|
+
|
|
103
|
+
GOLDENSWAG_DE_CONFIG = DatasetConfig(
|
|
104
|
+
name="goldenswag-de",
|
|
105
|
+
pretty_name="the truncated version of the German common-sense reasoning "
|
|
106
|
+
"dataset GoldenSwag-de, translated from the English GoldenSwag dataset",
|
|
107
|
+
huggingface_id="EuroEval/goldenswag-de-mini",
|
|
108
|
+
task=COMMON_SENSE,
|
|
109
|
+
languages=[DE],
|
|
110
|
+
unofficial=True,
|
|
111
|
+
)
|
|
@@ -99,3 +99,13 @@ MULTI_WIKI_QA_IT_CONFIG = DatasetConfig(
|
|
|
99
99
|
languages=[IT],
|
|
100
100
|
unofficial=True,
|
|
101
101
|
)
|
|
102
|
+
|
|
103
|
+
GOLDENSWAG_IT_CONFIG = DatasetConfig(
|
|
104
|
+
name="goldenswag-it",
|
|
105
|
+
pretty_name="the truncated version of the Italian common-sense reasoning "
|
|
106
|
+
"dataset GoldenSwag-it, translated from the English GoldenSwag dataset",
|
|
107
|
+
huggingface_id="EuroEval/goldenswag-it-mini",
|
|
108
|
+
task=COMMON_SENSE,
|
|
109
|
+
languages=[IT],
|
|
110
|
+
unofficial=True,
|
|
111
|
+
)
|
|
@@ -97,3 +97,13 @@ MULTI_WIKI_QA_ES_CONFIG = DatasetConfig(
|
|
|
97
97
|
languages=[ES],
|
|
98
98
|
unofficial=True,
|
|
99
99
|
)
|
|
100
|
+
|
|
101
|
+
GOLDENSWAG_ES_CONFIG = DatasetConfig(
|
|
102
|
+
name="goldenswag-es",
|
|
103
|
+
pretty_name="the truncated version of the Spanish common-sense reasoning "
|
|
104
|
+
"dataset GoldenSwag-es, translated from the English GoldenSwag dataset",
|
|
105
|
+
huggingface_id="EuroEval/goldenswag-es-mini",
|
|
106
|
+
task=COMMON_SENSE,
|
|
107
|
+
languages=[ES],
|
|
108
|
+
unofficial=True,
|
|
109
|
+
)
|
|
@@ -108,3 +108,13 @@ MULTI_WIKI_QA_SV_CONFIG = DatasetConfig(
|
|
|
108
108
|
languages=[SV],
|
|
109
109
|
unofficial=True,
|
|
110
110
|
)
|
|
111
|
+
|
|
112
|
+
GOLDENSWAG_SV_CONFIG = DatasetConfig(
|
|
113
|
+
name="goldenswag-sv",
|
|
114
|
+
pretty_name="the truncated version of the Swedish common-sense reasoning "
|
|
115
|
+
"dataset GoldenSwag-sv, translated from the English GoldenSwag dataset",
|
|
116
|
+
huggingface_id="EuroEval/goldenswag-sv-mini",
|
|
117
|
+
task=COMMON_SENSE,
|
|
118
|
+
languages=[SV],
|
|
119
|
+
unofficial=True,
|
|
120
|
+
)
|
euroeval/generation.py
CHANGED
|
@@ -235,7 +235,7 @@ def generate_single_iteration(
|
|
|
235
235
|
)
|
|
236
236
|
|
|
237
237
|
itr_scores: dict[str, float] = model.compute_metrics(
|
|
238
|
-
model_outputs_and_labels=(all_preds, ground_truth)
|
|
238
|
+
model_outputs_and_labels=(all_preds, ground_truth), dataset=dataset
|
|
239
239
|
)
|
|
240
240
|
|
|
241
241
|
return itr_scores
|
euroeval/human_evaluation.py
CHANGED
|
@@ -620,7 +620,8 @@ class HumanEvaluator:
|
|
|
620
620
|
)
|
|
621
621
|
ground_truth = self.active_dataset["label"]
|
|
622
622
|
itr_scores: dict[str, float] = self.compute_metrics(
|
|
623
|
-
model_outputs_and_labels=(all_preds, ground_truth)
|
|
623
|
+
model_outputs_and_labels=(all_preds, ground_truth),
|
|
624
|
+
dataset=self.active_dataset,
|
|
624
625
|
)
|
|
625
626
|
|
|
626
627
|
# We reverse the order, as the Info messages are printed in reverse order
|
euroeval/metrics.py
CHANGED
|
@@ -14,6 +14,7 @@ from .exceptions import InvalidBenchmark
|
|
|
14
14
|
from .utils import HiddenPrints
|
|
15
15
|
|
|
16
16
|
if t.TYPE_CHECKING:
|
|
17
|
+
from datasets.arrow_dataset import Dataset
|
|
17
18
|
from evaluate import EvaluationModule
|
|
18
19
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
@@ -49,7 +50,9 @@ class Metric(abc.ABC):
|
|
|
49
50
|
)
|
|
50
51
|
|
|
51
52
|
@abc.abstractmethod
|
|
52
|
-
def __call__(
|
|
53
|
+
def __call__(
|
|
54
|
+
self, predictions: t.Sequence, references: t.Sequence, dataset: "Dataset"
|
|
55
|
+
) -> float | None:
|
|
53
56
|
"""Calculate the metric score.
|
|
54
57
|
|
|
55
58
|
Args:
|
|
@@ -57,6 +60,9 @@ class Metric(abc.ABC):
|
|
|
57
60
|
The model predictions.
|
|
58
61
|
references:
|
|
59
62
|
The ground truth references.
|
|
63
|
+
dataset:
|
|
64
|
+
The dataset used for evaluation. This is only used in case any
|
|
65
|
+
additional metadata is used to compute the metrics.
|
|
60
66
|
|
|
61
67
|
Returns:
|
|
62
68
|
The calculated metric score, or None if the score should be ignored.
|
|
@@ -125,7 +131,9 @@ class HuggingFaceMetric(Metric):
|
|
|
125
131
|
)
|
|
126
132
|
self.metric: "EvaluationModule | None" = None
|
|
127
133
|
|
|
128
|
-
def __call__(
|
|
134
|
+
def __call__(
|
|
135
|
+
self, predictions: t.Sequence, references: t.Sequence, dataset: "Dataset"
|
|
136
|
+
) -> float | None:
|
|
129
137
|
"""Calculate the metric score.
|
|
130
138
|
|
|
131
139
|
Args:
|
|
@@ -133,6 +141,9 @@ class HuggingFaceMetric(Metric):
|
|
|
133
141
|
The model predictions.
|
|
134
142
|
references:
|
|
135
143
|
The ground truth references.
|
|
144
|
+
dataset:
|
|
145
|
+
The dataset used for evaluation. This is only used in case any
|
|
146
|
+
additional metadata is used to compute the metrics.
|
|
136
147
|
|
|
137
148
|
Returns:
|
|
138
149
|
The calculated metric score, or None if the score should be ignored.
|
|
@@ -213,7 +224,9 @@ class LLMAsAJudgeMetric(Metric):
|
|
|
213
224
|
self.condition_formatting_fn = condition_formatting_fn
|
|
214
225
|
self.system_prompt = system_prompt
|
|
215
226
|
|
|
216
|
-
def __call__(
|
|
227
|
+
def __call__(
|
|
228
|
+
self, predictions: t.Sequence, references: t.Sequence, dataset: "Dataset"
|
|
229
|
+
) -> float | None:
|
|
217
230
|
"""Calculate the metric score using the judge model.
|
|
218
231
|
|
|
219
232
|
Args:
|
|
@@ -221,6 +234,9 @@ class LLMAsAJudgeMetric(Metric):
|
|
|
221
234
|
The model predictions.
|
|
222
235
|
references:
|
|
223
236
|
The ground truth references.
|
|
237
|
+
dataset:
|
|
238
|
+
The dataset used for evaluation. This is only used in case any
|
|
239
|
+
additional metadata is used to compute the metrics.
|
|
224
240
|
|
|
225
241
|
Returns:
|
|
226
242
|
The calculated metric score, or None if the score should be ignored.
|
|
@@ -343,7 +359,7 @@ class SpeedMetric(Metric):
|
|
|
343
359
|
postprocessing_fn=lambda raw_score: (raw_score, f"{raw_score:,.0f}"),
|
|
344
360
|
)
|
|
345
361
|
|
|
346
|
-
def __call__(self, _: t.Sequence, __: t.Sequence) -> float | None:
|
|
362
|
+
def __call__(self, _: t.Sequence, __: t.Sequence, ___: "Dataset") -> float | None:
|
|
347
363
|
"""Not used with the speed metric, but required for consistency."""
|
|
348
364
|
raise NotImplementedError
|
|
349
365
|
|
|
@@ -69,7 +69,7 @@ MULTIPLE_CHOICE_TEMPLATES = {
|
|
|
69
69
|
IT: PromptConfig(
|
|
70
70
|
default_prompt_prefix="Le seguenti sono domande a scelta multipla "
|
|
71
71
|
"(con relative risposte).",
|
|
72
|
-
default_prompt_template="Domanda: {text}\
|
|
72
|
+
default_prompt_template="Domanda: {text}\nRisposta: {label}",
|
|
73
73
|
default_instruction_prompt="Domanda: {text}\n\nRispondete alla domanda "
|
|
74
74
|
"precedente con {labels_str}, e nient'altro.",
|
|
75
75
|
default_prompt_label_mapping="auto",
|
|
@@ -149,6 +149,7 @@ class QuestionAnsweringTrainer(Trainer):
|
|
|
149
149
|
def compute_metrics(
|
|
150
150
|
model_outputs_and_labels: "tuple[Predictions, Labels] | EvalPrediction",
|
|
151
151
|
dataset_config: "DatasetConfig",
|
|
152
|
+
dataset: "Dataset",
|
|
152
153
|
) -> dict[str, float]:
|
|
153
154
|
"""Compute the metrics needed for evaluation.
|
|
154
155
|
|
|
@@ -158,6 +159,9 @@ def compute_metrics(
|
|
|
158
159
|
contains the true labels.
|
|
159
160
|
dataset_config:
|
|
160
161
|
The configuration of the dataset.
|
|
162
|
+
dataset:
|
|
163
|
+
The dataset used for evaluation. This is only used in case any additional
|
|
164
|
+
metadata is used to compute the metrics.
|
|
161
165
|
|
|
162
166
|
Returns:
|
|
163
167
|
A dictionary with the names of the metrics as keys and the metric values as
|
|
@@ -181,7 +185,9 @@ def compute_metrics(
|
|
|
181
185
|
|
|
182
186
|
results: dict[str, float] = dict()
|
|
183
187
|
for metric in dataset_config.task.metrics:
|
|
184
|
-
score: float | None = metric(
|
|
188
|
+
score: float | None = metric(
|
|
189
|
+
predictions=predictions, references=labels, dataset=dataset
|
|
190
|
+
)
|
|
185
191
|
|
|
186
192
|
# The metric returns None if we are running on multi-GPU and the current
|
|
187
193
|
# process is not the main process
|
|
@@ -11,6 +11,7 @@ from ..exceptions import InvalidBenchmark
|
|
|
11
11
|
from ..utils import log_once, raise_if_model_output_contains_nan_values
|
|
12
12
|
|
|
13
13
|
if t.TYPE_CHECKING:
|
|
14
|
+
from datasets.arrow_dataset import Dataset
|
|
14
15
|
from transformers.trainer_utils import EvalPrediction
|
|
15
16
|
|
|
16
17
|
from ..data_models import DatasetConfig, GenerativeModelOutput
|
|
@@ -23,6 +24,7 @@ logger = logging.getLogger("euroeval")
|
|
|
23
24
|
def compute_metrics(
|
|
24
25
|
model_outputs_and_labels: "tuple[Predictions, Labels] | EvalPrediction",
|
|
25
26
|
dataset_config: "DatasetConfig",
|
|
27
|
+
dataset: "Dataset",
|
|
26
28
|
) -> dict[str, float]:
|
|
27
29
|
"""Compute the metrics needed for evaluation.
|
|
28
30
|
|
|
@@ -32,6 +34,9 @@ def compute_metrics(
|
|
|
32
34
|
contains the true labels.
|
|
33
35
|
dataset_config:
|
|
34
36
|
The configuration of the dataset.
|
|
37
|
+
dataset:
|
|
38
|
+
The dataset used for evaluation. This is only used in case any additional
|
|
39
|
+
metadata is used to compute the metrics.
|
|
35
40
|
|
|
36
41
|
Returns:
|
|
37
42
|
A dictionary with the names of the metrics as keys and the metric values as
|
|
@@ -73,7 +78,9 @@ def compute_metrics(
|
|
|
73
78
|
|
|
74
79
|
results: dict[str, float] = dict()
|
|
75
80
|
for metric in dataset_config.task.metrics:
|
|
76
|
-
score: float | None = metric(
|
|
81
|
+
score: float | None = metric(
|
|
82
|
+
predictions=predictions, references=label_ids, dataset=dataset
|
|
83
|
+
)
|
|
77
84
|
|
|
78
85
|
# The metric returns None if we are running on multi-GPU and the current
|
|
79
86
|
# process is not the main process
|
|
@@ -11,6 +11,7 @@ from ..metrics import HuggingFaceMetric
|
|
|
11
11
|
from ..utils import raise_if_model_output_contains_nan_values
|
|
12
12
|
|
|
13
13
|
if t.TYPE_CHECKING:
|
|
14
|
+
from datasets.arrow_dataset import Dataset
|
|
14
15
|
from transformers.trainer_utils import EvalPrediction
|
|
15
16
|
|
|
16
17
|
from ..data_models import BenchmarkConfig, DatasetConfig, GenerativeModelOutput
|
|
@@ -24,6 +25,7 @@ def compute_metrics(
|
|
|
24
25
|
model_outputs_and_labels: "tuple[Predictions, Labels] | EvalPrediction",
|
|
25
26
|
dataset_config: "DatasetConfig",
|
|
26
27
|
benchmark_config: "BenchmarkConfig",
|
|
28
|
+
dataset: "Dataset",
|
|
27
29
|
) -> dict[str, float]:
|
|
28
30
|
"""Compute the metrics needed for evaluation.
|
|
29
31
|
|
|
@@ -35,6 +37,9 @@ def compute_metrics(
|
|
|
35
37
|
The configuration of the dataset.
|
|
36
38
|
benchmark_config:
|
|
37
39
|
The configuration of the benchmark.
|
|
40
|
+
dataset:
|
|
41
|
+
The dataset used for evaluation. This is only used in case any additional
|
|
42
|
+
metadata is used to compute the metrics.
|
|
38
43
|
|
|
39
44
|
Returns:
|
|
40
45
|
A dictionary with the names of the metrics as keys and the metric values as
|
|
@@ -69,7 +74,9 @@ def compute_metrics(
|
|
|
69
74
|
|
|
70
75
|
while True:
|
|
71
76
|
try:
|
|
72
|
-
score: float | None = metric(
|
|
77
|
+
score: float | None = metric(
|
|
78
|
+
predictions=predictions, references=labels, dataset=dataset
|
|
79
|
+
)
|
|
73
80
|
break
|
|
74
81
|
except Exception as e:
|
|
75
82
|
oom_error = [
|
|
@@ -12,6 +12,7 @@ from ..exceptions import InvalidBenchmark
|
|
|
12
12
|
from ..utils import raise_if_model_output_contains_nan_values
|
|
13
13
|
|
|
14
14
|
if t.TYPE_CHECKING:
|
|
15
|
+
from datasets.arrow_dataset import Dataset
|
|
15
16
|
from transformers.tokenization_utils import PreTrainedTokenizer
|
|
16
17
|
from transformers.tokenization_utils_base import BatchEncoding
|
|
17
18
|
from transformers.trainer_utils import EvalPrediction
|
|
@@ -27,6 +28,7 @@ def compute_metrics(
|
|
|
27
28
|
model_outputs_and_labels: "tuple[Predictions, Labels] | EvalPrediction",
|
|
28
29
|
has_misc_tags: bool,
|
|
29
30
|
dataset_config: "DatasetConfig",
|
|
31
|
+
dataset: "Dataset",
|
|
30
32
|
) -> dict[str, float]:
|
|
31
33
|
"""Compute the metrics needed for evaluation.
|
|
32
34
|
|
|
@@ -38,6 +40,9 @@ def compute_metrics(
|
|
|
38
40
|
Whether the dataset has MISC tags.
|
|
39
41
|
dataset_config:
|
|
40
42
|
The configuration of the dataset.
|
|
43
|
+
dataset:
|
|
44
|
+
The dataset used for evaluation. This is only used in case any additional
|
|
45
|
+
metadata is used to compute the metrics.
|
|
41
46
|
|
|
42
47
|
Returns:
|
|
43
48
|
A dictionary with the names of the metrics as keys and the metric values as
|
|
@@ -136,7 +141,9 @@ def compute_metrics(
|
|
|
136
141
|
for metric in dataset_config.task.metrics
|
|
137
142
|
if metric.name == "micro_f1"
|
|
138
143
|
)
|
|
139
|
-
micro_f1_score = metric(
|
|
144
|
+
micro_f1_score = metric(
|
|
145
|
+
predictions=predictions, references=list(labels), dataset=dataset
|
|
146
|
+
)
|
|
140
147
|
|
|
141
148
|
# Compute the metrics without MISC tags
|
|
142
149
|
# We manually set the F1 metric to be 100% if both the labels and the models
|
|
@@ -158,7 +165,7 @@ def compute_metrics(
|
|
|
158
165
|
if metric.name == "micro_f1_no_misc"
|
|
159
166
|
)
|
|
160
167
|
micro_f1_no_misc_score = metric(
|
|
161
|
-
predictions=predictions_no_misc, references=labels_no_misc
|
|
168
|
+
predictions=predictions_no_misc, references=labels_no_misc, dataset=dataset
|
|
162
169
|
)
|
|
163
170
|
|
|
164
171
|
# Raise error if the metrics are invalid
|
euroeval/types.py
CHANGED
|
@@ -5,6 +5,7 @@ import typing as t
|
|
|
5
5
|
from transformers.trainer_utils import EvalPrediction
|
|
6
6
|
|
|
7
7
|
if t.TYPE_CHECKING:
|
|
8
|
+
from datasets.arrow_dataset import Dataset
|
|
8
9
|
from numpy.typing import NDArray
|
|
9
10
|
|
|
10
11
|
from .data_models import GenerativeModelOutput
|
|
@@ -25,12 +26,16 @@ class ComputeMetricsFunction(t.Protocol):
|
|
|
25
26
|
"NDArray | list[str] | list[list[str]]",
|
|
26
27
|
"NDArray | list[str] | list[list[str]]",
|
|
27
28
|
],
|
|
29
|
+
dataset: "Dataset",
|
|
28
30
|
) -> dict[str, float]:
|
|
29
31
|
"""Compute the metrics.
|
|
30
32
|
|
|
31
33
|
Args:
|
|
32
34
|
model_outputs_and_labels:
|
|
33
35
|
The model outputs and labels.
|
|
36
|
+
dataset:
|
|
37
|
+
The dataset used for evaluation. This is only used in case any
|
|
38
|
+
additional metadata is used to compute the metrics.
|
|
34
39
|
|
|
35
40
|
Returns:
|
|
36
41
|
The computed metrics.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: EuroEval
|
|
3
|
-
Version: 15.
|
|
3
|
+
Version: 15.15.0
|
|
4
4
|
Summary: The robust European language model benchmark.
|
|
5
5
|
Project-URL: Repository, https://github.com/EuroEval/EuroEval
|
|
6
6
|
Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
|
|
@@ -61,13 +61,11 @@ Provides-Extra: all
|
|
|
61
61
|
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'all'
|
|
62
62
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'all'
|
|
63
63
|
Requires-Dist: gradio>=4.26.0; extra == 'all'
|
|
64
|
-
Requires-Dist:
|
|
65
|
-
Requires-Dist: vllm>=0.9.1; (platform_system == 'Linux') and extra == 'all'
|
|
64
|
+
Requires-Dist: vllm>=0.10.0; (platform_system == 'Linux') and extra == 'all'
|
|
66
65
|
Provides-Extra: generative
|
|
67
66
|
Requires-Dist: bitsandbytes>=0.43.1; (platform_system == 'Linux') and extra == 'generative'
|
|
68
67
|
Requires-Dist: fbgemm-gpu>=1.0.0; (platform_system == 'Linux') and extra == 'generative'
|
|
69
|
-
Requires-Dist:
|
|
70
|
-
Requires-Dist: vllm>=0.9.1; (platform_system == 'Linux') and extra == 'generative'
|
|
68
|
+
Requires-Dist: vllm>=0.10.0; (platform_system == 'Linux') and extra == 'generative'
|
|
71
69
|
Provides-Extra: human-evaluation
|
|
72
70
|
Requires-Dist: gradio>=4.26.0; extra == 'human-evaluation'
|
|
73
71
|
Provides-Extra: test
|
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
euroeval/__init__.py,sha256=
|
|
1
|
+
euroeval/__init__.py,sha256=ZZoVc6tKWz_h8Pw2n26PV-q_Gd4TM_02O235ZBRUNJw,3756
|
|
2
2
|
euroeval/benchmark_config_factory.py,sha256=jKC8bEzJSGGCcG8aWsPxiyHX6fjOQYQWvkp1MIUuHYM,11564
|
|
3
3
|
euroeval/benchmarker.py,sha256=SDBzdCa4I8u1XDeN_1mKTFzfaaQbbY_oWcHt3niADxk,48497
|
|
4
4
|
euroeval/callbacks.py,sha256=5BTlDvBJ60xRvj01EpXZSZu3MFdKa3LgVuhxoLb3i3E,2565
|
|
5
5
|
euroeval/cli.py,sha256=h81Lswm_q9htkYz-GQQQVIsdsUPnfe3LDH8AZdBcpKs,8602
|
|
6
6
|
euroeval/constants.py,sha256=0KHrH74zGM8vNF4uZG_a5qFJRZH5YgyQULYZtCKlo68,2452
|
|
7
7
|
euroeval/data_loading.py,sha256=DP-cqwN_d0Y-KaN8P8c3fDr6PX80UYROHgRwX82ix4w,4156
|
|
8
|
-
euroeval/data_models.py,sha256=
|
|
8
|
+
euroeval/data_models.py,sha256=qSCNq3PV7qo--gibqEvvu4cXkEkhGGAb6UiZW8U_KiU,22031
|
|
9
9
|
euroeval/enums.py,sha256=L9LcNeruuhHvze9vKRogXY9vonRzoBqDzWSP6hxKQ7A,3195
|
|
10
10
|
euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
|
|
11
11
|
euroeval/finetuning.py,sha256=BrPZ-6qFY8K-dwfaRwNetVYfYburoQwLQty6pn6iP_s,11340
|
|
12
|
-
euroeval/generation.py,sha256=
|
|
12
|
+
euroeval/generation.py,sha256=lmvu__6w3cLxi0zBtXSlyZvV8CJpV3BdajUoIEA9ElA,11639
|
|
13
13
|
euroeval/generation_utils.py,sha256=zRsaOHcbhysbMa983BZXxfd-qMe4NYts-ZbQxfvNTK4,13310
|
|
14
|
-
euroeval/human_evaluation.py,sha256=
|
|
14
|
+
euroeval/human_evaluation.py,sha256=FLuTl1DHxCiWB_laVVQHIH86yXvA_ZeNNSrUmyExZXI,27579
|
|
15
15
|
euroeval/languages.py,sha256=cr_Z5jtaHb2XY0zeOhuk3ATHX74PODzt6gMPC2zMD7c,8594
|
|
16
|
-
euroeval/metrics.py,sha256=
|
|
16
|
+
euroeval/metrics.py,sha256=d59VRsjGFA2h2s4J8zRgdGxCu_pA3YhfvKxkK6pN6GI,16185
|
|
17
17
|
euroeval/model_cache.py,sha256=HgXTgn4RMBqIjKaTmYzxu0f4NIwbXx1XJFbvbITqy4E,8686
|
|
18
18
|
euroeval/model_config.py,sha256=64KKHPTrpsFhFAANtBnAKkOs7PWZ50GXkXeDl4jICgs,2748
|
|
19
19
|
euroeval/model_loading.py,sha256=B6dyjYO0Dg7NOcUXls8Sjwe6W0c2UqJ1OGw-RkzoSSQ,2239
|
|
@@ -21,43 +21,43 @@ euroeval/scores.py,sha256=TatSbjia7Zwj71gQFyV_gCHyppMbOgeaZgNCib8G86k,2849
|
|
|
21
21
|
euroeval/speed_benchmark.py,sha256=6bFGeMmtdl_6owkxNQ3ZKiyQQS58k0NApzlsbDgBW5s,4037
|
|
22
22
|
euroeval/tasks.py,sha256=btxf29M5rUP7JjBl6u9aQlHQAxrJNP4bRbdEQtDnmDA,3376
|
|
23
23
|
euroeval/tokenization_utils.py,sha256=LxgGs7juS5PuMYt5LL2X6eVXdtnpi-A2jFxqcWpF6NA,17931
|
|
24
|
-
euroeval/types.py,sha256=
|
|
24
|
+
euroeval/types.py,sha256=SCKOALV_-F1PAIwQ7qHNdSF1Uy29TSu9nIc1NYJGUUs,2754
|
|
25
25
|
euroeval/utils.py,sha256=5R7y67xe0ODaje7k8nOu2AFS3Ph2gcsiWpIq5rjSSuA,11613
|
|
26
26
|
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
27
27
|
euroeval/benchmark_modules/base.py,sha256=D1oKD16KBvxEoBUfqwvzvcDc1hx6letdD3v1PnBmF4A,10669
|
|
28
28
|
euroeval/benchmark_modules/fresh.py,sha256=sg_AXNPApFObCzCRWhCgKxfr-eqQsT6Ri0xx0_Yy5JM,10293
|
|
29
29
|
euroeval/benchmark_modules/hf.py,sha256=-W_bWEdm0zePkn4nDz4l0T4hhJJnlfwHrtIO3m5BrUs,44725
|
|
30
|
-
euroeval/benchmark_modules/litellm.py,sha256=
|
|
31
|
-
euroeval/benchmark_modules/vllm.py,sha256=
|
|
30
|
+
euroeval/benchmark_modules/litellm.py,sha256=qv-k2ntk48OF4ikevQ95k4zLbBkZYOZ2z-GAisA-tFY,53374
|
|
31
|
+
euroeval/benchmark_modules/vllm.py,sha256=Uq81tgNSkajuawdJ1lH1s9Te9wubYd-CyBbM-B5YZcA,38693
|
|
32
32
|
euroeval/dataset_configs/__init__.py,sha256=EbjEyHwBtSztASl8_xblD8hessruDdV4Eg1vXrmGOuY,1935
|
|
33
|
-
euroeval/dataset_configs/danish.py,sha256
|
|
34
|
-
euroeval/dataset_configs/dutch.py,sha256=
|
|
33
|
+
euroeval/dataset_configs/danish.py,sha256=0lDtvpgszXY1XaPjTU8yA3oNCU8W2OllvrBWgn6pkhk,4027
|
|
34
|
+
euroeval/dataset_configs/dutch.py,sha256=ekZxLL9d09BUMijCxy9EFa2heNQVvySPySOjhWdtJc8,3815
|
|
35
35
|
euroeval/dataset_configs/english.py,sha256=uQAaGWpHk8xqFCeIhmmPXYTb1cZomeEdRaRe9qIZQrg,2858
|
|
36
36
|
euroeval/dataset_configs/faroese.py,sha256=gkgxQTWGFbfg9Eo1z-NSLROgKDcaij9tAN2mfgtrt0M,1647
|
|
37
|
-
euroeval/dataset_configs/finnish.py,sha256=
|
|
38
|
-
euroeval/dataset_configs/french.py,sha256=
|
|
39
|
-
euroeval/dataset_configs/german.py,sha256=
|
|
37
|
+
euroeval/dataset_configs/finnish.py,sha256=UZwy0_d17O2L-v2AKOu3OlDwFPcLGTZNAOt7ZKlr4K8,2679
|
|
38
|
+
euroeval/dataset_configs/french.py,sha256=Hei2M4bGIz8hVtaPKQlQATcmK-0bFBNEocEszR3gia0,3014
|
|
39
|
+
euroeval/dataset_configs/german.py,sha256=sRYtOl6CYf4kZkeINfff6xoKBG4OsDxb2b72lKwELGc,3192
|
|
40
40
|
euroeval/dataset_configs/icelandic.py,sha256=g21IHjcwEZvf_yJ9PobeuBOqRiLOk0oCdEjY34g-UMk,4497
|
|
41
|
-
euroeval/dataset_configs/italian.py,sha256=
|
|
41
|
+
euroeval/dataset_configs/italian.py,sha256=4SEmdUyfGbbwMPhv_9nL3JNJtoDKHLAlWuvr7Ihmi9o,3294
|
|
42
42
|
euroeval/dataset_configs/norwegian.py,sha256=-WvQM44xCwjrqBzlAy4rjf6v87fGera2JmZV_069TeQ,6003
|
|
43
43
|
euroeval/dataset_configs/portuguese.py,sha256=3SqbwD0PNTILGALzh50pVoEwC-spRD75ZeE2NEj151E,2367
|
|
44
|
-
euroeval/dataset_configs/spanish.py,sha256=
|
|
45
|
-
euroeval/dataset_configs/swedish.py,sha256=
|
|
44
|
+
euroeval/dataset_configs/spanish.py,sha256=Bm0Z19Mh2qYXR0RIRlqEkzfVb5KiqJRectfuY7JLql4,3192
|
|
45
|
+
euroeval/dataset_configs/swedish.py,sha256=js4paNsuC0nQzPpf6_BzHBf7MT60XUpP1-qM2uxRtQs,3445
|
|
46
46
|
euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
|
|
47
47
|
euroeval/prompt_templates/linguistic_acceptability.py,sha256=ZN71BEt4HAhSYY-GWjh-S-iVvq5AODQJThkrjDhy4oM,7138
|
|
48
|
-
euroeval/prompt_templates/multiple_choice.py,sha256=
|
|
48
|
+
euroeval/prompt_templates/multiple_choice.py,sha256=wHnQCE5bv947L6hSK5zJitE37V-PbuNYAp156mWaIYA,5494
|
|
49
49
|
euroeval/prompt_templates/named_entity_recognition.py,sha256=ga21s9T4_Hhbf88boWm7gnL7OgD7txuS_EeDgXaxEoE,13602
|
|
50
50
|
euroeval/prompt_templates/reading_comprehension.py,sha256=3Nch-9zHfUDIwy-k5mP-TRhHQRQ9nad8HdhpJ1S8nGc,7072
|
|
51
51
|
euroeval/prompt_templates/sentiment_classification.py,sha256=2Xsmj8lbaAXACHhwbbR4dWhoKyKB87TqpMO-ssQ-Djo,7649
|
|
52
52
|
euroeval/prompt_templates/summarization.py,sha256=I98LlUOBVa_xo02npq7BWKKZOXGqm-_15i64QzbEsb0,5334
|
|
53
53
|
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
54
54
|
euroeval/task_group_utils/multiple_choice_classification.py,sha256=yfy8lczpZ_MY-Y4FQx3Et9vEUpuD3YMFjF3wQGCfMNw,6632
|
|
55
|
-
euroeval/task_group_utils/question_answering.py,sha256=
|
|
56
|
-
euroeval/task_group_utils/sequence_classification.py,sha256=
|
|
57
|
-
euroeval/task_group_utils/text_to_text.py,sha256=
|
|
58
|
-
euroeval/task_group_utils/token_classification.py,sha256=
|
|
59
|
-
euroeval-15.
|
|
60
|
-
euroeval-15.
|
|
61
|
-
euroeval-15.
|
|
62
|
-
euroeval-15.
|
|
63
|
-
euroeval-15.
|
|
55
|
+
euroeval/task_group_utils/question_answering.py,sha256=6jpiHukzA7IrJh4vVYyZDDyvD5Xc2GsxoXzpm_PHpXw,27503
|
|
56
|
+
euroeval/task_group_utils/sequence_classification.py,sha256=ihJO55f3Dy565d3ByYGMuSINasnjAADaTrM59LwZzA0,12977
|
|
57
|
+
euroeval/task_group_utils/text_to_text.py,sha256=go0y6X9QAv5iywlLAclb8cqFX_3QlAT-1-VNZ9zMWFA,4832
|
|
58
|
+
euroeval/task_group_utils/token_classification.py,sha256=BDqOfopdH5Bbj67HTEbZd9KZtNCDNket8NrCTfxZFzQ,17773
|
|
59
|
+
euroeval-15.15.0.dist-info/METADATA,sha256=ldIaYcwIlgDbuHPz_uHKrcYbmh-GLB9T239BjqYRalk,13377
|
|
60
|
+
euroeval-15.15.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
61
|
+
euroeval-15.15.0.dist-info/entry_points.txt,sha256=tKQRxN0HX2mGtbZbZQdCRFUDZIecA_z4mZduueor3Ug,135
|
|
62
|
+
euroeval-15.15.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
|
|
63
|
+
euroeval-15.15.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|