EuroEval 16.4.0__py3-none-any.whl → 16.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/__init__.py +6 -0
- euroeval/benchmark_config_factory.py +51 -46
- euroeval/benchmark_modules/base.py +6 -5
- euroeval/benchmark_modules/hf.py +2 -9
- euroeval/benchmark_modules/litellm.py +14 -12
- euroeval/benchmark_modules/vllm.py +17 -10
- euroeval/benchmarker.py +61 -44
- euroeval/caching_utils.py +1 -1
- euroeval/cli.py +86 -8
- euroeval/constants.py +3 -0
- euroeval/data_loading.py +78 -30
- euroeval/data_models.py +326 -326
- euroeval/dataset_configs/__init__.py +10 -3
- euroeval/dataset_configs/bulgarian.py +56 -0
- euroeval/dataset_configs/czech.py +25 -29
- euroeval/dataset_configs/danish.py +51 -88
- euroeval/dataset_configs/dutch.py +48 -86
- euroeval/dataset_configs/english.py +45 -76
- euroeval/dataset_configs/estonian.py +36 -38
- euroeval/dataset_configs/faroese.py +19 -60
- euroeval/dataset_configs/finnish.py +36 -68
- euroeval/dataset_configs/french.py +39 -74
- euroeval/dataset_configs/german.py +45 -81
- euroeval/dataset_configs/greek.py +64 -0
- euroeval/dataset_configs/icelandic.py +54 -91
- euroeval/dataset_configs/italian.py +42 -78
- euroeval/dataset_configs/latvian.py +28 -34
- euroeval/dataset_configs/lithuanian.py +22 -26
- euroeval/dataset_configs/norwegian.py +72 -114
- euroeval/dataset_configs/polish.py +33 -60
- euroeval/dataset_configs/portuguese.py +33 -65
- euroeval/dataset_configs/serbian.py +64 -0
- euroeval/dataset_configs/slovak.py +19 -24
- euroeval/dataset_configs/spanish.py +42 -76
- euroeval/dataset_configs/swedish.py +48 -84
- euroeval/dataset_configs/ukrainian.py +64 -0
- euroeval/exceptions.py +1 -1
- euroeval/finetuning.py +3 -2
- euroeval/generation.py +5 -4
- euroeval/generation_utils.py +6 -5
- euroeval/languages.py +395 -323
- euroeval/metrics/huggingface.py +14 -3
- euroeval/metrics/llm_as_a_judge.py +1 -1
- euroeval/model_cache.py +6 -5
- euroeval/model_loading.py +1 -1
- euroeval/prompt_templates/__init__.py +2 -0
- euroeval/prompt_templates/classification.py +206 -0
- euroeval/prompt_templates/linguistic_acceptability.py +82 -43
- euroeval/prompt_templates/multiple_choice.py +81 -41
- euroeval/prompt_templates/named_entity_recognition.py +125 -44
- euroeval/prompt_templates/reading_comprehension.py +92 -43
- euroeval/prompt_templates/sentiment_classification.py +91 -43
- euroeval/prompt_templates/summarization.py +64 -39
- euroeval/prompt_templates/token_classification.py +279 -0
- euroeval/scores.py +4 -3
- euroeval/speed_benchmark.py +2 -1
- euroeval/task_group_utils/multiple_choice_classification.py +2 -1
- euroeval/task_group_utils/question_answering.py +24 -13
- euroeval/task_group_utils/sequence_classification.py +5 -4
- euroeval/task_group_utils/text_to_text.py +2 -1
- euroeval/task_group_utils/token_classification.py +11 -8
- euroeval/tasks.py +44 -1
- euroeval/tokenisation_utils.py +19 -10
- euroeval/types.py +10 -9
- euroeval/utils.py +6 -3
- {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/METADATA +194 -37
- euroeval-16.5.0.dist-info/RECORD +81 -0
- euroeval-16.4.0.dist-info/RECORD +0 -75
- {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/WHEEL +0 -0
- {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/entry_points.txt +0 -0
- {euroeval-16.4.0.dist-info → euroeval-16.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: EuroEval
|
|
3
|
-
Version: 16.
|
|
3
|
+
Version: 16.5.0
|
|
4
4
|
Summary: The robust European language model benchmark.
|
|
5
5
|
Project-URL: Repository, https://github.com/EuroEval/EuroEval
|
|
6
6
|
Project-URL: Issues, https://github.com/EuroEval/EuroEval/issues
|
|
@@ -92,7 +92,7 @@ ______________________________________________________________________
|
|
|
92
92
|
[](https://arxiv.org/abs/2406.13469)
|
|
93
93
|
[](https://github.com/EuroEval/EuroEval/blob/main/LICENSE)
|
|
94
94
|
[](https://github.com/EuroEval/EuroEval/commits/main)
|
|
95
|
-
[](https://github.com/EuroEval/EuroEval/tree/main/tests)
|
|
96
96
|
[](https://github.com/EuroEval/EuroEval/blob/main/CODE_OF_CONDUCT.md)
|
|
97
97
|
|
|
98
98
|
## Maintainer
|
|
@@ -113,7 +113,7 @@ when an evaluation requires a certain extra dependency, and how you install it.
|
|
|
113
113
|
|
|
114
114
|
## Quickstart
|
|
115
115
|
|
|
116
|
-
### Benchmarking from the
|
|
116
|
+
### Benchmarking from the command line
|
|
117
117
|
|
|
118
118
|
The easiest way to benchmark pretrained models is via the command line interface. After
|
|
119
119
|
having installed the package, you can benchmark your favorite model like so:
|
|
@@ -160,7 +160,7 @@ See all the arguments and options available for the `euroeval` command by typing
|
|
|
160
160
|
euroeval --help
|
|
161
161
|
```
|
|
162
162
|
|
|
163
|
-
### Benchmarking from a
|
|
163
|
+
### Benchmarking from a script
|
|
164
164
|
|
|
165
165
|
In a script, the syntax is similar to the command line interface. You simply initialise
|
|
166
166
|
an object of the `Benchmarker` class, and call this benchmark object with your favorite
|
|
@@ -168,15 +168,19 @@ model:
|
|
|
168
168
|
|
|
169
169
|
```python
|
|
170
170
|
>>> from euroeval import Benchmarker
|
|
171
|
-
>>>
|
|
172
|
-
>>> benchmark(model="<model-id>")
|
|
171
|
+
>>> benchmarker = Benchmarker()
|
|
172
|
+
>>> benchmarker.benchmark(model="<model-id>")
|
|
173
173
|
```
|
|
174
174
|
|
|
175
175
|
To benchmark on a specific task and/or language, you simply specify the `task` or
|
|
176
176
|
`language` arguments, shown here with same example as above:
|
|
177
177
|
|
|
178
178
|
```python
|
|
179
|
-
>>> benchmark(
|
|
179
|
+
>>> benchmarker.benchmark(
|
|
180
|
+
... model="<model-id>",
|
|
181
|
+
... task="sentiment-classification",
|
|
182
|
+
... language="da",
|
|
183
|
+
... )
|
|
180
184
|
```
|
|
181
185
|
|
|
182
186
|
If you want to benchmark a subset of all the models on the Hugging Face Hub, you can
|
|
@@ -184,10 +188,61 @@ simply leave out the `model` argument. In this example, we're benchmarking all D
|
|
|
184
188
|
models on the Danish sentiment classification task:
|
|
185
189
|
|
|
186
190
|
```python
|
|
187
|
-
>>> benchmark(task="sentiment-classification", language="da")
|
|
191
|
+
>>> benchmarker.benchmark(task="sentiment-classification", language="da")
|
|
188
192
|
```
|
|
189
193
|
|
|
190
|
-
### Benchmarking
|
|
194
|
+
### Benchmarking from Docker
|
|
195
|
+
|
|
196
|
+
A Dockerfile is provided in the repo, which can be downloaded and run, without needing
|
|
197
|
+
to clone the repo and installing from source. This can be fetched programmatically by
|
|
198
|
+
running the following:
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
wget https://raw.githubusercontent.com/EuroEval/EuroEval/main/Dockerfile.cuda
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Next, to be able to build the Docker image, first ensure that the NVIDIA Container
|
|
205
|
+
Toolkit is
|
|
206
|
+
[installed](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation)
|
|
207
|
+
and
|
|
208
|
+
[configured](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#configuring-docker).
|
|
209
|
+
Ensure that the the CUDA version stated at the top of the Dockerfile matches the CUDA
|
|
210
|
+
version installed (which you can check using `nvidia-smi`). After that, we build the
|
|
211
|
+
image as follows:
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
docker build --pull -t euroeval -f Dockerfile.cuda .
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
With the Docker image built, we can now evaluate any model as follows:
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
docker run -e args="<euroeval-arguments>" --gpus 1 --name euroeval --rm euroeval
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Here `<euroeval-arguments>` consists of the arguments added to the `euroeval` CLI
|
|
224
|
+
argument. This could for instance be `--model <model-id> --task
|
|
225
|
+
sentiment-classification`.
|
|
226
|
+
|
|
227
|
+
## Benchmarking custom inference APIs
|
|
228
|
+
|
|
229
|
+
If the model you want to benchmark is hosted by a custom inference provider, such as a
|
|
230
|
+
[vLLM server](https://docs.vllm.ai/en/stable/), then this is also supported in EuroEval.
|
|
231
|
+
When benchmarking, you simply have to set the `--api-base` argument (`api_base` when
|
|
232
|
+
using the `Benchmarker` API) to the URL of the inference API, and optionally the
|
|
233
|
+
`--api-key` argument (`api_key`) to the API key, if authentication is required.
|
|
234
|
+
|
|
235
|
+
When benchmarking models hosted on a custom inference API, the model ID
|
|
236
|
+
(`--model`/`model`) should be the model name as registered on the inference server,
|
|
237
|
+
potentially with a required prefix, depending on the type of inference server used. For
|
|
238
|
+
instance, if the model is hosted on a vLLM server, the model ID should be prefixed with
|
|
239
|
+
`hosted_vllm/`, and if the model is hosted on an Ollama server, the model ID should be
|
|
240
|
+
prefixed with `ollama_chat/`. See the full list of possible inference providers as well
|
|
241
|
+
as their corresponding prefixes in the [LiteLLM
|
|
242
|
+
documentation](https://docs.litellm.ai/docs/providers/), as EuroEval uses LiteLLM to
|
|
243
|
+
handle evaluation of inference APIs in general.
|
|
244
|
+
|
|
245
|
+
## Benchmarking in an offline environment
|
|
191
246
|
|
|
192
247
|
If you need to benchmark in an offline environment, you need to download the models,
|
|
193
248
|
datasets and metrics beforehand. This can be done by adding the `--download-only`
|
|
@@ -202,7 +257,7 @@ euroeval --model <model-id> --task sentiment-classification --language da --down
|
|
|
202
257
|
Or from a script:
|
|
203
258
|
|
|
204
259
|
```python
|
|
205
|
-
>>> benchmark(
|
|
260
|
+
>>> benchmarker.benchmark(
|
|
206
261
|
... model="<model-id>",
|
|
207
262
|
... task="sentiment-classification",
|
|
208
263
|
... language="da",
|
|
@@ -210,44 +265,139 @@ Or from a script:
|
|
|
210
265
|
... )
|
|
211
266
|
```
|
|
212
267
|
|
|
213
|
-
Please note: Offline benchmarking of adapter models is not currently supported
|
|
214
|
-
internet connection
|
|
215
|
-
to you, please consider [opening an
|
|
268
|
+
Please note: Offline benchmarking of adapter models is not currently supported, meaning
|
|
269
|
+
that we still require an internet connection during the evaluation of these. If offline
|
|
270
|
+
support of adapters is important to you, please consider [opening an
|
|
271
|
+
issue](https://github.com/EuroEval/EuroEval/issues).
|
|
216
272
|
|
|
217
|
-
|
|
273
|
+
## Benchmarking custom datasets
|
|
218
274
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
275
|
+
If you want to benchmark models on your own custom dataset, this is also possible.
|
|
276
|
+
First, you need to set up your dataset to be compatible with EuroEval. This means
|
|
277
|
+
splitting up your dataset in a training, validation and test split, and ensuring that
|
|
278
|
+
the column names are correct. We use `text` as the column name for the input text, and
|
|
279
|
+
the output column name depends on the type of task:
|
|
222
280
|
|
|
223
|
-
|
|
224
|
-
|
|
281
|
+
- **Text or multiple-choice classification**: `label`
|
|
282
|
+
- **Token classification**: `labels`
|
|
283
|
+
- **Reading comprehension**: `answers`
|
|
284
|
+
- **Free-form text generation**: `target_text`
|
|
285
|
+
|
|
286
|
+
Text and multiple-choice classification tasks are by far the most common. Next, you
|
|
287
|
+
store your three dataset splits as three different CSV files with the desired two
|
|
288
|
+
columns. Finally, you create a file called `custom_datasets.py` script in which you
|
|
289
|
+
define the associated `DatasetConfig` objects for your dataset. Here is an example of a
|
|
290
|
+
simple text classification dataset with two classes:
|
|
291
|
+
|
|
292
|
+
```python
|
|
293
|
+
from euroeval import DatasetConfig, TEXT_CLASSIFICATION
|
|
294
|
+
from euroeval.languages import ENGLISH
|
|
295
|
+
|
|
296
|
+
MY_CONFIG = DatasetConfig(
|
|
297
|
+
name="my-dataset",
|
|
298
|
+
source=dict(train="train.csv", val="val.csv", test="test.csv"),
|
|
299
|
+
task=TEXT_CLASSIFICATION,
|
|
300
|
+
languages=[ENGLISH],
|
|
301
|
+
_labels=["positive", "negative"],
|
|
302
|
+
)
|
|
225
303
|
```
|
|
226
304
|
|
|
227
|
-
|
|
228
|
-
Toolkit is
|
|
229
|
-
[installed](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation)
|
|
230
|
-
and
|
|
231
|
-
[configured](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#configuring-docker).
|
|
232
|
-
Ensure that the the CUDA version stated at the top of the Dockerfile matches the CUDA
|
|
233
|
-
version installed (which you can check using `nvidia-smi`). After that, we build the
|
|
234
|
-
image as follows:
|
|
305
|
+
You can then benchmark your custom dataset by simply running
|
|
235
306
|
|
|
236
307
|
```bash
|
|
237
|
-
|
|
308
|
+
euroeval --dataset my-dataset --model <model-id>
|
|
238
309
|
```
|
|
239
310
|
|
|
240
|
-
|
|
311
|
+
You can also run the benchmark from a Python script, by simply providing your custom
|
|
312
|
+
dataset configuration directly into the `benchmark` method:
|
|
241
313
|
|
|
242
|
-
```
|
|
243
|
-
|
|
314
|
+
```python
|
|
315
|
+
from euroeval import Benchmarker
|
|
316
|
+
|
|
317
|
+
benchmarker = Benchmarker()
|
|
318
|
+
benchmarker.benchmark(model="<model-id>", dataset=MY_CONFIG)
|
|
244
319
|
```
|
|
245
320
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
321
|
+
We have included three convenience tasks to make it easier to set up custom datasets:
|
|
322
|
+
|
|
323
|
+
- `TEXT_CLASSIFICATION`, which is used for text classification tasks. This requires you
|
|
324
|
+
to set the `_labels` argument in the `DatasetConfig`, and requires the columns `text`
|
|
325
|
+
and `label` to be present in the dataset.
|
|
326
|
+
- `MULTIPLE_CHOICE`, which is used for multiple-choice classification tasks. This
|
|
327
|
+
also requires you to set the `_labels` argument in the `DatasetConfig`. Note that for
|
|
328
|
+
multiple choice tasks, you need to set up your `text` column to also list all the
|
|
329
|
+
choices, and all the samples should have the same number of choices. This requires the
|
|
330
|
+
columns `text` and `label` to be present in the dataset.
|
|
331
|
+
- `TOKEN_CLASSIFICATION`, which is used when classifying individual tokens in a text.
|
|
332
|
+
This also require you to set the `_labels` argument in the `DatasetConfig`. This
|
|
333
|
+
requires the columns `tokens` and `labels` to be present in the dataset, where
|
|
334
|
+
`tokens` is a list of tokens/words in the text, and `labels` is a list of the
|
|
335
|
+
corresponding labels for each token (so the two lists have the same length).
|
|
336
|
+
|
|
337
|
+
On top of these three convenience tasks, there are of course also the tasks that we use
|
|
338
|
+
in the official benchmark, which you can use if you want to use one of these tasks with
|
|
339
|
+
your own bespoke dataset:
|
|
340
|
+
|
|
341
|
+
- `LA`, for linguistic acceptability datasets.
|
|
342
|
+
- `NER`, for named entity recognition datasets with the standard BIO tagging scheme.
|
|
343
|
+
- `RC`, for reading comprehension datasets in the SQuAD format.
|
|
344
|
+
- `SENT`, for sentiment classification datasets.
|
|
345
|
+
- `SUMM`, for text summarisation datasets.
|
|
346
|
+
- `KNOW`, for multiple-choice knowledge datasets (e.g., MMLU).
|
|
347
|
+
- `MCRC`, for multiple-choice reading comprehension datasets (e.g., Belebele).
|
|
348
|
+
- `COMMON_SENSE`, for multiple-choice common-sense reasoning datasets (e.g., HellaSwag).
|
|
349
|
+
|
|
350
|
+
These can all be imported from `euroeval.tasks` module.
|
|
351
|
+
|
|
352
|
+
### Creating your own custom task
|
|
353
|
+
|
|
354
|
+
You are of course also free to define your own task from scratch, which allows you to
|
|
355
|
+
customise the prompts used when evaluating generative models, for instance. Here is an
|
|
356
|
+
example of a custom free-form text generation task, where the goal for the model is to
|
|
357
|
+
generate a SQL query based on a natural language input:
|
|
358
|
+
|
|
359
|
+
```python
|
|
360
|
+
from euroeval import DatasetConfig
|
|
361
|
+
from euroeval.data_models import Task, PromptConfig
|
|
362
|
+
from euroeval.enums import TaskGroup, ModelType
|
|
363
|
+
from euroeval.languages import ENGLISH
|
|
364
|
+
from euroeval.metrics import rouge_l_metric
|
|
365
|
+
|
|
366
|
+
sql_generation_task = Task(
|
|
367
|
+
name="sql-generation",
|
|
368
|
+
task_group=TaskGroup.TEXT_TO_TEXT,
|
|
369
|
+
template_dict={
|
|
370
|
+
ENGLISH: PromptConfig(
|
|
371
|
+
default_prompt_prefix="The following are natural language texts and their "
|
|
372
|
+
"corresponding SQL queries.",
|
|
373
|
+
default_prompt_template="Natural language query: {text}\nSQL query: "
|
|
374
|
+
"{target_text}",
|
|
375
|
+
default_instruction_prompt="Generate the SQL query for the following "
|
|
376
|
+
"natural language query:\n{text!r}",
|
|
377
|
+
default_prompt_label_mapping=dict(),
|
|
378
|
+
),
|
|
379
|
+
},
|
|
380
|
+
metrics=[rouge_l_metric],
|
|
381
|
+
default_num_few_shot_examples=3,
|
|
382
|
+
default_max_generated_tokens=256,
|
|
383
|
+
default_allowed_model_types=[ModelType.GENERATIVE],
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
MY_SQL_DATASET = DatasetConfig(
|
|
387
|
+
name="my-sql-dataset",
|
|
388
|
+
source=dict(train="train.csv", val="val.csv", test="test.csv"),
|
|
389
|
+
task=sql_generation_task,
|
|
390
|
+
languages=[ENGLISH],
|
|
391
|
+
)
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
Again, with this you can benchmark your custom dataset by simply running
|
|
395
|
+
|
|
396
|
+
```bash
|
|
397
|
+
euroeval --dataset my-sql-dataset --model <model-id>
|
|
398
|
+
```
|
|
249
399
|
|
|
250
|
-
|
|
400
|
+
## Reproducing the evaluation datasets
|
|
251
401
|
|
|
252
402
|
All datasets used in this project are generated using the scripts located in the
|
|
253
403
|
[src/scripts](src/scripts) folder. To reproduce a dataset, run the corresponding script
|
|
@@ -379,6 +529,13 @@ A huge thank you to all the contributors who have helped make this project a suc
|
|
|
379
529
|
alt="Contributor avatar for slowwavesleep"
|
|
380
530
|
/>
|
|
381
531
|
</a>
|
|
532
|
+
<a href="https://github.com/mrkowalski">
|
|
533
|
+
<img
|
|
534
|
+
src="https://avatars.githubusercontent.com/u/6357044"
|
|
535
|
+
width=50
|
|
536
|
+
alt="Contributor avatar for mrkowalski"
|
|
537
|
+
/>
|
|
538
|
+
</a>
|
|
382
539
|
|
|
383
540
|
### Contribute to EuroEval
|
|
384
541
|
|
|
@@ -390,7 +547,7 @@ contributing new datasets, your help makes this project better for everyone.
|
|
|
390
547
|
- **Adding datasets**: If you're interested in adding a new dataset to EuroEval, we have
|
|
391
548
|
a [dedicated guide](NEW_DATASET_GUIDE.md) with step-by-step instructions.
|
|
392
549
|
|
|
393
|
-
### Special
|
|
550
|
+
### Special thanks
|
|
394
551
|
|
|
395
552
|
- Thanks to [Google](https://google.com/) for sponsoring Gemini credits as part of their
|
|
396
553
|
[Google Cloud for Researchers Program](https://cloud.google.com/edu/researchers).
|
|
@@ -401,7 +558,7 @@ contributing new datasets, your help makes this project better for everyone.
|
|
|
401
558
|
- Thanks to [UWV](https://www.uwv.nl/) and [KU
|
|
402
559
|
Leuven](https://www.arts.kuleuven.be/ling/ccl) for sponsoring the Azure OpenAI
|
|
403
560
|
credits used to evaluate GPT-4-turbo in Dutch.
|
|
404
|
-
- Thanks to [Miðeind](https://mideind.is/
|
|
561
|
+
- Thanks to [Miðeind](https://mideind.is/en) for sponsoring the OpenAI
|
|
405
562
|
credits used to evaluate GPT-4-turbo in Icelandic and Faroese.
|
|
406
563
|
- Thanks to [CHC](https://chc.au.dk/) for sponsoring the OpenAI credits used to
|
|
407
564
|
evaluate GPT-4-turbo in German.
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
euroeval/__init__.py,sha256=MuVVOnGU3IJIROBmNkrFSCfnLaeqAFaI-gBaf78_Zr0,4118
|
|
2
|
+
euroeval/benchmark_config_factory.py,sha256=evOZWuK5tBZkiVx5l7hxO5XPBbD-tmxMUZdcEsmBX4o,8689
|
|
3
|
+
euroeval/benchmarker.py,sha256=ejvAhL9XNHYwIYhotIZMmvvYURCnsrMP5W2YhA2RI38,50615
|
|
4
|
+
euroeval/caching_utils.py,sha256=lLUbkpDdJZy4xodIpwIz5d-WNKGuszbr_d9dyiJ5kZc,2591
|
|
5
|
+
euroeval/callbacks.py,sha256=l8f6Zr8EoHfVFsI1ZnMUK0Y8uZB00Nvaz_I6XDn6avE,2515
|
|
6
|
+
euroeval/cli.py,sha256=4g0tOOO9ItEAVEBVqDWvkKk12hys1rmpYk9zMRIH7Xw,12638
|
|
7
|
+
euroeval/constants.py,sha256=XeEQ6nyvgVwhPu9ASe2BN2uO9URlcK03A76VbHyFit0,2959
|
|
8
|
+
euroeval/data_loading.py,sha256=2CYnvjMVvJOUaDhwKueh3gYIdIwyAsCBonD9uciv_RU,6896
|
|
9
|
+
euroeval/data_models.py,sha256=kHZ_90jtAq8ea2LQwhTc_UTW0ACcznxmLaJF10ZN0Wg,29127
|
|
10
|
+
euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
|
|
11
|
+
euroeval/exceptions.py,sha256=wE5jg1ReR_ULtdFj5_E96QXzgi1AEnR2tYjyMLXF1R0,5115
|
|
12
|
+
euroeval/finetuning.py,sha256=ggaoREQb7GA7LRpa1dORVLF170ubLZnX3j7RKm2vG5s,11804
|
|
13
|
+
euroeval/generation.py,sha256=GiXsWlPhF19d85kA4bS1RZXEO0y-z7ENu5dH6fiQ_aQ,12637
|
|
14
|
+
euroeval/generation_utils.py,sha256=6TepIaKjnt-7ViajDwCvAZmkmO3rSj3IvRx_tuje5lM,18285
|
|
15
|
+
euroeval/languages.py,sha256=ABZu-en6eyrpbKsJK7pCYyhZIT_We4raLAFxTuiZVzg,37299
|
|
16
|
+
euroeval/logging_utils.py,sha256=iq9K2-7URgeHOUJCYCC9k8vRAz4YmBf4f44GpCVEGcc,8802
|
|
17
|
+
euroeval/model_cache.py,sha256=Jz7pIgzWav-29jqajxBkA-Nm0jDN6npmBDx5JgR_CgE,9304
|
|
18
|
+
euroeval/model_config.py,sha256=fxHfgpw-9vj3hwke28DguVGvG9TU06nkTXT0V6KAMpQ,2761
|
|
19
|
+
euroeval/model_loading.py,sha256=Ru44ONwCMEquM48T-pDWCnZq2V7mFKhu7L80OXtX4co,2340
|
|
20
|
+
euroeval/scores.py,sha256=9a1XtppFbp8GJFc9JdThGxqBY0YUE7-92oyrlxScjNk,3281
|
|
21
|
+
euroeval/speed_benchmark.py,sha256=VUOvauc9tuAegThNT2g1a-Z1l7DEmKq57dHI4t16o5A,4068
|
|
22
|
+
euroeval/tasks.py,sha256=rEzL8Tixux6kHdeMQxOGCHSi-HK2B1BDFeYVuOq7QCU,5955
|
|
23
|
+
euroeval/tokenisation_utils.py,sha256=KccepQZaAtwUI_K1LLHjz8R47O4Q7HUthkSShFl-m40,21551
|
|
24
|
+
euroeval/types.py,sha256=dJFhHEPpTaDWWNUx1bayG7w9dVOspBdM1JT1Pr9EzhI,2951
|
|
25
|
+
euroeval/utils.py,sha256=_l8vSXzXytA-BXvVREBC2sfgRB_xzfkfmPX-q7JnF_U,15287
|
|
26
|
+
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
27
|
+
euroeval/benchmark_modules/base.py,sha256=PKRbPhh7mh-s9IPxn893G9v6MUCmMMJJZkhVD1rrwr0,11286
|
|
28
|
+
euroeval/benchmark_modules/fresh.py,sha256=h4TPJlJK6xxxyhAXURr0T9gk4Khm3WyujnKBDFc3sCE,10806
|
|
29
|
+
euroeval/benchmark_modules/hf.py,sha256=4dFXgLuSi48NPftd8mLd5a4tcF8r28w9jnntKKXHBGw,45935
|
|
30
|
+
euroeval/benchmark_modules/litellm.py,sha256=LoGfj9Zp-S8YhKjufbDy3FvFdrqlLFj_eBKtPhvVjpM,66685
|
|
31
|
+
euroeval/benchmark_modules/vllm.py,sha256=BjVcotncijq2YcDIZL-JSgtap6uiE4k4PTda187NlGw,47352
|
|
32
|
+
euroeval/dataset_configs/__init__.py,sha256=8RXDhAowzRcKFYQwoDTumzpsCp5ab2n0_09_mlt0mdI,2361
|
|
33
|
+
euroeval/dataset_configs/bulgarian.py,sha256=OVoDPTRdU-lVq-xUka7-Ct20h2jbs8HV43KBxRQenIE,1284
|
|
34
|
+
euroeval/dataset_configs/czech.py,sha256=ghv2yNw839G-utll8PQRSjyKYbM5gfoQhFKy664GTCI,1562
|
|
35
|
+
euroeval/dataset_configs/danish.py,sha256=LEKs04vK2KnV0CYheT7FeS-g3iHBvf2bQxyl0D_LbTg,3293
|
|
36
|
+
euroeval/dataset_configs/dutch.py,sha256=HB1O7IxQUyOxLg7g0tqcCci1MHaKtZJiFlRJZo2jPr4,3107
|
|
37
|
+
euroeval/dataset_configs/english.py,sha256=nc9nGwxf1tHVMUhQeND61yJbpTO4rJaAusPZlstqtq0,2817
|
|
38
|
+
euroeval/dataset_configs/estonian.py,sha256=bWiKA_dJ7WUE8Z_1YZnSewhi4ZdCQBGJZ7pQxkCwMcU,2757
|
|
39
|
+
euroeval/dataset_configs/faroese.py,sha256=13qYwXonDPWG9Av5MY_NBNTRDglPVKz5_mbz7ZCJ_mo,1247
|
|
40
|
+
euroeval/dataset_configs/finnish.py,sha256=eyAMoQE43R-jKD3IZS0iqD2SZOPxWLqjmXzmm00tqPs,2444
|
|
41
|
+
euroeval/dataset_configs/french.py,sha256=z6cGY0J7TgXjqmkOLOxQE6ADO1EEPoMF1sdk2n2USe0,2611
|
|
42
|
+
euroeval/dataset_configs/german.py,sha256=CXW8_6CMMtrqrq85tDB6gY_fja_FATOqN5glNJI6efU,2858
|
|
43
|
+
euroeval/dataset_configs/greek.py,sha256=BLdhfBIG7ABzoZWvtI_VlInpb0SYLK36nhHN33LPVo0,1475
|
|
44
|
+
euroeval/dataset_configs/icelandic.py,sha256=G2Ibe6oF1NknkQmHqLpoHlysW_8f-0G53DJAGAlLkzQ,3552
|
|
45
|
+
euroeval/dataset_configs/italian.py,sha256=qhjAQChnQanzs7EyN1DSAJ4OOU41HAlWqWntQOtbWCw,2761
|
|
46
|
+
euroeval/dataset_configs/latvian.py,sha256=wbwIDieq5Lplng5Jzx9LEqq4d8b5LnNOyCUmT64b4bA,1928
|
|
47
|
+
euroeval/dataset_configs/lithuanian.py,sha256=NaU5uTeHFaXgWHHXsxOzG5LO7KuTT5yXHqdjhIJIVKo,1498
|
|
48
|
+
euroeval/dataset_configs/norwegian.py,sha256=skKKs4V4-zbd-1lpVUaxKXAjTMpBM6SAU5HZ8kcQ2mI,5454
|
|
49
|
+
euroeval/dataset_configs/polish.py,sha256=nN_NT8cUK2iv1L_zO_aCYOk2R7ACSDZgvI7e0hIaFAM,2074
|
|
50
|
+
euroeval/dataset_configs/portuguese.py,sha256=m9lEeVtI_yNvIdTIEOn3HFK_ilY2tn3-acC981hjZFM,2401
|
|
51
|
+
euroeval/dataset_configs/serbian.py,sha256=yE_aoQRU8qiJqSD-iEt6o_M4rSkGahLX8qz2RRYDvbo,1411
|
|
52
|
+
euroeval/dataset_configs/slovak.py,sha256=7JQj2GlRj1giACKqVZ6QJxiFBzP9byoowm0DjPaZ5u8,1252
|
|
53
|
+
euroeval/dataset_configs/spanish.py,sha256=FhHSsFemau8mMkqF5VCSViRBZEcnSGcDeUUO9318XDM,2745
|
|
54
|
+
euroeval/dataset_configs/swedish.py,sha256=QAN-cy818OojTQZdDRCPbMB_HZO_xZGoTb3CR7j9JmU,3071
|
|
55
|
+
euroeval/dataset_configs/ukrainian.py,sha256=spbCmCOU27jOfz6FZxqCIfVmDN5l8H-7VCl-k-8eAIo,1527
|
|
56
|
+
euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
|
|
57
|
+
euroeval/metrics/base.py,sha256=dUBby-ZzettMjdcjek6rw0JTZMuScX4cQ2Rd6untKHY,2525
|
|
58
|
+
euroeval/metrics/huggingface.py,sha256=0j8AITealZiRNQU1K1apvRMvz8YFH0AL_xHyrvXX-qo,7159
|
|
59
|
+
euroeval/metrics/llm_as_a_judge.py,sha256=ohEQfkevuixt_vjjb0oMfKus2SfewIm9FRl8TpNlLXs,9729
|
|
60
|
+
euroeval/metrics/pipeline.py,sha256=xGCA7N1F4cLKOIeXP9SGAZvrWToREwAVb_gR5iBMQIU,10825
|
|
61
|
+
euroeval/metrics/speed.py,sha256=G5hEQcrtqxF070ZZwLDh61iZnq2CSW2o6ZM7zR4lOTY,1298
|
|
62
|
+
euroeval/prompt_templates/__init__.py,sha256=HN6Qspqm10ik6RKoPBJsvM-Nng9sywQojZbtbCqj4Z8,475
|
|
63
|
+
euroeval/prompt_templates/classification.py,sha256=QuZh6hTMaqMYTsoruAhwjVP9381zzlQmDIwSeyGnav0,10121
|
|
64
|
+
euroeval/prompt_templates/linguistic_acceptability.py,sha256=QOP0X0mpuPGmUUp9FkjsdMGfkK9FWSvh-Mx2gE7Xju0,12753
|
|
65
|
+
euroeval/prompt_templates/multiple_choice.py,sha256=pzdWzGye0cV1bVWl_GdOupm1cdwT_BCKPNd3Ltx_FM4,10547
|
|
66
|
+
euroeval/prompt_templates/named_entity_recognition.py,sha256=cUrKwDTJ3ztIvlZBo18Xccst7gqsN8guEdnKPFMYpOI,23942
|
|
67
|
+
euroeval/prompt_templates/reading_comprehension.py,sha256=kjGh6rOlqTaUiTln9hco5BFjOeZpWcFP0kywOP6Y1Os,13240
|
|
68
|
+
euroeval/prompt_templates/sentiment_classification.py,sha256=t17NxR2KQ2f1VWjRybHQ4LetwTawtMV_-5VobXrjSHM,13877
|
|
69
|
+
euroeval/prompt_templates/summarization.py,sha256=6wYnUe10UT-sP3pFYJxfXXgAN1_SYIHyZi0uR6fis5A,8916
|
|
70
|
+
euroeval/prompt_templates/token_classification.py,sha256=8Uw34mN2xQ_5es-nz7vCK-GgDg_oE-zsAzPJPzAxFrQ,15531
|
|
71
|
+
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
72
|
+
euroeval/task_group_utils/multiple_choice_classification.py,sha256=m0jRF8ifm4iJsBwvLsPN7kIS_5HpnOy6XMAOaX09AcY,7085
|
|
73
|
+
euroeval/task_group_utils/question_answering.py,sha256=kAceiNVcRwASjhlvOCPXj0uX_MjX_TMsAr7fkVgTFJo,28130
|
|
74
|
+
euroeval/task_group_utils/sequence_classification.py,sha256=vEV7b7DhiLaj2qz67u3k_rGdteC1yknC2RC0lnLzBKY,16520
|
|
75
|
+
euroeval/task_group_utils/text_to_text.py,sha256=Y9f27-I_ie0zRBmOwleObZ5u8B0lzvYmbJ0zH9DqI1U,5415
|
|
76
|
+
euroeval/task_group_utils/token_classification.py,sha256=_u8Ks4FK1oKB8Ifyu_I2NMW-z1GyJ7Mml5Z_edEaV_Q,17312
|
|
77
|
+
euroeval-16.5.0.dist-info/METADATA,sha256=bBhULf3_umIesaQrluir1G4cbhYNm_0qQAOxwG7L7d8,22112
|
|
78
|
+
euroeval-16.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
79
|
+
euroeval-16.5.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
|
|
80
|
+
euroeval-16.5.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
|
|
81
|
+
euroeval-16.5.0.dist-info/RECORD,,
|
euroeval-16.4.0.dist-info/RECORD
DELETED
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
euroeval/__init__.py,sha256=Ci1Sta9hl-v-ZPwJ1qqAVpzvj-vVgZZbQQuP5Qopc4o,3956
|
|
2
|
-
euroeval/benchmark_config_factory.py,sha256=x1HfK8kDVxN14PPHxonsDv0vhkdrexsMJfKaXhO9WQQ,8540
|
|
3
|
-
euroeval/benchmarker.py,sha256=M_2KV0f41RmCiRLcQLEIACt1TcL7QqvH48ds0ebJCG8,49705
|
|
4
|
-
euroeval/caching_utils.py,sha256=AkR0TLY9EHbqv3TrhtCmpEGsm0DWZSLEfR2fRHq1S3E,2587
|
|
5
|
-
euroeval/callbacks.py,sha256=l8f6Zr8EoHfVFsI1ZnMUK0Y8uZB00Nvaz_I6XDn6avE,2515
|
|
6
|
-
euroeval/cli.py,sha256=yb_Gw3TrouBiUmeQIZF0705zio8UPFACUDOzSB3CCfo,9316
|
|
7
|
-
euroeval/constants.py,sha256=XAdsdSE4bAOUeW2o5qmMlfqRmsXZUNIKlEZrbxBPdLk,2845
|
|
8
|
-
euroeval/data_loading.py,sha256=r2GtvH2fAPapE9Idyu8W27n3YXD2Bgw8Qt88vdDn0DQ,4751
|
|
9
|
-
euroeval/data_models.py,sha256=j3gdzLSxgr3FakBIOqvVGZ5K5cXb4RrCMOkJc8J8Zmc,28007
|
|
10
|
-
euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
|
|
11
|
-
euroeval/exceptions.py,sha256=5kQ-YvHyFO3aaA-zfOTaS07LRFH8xlSqlOiATvnIObY,5116
|
|
12
|
-
euroeval/finetuning.py,sha256=t3VqkuRVqRxcpHhSzU4nF4npvLDnjNzPJqGqG-L6ifk,11764
|
|
13
|
-
euroeval/generation.py,sha256=epv2QPHTxzoBmq5OFQtolvuvJ6ce4FkdD03NTYdKFZk,12579
|
|
14
|
-
euroeval/generation_utils.py,sha256=3mI-T9imk433VsvbwCy71Zzv2XOdm-l1SH-IiFfSd9M,18285
|
|
15
|
-
euroeval/languages.py,sha256=G2cJI8lDT7eOFHxNR9opJ6zWjdxFDwm8P8HY_4WKFI4,33815
|
|
16
|
-
euroeval/logging_utils.py,sha256=iq9K2-7URgeHOUJCYCC9k8vRAz4YmBf4f44GpCVEGcc,8802
|
|
17
|
-
euroeval/model_cache.py,sha256=S_8ZtLaliTiUEvQAVw_DJ1qk5PWUO5-eE04hGScCj_o,9246
|
|
18
|
-
euroeval/model_config.py,sha256=fxHfgpw-9vj3hwke28DguVGvG9TU06nkTXT0V6KAMpQ,2761
|
|
19
|
-
euroeval/model_loading.py,sha256=mVh05sPENBBOIUkd_rwXqbBd13YvF_tOVZ8XGtguNzw,2338
|
|
20
|
-
euroeval/scores.py,sha256=tlLfmI6Pgm1d_odubfyFcGLoB6Mxgfw3Yl7POzFv9l8,3235
|
|
21
|
-
euroeval/speed_benchmark.py,sha256=k9xEF7jPAMrEBcZdykilQ6eJMGhFW1eUGuhQco9470M,4034
|
|
22
|
-
euroeval/tasks.py,sha256=EzEWFDo_0ffabBFiRu-mw80jENUioE8D_VEn_Dsv-F8,4703
|
|
23
|
-
euroeval/tokenisation_utils.py,sha256=rytsJy4mNEqeSdGzXsvVU4OShveeHOOlbaQOJDsX4S0,21275
|
|
24
|
-
euroeval/types.py,sha256=_iVy-RwiCGu9TNX2sfyJTdCvXy1akNGTCywAo-YpBqU,2815
|
|
25
|
-
euroeval/utils.py,sha256=VJrbEFXr6ZCJIHiMT7M7Y84ZYl0LHe2uhIz4eePciAw,15235
|
|
26
|
-
euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
|
|
27
|
-
euroeval/benchmark_modules/base.py,sha256=PeOqhfrc9iqyRz1aDHFBiTpWcwU5zDXo5pB_CD8W4VI,11199
|
|
28
|
-
euroeval/benchmark_modules/fresh.py,sha256=h4TPJlJK6xxxyhAXURr0T9gk4Khm3WyujnKBDFc3sCE,10806
|
|
29
|
-
euroeval/benchmark_modules/hf.py,sha256=enj88OY2XELdNgLnqeRPXvX2ATgcm6fjQpSYpBhmgzI,46274
|
|
30
|
-
euroeval/benchmark_modules/litellm.py,sha256=VNFIOJU8TJNrifHtfBILh1MeS7tehqztxH5WoPLr5fc,66581
|
|
31
|
-
euroeval/benchmark_modules/vllm.py,sha256=dm19gYG-MR63V8YpZBM1iOQ1c7xbFRzo9NuDWHG3q-Y,46952
|
|
32
|
-
euroeval/dataset_configs/__init__.py,sha256=zvyH0onXIDtm8lHDVRSzk7J0_mJFU0H6WnLueaxM7WE,2157
|
|
33
|
-
euroeval/dataset_configs/czech.py,sha256=9IDYKg1aoikMXIqQo2tYTQHf2WmQEujkNTyF0a8c9c8,2134
|
|
34
|
-
euroeval/dataset_configs/danish.py,sha256=nkw1poFOJGpQJFB9HYC6bdlNzUR5pXxYacvZs4GrK4Y,5522
|
|
35
|
-
euroeval/dataset_configs/dutch.py,sha256=CDr0oQnmDxeNloZ6iTGYPcNqPM5R9N8Z4aTKEE0C2MU,5408
|
|
36
|
-
euroeval/dataset_configs/english.py,sha256=2sJQPM4rZSYlwE5v4AiDm95Boq-_53AzdOt5cL_drJs,4628
|
|
37
|
-
euroeval/dataset_configs/estonian.py,sha256=fC5TUGpd6u22DUxoETBLA7EThwqsPDU54gXTzWtFCHk,3369
|
|
38
|
-
euroeval/dataset_configs/faroese.py,sha256=sFC25nwlPtnl6hwkPOxPkwVggPGTjw167YhSBnLl1EA,3039
|
|
39
|
-
euroeval/dataset_configs/finnish.py,sha256=DwrhwluoV4rmW8m2E5gWTfvHZ1XKRQG_3KU7wSOqM40,4281
|
|
40
|
-
euroeval/dataset_configs/french.py,sha256=MIZUAn2rNwasb41DC92q6vMwRBem1Fw4D7Hj1cLFlfs,4611
|
|
41
|
-
euroeval/dataset_configs/german.py,sha256=D-Yuz0pGf8pOEfMmTibXfk0k0QGjA4nEyAmea4TnCh0,5021
|
|
42
|
-
euroeval/dataset_configs/icelandic.py,sha256=qX-szARxqzJ9l-h0k5iXirC5StpW_B3BOakZQ14zmpM,5797
|
|
43
|
-
euroeval/dataset_configs/italian.py,sha256=GaN7u2NgAOrpe7n--CsmpQm_n-PCVsTN-wV78UKtQco,4895
|
|
44
|
-
euroeval/dataset_configs/latvian.py,sha256=8hb32_YD_nQHn4kRVfwiGRDoJHF8M00ZdcwuD5ozJwU,2647
|
|
45
|
-
euroeval/dataset_configs/lithuanian.py,sha256=Gv3ta3Gs7xknZ_h_dVWY7YN43UfQzLkJPnYnQcuBguU,1998
|
|
46
|
-
euroeval/dataset_configs/norwegian.py,sha256=VcNftTvOJMCQEJvDFe3iixKbr8cjE3C6oHG4Jp4HET4,7636
|
|
47
|
-
euroeval/dataset_configs/polish.py,sha256=wiorGf4Z06WLPYAa5blD8F2qDaEWUr4MgVShkkVfVo4,3563
|
|
48
|
-
euroeval/dataset_configs/portuguese.py,sha256=TsjJMGJc_wExE_9TMJiQuxhN9BylXcHTXRFaCmkE4Gg,3980
|
|
49
|
-
euroeval/dataset_configs/slovak.py,sha256=Dc9ai2VW-ckQk7trglL2w1Ki0NECsr1RMXQPYBAN6OU,1759
|
|
50
|
-
euroeval/dataset_configs/spanish.py,sha256=VQHQiRsTLlen1zBKgbmRiXSB--b89WofXgFxeIgMR1o,4793
|
|
51
|
-
euroeval/dataset_configs/swedish.py,sha256=pNd-O9cU-4_9gkQU-EFVzsjri6Jg-0taVkzQYdFT6Lw,5257
|
|
52
|
-
euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
|
|
53
|
-
euroeval/metrics/base.py,sha256=dUBby-ZzettMjdcjek6rw0JTZMuScX4cQ2Rd6untKHY,2525
|
|
54
|
-
euroeval/metrics/huggingface.py,sha256=eCbL-jIj5WYAVRSYdbBWDzDoakIPl6_rSvBqLZhXO-E,6736
|
|
55
|
-
euroeval/metrics/llm_as_a_judge.py,sha256=br-pIyzhgrfDXZb6K0GuSUAyczLnrc7pFugW1DYwK6w,9721
|
|
56
|
-
euroeval/metrics/pipeline.py,sha256=xGCA7N1F4cLKOIeXP9SGAZvrWToREwAVb_gR5iBMQIU,10825
|
|
57
|
-
euroeval/metrics/speed.py,sha256=G5hEQcrtqxF070ZZwLDh61iZnq2CSW2o6ZM7zR4lOTY,1298
|
|
58
|
-
euroeval/prompt_templates/__init__.py,sha256=HWMZpybxs2xHPnVeJ43893conARahIVLWNXeRhXEGZw,357
|
|
59
|
-
euroeval/prompt_templates/linguistic_acceptability.py,sha256=Q-GyoS_c_iM-wQ8aXTywRTdxl1kUF0WEzHWh40hsk3s,10098
|
|
60
|
-
euroeval/prompt_templates/multiple_choice.py,sha256=p6Dt3EMaASyqFHOjxdisFnh7OOVi-roCyKalSPwp5Dc,8220
|
|
61
|
-
euroeval/prompt_templates/named_entity_recognition.py,sha256=3yEr1GHk0UbubsTwDSK928QssgYO0mnMfOgVmlDT2HI,19066
|
|
62
|
-
euroeval/prompt_templates/reading_comprehension.py,sha256=0eYnJOfk8u9Zv_Xj6VtDLoQwvfe5_jjzAWGAksRMO6Y,10338
|
|
63
|
-
euroeval/prompt_templates/sentiment_classification.py,sha256=Xg90BzCHQEmgTImn9zqI9Z48nW1paGQ-4AWYCxoUJxk,11027
|
|
64
|
-
euroeval/prompt_templates/summarization.py,sha256=ypyJRX2R5CyCFjJnM3iE5J4OrvLZBwXr7WdBLv8qMKQ,7391
|
|
65
|
-
euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
|
|
66
|
-
euroeval/task_group_utils/multiple_choice_classification.py,sha256=tAFQOM_iZwyknbOcZfw6_71lUSbcB5OlY0gOkNfUBAY,7051
|
|
67
|
-
euroeval/task_group_utils/question_answering.py,sha256=vr5gjIJxqqvbj0DYNSEdN0Ek9fkZ5maCAt7FKqzo-Xs,27695
|
|
68
|
-
euroeval/task_group_utils/sequence_classification.py,sha256=_kYgAIF2LABZ-nate3O6s7vlfI2RGHVtpNPjaMIHLDk,16450
|
|
69
|
-
euroeval/task_group_utils/text_to_text.py,sha256=ibSOiP_wpEyGYQh7uEeTjOp-ojLJsEcJT1W7IWOBfk8,5381
|
|
70
|
-
euroeval/task_group_utils/token_classification.py,sha256=hFiO29eSX_KtqbjJM4jy37jmyhfhfnWj3WTpNvh_vQk,17208
|
|
71
|
-
euroeval-16.4.0.dist-info/METADATA,sha256=ot4RNMLDwwJR2UIk20k59E7MsBOXlIqJPYI9xc_XUP8,15365
|
|
72
|
-
euroeval-16.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
73
|
-
euroeval-16.4.0.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
|
|
74
|
-
euroeval-16.4.0.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
|
|
75
|
-
euroeval-16.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|