EuroEval 15.3.0__tar.gz → 15.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- {euroeval-15.3.0 → euroeval-15.3.1}/CHANGELOG.md +5 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/PKG-INFO +1 -1
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/leaderboards/Monolingual/danish.md +2 -2
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/leaderboards/Monolingual/dutch.md +2 -2
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/leaderboards/Monolingual/english.md +2 -2
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/leaderboards/Monolingual/faroese.md +1 -2
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/leaderboards/Monolingual/french.md +2 -2
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/leaderboards/Monolingual/german.md +2 -2
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/leaderboards/Monolingual/icelandic.md +2 -2
- euroeval-15.3.1/docs/leaderboards/Monolingual/italian.md +15 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/leaderboards/Monolingual/norwegian.md +2 -2
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/leaderboards/Monolingual/swedish.md +2 -2
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/leaderboards/Multilingual/european.md +2 -2
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/leaderboards/Multilingual/germanic.md +2 -2
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/leaderboards/Multilingual/mainland-scandinavian.md +2 -2
- euroeval-15.3.1/docs/leaderboards/Multilingual/romance.md +15 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/pyproject.toml +1 -1
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/benchmarker.py +10 -12
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/data_loading.py +9 -3
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/versioning.py +1 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/uv.lock +35 -35
- {euroeval-15.3.0 → euroeval-15.3.1}/.github/ISSUE_TEMPLATE/benchmark_dataset_request.yaml +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/.github/ISSUE_TEMPLATE/bug.yaml +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/.github/ISSUE_TEMPLATE/model_evaluation_request.yaml +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/.github/workflows/ci.yaml +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/.gitignore +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/.pre-commit-config.yaml +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/CITATION.cff +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/CODE_OF_CONDUCT.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/CONTRIBUTING.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/Dockerfile.cuda +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/LICENSE +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/README.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/CNAME +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/README.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/datasets/README.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/datasets/danish.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/datasets/dutch.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/datasets/english.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/datasets/faroese.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/datasets/french.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/datasets/german.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/datasets/icelandic.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/datasets/italian.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/datasets/norwegian.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/datasets/swedish.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/extras/radial_plotter.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/faq.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/gfx/favicon.png +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/leaderboards/README.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/methodology.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/python-package.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/tasks/README.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/tasks/common-sense-reasoning.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/tasks/knowledge.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/tasks/linguistic-acceptability.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/tasks/named-entity-recognition.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/tasks/reading-comprehension.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/tasks/sentiment-classification.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/tasks/speed.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/docs/tasks/summarization.md +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/gfx/euroeval.png +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/gfx/euroeval.xcf +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/gfx/scandeval.png +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/makefile +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/mkdocs.yaml +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/__init__.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/benchmark_config_factory.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/benchmark_modules/__init__.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/benchmark_modules/base.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/benchmark_modules/fresh.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/benchmark_modules/hf.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/benchmark_modules/litellm.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/benchmark_modules/vllm.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/callbacks.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/cli.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/constants.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/data_models.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/dataset_configs.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/enums.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/exceptions.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/finetuning.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/generation.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/human_evaluation.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/languages.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/model_cache.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/model_config.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/model_loading.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/scores.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/speed_benchmark.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/task_utils/__init__.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/task_utils/multiple_choice_classification.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/task_utils/question_answering.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/task_utils/sequence_classification.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/task_utils/text_to_text.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/task_utils/token_classification.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/tasks.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/types.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/euroeval/utils.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/constants.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_allocine.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_angry_tweets.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_arc.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_arc_is.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_belebele.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_cnn_dailymail.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_conll_en.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_conll_nl.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_dane.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_danish_citizen_tests.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_dansk.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_danske_talemaader.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_danske_talemaader_old.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_dbrd.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_dutch_cola.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_dutch_social.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_eltec.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_fone.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_foqa.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_fosent.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_fquad.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_germanquad.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_germeval.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_hellaswag.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_hotter_and_colder_sentiment.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_ice_linguistic.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_icelandic_error_corpus.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_icelandic_knowledge.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_icelandic_qa.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_icesum.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_ilpost_sum.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_jentoft.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_mim_gold_ner.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_mlsum.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_mmlu.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_multinerd-it.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_no_cola.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_no_sammendrag.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_nor_common_sense_qa.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_nordjylland_news.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_norec.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_norglm_multiqa.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_norglm_multisum.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_norne.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_norquad.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_nqii.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_nrk_quiz_qa.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_orange_sum.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_personal_sum.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_rrn.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_sb10k.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_scala.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_scandiqa.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_schibsted.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_sentipolc16.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_squad.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_squad_it.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_squad_nl.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_squad_nl_old.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_sst5.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_suc3.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_swedn.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_swerec.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_wiki_lingua_nl.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_wikiann_fo.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_wikineural-it.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/create_winogrande_is.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/fix_dot_env_file.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/src/scripts/load_ud_pos.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/__init__.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/conftest.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_benchmark_config_factory.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_benchmark_modules/__init__.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_benchmark_modules/test_base.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_benchmark_modules/test_fresh.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_benchmark_modules/test_hf.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_benchmark_modules/test_litellm.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_benchmark_modules/test_vllm.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_benchmarker.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_callbacks.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_cli.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_constants.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_data_loading.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_data_models.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_dataset_configs.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_enums.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_exceptions.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_finetuning.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_generation.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_human_evaluation.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_languages.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_model_cache.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_model_config.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_model_loading.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_scores.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_speed_benchmark.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_task_utils/__init__.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_task_utils/test_question_answering.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_task_utils/test_sequence_classification.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_task_utils/test_text_to_text.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_task_utils/test_token_classification.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_tasks.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_types.py +0 -0
- {euroeval-15.3.0 → euroeval-15.3.1}/tests/test_utils.py +0 -0
|
@@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
## [v15.3.1] - 2025-03-13
|
|
14
|
+
### Fixed
|
|
15
|
+
- Now handles`ConnectionError`s when loading datasets, rather than aborting evaluations.
|
|
16
|
+
|
|
17
|
+
|
|
13
18
|
## [v15.3.0] - 2025-03-12
|
|
14
19
|
### Added
|
|
15
20
|
- Added support for evaluating Italian 🇮🇹! This includes the reading comprehension
|
|
@@ -7,9 +7,9 @@ hide:
|
|
|
7
7
|
See the [leaderboard page](/leaderboards) for more information about all the columns.
|
|
8
8
|
|
|
9
9
|
/// tab | Generative Leaderboard
|
|
10
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
10
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-jBKLq" src="https://datawrapper.dwcdn.net/jBKLq" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="833" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}(); </script>
|
|
11
11
|
///
|
|
12
12
|
|
|
13
13
|
/// tab | NLU Leaderboard
|
|
14
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
14
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-V6Ga5" src="https://datawrapper.dwcdn.net/V6Ga5" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="833" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}(); </script>
|
|
15
15
|
///
|
|
@@ -7,9 +7,9 @@ hide:
|
|
|
7
7
|
See the [leaderboard page](/leaderboards) for more information about all the columns.
|
|
8
8
|
|
|
9
9
|
/// tab | Generative Leaderboard
|
|
10
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
10
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-KTdYX" src="https://datawrapper.dwcdn.net/KTdYX" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="833" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
11
11
|
///
|
|
12
12
|
|
|
13
13
|
/// tab | NLU Leaderboard
|
|
14
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
14
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-3AxBD" src="https://datawrapper.dwcdn.net/3AxBD" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="789" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
15
15
|
///
|
|
@@ -7,9 +7,9 @@ hide:
|
|
|
7
7
|
See the [leaderboard page](/leaderboards) for more information about all the columns.
|
|
8
8
|
|
|
9
9
|
/// tab | Generative Leaderboard
|
|
10
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
10
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-CIvTT" src="https://datawrapper.dwcdn.net/CIvTT" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="833" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
11
11
|
///
|
|
12
12
|
|
|
13
13
|
/// tab | NLU Leaderboard
|
|
14
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
14
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-8RXPq" src="https://datawrapper.dwcdn.net/8RXPq" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="775" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
15
15
|
///
|
|
@@ -15,6 +15,5 @@ See the [leaderboard page](/leaderboards) for more information about all the col
|
|
|
15
15
|
|
|
16
16
|
/// tab | NLU Leaderboard
|
|
17
17
|
select: True
|
|
18
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
19
|
-
</script>
|
|
18
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-lRosR" src="https://datawrapper.dwcdn.net/lRosR" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="849" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
20
19
|
///
|
|
@@ -7,9 +7,9 @@ hide:
|
|
|
7
7
|
See the [leaderboard page](/leaderboards) for more information about all the columns.
|
|
8
8
|
|
|
9
9
|
/// tab | Generative Leaderboard
|
|
10
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
10
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-JLrdH" src="https://datawrapper.dwcdn.net/JLrdH" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="799" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
11
11
|
///
|
|
12
12
|
|
|
13
13
|
/// tab | NLU Leaderboard
|
|
14
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
14
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-4YpPx" src="https://datawrapper.dwcdn.net/4YpPx" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="775" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
15
15
|
///
|
|
@@ -7,9 +7,9 @@ hide:
|
|
|
7
7
|
See the [leaderboard page](/leaderboards) for more information about all the columns.
|
|
8
8
|
|
|
9
9
|
/// tab | Generative Leaderboard
|
|
10
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
10
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-y5gIe" src="https://datawrapper.dwcdn.net/y5gIe" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="813" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
11
11
|
///
|
|
12
12
|
|
|
13
13
|
/// tab | NLU Leaderboard
|
|
14
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
14
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-ZIFFx" src="https://datawrapper.dwcdn.net/ZIFFx" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="746" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
15
15
|
///
|
|
@@ -7,9 +7,9 @@ hide:
|
|
|
7
7
|
See the [leaderboard page](/leaderboards) for more information about all the columns.
|
|
8
8
|
|
|
9
9
|
/// tab | Generative Leaderboard
|
|
10
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
10
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-kmfGl" src="https://datawrapper.dwcdn.net/kmfGl" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="847" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
11
11
|
///
|
|
12
12
|
|
|
13
13
|
/// tab | NLU Leaderboard
|
|
14
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
14
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-8QIRm" src="https://datawrapper.dwcdn.net/8QIRm" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="717" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
15
15
|
///
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
---
|
|
2
|
+
hide:
|
|
3
|
+
- toc
|
|
4
|
+
---
|
|
5
|
+
# 🇮🇹 Italian
|
|
6
|
+
|
|
7
|
+
See the [leaderboard page](/leaderboards) for more information about all the columns.
|
|
8
|
+
|
|
9
|
+
/// tab | Generative Leaderboard
|
|
10
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-3kmuU" src="https://datawrapper.dwcdn.net/3kmuU" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="806" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
11
|
+
///
|
|
12
|
+
|
|
13
|
+
/// tab | NLU Leaderboard
|
|
14
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-Ymz2m" src="https://datawrapper.dwcdn.net/Ymz2m" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="806" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
15
|
+
///
|
|
@@ -7,9 +7,9 @@ hide:
|
|
|
7
7
|
See the [leaderboard page](/leaderboards) for more information about all the columns.
|
|
8
8
|
|
|
9
9
|
/// tab | Generative Leaderboard
|
|
10
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
10
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-S7Yex" src="https://datawrapper.dwcdn.net/S7Yex" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="847" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
11
11
|
///
|
|
12
12
|
|
|
13
13
|
/// tab | NLU Leaderboard
|
|
14
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
14
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-TluPK" src="https://datawrapper.dwcdn.net/TluPK" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="804" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
15
15
|
///
|
|
@@ -7,9 +7,9 @@ hide:
|
|
|
7
7
|
See the [leaderboard page](/leaderboards) for more information about all the columns.
|
|
8
8
|
|
|
9
9
|
/// tab | Generative Leaderboard
|
|
10
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
10
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-2Q2s2" src="https://datawrapper.dwcdn.net/2Q2s2" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="833" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
11
11
|
///
|
|
12
12
|
|
|
13
13
|
/// tab | NLU Leaderboard
|
|
14
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
14
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-7ULxP" src="https://datawrapper.dwcdn.net/7ULxP" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="833" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
15
15
|
///
|
|
@@ -7,9 +7,9 @@ hide:
|
|
|
7
7
|
See the [leaderboard page](/leaderboards) for more information about all the columns.
|
|
8
8
|
|
|
9
9
|
/// tab | Generative Leaderboard
|
|
10
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
10
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-SZIle" src="https://datawrapper.dwcdn.net/SZIle" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="833" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
11
11
|
///
|
|
12
12
|
|
|
13
13
|
/// tab | NLU Leaderboard
|
|
14
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
14
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-IaQfD" src="https://datawrapper.dwcdn.net/IaQfD" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="775" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
15
15
|
///
|
|
@@ -7,9 +7,9 @@ hide:
|
|
|
7
7
|
See the [leaderboard page](/leaderboards) for more information about all the columns.
|
|
8
8
|
|
|
9
9
|
/// tab | Generative Leaderboard
|
|
10
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
10
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-Hc95G" src="https://datawrapper.dwcdn.net/Hc95G" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="847" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
11
11
|
///
|
|
12
12
|
|
|
13
13
|
/// tab | NLU Leaderboard
|
|
14
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
14
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-COmuG" src="https://datawrapper.dwcdn.net/COmuG" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="818" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
15
15
|
///
|
|
@@ -7,9 +7,9 @@ hide:
|
|
|
7
7
|
See the [leaderboard page](/leaderboards) for more information about all the columns.
|
|
8
8
|
|
|
9
9
|
/// tab | Generative Leaderboard
|
|
10
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
10
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-Ng3Sr" src="https://datawrapper.dwcdn.net/Ng3Sr" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="847" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
11
11
|
///
|
|
12
12
|
|
|
13
13
|
/// tab | NLU Leaderboard
|
|
14
|
-
<iframe title="" aria-label="Table" id="datawrapper-chart-
|
|
14
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-0ZzPN" src="https://datawrapper.dwcdn.net/0ZzPN" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="818" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
15
15
|
///
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
---
|
|
2
|
+
hide:
|
|
3
|
+
- toc
|
|
4
|
+
---
|
|
5
|
+
# 🇫🇷🇮🇹Romance
|
|
6
|
+
|
|
7
|
+
See the [leaderboard page](/leaderboards) for more information about all the columns.
|
|
8
|
+
|
|
9
|
+
/// tab | Generative Leaderboard
|
|
10
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-DBqty" src="https://datawrapper.dwcdn.net/DBqty" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="806" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
11
|
+
///
|
|
12
|
+
|
|
13
|
+
/// tab | NLU Leaderboard
|
|
14
|
+
<iframe title="" aria-label="Table" id="datawrapper-chart-NQwsF" src="https://datawrapper.dwcdn.net/NQwsF" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="806" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r,i=0;r=e[i];i++)if(r.contentWindow===a.source){var d=a.data["datawrapper-height"][t]+"px";r.style.height=d}}}))}();</script>
|
|
15
|
+
///
|
|
@@ -18,7 +18,7 @@ from .data_loading import load_data
|
|
|
18
18
|
from .data_models import BenchmarkConfigParams, BenchmarkResult
|
|
19
19
|
from .dataset_configs import get_all_dataset_configs
|
|
20
20
|
from .enums import Device, ModelType
|
|
21
|
-
from .exceptions import InvalidBenchmark, InvalidModel
|
|
21
|
+
from .exceptions import HuggingFaceHubDown, InvalidBenchmark, InvalidModel
|
|
22
22
|
from .finetuning import finetune
|
|
23
23
|
from .generation import generate
|
|
24
24
|
from .model_config import get_model_config
|
|
@@ -769,23 +769,21 @@ class Benchmarker:
|
|
|
769
769
|
logger.debug(f"Results:\n{results}")
|
|
770
770
|
return record
|
|
771
771
|
|
|
772
|
+
except HuggingFaceHubDown:
|
|
773
|
+
wait_time = 30
|
|
774
|
+
logger.debug(
|
|
775
|
+
f"The Hugging Face Hub seems to be down. Retrying in {wait_time} "
|
|
776
|
+
"seconds."
|
|
777
|
+
)
|
|
778
|
+
sleep(wait_time)
|
|
779
|
+
continue
|
|
780
|
+
|
|
772
781
|
except (InvalidBenchmark, InvalidModel) as e:
|
|
773
782
|
# If the model ID is not valid then raise an error
|
|
774
783
|
model_err_msg = "does not exist on the Hugging Face Hub"
|
|
775
784
|
if benchmark_config.raise_errors and model_err_msg in str(e):
|
|
776
785
|
raise e
|
|
777
786
|
|
|
778
|
-
# Otherwise, if the error is due to Hugging Face Hub being down, then
|
|
779
|
-
# wait a bit and try again
|
|
780
|
-
elif "The Hugging Face Hub seems to be down." in str(e):
|
|
781
|
-
wait_time = 30
|
|
782
|
-
logger.debug(
|
|
783
|
-
"The Hugging Face Hub seems to be down. Retrying in "
|
|
784
|
-
f"{wait_time} seconds."
|
|
785
|
-
)
|
|
786
|
-
sleep(wait_time)
|
|
787
|
-
continue
|
|
788
|
-
|
|
789
787
|
# Otherwise, if the error is due to the MPS fallback not being enabled,
|
|
790
788
|
# then raise an error asking the user to enable it
|
|
791
789
|
elif "PYTORCH_ENABLE_MPS_FALLBACK" in str(e):
|
|
@@ -10,7 +10,7 @@ from huggingface_hub.errors import HfHubHTTPError
|
|
|
10
10
|
from numpy.random import Generator
|
|
11
11
|
|
|
12
12
|
from .data_models import BenchmarkConfig, DatasetConfig
|
|
13
|
-
from .exceptions import InvalidBenchmark
|
|
13
|
+
from .exceptions import HuggingFaceHubDown, InvalidBenchmark
|
|
14
14
|
from .utils import unscramble
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger("euroeval")
|
|
@@ -31,6 +31,12 @@ def load_data(
|
|
|
31
31
|
|
|
32
32
|
Returns:
|
|
33
33
|
A list of bootstrapped datasets, one for each iteration.
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
InvalidBenchmark:
|
|
37
|
+
If the dataset cannot be loaded.
|
|
38
|
+
HuggingFaceHubDown:
|
|
39
|
+
If the Hugging Face Hub is down.
|
|
34
40
|
"""
|
|
35
41
|
num_attempts = 5
|
|
36
42
|
for _ in range(num_attempts):
|
|
@@ -41,14 +47,14 @@ def load_data(
|
|
|
41
47
|
token=unscramble("HjccJFhIozVymqXDVqTUTXKvYhZMTbfIjMxG_"),
|
|
42
48
|
)
|
|
43
49
|
break
|
|
44
|
-
except (FileNotFoundError, DatasetsError):
|
|
50
|
+
except (FileNotFoundError, DatasetsError, ConnectionError):
|
|
45
51
|
logger.warning(
|
|
46
52
|
f"Failed to load dataset {dataset_config.huggingface_id!r}. Retrying..."
|
|
47
53
|
)
|
|
48
54
|
time.sleep(1)
|
|
49
55
|
continue
|
|
50
56
|
except HfHubHTTPError:
|
|
51
|
-
raise
|
|
57
|
+
raise HuggingFaceHubDown()
|
|
52
58
|
else:
|
|
53
59
|
raise InvalidBenchmark(
|
|
54
60
|
f"Failed to load dataset {dataset_config.huggingface_id!r} after "
|
|
@@ -65,6 +65,7 @@ def set_new_version(major: int, minor: int, patch: int) -> None:
|
|
|
65
65
|
subprocess.run(["make", "install"])
|
|
66
66
|
|
|
67
67
|
# Add to version control
|
|
68
|
+
subprocess.run(["git", "add", ".pre-commit-config.yaml"])
|
|
68
69
|
subprocess.run(["git", "add", "CHANGELOG.md"])
|
|
69
70
|
subprocess.run(["git", "add", "pyproject.toml"])
|
|
70
71
|
subprocess.run(["git", "add", "uv.lock"])
|
|
@@ -27,7 +27,7 @@ wheels = [
|
|
|
27
27
|
|
|
28
28
|
[[package]]
|
|
29
29
|
name = "accelerate"
|
|
30
|
-
version = "1.5.
|
|
30
|
+
version = "1.5.1"
|
|
31
31
|
source = { registry = "https://pypi.org/simple" }
|
|
32
32
|
dependencies = [
|
|
33
33
|
{ name = "huggingface-hub" },
|
|
@@ -38,9 +38,9 @@ dependencies = [
|
|
|
38
38
|
{ name = "safetensors" },
|
|
39
39
|
{ name = "torch" },
|
|
40
40
|
]
|
|
41
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
41
|
+
sdist = { url = "https://files.pythonhosted.org/packages/64/fb/10daafb0efbb1af95d782c9907004bd50fcfd74d6e11e6a91945df37768e/accelerate-1.5.1.tar.gz", hash = "sha256:5d936faf3a31894c6160f2f2a984a38aecbba760ef919ae298b2ecd57ea9bf87", size = 353342 }
|
|
42
42
|
wheels = [
|
|
43
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
43
|
+
{ url = "https://files.pythonhosted.org/packages/4b/ef/2723a3c53d06619dac38c1630bac3d9b7aec91e1a18a82a08b93696b8baf/accelerate-1.5.1-py3-none-any.whl", hash = "sha256:4838cff9ed1bb0ddc9d967530ced62a1d74ea21cdb57688400359ab32682f03e", size = 345134 },
|
|
44
44
|
]
|
|
45
45
|
|
|
46
46
|
[[package]]
|
|
@@ -202,11 +202,11 @@ wheels = [
|
|
|
202
202
|
|
|
203
203
|
[[package]]
|
|
204
204
|
name = "attrs"
|
|
205
|
-
version = "25.
|
|
205
|
+
version = "25.3.0"
|
|
206
206
|
source = { registry = "https://pypi.org/simple" }
|
|
207
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
207
|
+
sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032 }
|
|
208
208
|
wheels = [
|
|
209
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
209
|
+
{ url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815 },
|
|
210
210
|
]
|
|
211
211
|
|
|
212
212
|
[[package]]
|
|
@@ -752,7 +752,7 @@ wheels = [
|
|
|
752
752
|
|
|
753
753
|
[[package]]
|
|
754
754
|
name = "euroeval"
|
|
755
|
-
version = "15.3.
|
|
755
|
+
version = "15.3.1"
|
|
756
756
|
source = { editable = "." }
|
|
757
757
|
dependencies = [
|
|
758
758
|
{ name = "accelerate" },
|
|
@@ -1642,7 +1642,7 @@ wheels = [
|
|
|
1642
1642
|
|
|
1643
1643
|
[[package]]
|
|
1644
1644
|
name = "litellm"
|
|
1645
|
-
version = "1.63.
|
|
1645
|
+
version = "1.63.7"
|
|
1646
1646
|
source = { registry = "https://pypi.org/simple" }
|
|
1647
1647
|
dependencies = [
|
|
1648
1648
|
{ name = "aiohttp" },
|
|
@@ -1657,9 +1657,9 @@ dependencies = [
|
|
|
1657
1657
|
{ name = "tiktoken" },
|
|
1658
1658
|
{ name = "tokenizers" },
|
|
1659
1659
|
]
|
|
1660
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
1660
|
+
sdist = { url = "https://files.pythonhosted.org/packages/5c/7a/6c1994a239abd1b335001a46ae47fa055a24c493b6de19a9fa1872187fe9/litellm-1.63.7.tar.gz", hash = "sha256:2fbd7236d5e5379eee18556857ed62a5ed49f4f09e03ff33cf15932306b984f1", size = 6598034 }
|
|
1661
1661
|
wheels = [
|
|
1662
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
1662
|
+
{ url = "https://files.pythonhosted.org/packages/1e/44/255c7ecb8b6f3f730a37422736509c21cb1bf4da66cc060d872005bda9f5/litellm-1.63.7-py3-none-any.whl", hash = "sha256:fbdee39a894506c68f158c6b4e0079f9e9c023441fff7215e7b8e42162dba0a7", size = 6909807 },
|
|
1663
1663
|
]
|
|
1664
1664
|
|
|
1665
1665
|
[[package]]
|
|
@@ -2010,7 +2010,7 @@ wheels = [
|
|
|
2010
2010
|
|
|
2011
2011
|
[[package]]
|
|
2012
2012
|
name = "mkdocs-material"
|
|
2013
|
-
version = "9.6.
|
|
2013
|
+
version = "9.6.8"
|
|
2014
2014
|
source = { registry = "https://pypi.org/simple" }
|
|
2015
2015
|
dependencies = [
|
|
2016
2016
|
{ name = "babel" },
|
|
@@ -2025,9 +2025,9 @@ dependencies = [
|
|
|
2025
2025
|
{ name = "pymdown-extensions" },
|
|
2026
2026
|
{ name = "requests" },
|
|
2027
2027
|
]
|
|
2028
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
2028
|
+
sdist = { url = "https://files.pythonhosted.org/packages/10/0a/17557708cfc6a11a1a941199b6b54a8990b297d910db81a43f1082b11e1b/mkdocs_material-9.6.8.tar.gz", hash = "sha256:8de31bb7566379802532b248bd56d9c4bc834afc4625884bf5769f9412c6a354", size = 3948078 }
|
|
2029
2029
|
wheels = [
|
|
2030
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
2030
|
+
{ url = "https://files.pythonhosted.org/packages/18/fd/0e6aa44f5b4fb5a386f19c398222a6c75a313d52567ba992bad691cf0d80/mkdocs_material-9.6.8-py3-none-any.whl", hash = "sha256:0a51532dd8aa80b232546c073fe3ef60dfaef1b1b12196ac7191ee01702d1cf8", size = 8697857 },
|
|
2031
2031
|
]
|
|
2032
2032
|
|
|
2033
2033
|
[[package]]
|
|
@@ -2492,7 +2492,7 @@ wheels = [
|
|
|
2492
2492
|
|
|
2493
2493
|
[[package]]
|
|
2494
2494
|
name = "openai"
|
|
2495
|
-
version = "1.66.
|
|
2495
|
+
version = "1.66.3"
|
|
2496
2496
|
source = { registry = "https://pypi.org/simple" }
|
|
2497
2497
|
dependencies = [
|
|
2498
2498
|
{ name = "anyio" },
|
|
@@ -2504,9 +2504,9 @@ dependencies = [
|
|
|
2504
2504
|
{ name = "tqdm" },
|
|
2505
2505
|
{ name = "typing-extensions" },
|
|
2506
2506
|
]
|
|
2507
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
2507
|
+
sdist = { url = "https://files.pythonhosted.org/packages/a3/77/5172104ca1df35ed2ed8fb26dbc787f721c39498fc51d666c4db07756a0c/openai-1.66.3.tar.gz", hash = "sha256:8dde3aebe2d081258d4159c4cb27bdc13b5bb3f7ea2201d9bd940b9a89faf0c9", size = 397244 }
|
|
2508
2508
|
wheels = [
|
|
2509
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
2509
|
+
{ url = "https://files.pythonhosted.org/packages/78/5a/e20182f7b6171642d759c548daa0ba20a1d3ac10d2bd0a13fd75704a9ac3/openai-1.66.3-py3-none-any.whl", hash = "sha256:a427c920f727711877ab17c11b95f1230b27767ba7a01e5b66102945141ceca9", size = 567400 },
|
|
2510
2510
|
]
|
|
2511
2511
|
|
|
2512
2512
|
[[package]]
|
|
@@ -4053,27 +4053,27 @@ wheels = [
|
|
|
4053
4053
|
|
|
4054
4054
|
[[package]]
|
|
4055
4055
|
name = "tokenizers"
|
|
4056
|
-
version = "0.21.
|
|
4056
|
+
version = "0.21.1"
|
|
4057
4057
|
source = { registry = "https://pypi.org/simple" }
|
|
4058
4058
|
dependencies = [
|
|
4059
4059
|
{ name = "huggingface-hub" },
|
|
4060
4060
|
]
|
|
4061
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
4061
|
+
sdist = { url = "https://files.pythonhosted.org/packages/92/76/5ac0c97f1117b91b7eb7323dcd61af80d72f790b4df71249a7850c195f30/tokenizers-0.21.1.tar.gz", hash = "sha256:a1bb04dc5b448985f86ecd4b05407f5a8d97cb2c0532199b2a302a604a0165ab", size = 343256 }
|
|
4062
4062
|
wheels = [
|
|
4063
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4064
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4065
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4066
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4067
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4068
|
-
{ url = "https://files.pythonhosted.org/packages/4d/
|
|
4069
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4070
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4071
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4072
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4073
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4074
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4075
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4076
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4063
|
+
{ url = "https://files.pythonhosted.org/packages/a5/1f/328aee25f9115bf04262e8b4e5a2050b7b7cf44b59c74e982db7270c7f30/tokenizers-0.21.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e78e413e9e668ad790a29456e677d9d3aa50a9ad311a40905d6861ba7692cf41", size = 2780767 },
|
|
4064
|
+
{ url = "https://files.pythonhosted.org/packages/ae/1a/4526797f3719b0287853f12c5ad563a9be09d446c44ac784cdd7c50f76ab/tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:cd51cd0a91ecc801633829fcd1fda9cf8682ed3477c6243b9a095539de4aecf3", size = 2650555 },
|
|
4065
|
+
{ url = "https://files.pythonhosted.org/packages/4d/7a/a209b29f971a9fdc1da86f917fe4524564924db50d13f0724feed37b2a4d/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28da6b72d4fb14ee200a1bd386ff74ade8992d7f725f2bde2c495a9a98cf4d9f", size = 2937541 },
|
|
4066
|
+
{ url = "https://files.pythonhosted.org/packages/3c/1e/b788b50ffc6191e0b1fc2b0d49df8cff16fe415302e5ceb89f619d12c5bc/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:34d8cfde551c9916cb92014e040806122295a6800914bab5865deb85623931cf", size = 2819058 },
|
|
4067
|
+
{ url = "https://files.pythonhosted.org/packages/36/aa/3626dfa09a0ecc5b57a8c58eeaeb7dd7ca9a37ad9dd681edab5acd55764c/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aaa852d23e125b73d283c98f007e06d4595732104b65402f46e8ef24b588d9f8", size = 3133278 },
|
|
4068
|
+
{ url = "https://files.pythonhosted.org/packages/a4/4d/8fbc203838b3d26269f944a89459d94c858f5b3f9a9b6ee9728cdcf69161/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a21a15d5c8e603331b8a59548bbe113564136dc0f5ad8306dd5033459a226da0", size = 3144253 },
|
|
4069
|
+
{ url = "https://files.pythonhosted.org/packages/d8/1b/2bd062adeb7c7511b847b32e356024980c0ffcf35f28947792c2d8ad2288/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2fdbd4c067c60a0ac7eca14b6bd18a5bebace54eb757c706b47ea93204f7a37c", size = 3398225 },
|
|
4070
|
+
{ url = "https://files.pythonhosted.org/packages/8a/63/38be071b0c8e06840bc6046991636bcb30c27f6bb1e670f4f4bc87cf49cc/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dd9a0061e403546f7377df940e866c3e678d7d4e9643d0461ea442b4f89e61a", size = 3038874 },
|
|
4071
|
+
{ url = "https://files.pythonhosted.org/packages/ec/83/afa94193c09246417c23a3c75a8a0a96bf44ab5630a3015538d0c316dd4b/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:db9484aeb2e200c43b915a1a0150ea885e35f357a5a8fabf7373af333dcc8dbf", size = 9014448 },
|
|
4072
|
+
{ url = "https://files.pythonhosted.org/packages/ae/b3/0e1a37d4f84c0f014d43701c11eb8072704f6efe8d8fc2dcdb79c47d76de/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:ed248ab5279e601a30a4d67bdb897ecbe955a50f1e7bb62bd99f07dd11c2f5b6", size = 8937877 },
|
|
4073
|
+
{ url = "https://files.pythonhosted.org/packages/ac/33/ff08f50e6d615eb180a4a328c65907feb6ded0b8f990ec923969759dc379/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:9ac78b12e541d4ce67b4dfd970e44c060a2147b9b2a21f509566d556a509c67d", size = 9186645 },
|
|
4074
|
+
{ url = "https://files.pythonhosted.org/packages/5f/aa/8ae85f69a9f6012c6f8011c6f4aa1c96154c816e9eea2e1b758601157833/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e5a69c1a4496b81a5ee5d2c1f3f7fbdf95e90a0196101b0ee89ed9956b8a168f", size = 9384380 },
|
|
4075
|
+
{ url = "https://files.pythonhosted.org/packages/e8/5b/a5d98c89f747455e8b7a9504910c865d5e51da55e825a7ae641fb5ff0a58/tokenizers-0.21.1-cp39-abi3-win32.whl", hash = "sha256:1039a3a5734944e09de1d48761ade94e00d0fa760c0e0551151d4dd851ba63e3", size = 2239506 },
|
|
4076
|
+
{ url = "https://files.pythonhosted.org/packages/e6/b6/072a8e053ae600dcc2ac0da81a23548e3b523301a442a6ca900e92ac35be/tokenizers-0.21.1-cp39-abi3-win_amd64.whl", hash = "sha256:0f0dcbcc9f6e13e675a66d7a5f2f225a736745ce484c1a4e07476a89ccdad382", size = 2435481 },
|
|
4077
4077
|
]
|
|
4078
4078
|
|
|
4079
4079
|
[[package]]
|
|
@@ -4333,14 +4333,14 @@ wheels = [
|
|
|
4333
4333
|
|
|
4334
4334
|
[[package]]
|
|
4335
4335
|
name = "types-setuptools"
|
|
4336
|
-
version = "
|
|
4336
|
+
version = "76.0.0.20250313"
|
|
4337
4337
|
source = { registry = "https://pypi.org/simple" }
|
|
4338
4338
|
dependencies = [
|
|
4339
4339
|
{ name = "setuptools" },
|
|
4340
4340
|
]
|
|
4341
|
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
|
4341
|
+
sdist = { url = "https://files.pythonhosted.org/packages/b8/0f/2d1d000c2be3919bcdea15e5da48456bf1e55c18d02c5509ea59dade1408/types_setuptools-76.0.0.20250313.tar.gz", hash = "sha256:b2be66f550f95f3cad2a7d46177b273c7e9c80df7d257fa57addbbcfc8126a9e", size = 43627 }
|
|
4342
4342
|
wheels = [
|
|
4343
|
-
{ url = "https://files.pythonhosted.org/packages/
|
|
4343
|
+
{ url = "https://files.pythonhosted.org/packages/ca/89/ea9669a0a76b160ffb312d0b02b15bad053c1bc81d2a54e42e3a402ca754/types_setuptools-76.0.0.20250313-py3-none-any.whl", hash = "sha256:bf454b2a49b8cfd7ebcf5844d4dd5fe4c8666782df1e3663c5866fd51a47460e", size = 65845 },
|
|
4344
4344
|
]
|
|
4345
4345
|
|
|
4346
4346
|
[[package]]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|