mteb 2.7.2__py3-none-any.whl → 2.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +16 -9
- mteb/_evaluators/any_sts_evaluator.py +10 -5
- mteb/_evaluators/clustering_evaluator.py +10 -4
- mteb/_evaluators/evaluator.py +9 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
- mteb/_evaluators/pair_classification_evaluator.py +10 -5
- mteb/_evaluators/retrieval_evaluator.py +19 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +14 -10
- mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
- mteb/_evaluators/text/summarization_evaluator.py +8 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +17 -9
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +15 -6
- mteb/abstasks/clustering.py +17 -8
- mteb/abstasks/clustering_legacy.py +14 -6
- mteb/abstasks/image/image_text_pair_classification.py +17 -7
- mteb/abstasks/multilabel_classification.py +11 -5
- mteb/abstasks/pair_classification.py +19 -9
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +28 -17
- mteb/abstasks/retrieval_dataset_loaders.py +11 -8
- mteb/abstasks/sts.py +19 -10
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +14 -7
- mteb/abstasks/text/summarization.py +17 -7
- mteb/abstasks/zeroshot_classification.py +15 -7
- mteb/benchmarks/_create_table.py +13 -3
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +41 -2
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +10 -5
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/evaluate.py +20 -18
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +7 -3
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +11 -4
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +10 -4
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +2 -0
- mteb/models/model_implementations/ara_models.py +1 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +40 -1
- mteb/models/model_implementations/bica_model.py +1 -0
- mteb/models/model_implementations/blip2_models.py +11 -4
- mteb/models/model_implementations/blip_models.py +17 -4
- mteb/models/model_implementations/bm25.py +22 -14
- mteb/models/model_implementations/bmretriever_models.py +10 -2
- mteb/models/model_implementations/cadet_models.py +1 -0
- mteb/models/model_implementations/cde_models.py +11 -5
- mteb/models/model_implementations/clip_models.py +12 -4
- mteb/models/model_implementations/clips_models.py +3 -0
- mteb/models/model_implementations/codefuse_models.py +5 -0
- mteb/models/model_implementations/codesage_models.py +3 -0
- mteb/models/model_implementations/cohere_models.py +14 -4
- mteb/models/model_implementations/cohere_v.py +14 -4
- mteb/models/model_implementations/colpali_models.py +7 -3
- mteb/models/model_implementations/colqwen_models.py +17 -31
- mteb/models/model_implementations/colsmol_models.py +3 -1
- mteb/models/model_implementations/conan_models.py +11 -4
- mteb/models/model_implementations/dino_models.py +28 -4
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +9 -0
- mteb/models/model_implementations/e5_v.py +10 -4
- mteb/models/model_implementations/eagerworks_models.py +11 -4
- mteb/models/model_implementations/emillykkejensen_models.py +3 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +1 -0
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +9 -0
- mteb/models/model_implementations/facebookai.py +2 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +7 -3
- mteb/models/model_implementations/google_models.py +15 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
- mteb/models/model_implementations/gritlm_models.py +2 -0
- mteb/models/model_implementations/gte_models.py +9 -0
- mteb/models/model_implementations/hinvec_models.py +6 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +14 -5
- mteb/models/model_implementations/jina_clip.py +10 -4
- mteb/models/model_implementations/jina_models.py +17 -5
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +1 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
- mteb/models/model_implementations/kfst.py +1 -0
- mteb/models/model_implementations/kowshik24_models.py +1 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +7 -1
- mteb/models/model_implementations/listconranker.py +10 -4
- mteb/models/model_implementations/llm2clip_models.py +12 -4
- mteb/models/model_implementations/llm2vec_models.py +20 -6
- mteb/models/model_implementations/mcinext_models.py +8 -2
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +63 -0
- mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +11 -4
- mteb/models/model_implementations/mod_models.py +2 -1
- mteb/models/model_implementations/model2vec_models.py +23 -4
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/nbailab.py +3 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
- mteb/models/model_implementations/nomic_models.py +16 -4
- mteb/models/model_implementations/nomic_models_vision.py +5 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
- mteb/models/model_implementations/nvidia_models.py +15 -4
- mteb/models/model_implementations/octen_models.py +3 -1
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +17 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
- mteb/models/model_implementations/ops_moa_models.py +9 -2
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
- mteb/models/model_implementations/pawan_models.py +1 -0
- mteb/models/model_implementations/piccolo_models.py +2 -0
- mteb/models/model_implementations/promptriever_models.py +16 -6
- mteb/models/model_implementations/pylate_models.py +22 -13
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +11 -1
- mteb/models/model_implementations/qzhou_models.py +2 -0
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/rasgaard_models.py +1 -0
- mteb/models/model_implementations/reasonir_model.py +65 -0
- mteb/models/model_implementations/repllama_models.py +15 -6
- mteb/models/model_implementations/rerankers_custom.py +13 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +20 -0
- mteb/models/model_implementations/ruri_models.py +10 -0
- mteb/models/model_implementations/salesforce_models.py +10 -1
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +18 -0
- mteb/models/model_implementations/shuu_model.py +1 -0
- mteb/models/model_implementations/siglip_models.py +19 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +2 -0
- mteb/models/model_implementations/text2vec_models.py +3 -0
- mteb/models/model_implementations/ua_sentence_models.py +1 -0
- mteb/models/model_implementations/uae_models.py +10 -4
- mteb/models/model_implementations/vdr_models.py +8 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -0
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +11 -4
- mteb/models/model_implementations/voyage_models.py +25 -4
- mteb/models/model_implementations/voyage_v.py +11 -6
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +2 -1
- mteb/models/model_meta.py +47 -9
- mteb/models/models_protocols.py +19 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +19 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +15 -9
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/clustering/nob/snl_clustering.py +7 -2
- mteb/tasks/clustering/nob/vg_clustering.py +7 -2
- mteb/tasks/retrieval/eng/__init__.py +42 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +9 -1
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/METADATA +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/RECORD +238 -217
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/WHEEL +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"standard": {
|
|
3
|
+
"num_samples": 414074,
|
|
4
|
+
"number_of_characters": 438348000,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 438140779,
|
|
7
|
+
"min_text_length": 75,
|
|
8
|
+
"average_text_length": 1058.4849178125876,
|
|
9
|
+
"max_text_length": 103665,
|
|
10
|
+
"unique_texts": 413932
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 207221,
|
|
15
|
+
"min_text_length": 422,
|
|
16
|
+
"average_text_length": 1459.3028169014085,
|
|
17
|
+
"max_text_length": 3964,
|
|
18
|
+
"unique_texts": 142
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 262,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.8450704225352113,
|
|
25
|
+
"max_relevant_docs_per_query": 5,
|
|
26
|
+
"unique_relevant_docs": 216
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 58744859,
|
|
30
|
+
"min_top_ranked_per_query": 412813,
|
|
31
|
+
"average_top_ranked_per_query": 413696.1901408451,
|
|
32
|
+
"max_top_ranked_per_query": 413923
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"long": {
|
|
3
|
+
"num_samples": 689,
|
|
4
|
+
"number_of_characters": 2093720,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 2050155,
|
|
7
|
+
"min_text_length": 28,
|
|
8
|
+
"average_text_length": 3553.1282495667247,
|
|
9
|
+
"max_text_length": 108885,
|
|
10
|
+
"unique_texts": 577
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 43565,
|
|
15
|
+
"min_text_length": 182,
|
|
16
|
+
"average_text_length": 388.9732142857143,
|
|
17
|
+
"max_text_length": 946,
|
|
18
|
+
"unique_texts": 112
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 769,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 6.866071428571429,
|
|
25
|
+
"max_relevant_docs_per_query": 12,
|
|
26
|
+
"unique_relevant_docs": 17
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 64624,
|
|
30
|
+
"min_top_ranked_per_query": 577,
|
|
31
|
+
"average_top_ranked_per_query": 577.0,
|
|
32
|
+
"max_top_ranked_per_query": 577
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"standard": {
|
|
3
|
+
"num_samples": 8006,
|
|
4
|
+
"number_of_characters": 2082980,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 2039415,
|
|
7
|
+
"min_text_length": 5,
|
|
8
|
+
"average_text_length": 258.350012667849,
|
|
9
|
+
"max_text_length": 2583,
|
|
10
|
+
"unique_texts": 6183
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 43565,
|
|
15
|
+
"min_text_length": 182,
|
|
16
|
+
"average_text_length": 388.9732142857143,
|
|
17
|
+
"max_text_length": 946,
|
|
18
|
+
"unique_texts": 112
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 2519,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 22.491071428571427,
|
|
25
|
+
"max_relevant_docs_per_query": 32,
|
|
26
|
+
"unique_relevant_docs": 47
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 884128,
|
|
30
|
+
"min_top_ranked_per_query": 7894,
|
|
31
|
+
"average_top_ranked_per_query": 7894.0,
|
|
32
|
+
"max_top_ranked_per_query": 7894
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"long": {
|
|
3
|
+
"num_samples": 613,
|
|
4
|
+
"number_of_characters": 20489389,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 20419376,
|
|
7
|
+
"min_text_length": 23,
|
|
8
|
+
"average_text_length": 39881.59375,
|
|
9
|
+
"max_text_length": 669575,
|
|
10
|
+
"unique_texts": 509
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 70013,
|
|
15
|
+
"min_text_length": 166,
|
|
16
|
+
"average_text_length": 693.1980198019802,
|
|
17
|
+
"max_text_length": 2334,
|
|
18
|
+
"unique_texts": 101
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 116,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.1485148514851484,
|
|
25
|
+
"max_relevant_docs_per_query": 5,
|
|
26
|
+
"unique_relevant_docs": 113
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 51712,
|
|
30
|
+
"min_top_ranked_per_query": 512,
|
|
31
|
+
"average_top_ranked_per_query": 512.0,
|
|
32
|
+
"max_top_ranked_per_query": 512
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"standard": {
|
|
3
|
+
"num_samples": 52936,
|
|
4
|
+
"number_of_characters": 20372421,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 20302408,
|
|
7
|
+
"min_text_length": 3,
|
|
8
|
+
"average_text_length": 384.26058483959497,
|
|
9
|
+
"max_text_length": 226941,
|
|
10
|
+
"unique_texts": 43756
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 70013,
|
|
15
|
+
"min_text_length": 166,
|
|
16
|
+
"average_text_length": 693.1980198019802,
|
|
17
|
+
"max_text_length": 2334,
|
|
18
|
+
"unique_texts": 101
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 742,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 7.346534653465347,
|
|
25
|
+
"max_relevant_docs_per_query": 59,
|
|
26
|
+
"unique_relevant_docs": 738
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 5336335,
|
|
30
|
+
"min_top_ranked_per_query": 52835,
|
|
31
|
+
"average_top_ranked_per_query": 52835.0,
|
|
32
|
+
"max_top_ranked_per_query": 52835
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"long": {
|
|
3
|
+
"num_samples": 609,
|
|
4
|
+
"number_of_characters": 18386897,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 18166762,
|
|
7
|
+
"min_text_length": 117,
|
|
8
|
+
"average_text_length": 35761.34251968504,
|
|
9
|
+
"max_text_length": 3589928,
|
|
10
|
+
"unique_texts": 505
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 220135,
|
|
15
|
+
"min_text_length": 165,
|
|
16
|
+
"average_text_length": 2179.5544554455446,
|
|
17
|
+
"max_text_length": 19341,
|
|
18
|
+
"unique_texts": 101
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 106,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0495049504950495,
|
|
25
|
+
"max_relevant_docs_per_query": 2,
|
|
26
|
+
"unique_relevant_docs": 106
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 51308,
|
|
30
|
+
"min_top_ranked_per_query": 508,
|
|
31
|
+
"average_top_ranked_per_query": 508.0,
|
|
32
|
+
"max_top_ranked_per_query": 508
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"standard": {
|
|
3
|
+
"num_samples": 62062,
|
|
4
|
+
"number_of_characters": 18167360,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 17947225,
|
|
7
|
+
"min_text_length": 1,
|
|
8
|
+
"average_text_length": 289.6535724084505,
|
|
9
|
+
"max_text_length": 28637,
|
|
10
|
+
"unique_texts": 40431
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 220135,
|
|
15
|
+
"min_text_length": 165,
|
|
16
|
+
"average_text_length": 2179.5544554455446,
|
|
17
|
+
"max_text_length": 19341,
|
|
18
|
+
"unique_texts": 101
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 553,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 5.475247524752476,
|
|
25
|
+
"max_relevant_docs_per_query": 36,
|
|
26
|
+
"unique_relevant_docs": 553
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 6258061,
|
|
30
|
+
"min_top_ranked_per_query": 61961,
|
|
31
|
+
"average_top_ranked_per_query": 61961.0,
|
|
32
|
+
"max_top_ranked_per_query": 61961
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"long": {
|
|
3
|
+
"num_samples": 1975,
|
|
4
|
+
"number_of_characters": 184326754,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 184175475,
|
|
7
|
+
"min_text_length": 41,
|
|
8
|
+
"average_text_length": 99125.65931108719,
|
|
9
|
+
"max_text_length": 9182738,
|
|
10
|
+
"unique_texts": 1846
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 151279,
|
|
15
|
+
"min_text_length": 185,
|
|
16
|
+
"average_text_length": 1292.982905982906,
|
|
17
|
+
"max_text_length": 12432,
|
|
18
|
+
"unique_texts": 117
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 129,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.1025641025641026,
|
|
25
|
+
"max_relevant_docs_per_query": 2,
|
|
26
|
+
"unique_relevant_docs": 125
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 217386,
|
|
30
|
+
"min_top_ranked_per_query": 1858,
|
|
31
|
+
"average_top_ranked_per_query": 1858.0,
|
|
32
|
+
"max_top_ranked_per_query": 1858
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"standard": {
|
|
3
|
+
"num_samples": 107198,
|
|
4
|
+
"number_of_characters": 183652816,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 183501537,
|
|
7
|
+
"min_text_length": 1,
|
|
8
|
+
"average_text_length": 1713.6703710275399,
|
|
9
|
+
"max_text_length": 4000,
|
|
10
|
+
"unique_texts": 66270
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 151279,
|
|
15
|
+
"min_text_length": 185,
|
|
16
|
+
"average_text_length": 1292.982905982906,
|
|
17
|
+
"max_text_length": 12432,
|
|
18
|
+
"unique_texts": 117
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 819,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 7.0,
|
|
25
|
+
"max_relevant_docs_per_query": 59,
|
|
26
|
+
"unique_relevant_docs": 816
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 12528477,
|
|
30
|
+
"min_top_ranked_per_query": 107081,
|
|
31
|
+
"average_top_ranked_per_query": 107081.0,
|
|
32
|
+
"max_top_ranked_per_query": 107081
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"long": {
|
|
3
|
+
"num_samples": 662,
|
|
4
|
+
"number_of_characters": 21154322,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 21080575,
|
|
7
|
+
"min_text_length": 30,
|
|
8
|
+
"average_text_length": 38051.579422382674,
|
|
9
|
+
"max_text_length": 5732344,
|
|
10
|
+
"unique_texts": 551
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 73747,
|
|
15
|
+
"min_text_length": 158,
|
|
16
|
+
"average_text_length": 682.8425925925926,
|
|
17
|
+
"max_text_length": 2843,
|
|
18
|
+
"unique_texts": 108
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 129,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.1944444444444444,
|
|
25
|
+
"max_relevant_docs_per_query": 5,
|
|
26
|
+
"unique_relevant_docs": 129
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 59832,
|
|
30
|
+
"min_top_ranked_per_query": 554,
|
|
31
|
+
"average_top_ranked_per_query": 554.0,
|
|
32
|
+
"max_top_ranked_per_query": 554
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"standard": {
|
|
3
|
+
"num_samples": 60900,
|
|
4
|
+
"number_of_characters": 20971763,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 20898016,
|
|
7
|
+
"min_text_length": 1,
|
|
8
|
+
"average_text_length": 343.7626003421503,
|
|
9
|
+
"max_text_length": 158296,
|
|
10
|
+
"unique_texts": 50142
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 73747,
|
|
15
|
+
"min_text_length": 158,
|
|
16
|
+
"average_text_length": 682.8425925925926,
|
|
17
|
+
"max_text_length": 2843,
|
|
18
|
+
"unique_texts": 108
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 604,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 5.592592592592593,
|
|
25
|
+
"max_relevant_docs_per_query": 59,
|
|
26
|
+
"unique_relevant_docs": 604
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 6565536,
|
|
30
|
+
"min_top_ranked_per_query": 60792,
|
|
31
|
+
"average_top_ranked_per_query": 60792.0,
|
|
32
|
+
"max_top_ranked_per_query": 60792
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"standard": {
|
|
3
|
+
"num_samples": 188207,
|
|
4
|
+
"number_of_characters": 141817604,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 141734227,
|
|
7
|
+
"min_text_length": 58,
|
|
8
|
+
"average_text_length": 753.8974425803981,
|
|
9
|
+
"max_text_length": 7334,
|
|
10
|
+
"unique_texts": 176508
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 83377,
|
|
15
|
+
"min_text_length": 12,
|
|
16
|
+
"average_text_length": 406.7170731707317,
|
|
17
|
+
"max_text_length": 1255,
|
|
18
|
+
"unique_texts": 201
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 469,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 2.299019607843137,
|
|
25
|
+
"max_relevant_docs_per_query": 7,
|
|
26
|
+
"unique_relevant_docs": 234
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 37946536,
|
|
30
|
+
"min_top_ranked_per_query": 176970,
|
|
31
|
+
"average_top_ranked_per_query": 185105.05365853658,
|
|
32
|
+
"max_top_ranked_per_query": 188176
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"standard": {
|
|
3
|
+
"num_samples": 23904,
|
|
4
|
+
"number_of_characters": 20825122,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 20797224,
|
|
7
|
+
"min_text_length": 74,
|
|
8
|
+
"average_text_length": 872.4033726246906,
|
|
9
|
+
"max_text_length": 19104,
|
|
10
|
+
"unique_texts": 23839
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 27898,
|
|
15
|
+
"min_text_length": 13,
|
|
16
|
+
"average_text_length": 429.2,
|
|
17
|
+
"max_text_length": 1255,
|
|
18
|
+
"unique_texts": 65
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 126,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.9384615384615385,
|
|
25
|
+
"max_relevant_docs_per_query": 6,
|
|
26
|
+
"unique_relevant_docs": 95
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 1549535,
|
|
30
|
+
"min_top_ranked_per_query": 23839,
|
|
31
|
+
"average_top_ranked_per_query": 23839.0,
|
|
32
|
+
"max_top_ranked_per_query": 23839
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
mteb/evaluate.py
CHANGED
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import warnings
|
|
5
|
-
from collections.abc import Iterable
|
|
6
5
|
from pathlib import Path
|
|
7
6
|
from time import time
|
|
8
7
|
from typing import TYPE_CHECKING, cast
|
|
@@ -17,22 +16,25 @@ from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
|
17
16
|
from mteb.benchmarks.benchmark import Benchmark
|
|
18
17
|
from mteb.cache import ResultCache
|
|
19
18
|
from mteb.models.model_meta import ModelMeta
|
|
20
|
-
from mteb.models.models_protocols import (
|
|
21
|
-
MTEBModels,
|
|
22
|
-
)
|
|
23
19
|
from mteb.models.sentence_transformer_wrapper import (
|
|
24
20
|
CrossEncoderWrapper,
|
|
25
21
|
SentenceTransformerEncoderWrapper,
|
|
26
22
|
)
|
|
27
23
|
from mteb.results import ModelResult, TaskResult
|
|
28
24
|
from mteb.results.task_result import TaskError
|
|
29
|
-
from mteb.types import
|
|
30
|
-
from mteb.types._encoder_io import EncodeKwargs
|
|
31
|
-
from mteb.types._metadata import ModelName, Revision
|
|
25
|
+
from mteb.types import PromptType
|
|
32
26
|
|
|
33
27
|
if TYPE_CHECKING:
|
|
28
|
+
from collections.abc import Iterable
|
|
29
|
+
|
|
34
30
|
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
35
31
|
|
|
32
|
+
from mteb.models.models_protocols import (
|
|
33
|
+
MTEBModels,
|
|
34
|
+
)
|
|
35
|
+
from mteb.types import EncodeKwargs, HFSubset, SplitName
|
|
36
|
+
from mteb.types._metadata import ModelName, Revision
|
|
37
|
+
|
|
36
38
|
logger = logging.getLogger(__name__)
|
|
37
39
|
|
|
38
40
|
|
|
@@ -69,13 +71,13 @@ def _sanitize_model(
|
|
|
69
71
|
meta = getattr(model, "mteb_model_meta")
|
|
70
72
|
if not isinstance(meta, ModelMeta):
|
|
71
73
|
meta = ModelMeta._from_hub(None)
|
|
72
|
-
wrapped_model = cast(MTEBModels | ModelMeta, model)
|
|
74
|
+
wrapped_model = cast("MTEBModels | ModelMeta", model)
|
|
73
75
|
else:
|
|
74
76
|
meta = ModelMeta._from_hub(None) if not isinstance(model, ModelMeta) else model
|
|
75
77
|
wrapped_model = meta
|
|
76
78
|
|
|
77
|
-
model_name = cast(str, meta.name)
|
|
78
|
-
model_revision = cast(str, meta.revision)
|
|
79
|
+
model_name = cast("str", meta.name)
|
|
80
|
+
model_revision = cast("str", meta.revision)
|
|
79
81
|
|
|
80
82
|
return wrapped_model, meta, model_name, model_revision
|
|
81
83
|
|
|
@@ -132,8 +134,8 @@ def _evaluate_task(
|
|
|
132
134
|
|
|
133
135
|
task.check_if_dataset_is_superseded()
|
|
134
136
|
|
|
135
|
-
|
|
136
|
-
if not
|
|
137
|
+
data_preloaded = task.data_loaded
|
|
138
|
+
if not data_preloaded:
|
|
137
139
|
try:
|
|
138
140
|
task.load_data()
|
|
139
141
|
except DatasetNotFoundError as e:
|
|
@@ -176,7 +178,7 @@ def _evaluate_task(
|
|
|
176
178
|
kg_co2_emissions=None,
|
|
177
179
|
)
|
|
178
180
|
|
|
179
|
-
if
|
|
181
|
+
if not data_preloaded: # only unload if we loaded the data
|
|
180
182
|
task.unload_data()
|
|
181
183
|
|
|
182
184
|
return result
|
|
@@ -202,10 +204,10 @@ def _check_model_modalities(
|
|
|
202
204
|
if isinstance(tasks, AbsTask):
|
|
203
205
|
check_tasks = [tasks]
|
|
204
206
|
elif isinstance(tasks, Benchmark):
|
|
205
|
-
benchmark = cast(Benchmark, tasks)
|
|
207
|
+
benchmark = cast("Benchmark", tasks)
|
|
206
208
|
check_tasks = benchmark.tasks
|
|
207
209
|
else:
|
|
208
|
-
check_tasks = cast(Iterable[AbsTask], tasks)
|
|
210
|
+
check_tasks = cast("Iterable[AbsTask]", tasks)
|
|
209
211
|
|
|
210
212
|
warnings, errors = [], []
|
|
211
213
|
|
|
@@ -298,7 +300,7 @@ def evaluate(
|
|
|
298
300
|
changed.
|
|
299
301
|
- "only-cache": Only load the results from the cache folder and do not run the task. Useful if you just want to load the results from the
|
|
300
302
|
cache.
|
|
301
|
-
prediction_folder: Optional folder in which to save model predictions for the task. Predictions of the tasks will be
|
|
303
|
+
prediction_folder: Optional folder in which to save model predictions for the task. Predictions of the tasks will be saved in `prediction_folder/{task_name}_predictions.json`
|
|
302
304
|
show_progress_bar: Whether to show a progress bar when running the evaluation. Default is True. Setting this to False will also set the
|
|
303
305
|
`encode_kwargs['show_progress_bar']` to False if encode_kwargs is unspecified.
|
|
304
306
|
public_only: Run only public tasks. If None, it will attempt to run the private task.
|
|
@@ -342,7 +344,7 @@ def evaluate(
|
|
|
342
344
|
|
|
343
345
|
# AbsTaskAggregate is a special case where we have to run multiple tasks and combine the results
|
|
344
346
|
if isinstance(tasks, AbsTaskAggregate):
|
|
345
|
-
aggregated_task = cast(AbsTaskAggregate, tasks)
|
|
347
|
+
aggregated_task = cast("AbsTaskAggregate", tasks)
|
|
346
348
|
results = evaluate(
|
|
347
349
|
model,
|
|
348
350
|
aggregated_task.metadata.tasks,
|
|
@@ -365,7 +367,7 @@ def evaluate(
|
|
|
365
367
|
if isinstance(tasks, AbsTask):
|
|
366
368
|
task = tasks
|
|
367
369
|
else:
|
|
368
|
-
tasks = cast(Iterable[AbsTask], tasks)
|
|
370
|
+
tasks = cast("Iterable[AbsTask]", tasks)
|
|
369
371
|
evaluate_results = []
|
|
370
372
|
exceptions = []
|
|
371
373
|
tasks_tqdm = tqdm(
|
mteb/filter_tasks.py
CHANGED
|
@@ -1,19 +1,24 @@
|
|
|
1
1
|
"""This script contains functions that are used to get an overview of the MTEB benchmark."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import logging
|
|
4
|
-
from
|
|
5
|
-
from typing import overload
|
|
6
|
+
from typing import TYPE_CHECKING, overload
|
|
6
7
|
|
|
7
|
-
from mteb.abstasks import (
|
|
8
|
-
AbsTask,
|
|
9
|
-
)
|
|
10
8
|
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
11
|
-
from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
|
|
12
9
|
from mteb.languages import (
|
|
13
10
|
ISO_TO_LANGUAGE,
|
|
14
11
|
ISO_TO_SCRIPT,
|
|
15
12
|
)
|
|
16
|
-
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from collections.abc import Iterable, Sequence
|
|
16
|
+
|
|
17
|
+
from mteb.abstasks import (
|
|
18
|
+
AbsTask,
|
|
19
|
+
)
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
|
|
21
|
+
from mteb.types import Modalities
|
|
17
22
|
|
|
18
23
|
logger = logging.getLogger(__name__)
|
|
19
24
|
|
mteb/get_tasks.py
CHANGED
|
@@ -1,20 +1,25 @@
|
|
|
1
1
|
"""This script contains functions that are used to get an overview of the MTEB benchmark."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import difflib
|
|
4
6
|
import logging
|
|
5
7
|
import warnings
|
|
6
8
|
from collections import Counter, defaultdict
|
|
7
|
-
from
|
|
8
|
-
from typing import Any
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
9
10
|
|
|
10
11
|
import pandas as pd
|
|
11
12
|
|
|
12
13
|
from mteb.abstasks import (
|
|
13
14
|
AbsTask,
|
|
14
15
|
)
|
|
15
|
-
from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
|
|
16
16
|
from mteb.filter_tasks import filter_tasks
|
|
17
|
-
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from collections.abc import Iterable, Sequence
|
|
20
|
+
|
|
21
|
+
from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
|
|
22
|
+
from mteb.types import Modalities
|
|
18
23
|
|
|
19
24
|
logger = logging.getLogger(__name__)
|
|
20
25
|
|
|
@@ -1,10 +1,15 @@
|
|
|
1
|
-
from
|
|
2
|
-
from dataclasses import dataclass
|
|
1
|
+
from __future__ import annotations
|
|
3
2
|
|
|
4
|
-
from
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
5
|
|
|
6
6
|
from mteb.languages.check_language_code import check_language_code
|
|
7
7
|
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from collections.abc import Iterable, Sequence
|
|
10
|
+
|
|
11
|
+
from typing_extensions import Self
|
|
12
|
+
|
|
8
13
|
|
|
9
14
|
@dataclass
|
|
10
15
|
class LanguageScripts:
|