mteb 2.7.2__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. mteb/_create_dataloaders.py +16 -9
  2. mteb/_evaluators/any_sts_evaluator.py +10 -5
  3. mteb/_evaluators/clustering_evaluator.py +10 -4
  4. mteb/_evaluators/evaluator.py +9 -4
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
  6. mteb/_evaluators/pair_classification_evaluator.py +10 -5
  7. mteb/_evaluators/retrieval_evaluator.py +19 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +14 -10
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +8 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +17 -9
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +15 -6
  21. mteb/abstasks/clustering.py +17 -8
  22. mteb/abstasks/clustering_legacy.py +14 -6
  23. mteb/abstasks/image/image_text_pair_classification.py +17 -7
  24. mteb/abstasks/multilabel_classification.py +11 -5
  25. mteb/abstasks/pair_classification.py +19 -9
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +28 -17
  28. mteb/abstasks/retrieval_dataset_loaders.py +11 -8
  29. mteb/abstasks/sts.py +19 -10
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +14 -7
  32. mteb/abstasks/text/summarization.py +17 -7
  33. mteb/abstasks/zeroshot_classification.py +15 -7
  34. mteb/benchmarks/_create_table.py +13 -3
  35. mteb/benchmarks/benchmark.py +11 -1
  36. mteb/benchmarks/benchmarks/__init__.py +2 -0
  37. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  38. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  39. mteb/cache.py +10 -5
  40. mteb/cli/_display_tasks.py +9 -3
  41. mteb/cli/build_cli.py +5 -2
  42. mteb/cli/generate_model_card.py +9 -2
  43. mteb/deprecated_evaluator.py +16 -12
  44. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  45. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  48. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  49. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  50. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  51. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  52. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  64. mteb/evaluate.py +20 -18
  65. mteb/filter_tasks.py +12 -7
  66. mteb/get_tasks.py +9 -4
  67. mteb/languages/language_scripts.py +8 -3
  68. mteb/leaderboard/app.py +7 -3
  69. mteb/leaderboard/table.py +7 -2
  70. mteb/load_results.py +9 -3
  71. mteb/models/abs_encoder.py +22 -12
  72. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  73. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  74. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  75. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  76. mteb/models/get_model_meta.py +11 -4
  77. mteb/models/instruct_wrapper.py +13 -5
  78. mteb/models/model_implementations/align_models.py +10 -4
  79. mteb/models/model_implementations/amazon_models.py +1 -0
  80. mteb/models/model_implementations/andersborges.py +2 -0
  81. mteb/models/model_implementations/ara_models.py +1 -0
  82. mteb/models/model_implementations/arctic_models.py +8 -0
  83. mteb/models/model_implementations/b1ade_models.py +1 -0
  84. mteb/models/model_implementations/bedrock_models.py +20 -6
  85. mteb/models/model_implementations/bge_models.py +40 -1
  86. mteb/models/model_implementations/bica_model.py +1 -0
  87. mteb/models/model_implementations/blip2_models.py +11 -4
  88. mteb/models/model_implementations/blip_models.py +17 -4
  89. mteb/models/model_implementations/bm25.py +22 -14
  90. mteb/models/model_implementations/bmretriever_models.py +10 -2
  91. mteb/models/model_implementations/cadet_models.py +1 -0
  92. mteb/models/model_implementations/cde_models.py +11 -5
  93. mteb/models/model_implementations/clip_models.py +12 -4
  94. mteb/models/model_implementations/clips_models.py +3 -0
  95. mteb/models/model_implementations/codefuse_models.py +5 -0
  96. mteb/models/model_implementations/codesage_models.py +3 -0
  97. mteb/models/model_implementations/cohere_models.py +14 -4
  98. mteb/models/model_implementations/cohere_v.py +14 -4
  99. mteb/models/model_implementations/colpali_models.py +7 -3
  100. mteb/models/model_implementations/colqwen_models.py +17 -31
  101. mteb/models/model_implementations/colsmol_models.py +3 -1
  102. mteb/models/model_implementations/conan_models.py +11 -4
  103. mteb/models/model_implementations/dino_models.py +28 -4
  104. mteb/models/model_implementations/e5_instruct.py +4 -0
  105. mteb/models/model_implementations/e5_models.py +9 -0
  106. mteb/models/model_implementations/e5_v.py +10 -4
  107. mteb/models/model_implementations/eagerworks_models.py +11 -4
  108. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  109. mteb/models/model_implementations/en_code_retriever.py +1 -0
  110. mteb/models/model_implementations/euler_models.py +1 -0
  111. mteb/models/model_implementations/evaclip_models.py +13 -4
  112. mteb/models/model_implementations/fa_models.py +9 -0
  113. mteb/models/model_implementations/facebookai.py +2 -0
  114. mteb/models/model_implementations/geogpt_models.py +1 -0
  115. mteb/models/model_implementations/gme_v_models.py +7 -3
  116. mteb/models/model_implementations/google_models.py +15 -4
  117. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
  118. mteb/models/model_implementations/gritlm_models.py +2 -0
  119. mteb/models/model_implementations/gte_models.py +9 -0
  120. mteb/models/model_implementations/hinvec_models.py +6 -1
  121. mteb/models/model_implementations/human.py +1 -0
  122. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  123. mteb/models/model_implementations/inf_models.py +2 -0
  124. mteb/models/model_implementations/jasper_models.py +14 -5
  125. mteb/models/model_implementations/jina_clip.py +10 -4
  126. mteb/models/model_implementations/jina_models.py +17 -5
  127. mteb/models/model_implementations/kalm_models.py +24 -12
  128. mteb/models/model_implementations/kblab.py +1 -0
  129. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  130. mteb/models/model_implementations/kfst.py +1 -0
  131. mteb/models/model_implementations/kowshik24_models.py +1 -0
  132. mteb/models/model_implementations/lens_models.py +2 -0
  133. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  134. mteb/models/model_implementations/linq_models.py +7 -1
  135. mteb/models/model_implementations/listconranker.py +10 -4
  136. mteb/models/model_implementations/llm2clip_models.py +12 -4
  137. mteb/models/model_implementations/llm2vec_models.py +20 -6
  138. mteb/models/model_implementations/mcinext_models.py +8 -2
  139. mteb/models/model_implementations/mdbr_models.py +2 -0
  140. mteb/models/model_implementations/misc_models.py +63 -0
  141. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  142. mteb/models/model_implementations/mme5_models.py +2 -1
  143. mteb/models/model_implementations/moco_models.py +11 -4
  144. mteb/models/model_implementations/mod_models.py +2 -1
  145. mteb/models/model_implementations/model2vec_models.py +23 -4
  146. mteb/models/model_implementations/moka_models.py +3 -0
  147. mteb/models/model_implementations/nbailab.py +3 -0
  148. mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
  149. mteb/models/model_implementations/nomic_models.py +16 -4
  150. mteb/models/model_implementations/nomic_models_vision.py +5 -3
  151. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
  152. mteb/models/model_implementations/nvidia_models.py +15 -4
  153. mteb/models/model_implementations/octen_models.py +3 -1
  154. mteb/models/model_implementations/openai_models.py +14 -4
  155. mteb/models/model_implementations/openclip_models.py +17 -4
  156. mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
  157. mteb/models/model_implementations/ops_moa_models.py +9 -2
  158. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  159. mteb/models/model_implementations/pawan_models.py +1 -0
  160. mteb/models/model_implementations/piccolo_models.py +2 -0
  161. mteb/models/model_implementations/promptriever_models.py +16 -6
  162. mteb/models/model_implementations/pylate_models.py +22 -13
  163. mteb/models/model_implementations/qodo_models.py +2 -0
  164. mteb/models/model_implementations/qtack_models.py +1 -0
  165. mteb/models/model_implementations/qwen3_models.py +11 -1
  166. mteb/models/model_implementations/qzhou_models.py +2 -0
  167. mteb/models/model_implementations/random_baseline.py +4 -3
  168. mteb/models/model_implementations/rasgaard_models.py +1 -0
  169. mteb/models/model_implementations/reasonir_model.py +65 -0
  170. mteb/models/model_implementations/repllama_models.py +15 -6
  171. mteb/models/model_implementations/rerankers_custom.py +13 -4
  172. mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
  173. mteb/models/model_implementations/richinfoai_models.py +1 -0
  174. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  175. mteb/models/model_implementations/ruri_models.py +10 -0
  176. mteb/models/model_implementations/salesforce_models.py +10 -1
  177. mteb/models/model_implementations/samilpwc_models.py +1 -0
  178. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  179. mteb/models/model_implementations/searchmap_models.py +1 -0
  180. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  181. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
  182. mteb/models/model_implementations/seed_models.py +2 -1
  183. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  184. mteb/models/model_implementations/shuu_model.py +1 -0
  185. mteb/models/model_implementations/siglip_models.py +19 -4
  186. mteb/models/model_implementations/slm_models.py +7 -4
  187. mteb/models/model_implementations/sonar_models.py +2 -1
  188. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  189. mteb/models/model_implementations/stella_models.py +6 -0
  190. mteb/models/model_implementations/tarka_models.py +2 -0
  191. mteb/models/model_implementations/text2vec_models.py +3 -0
  192. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  193. mteb/models/model_implementations/uae_models.py +10 -4
  194. mteb/models/model_implementations/vdr_models.py +8 -1
  195. mteb/models/model_implementations/vi_vn_models.py +6 -0
  196. mteb/models/model_implementations/vista_models.py +11 -4
  197. mteb/models/model_implementations/vlm2vec_models.py +11 -4
  198. mteb/models/model_implementations/voyage_models.py +25 -4
  199. mteb/models/model_implementations/voyage_v.py +11 -6
  200. mteb/models/model_implementations/xyz_models.py +1 -0
  201. mteb/models/model_implementations/youtu_models.py +1 -0
  202. mteb/models/model_implementations/yuan_models.py +1 -0
  203. mteb/models/model_implementations/yuan_models_en.py +2 -1
  204. mteb/models/model_meta.py +47 -9
  205. mteb/models/models_protocols.py +19 -18
  206. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  207. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  208. mteb/models/search_wrappers.py +19 -12
  209. mteb/models/sentence_transformer_wrapper.py +4 -3
  210. mteb/models/vllm_wrapper.py +8 -6
  211. mteb/results/benchmark_results.py +22 -17
  212. mteb/results/model_result.py +21 -15
  213. mteb/results/task_result.py +15 -9
  214. mteb/similarity_functions.py +8 -2
  215. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  216. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  217. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  218. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  219. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  220. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  221. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  222. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  223. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  224. mteb/tasks/clustering/nob/snl_clustering.py +7 -2
  225. mteb/tasks/clustering/nob/vg_clustering.py +7 -2
  226. mteb/tasks/retrieval/eng/__init__.py +42 -0
  227. mteb/tasks/retrieval/eng/bright_retrieval.py +9 -1
  228. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  229. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  230. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
  231. mteb/types/_encoder_io.py +1 -1
  232. mteb/types/statistics.py +9 -2
  233. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/METADATA +1 -1
  234. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/RECORD +238 -217
  235. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/WHEEL +0 -0
  236. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/entry_points.txt +0 -0
  237. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/licenses/LICENSE +0 -0
  238. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 414074,
4
+ "number_of_characters": 438348000,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 438140779,
7
+ "min_text_length": 75,
8
+ "average_text_length": 1058.4849178125876,
9
+ "max_text_length": 103665,
10
+ "unique_texts": 413932
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 207221,
15
+ "min_text_length": 422,
16
+ "average_text_length": 1459.3028169014085,
17
+ "max_text_length": 3964,
18
+ "unique_texts": 142
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 262,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.8450704225352113,
25
+ "max_relevant_docs_per_query": 5,
26
+ "unique_relevant_docs": 216
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 58744859,
30
+ "min_top_ranked_per_query": 412813,
31
+ "average_top_ranked_per_query": 413696.1901408451,
32
+ "max_top_ranked_per_query": 413923
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "long": {
3
+ "num_samples": 689,
4
+ "number_of_characters": 2093720,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 2050155,
7
+ "min_text_length": 28,
8
+ "average_text_length": 3553.1282495667247,
9
+ "max_text_length": 108885,
10
+ "unique_texts": 577
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 43565,
15
+ "min_text_length": 182,
16
+ "average_text_length": 388.9732142857143,
17
+ "max_text_length": 946,
18
+ "unique_texts": 112
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 769,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 6.866071428571429,
25
+ "max_relevant_docs_per_query": 12,
26
+ "unique_relevant_docs": 17
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 64624,
30
+ "min_top_ranked_per_query": 577,
31
+ "average_top_ranked_per_query": 577.0,
32
+ "max_top_ranked_per_query": 577
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 8006,
4
+ "number_of_characters": 2082980,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 2039415,
7
+ "min_text_length": 5,
8
+ "average_text_length": 258.350012667849,
9
+ "max_text_length": 2583,
10
+ "unique_texts": 6183
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 43565,
15
+ "min_text_length": 182,
16
+ "average_text_length": 388.9732142857143,
17
+ "max_text_length": 946,
18
+ "unique_texts": 112
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 2519,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 22.491071428571427,
25
+ "max_relevant_docs_per_query": 32,
26
+ "unique_relevant_docs": 47
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 884128,
30
+ "min_top_ranked_per_query": 7894,
31
+ "average_top_ranked_per_query": 7894.0,
32
+ "max_top_ranked_per_query": 7894
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "long": {
3
+ "num_samples": 613,
4
+ "number_of_characters": 20489389,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 20419376,
7
+ "min_text_length": 23,
8
+ "average_text_length": 39881.59375,
9
+ "max_text_length": 669575,
10
+ "unique_texts": 509
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 70013,
15
+ "min_text_length": 166,
16
+ "average_text_length": 693.1980198019802,
17
+ "max_text_length": 2334,
18
+ "unique_texts": 101
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 116,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.1485148514851484,
25
+ "max_relevant_docs_per_query": 5,
26
+ "unique_relevant_docs": 113
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 51712,
30
+ "min_top_ranked_per_query": 512,
31
+ "average_top_ranked_per_query": 512.0,
32
+ "max_top_ranked_per_query": 512
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 52936,
4
+ "number_of_characters": 20372421,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 20302408,
7
+ "min_text_length": 3,
8
+ "average_text_length": 384.26058483959497,
9
+ "max_text_length": 226941,
10
+ "unique_texts": 43756
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 70013,
15
+ "min_text_length": 166,
16
+ "average_text_length": 693.1980198019802,
17
+ "max_text_length": 2334,
18
+ "unique_texts": 101
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 742,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 7.346534653465347,
25
+ "max_relevant_docs_per_query": 59,
26
+ "unique_relevant_docs": 738
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 5336335,
30
+ "min_top_ranked_per_query": 52835,
31
+ "average_top_ranked_per_query": 52835.0,
32
+ "max_top_ranked_per_query": 52835
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "long": {
3
+ "num_samples": 609,
4
+ "number_of_characters": 18386897,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 18166762,
7
+ "min_text_length": 117,
8
+ "average_text_length": 35761.34251968504,
9
+ "max_text_length": 3589928,
10
+ "unique_texts": 505
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 220135,
15
+ "min_text_length": 165,
16
+ "average_text_length": 2179.5544554455446,
17
+ "max_text_length": 19341,
18
+ "unique_texts": 101
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 106,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0495049504950495,
25
+ "max_relevant_docs_per_query": 2,
26
+ "unique_relevant_docs": 106
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 51308,
30
+ "min_top_ranked_per_query": 508,
31
+ "average_top_ranked_per_query": 508.0,
32
+ "max_top_ranked_per_query": 508
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 62062,
4
+ "number_of_characters": 18167360,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 17947225,
7
+ "min_text_length": 1,
8
+ "average_text_length": 289.6535724084505,
9
+ "max_text_length": 28637,
10
+ "unique_texts": 40431
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 220135,
15
+ "min_text_length": 165,
16
+ "average_text_length": 2179.5544554455446,
17
+ "max_text_length": 19341,
18
+ "unique_texts": 101
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 553,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 5.475247524752476,
25
+ "max_relevant_docs_per_query": 36,
26
+ "unique_relevant_docs": 553
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 6258061,
30
+ "min_top_ranked_per_query": 61961,
31
+ "average_top_ranked_per_query": 61961.0,
32
+ "max_top_ranked_per_query": 61961
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "long": {
3
+ "num_samples": 1975,
4
+ "number_of_characters": 184326754,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 184175475,
7
+ "min_text_length": 41,
8
+ "average_text_length": 99125.65931108719,
9
+ "max_text_length": 9182738,
10
+ "unique_texts": 1846
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 151279,
15
+ "min_text_length": 185,
16
+ "average_text_length": 1292.982905982906,
17
+ "max_text_length": 12432,
18
+ "unique_texts": 117
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 129,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.1025641025641026,
25
+ "max_relevant_docs_per_query": 2,
26
+ "unique_relevant_docs": 125
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 217386,
30
+ "min_top_ranked_per_query": 1858,
31
+ "average_top_ranked_per_query": 1858.0,
32
+ "max_top_ranked_per_query": 1858
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 107198,
4
+ "number_of_characters": 183652816,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 183501537,
7
+ "min_text_length": 1,
8
+ "average_text_length": 1713.6703710275399,
9
+ "max_text_length": 4000,
10
+ "unique_texts": 66270
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 151279,
15
+ "min_text_length": 185,
16
+ "average_text_length": 1292.982905982906,
17
+ "max_text_length": 12432,
18
+ "unique_texts": 117
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 819,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 7.0,
25
+ "max_relevant_docs_per_query": 59,
26
+ "unique_relevant_docs": 816
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 12528477,
30
+ "min_top_ranked_per_query": 107081,
31
+ "average_top_ranked_per_query": 107081.0,
32
+ "max_top_ranked_per_query": 107081
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "long": {
3
+ "num_samples": 662,
4
+ "number_of_characters": 21154322,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 21080575,
7
+ "min_text_length": 30,
8
+ "average_text_length": 38051.579422382674,
9
+ "max_text_length": 5732344,
10
+ "unique_texts": 551
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 73747,
15
+ "min_text_length": 158,
16
+ "average_text_length": 682.8425925925926,
17
+ "max_text_length": 2843,
18
+ "unique_texts": 108
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 129,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.1944444444444444,
25
+ "max_relevant_docs_per_query": 5,
26
+ "unique_relevant_docs": 129
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 59832,
30
+ "min_top_ranked_per_query": 554,
31
+ "average_top_ranked_per_query": 554.0,
32
+ "max_top_ranked_per_query": 554
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 60900,
4
+ "number_of_characters": 20971763,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 20898016,
7
+ "min_text_length": 1,
8
+ "average_text_length": 343.7626003421503,
9
+ "max_text_length": 158296,
10
+ "unique_texts": 50142
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 73747,
15
+ "min_text_length": 158,
16
+ "average_text_length": 682.8425925925926,
17
+ "max_text_length": 2843,
18
+ "unique_texts": 108
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 604,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 5.592592592592593,
25
+ "max_relevant_docs_per_query": 59,
26
+ "unique_relevant_docs": 604
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 6565536,
30
+ "min_top_ranked_per_query": 60792,
31
+ "average_top_ranked_per_query": 60792.0,
32
+ "max_top_ranked_per_query": 60792
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 188207,
4
+ "number_of_characters": 141817604,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 141734227,
7
+ "min_text_length": 58,
8
+ "average_text_length": 753.8974425803981,
9
+ "max_text_length": 7334,
10
+ "unique_texts": 176508
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 83377,
15
+ "min_text_length": 12,
16
+ "average_text_length": 406.7170731707317,
17
+ "max_text_length": 1255,
18
+ "unique_texts": 201
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 469,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 2.299019607843137,
25
+ "max_relevant_docs_per_query": 7,
26
+ "unique_relevant_docs": 234
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 37946536,
30
+ "min_top_ranked_per_query": 176970,
31
+ "average_top_ranked_per_query": 185105.05365853658,
32
+ "max_top_ranked_per_query": 188176
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 23904,
4
+ "number_of_characters": 20825122,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 20797224,
7
+ "min_text_length": 74,
8
+ "average_text_length": 872.4033726246906,
9
+ "max_text_length": 19104,
10
+ "unique_texts": 23839
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 27898,
15
+ "min_text_length": 13,
16
+ "average_text_length": 429.2,
17
+ "max_text_length": 1255,
18
+ "unique_texts": 65
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 126,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.9384615384615385,
25
+ "max_relevant_docs_per_query": 6,
26
+ "unique_relevant_docs": 95
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 1549535,
30
+ "min_top_ranked_per_query": 23839,
31
+ "average_top_ranked_per_query": 23839.0,
32
+ "max_top_ranked_per_query": 23839
33
+ }
34
+ }
35
+ }
mteb/evaluate.py CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import warnings
5
- from collections.abc import Iterable
6
5
  from pathlib import Path
7
6
  from time import time
8
7
  from typing import TYPE_CHECKING, cast
@@ -17,22 +16,25 @@ from mteb.abstasks.aggregated_task import AbsTaskAggregate
17
16
  from mteb.benchmarks.benchmark import Benchmark
18
17
  from mteb.cache import ResultCache
19
18
  from mteb.models.model_meta import ModelMeta
20
- from mteb.models.models_protocols import (
21
- MTEBModels,
22
- )
23
19
  from mteb.models.sentence_transformer_wrapper import (
24
20
  CrossEncoderWrapper,
25
21
  SentenceTransformerEncoderWrapper,
26
22
  )
27
23
  from mteb.results import ModelResult, TaskResult
28
24
  from mteb.results.task_result import TaskError
29
- from mteb.types import HFSubset, PromptType, SplitName
30
- from mteb.types._encoder_io import EncodeKwargs
31
- from mteb.types._metadata import ModelName, Revision
25
+ from mteb.types import PromptType
32
26
 
33
27
  if TYPE_CHECKING:
28
+ from collections.abc import Iterable
29
+
34
30
  from sentence_transformers import CrossEncoder, SentenceTransformer
35
31
 
32
+ from mteb.models.models_protocols import (
33
+ MTEBModels,
34
+ )
35
+ from mteb.types import EncodeKwargs, HFSubset, SplitName
36
+ from mteb.types._metadata import ModelName, Revision
37
+
36
38
  logger = logging.getLogger(__name__)
37
39
 
38
40
 
@@ -69,13 +71,13 @@ def _sanitize_model(
69
71
  meta = getattr(model, "mteb_model_meta")
70
72
  if not isinstance(meta, ModelMeta):
71
73
  meta = ModelMeta._from_hub(None)
72
- wrapped_model = cast(MTEBModels | ModelMeta, model)
74
+ wrapped_model = cast("MTEBModels | ModelMeta", model)
73
75
  else:
74
76
  meta = ModelMeta._from_hub(None) if not isinstance(model, ModelMeta) else model
75
77
  wrapped_model = meta
76
78
 
77
- model_name = cast(str, meta.name)
78
- model_revision = cast(str, meta.revision)
79
+ model_name = cast("str", meta.name)
80
+ model_revision = cast("str", meta.revision)
79
81
 
80
82
  return wrapped_model, meta, model_name, model_revision
81
83
 
@@ -132,8 +134,8 @@ def _evaluate_task(
132
134
 
133
135
  task.check_if_dataset_is_superseded()
134
136
 
135
- data_loaded = task.data_loaded
136
- if not data_loaded:
137
+ data_preloaded = task.data_loaded
138
+ if not data_preloaded:
137
139
  try:
138
140
  task.load_data()
139
141
  except DatasetNotFoundError as e:
@@ -176,7 +178,7 @@ def _evaluate_task(
176
178
  kg_co2_emissions=None,
177
179
  )
178
180
 
179
- if data_loaded: # only unload if we loaded the data
181
+ if not data_preloaded: # only unload if we loaded the data
180
182
  task.unload_data()
181
183
 
182
184
  return result
@@ -202,10 +204,10 @@ def _check_model_modalities(
202
204
  if isinstance(tasks, AbsTask):
203
205
  check_tasks = [tasks]
204
206
  elif isinstance(tasks, Benchmark):
205
- benchmark = cast(Benchmark, tasks)
207
+ benchmark = cast("Benchmark", tasks)
206
208
  check_tasks = benchmark.tasks
207
209
  else:
208
- check_tasks = cast(Iterable[AbsTask], tasks)
210
+ check_tasks = cast("Iterable[AbsTask]", tasks)
209
211
 
210
212
  warnings, errors = [], []
211
213
 
@@ -298,7 +300,7 @@ def evaluate(
298
300
  changed.
299
301
  - "only-cache": Only load the results from the cache folder and do not run the task. Useful if you just want to load the results from the
300
302
  cache.
301
- prediction_folder: Optional folder in which to save model predictions for the task. Predictions of the tasks will be sabed in `prediction_folder/{task_name}_predictions.json`
303
+ prediction_folder: Optional folder in which to save model predictions for the task. Predictions of the tasks will be saved in `prediction_folder/{task_name}_predictions.json`
302
304
  show_progress_bar: Whether to show a progress bar when running the evaluation. Default is True. Setting this to False will also set the
303
305
  `encode_kwargs['show_progress_bar']` to False if encode_kwargs is unspecified.
304
306
  public_only: Run only public tasks. If None, it will attempt to run the private task.
@@ -342,7 +344,7 @@ def evaluate(
342
344
 
343
345
  # AbsTaskAggregate is a special case where we have to run multiple tasks and combine the results
344
346
  if isinstance(tasks, AbsTaskAggregate):
345
- aggregated_task = cast(AbsTaskAggregate, tasks)
347
+ aggregated_task = cast("AbsTaskAggregate", tasks)
346
348
  results = evaluate(
347
349
  model,
348
350
  aggregated_task.metadata.tasks,
@@ -365,7 +367,7 @@ def evaluate(
365
367
  if isinstance(tasks, AbsTask):
366
368
  task = tasks
367
369
  else:
368
- tasks = cast(Iterable[AbsTask], tasks)
370
+ tasks = cast("Iterable[AbsTask]", tasks)
369
371
  evaluate_results = []
370
372
  exceptions = []
371
373
  tasks_tqdm = tqdm(
mteb/filter_tasks.py CHANGED
@@ -1,19 +1,24 @@
1
1
  """This script contains functions that are used to get an overview of the MTEB benchmark."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import logging
4
- from collections.abc import Iterable, Sequence
5
- from typing import overload
6
+ from typing import TYPE_CHECKING, overload
6
7
 
7
- from mteb.abstasks import (
8
- AbsTask,
9
- )
10
8
  from mteb.abstasks.aggregated_task import AbsTaskAggregate
11
- from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
12
9
  from mteb.languages import (
13
10
  ISO_TO_LANGUAGE,
14
11
  ISO_TO_SCRIPT,
15
12
  )
16
- from mteb.types import Modalities
13
+
14
+ if TYPE_CHECKING:
15
+ from collections.abc import Iterable, Sequence
16
+
17
+ from mteb.abstasks import (
18
+ AbsTask,
19
+ )
20
+ from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
21
+ from mteb.types import Modalities
17
22
 
18
23
  logger = logging.getLogger(__name__)
19
24
 
mteb/get_tasks.py CHANGED
@@ -1,20 +1,25 @@
1
1
  """This script contains functions that are used to get an overview of the MTEB benchmark."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import difflib
4
6
  import logging
5
7
  import warnings
6
8
  from collections import Counter, defaultdict
7
- from collections.abc import Iterable, Sequence
8
- from typing import Any
9
+ from typing import TYPE_CHECKING, Any
9
10
 
10
11
  import pandas as pd
11
12
 
12
13
  from mteb.abstasks import (
13
14
  AbsTask,
14
15
  )
15
- from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
16
16
  from mteb.filter_tasks import filter_tasks
17
- from mteb.types import Modalities
17
+
18
+ if TYPE_CHECKING:
19
+ from collections.abc import Iterable, Sequence
20
+
21
+ from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
22
+ from mteb.types import Modalities
18
23
 
19
24
  logger = logging.getLogger(__name__)
20
25
 
@@ -1,10 +1,15 @@
1
- from collections.abc import Iterable, Sequence
2
- from dataclasses import dataclass
1
+ from __future__ import annotations
3
2
 
4
- from typing_extensions import Self
3
+ from dataclasses import dataclass
4
+ from typing import TYPE_CHECKING
5
5
 
6
6
  from mteb.languages.check_language_code import check_language_code
7
7
 
8
+ if TYPE_CHECKING:
9
+ from collections.abc import Iterable, Sequence
10
+
11
+ from typing_extensions import Self
12
+
8
13
 
9
14
  @dataclass
10
15
  class LanguageScripts: