mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 74457,
4
+ "number_of_characters": 76109543,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 75549698,
7
+ "min_text_length": 121,
8
+ "average_text_length": 1087.7189916063176,
9
+ "max_text_length": 25438,
10
+ "unique_texts": 69150
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 559845,
15
+ "min_text_length": 57,
16
+ "average_text_length": 111.969,
17
+ "max_text_length": 224,
18
+ "unique_texts": 5000
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 5000,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 5000
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,116 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 30300,
4
+ "number_of_characters": 17320243,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 17276572,
7
+ "min_text_length": 316,
8
+ "average_text_length": 575.8857333333333,
9
+ "max_text_length": 1008,
10
+ "unique_texts": 28361
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 43671,
15
+ "min_text_length": 67,
16
+ "average_text_length": 145.57,
17
+ "max_text_length": 345,
18
+ "unique_texts": 300
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 300,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 300
27
+ },
28
+ "top_ranked_statistics": null,
29
+ "hf_subset_descriptive_stats": {
30
+ "en": {
31
+ "num_samples": 10100,
32
+ "number_of_characters": 5517678,
33
+ "documents_text_statistics": {
34
+ "total_text_length": 5503635,
35
+ "min_text_length": 316,
36
+ "average_text_length": 550.3635,
37
+ "max_text_length": 726,
38
+ "unique_texts": 9422
39
+ },
40
+ "documents_image_statistics": null,
41
+ "queries_text_statistics": {
42
+ "total_text_length": 14043,
43
+ "min_text_length": 68,
44
+ "average_text_length": 140.43,
45
+ "max_text_length": 305,
46
+ "unique_texts": 100
47
+ },
48
+ "queries_image_statistics": null,
49
+ "relevant_docs_statistics": {
50
+ "num_relevant_docs": 100,
51
+ "min_relevant_docs_per_query": 1,
52
+ "average_relevant_docs_per_query": 1.0,
53
+ "max_relevant_docs_per_query": 1,
54
+ "unique_relevant_docs": 100
55
+ },
56
+ "top_ranked_statistics": null
57
+ },
58
+ "fi": {
59
+ "num_samples": 10100,
60
+ "number_of_characters": 5953462,
61
+ "documents_text_statistics": {
62
+ "total_text_length": 5938809,
63
+ "min_text_length": 326,
64
+ "average_text_length": 593.8809,
65
+ "max_text_length": 1008,
66
+ "unique_texts": 9422
67
+ },
68
+ "documents_image_statistics": null,
69
+ "queries_text_statistics": {
70
+ "total_text_length": 14653,
71
+ "min_text_length": 67,
72
+ "average_text_length": 146.53,
73
+ "max_text_length": 345,
74
+ "unique_texts": 100
75
+ },
76
+ "queries_image_statistics": null,
77
+ "relevant_docs_statistics": {
78
+ "num_relevant_docs": 100,
79
+ "min_relevant_docs_per_query": 1,
80
+ "average_relevant_docs_per_query": 1.0,
81
+ "max_relevant_docs_per_query": 1,
82
+ "unique_relevant_docs": 100
83
+ },
84
+ "top_ranked_statistics": null
85
+ },
86
+ "pt": {
87
+ "num_samples": 10100,
88
+ "number_of_characters": 5849103,
89
+ "documents_text_statistics": {
90
+ "total_text_length": 5834128,
91
+ "min_text_length": 325,
92
+ "average_text_length": 583.4128,
93
+ "max_text_length": 774,
94
+ "unique_texts": 9517
95
+ },
96
+ "documents_image_statistics": null,
97
+ "queries_text_statistics": {
98
+ "total_text_length": 14975,
99
+ "min_text_length": 69,
100
+ "average_text_length": 149.75,
101
+ "max_text_length": 320,
102
+ "unique_texts": 100
103
+ },
104
+ "queries_image_statistics": null,
105
+ "relevant_docs_statistics": {
106
+ "num_relevant_docs": 100,
107
+ "min_relevant_docs_per_query": 1,
108
+ "average_relevant_docs_per_query": 1.0,
109
+ "max_relevant_docs_per_query": 1,
110
+ "unique_relevant_docs": 100
111
+ },
112
+ "top_ranked_statistics": null
113
+ }
114
+ }
115
+ }
116
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 102198,
4
+ "number_of_characters": 47870352,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 47719757,
7
+ "min_text_length": 9,
8
+ "average_text_length": 472.01951591046225,
9
+ "max_text_length": 8686,
10
+ "unique_texts": 101097
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 150595,
15
+ "min_text_length": 30,
16
+ "average_text_length": 136.78019981834694,
17
+ "max_text_length": 404,
18
+ "unique_texts": 1099
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 3401,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 3.089009990917348,
25
+ "max_relevant_docs_per_query": 5,
26
+ "unique_relevant_docs": 1123
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 132137,
4
+ "number_of_characters": 43323279,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 43311486,
7
+ "min_text_length": 11,
8
+ "average_text_length": 328.5778249819823,
9
+ "max_text_length": 8576,
10
+ "unique_texts": 131814
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 11793,
15
+ "min_text_length": 6,
16
+ "average_text_length": 36.62422360248447,
17
+ "max_text_length": 100,
18
+ "unique_texts": 321
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 11620,
23
+ "min_relevant_docs_per_query": 31,
24
+ "average_relevant_docs_per_query": 36.08695652173913,
25
+ "max_relevant_docs_per_query": 1288,
26
+ "unique_relevant_docs": 32537
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 106558,
4
+ "number_of_characters": 48164581,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 47886101,
7
+ "min_text_length": 9,
8
+ "average_text_length": 472.6783768310499,
9
+ "max_text_length": 8689,
10
+ "unique_texts": 101308
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 278480,
15
+ "min_text_length": 11,
16
+ "average_text_length": 53.04380952380952,
17
+ "max_text_length": 196,
18
+ "unique_texts": 5124
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 6254,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.1912380952380952,
25
+ "max_relevant_docs_per_query": 15,
26
+ "unique_relevant_docs": 1324
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 117974,
4
+ "number_of_characters": 35927363,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 35335613,
7
+ "min_text_length": 22,
8
+ "average_text_length": 316.47705838625023,
9
+ "max_text_length": 4105,
10
+ "unique_texts": 111651
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 591750,
15
+ "min_text_length": 21,
16
+ "average_text_length": 93.61651637399146,
17
+ "max_text_length": 280,
18
+ "unique_texts": 6321
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 12642,
23
+ "min_relevant_docs_per_query": 2,
24
+ "average_relevant_docs_per_query": 2.0,
25
+ "max_relevant_docs_per_query": 2,
26
+ "unique_relevant_docs": 11874
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "dev": {
3
+ "num_samples": 107153,
4
+ "number_of_characters": 33316879,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 33200903,
7
+ "min_text_length": 2,
8
+ "average_text_length": 320.30199218561575,
9
+ "max_text_length": 1712,
10
+ "unique_texts": 103641
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 115976,
15
+ "min_text_length": 8,
16
+ "average_text_length": 33.15494568324757,
17
+ "max_text_length": 190,
18
+ "unique_texts": 3498
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 3700,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0577472841623785,
25
+ "max_relevant_docs_per_query": 4,
26
+ "unique_relevant_docs": 3698
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 104095,
4
+ "number_of_characters": 52312680,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 52220289,
7
+ "min_text_length": 10,
8
+ "average_text_length": 510.98673124908265,
9
+ "max_text_length": 10245,
10
+ "unique_texts": 102181
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 92391,
15
+ "min_text_length": 22,
16
+ "average_text_length": 48.62684210526316,
17
+ "max_text_length": 113,
18
+ "unique_texts": 1900
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 2283,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.201578947368421,
25
+ "max_relevant_docs_per_query": 4,
26
+ "unique_relevant_docs": 2283
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 20561,
4
+ "number_of_characters": 10832770,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 9929303,
7
+ "min_text_length": 9,
8
+ "average_text_length": 938.8524016641452,
9
+ "max_text_length": 6319,
10
+ "unique_texts": 10573
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 903467,
15
+ "min_text_length": 13,
16
+ "average_text_length": 90.48242363545317,
17
+ "max_text_length": 228,
18
+ "unique_texts": 9985
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 11158,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.1174762143214823,
25
+ "max_relevant_docs_per_query": 8,
26
+ "unique_relevant_docs": 10576
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
mteb/evaluate.py CHANGED
@@ -1,10 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from collections.abc import Iterable
4
+ import warnings
5
5
  from pathlib import Path
6
6
  from time import time
7
- from typing import TYPE_CHECKING, Any, cast
7
+ from typing import TYPE_CHECKING, cast
8
8
 
9
9
  from datasets.exceptions import DatasetNotFoundError
10
10
  from tqdm.auto import tqdm
@@ -13,25 +13,28 @@ from mteb._helpful_enum import HelpfulStrEnum
13
13
  from mteb.abstasks import AbsTaskRetrieval
14
14
  from mteb.abstasks.abstask import AbsTask
15
15
  from mteb.abstasks.aggregated_task import AbsTaskAggregate
16
+ from mteb.benchmarks.benchmark import Benchmark
16
17
  from mteb.cache import ResultCache
17
18
  from mteb.models.model_meta import ModelMeta
18
- from mteb.models.models_protocols import (
19
- CrossEncoderProtocol,
20
- EncoderProtocol,
21
- MTEBModels,
22
- )
23
19
  from mteb.models.sentence_transformer_wrapper import (
24
20
  CrossEncoderWrapper,
25
21
  SentenceTransformerEncoderWrapper,
26
22
  )
27
23
  from mteb.results import ModelResult, TaskResult
28
24
  from mteb.results.task_result import TaskError
29
- from mteb.types import HFSubset, PromptType, SplitName
30
- from mteb.types._metadata import ModelName, Revision
25
+ from mteb.types import PromptType
31
26
 
32
27
  if TYPE_CHECKING:
28
+ from collections.abc import Iterable
29
+
33
30
  from sentence_transformers import CrossEncoder, SentenceTransformer
34
31
 
32
+ from mteb.models.models_protocols import (
33
+ MTEBModels,
34
+ )
35
+ from mteb.types import EncodeKwargs, HFSubset, SplitName
36
+ from mteb.types._metadata import ModelName, Revision
37
+
35
38
  logger = logging.getLogger(__name__)
36
39
 
37
40
 
@@ -57,27 +60,26 @@ def _sanitize_model(
57
60
  ) -> tuple[MTEBModels | ModelMeta, ModelMeta, ModelName, Revision]:
58
61
  from sentence_transformers import CrossEncoder, SentenceTransformer
59
62
 
63
+ wrapped_model: MTEBModels | ModelMeta
60
64
  if isinstance(model, SentenceTransformer):
61
- _mdl = SentenceTransformerEncoderWrapper(model)
62
- meta = _mdl.mteb_model_meta
63
- _mdl = cast(EncoderProtocol, _mdl)
64
- model = _mdl
65
+ wrapped_model = SentenceTransformerEncoderWrapper(model)
66
+ meta = wrapped_model.mteb_model_meta
65
67
  elif isinstance(model, CrossEncoder):
66
- _mdl = CrossEncoderWrapper(model)
67
- _mdl = cast(CrossEncoderProtocol, _mdl)
68
- meta = _mdl.mteb_model_meta
69
- model = _mdl
68
+ wrapped_model = CrossEncoderWrapper(model)
69
+ meta = wrapped_model.mteb_model_meta
70
70
  elif hasattr(model, "mteb_model_meta"):
71
- meta = model.mteb_model_meta # type: ignore[attr-defined]
71
+ meta = getattr(model, "mteb_model_meta")
72
72
  if not isinstance(meta, ModelMeta):
73
- meta = ModelMeta.from_hub(None)
73
+ meta = ModelMeta._from_hub(None)
74
+ wrapped_model = cast("MTEBModels | ModelMeta", model)
74
75
  else:
75
- meta = ModelMeta.from_hub(None) if not isinstance(model, ModelMeta) else model
76
+ meta = ModelMeta._from_hub(None) if not isinstance(model, ModelMeta) else model
77
+ wrapped_model = meta
76
78
 
77
- model_name = cast(str, meta.name)
78
- model_revision = cast(str, meta.revision)
79
+ model_name = cast("str", meta.name)
80
+ model_revision = cast("str", meta.revision)
79
81
 
80
- return model, meta, model_name, model_revision
82
+ return wrapped_model, meta, model_name, model_revision
81
83
 
82
84
 
83
85
  def _evaluate_task(
@@ -86,9 +88,10 @@ def _evaluate_task(
86
88
  *,
87
89
  splits: dict[SplitName, list[HFSubset]],
88
90
  co2_tracker: bool | None,
89
- encode_kwargs: dict[str, Any],
91
+ encode_kwargs: EncodeKwargs,
90
92
  prediction_folder: Path | None,
91
93
  public_only: bool | None,
94
+ num_proc: int = 1,
92
95
  ) -> TaskResult | TaskError:
93
96
  """The core logic to run a model on a given task. See `evaluate` for more details.
94
97
 
@@ -122,24 +125,28 @@ def _evaluate_task(
122
125
  co2_tracker=False,
123
126
  prediction_folder=prediction_folder,
124
127
  public_only=public_only,
128
+ num_proc=num_proc,
125
129
  )
126
- result.kg_co2_emissions = tracker.final_emissions
130
+ if isinstance(result, TaskResult):
131
+ result.kg_co2_emissions = tracker.final_emissions
127
132
  return result
128
133
 
129
134
  task_results = {}
130
135
 
131
136
  task.check_if_dataset_is_superseded()
132
137
 
133
- data_loaded = task.data_loaded
134
- if not data_loaded:
138
+ data_preloaded = task.data_loaded
139
+ if not data_preloaded:
135
140
  try:
136
- task.load_data()
141
+ task.load_data(num_proc=num_proc)
137
142
  except DatasetNotFoundError as e:
138
143
  if not task.metadata.is_public and public_only is None:
139
- logger.warning(
144
+ msg = (
140
145
  f"Dataset for private task '{task.metadata.name}' not found. "
141
146
  "Make sure you have access to the dataset and that you have set up the authentication correctly. To disable this warning set `public_only=False`"
142
147
  )
148
+ logger.warning(msg)
149
+ warnings.warn(msg)
143
150
  return TaskError(
144
151
  task_name=task.metadata.name,
145
152
  exception=str(e),
@@ -147,7 +154,7 @@ def _evaluate_task(
147
154
  if public_only is False:
148
155
  raise e
149
156
 
150
- evaluation_time = 0
157
+ evaluation_time = 0.0
151
158
 
152
159
  for split, hf_subsets in splits.items():
153
160
  tick = time()
@@ -157,6 +164,7 @@ def _evaluate_task(
157
164
  subsets_to_run=hf_subsets,
158
165
  encode_kwargs=encode_kwargs,
159
166
  prediction_folder=prediction_folder,
167
+ num_proc=num_proc,
160
168
  )
161
169
  tock = time()
162
170
 
@@ -172,7 +180,7 @@ def _evaluate_task(
172
180
  kg_co2_emissions=None,
173
181
  )
174
182
 
175
- if data_loaded: # only unload if we loaded the data
183
+ if not data_preloaded: # only unload if we loaded the data
176
184
  task.unload_data()
177
185
 
178
186
  return result
@@ -194,12 +202,18 @@ def _check_model_modalities(
194
202
  return
195
203
 
196
204
  model_modalities = set(model.modalities)
205
+ check_tasks: Iterable[AbsTask] = []
197
206
  if isinstance(tasks, AbsTask):
198
- tasks = [tasks]
207
+ check_tasks = [tasks]
208
+ elif isinstance(tasks, Benchmark):
209
+ benchmark = cast("Benchmark", tasks)
210
+ check_tasks = benchmark.tasks
211
+ else:
212
+ check_tasks = cast("Iterable[AbsTask]", tasks)
199
213
 
200
214
  warnings, errors = [], []
201
215
 
202
- for task in tasks:
216
+ for task in check_tasks:
203
217
  # only retrieval tasks have different modalities for query and document and can be run with partial overlaps
204
218
  if isinstance(task, AbsTaskRetrieval):
205
219
  query_mods = set(task.metadata.get_modalities(PromptType.query))
@@ -262,12 +276,13 @@ def evaluate(
262
276
  *,
263
277
  co2_tracker: bool | None = None,
264
278
  raise_error: bool = True,
265
- encode_kwargs: dict[str, Any] | None = None,
279
+ encode_kwargs: EncodeKwargs | None = None,
266
280
  cache: ResultCache | None = ResultCache(),
267
281
  overwrite_strategy: str | OverwriteStrategy = "only-missing",
268
282
  prediction_folder: Path | str | None = None,
269
283
  show_progress_bar: bool = True,
270
284
  public_only: bool | None = None,
285
+ num_proc: int = 1,
271
286
  ) -> ModelResult:
272
287
  """This function runs a model on a given task and returns the results.
273
288
 
@@ -276,7 +291,7 @@ def evaluate(
276
291
  tasks: A task to run.
277
292
  co2_tracker: If True, track the CO₂ emissions of the evaluation, required codecarbon to be installed, which can be installed using
278
293
  `pip install mteb[codecarbon]`. If none is passed co2 tracking will only be run if codecarbon is installed.
279
- encode_kwargs: Additional keyword arguments passed to the models `encode` method.
294
+ encode_kwargs: Additional keyword arguments passed to the models `encode` and `load_data` methods;
280
295
  raise_error: If True, raise an error if the task fails. If False, return an empty list.
281
296
  cache: The cache to use for loading the results. If None, then no cache will be used. The default cache saved the cache in the
282
297
  `~/.cache/mteb` directory. It can be overridden by setting the `MTEB_CACHE` environment variable to a different directory or by directly
@@ -288,10 +303,11 @@ def evaluate(
288
303
  changed.
289
304
  - "only-cache": Only load the results from the cache folder and do not run the task. Useful if you just want to load the results from the
290
305
  cache.
291
- prediction_folder: Optional folder in which to save model predictions for the task. Predictions of the tasks will be sabed in `prediction_folder/{task_name}_predictions.json`
306
+ prediction_folder: Optional folder in which to save model predictions for the task. Predictions of the tasks will be saved in `prediction_folder/{task_name}_predictions.json`
292
307
  show_progress_bar: Whether to show a progress bar when running the evaluation. Default is True. Setting this to False will also set the
293
308
  `encode_kwargs['show_progress_bar']` to False if encode_kwargs is unspecified.
294
309
  public_only: Run only public tasks. If None, it will attempt to run the private task.
310
+ num_proc: Number of processes to use during data loading and transformation. Defaults to 1.
295
311
 
296
312
  Returns:
297
313
  The results of the evaluation.
@@ -332,10 +348,10 @@ def evaluate(
332
348
 
333
349
  # AbsTaskAggregate is a special case where we have to run multiple tasks and combine the results
334
350
  if isinstance(tasks, AbsTaskAggregate):
335
- task = cast(AbsTaskAggregate, tasks)
351
+ aggregated_task = cast("AbsTaskAggregate", tasks)
336
352
  results = evaluate(
337
353
  model,
338
- task.metadata.tasks,
354
+ aggregated_task.metadata.tasks,
339
355
  co2_tracker=co2_tracker,
340
356
  raise_error=raise_error,
341
357
  encode_kwargs=encode_kwargs,
@@ -344,18 +360,23 @@ def evaluate(
344
360
  prediction_folder=prediction_folder,
345
361
  show_progress_bar=show_progress_bar,
346
362
  public_only=public_only,
363
+ num_proc=num_proc,
347
364
  )
348
- result = task.combine_task_results(results.task_results)
365
+ combined_results = aggregated_task.combine_task_results(results.task_results)
366
+ if cache:
367
+ cache.save_to_cache(combined_results, meta)
368
+
349
369
  return ModelResult(
350
370
  model_name=results.model_name,
351
371
  model_revision=results.model_revision,
352
- task_results=[result],
372
+ task_results=[combined_results],
353
373
  )
354
374
 
355
375
  if isinstance(tasks, AbsTask):
356
376
  task = tasks
357
377
  else:
358
- results = []
378
+ tasks = cast("Iterable[AbsTask]", tasks)
379
+ evaluate_results = []
359
380
  exceptions = []
360
381
  tasks_tqdm = tqdm(
361
382
  tasks,
@@ -375,24 +396,25 @@ def evaluate(
375
396
  prediction_folder=prediction_folder,
376
397
  show_progress_bar=False,
377
398
  public_only=public_only,
399
+ num_proc=num_proc,
378
400
  )
379
- results.extend(_res.task_results)
401
+ evaluate_results.extend(_res.task_results)
380
402
  if _res.exceptions:
381
403
  exceptions.extend(_res.exceptions)
382
404
  return ModelResult(
383
405
  model_name=_res.model_name,
384
406
  model_revision=_res.model_revision,
385
- task_results=results,
407
+ task_results=evaluate_results,
386
408
  exceptions=exceptions,
387
409
  )
388
410
 
389
411
  overwrite_strategy = OverwriteStrategy.from_str(overwrite_strategy)
390
412
 
391
- existing_results = None
413
+ existing_results: TaskResult | None = None
392
414
  if cache and overwrite_strategy != OverwriteStrategy.ALWAYS:
393
- results = cache.load_task_result(task.metadata.name, meta)
394
- if results:
395
- existing_results = results
415
+ cache_results = cache.load_task_result(task.metadata.name, meta)
416
+ if cache_results:
417
+ existing_results = cache_results
396
418
 
397
419
  if (
398
420
  existing_results
@@ -454,6 +476,7 @@ def evaluate(
454
476
  encode_kwargs=encode_kwargs,
455
477
  prediction_folder=prediction_folder,
456
478
  public_only=public_only,
479
+ num_proc=num_proc,
457
480
  )
458
481
  except Exception as e:
459
482
  logger.error(
@@ -469,6 +492,7 @@ def evaluate(
469
492
  encode_kwargs=encode_kwargs,
470
493
  prediction_folder=prediction_folder,
471
494
  public_only=public_only,
495
+ num_proc=num_proc,
472
496
  )
473
497
  logger.info(f"✓ Finished evaluation for {task.metadata.name}")
474
498