mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (486) hide show
  1. mteb/_create_dataloaders.py +63 -14
  2. mteb/_evaluators/any_sts_evaluator.py +12 -5
  3. mteb/_evaluators/clustering_evaluator.py +12 -4
  4. mteb/_evaluators/evaluator.py +11 -5
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
  6. mteb/_evaluators/pair_classification_evaluator.py +13 -5
  7. mteb/_evaluators/retrieval_evaluator.py +22 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +20 -11
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +10 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +48 -21
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +25 -9
  21. mteb/abstasks/clustering.py +23 -10
  22. mteb/abstasks/clustering_legacy.py +22 -8
  23. mteb/abstasks/image/image_text_pair_classification.py +23 -9
  24. mteb/abstasks/multilabel_classification.py +13 -5
  25. mteb/abstasks/pair_classification.py +27 -11
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +56 -30
  28. mteb/abstasks/retrieval_dataset_loaders.py +48 -37
  29. mteb/abstasks/sts.py +29 -13
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +23 -12
  32. mteb/abstasks/text/reranking.py +2 -2
  33. mteb/abstasks/text/summarization.py +19 -8
  34. mteb/abstasks/zeroshot_classification.py +23 -9
  35. mteb/benchmarks/_create_table.py +13 -7
  36. mteb/benchmarks/benchmark.py +11 -1
  37. mteb/benchmarks/benchmarks/__init__.py +2 -0
  38. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  39. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  40. mteb/cache.py +10 -5
  41. mteb/cli/_display_tasks.py +9 -3
  42. mteb/cli/build_cli.py +5 -2
  43. mteb/cli/generate_model_card.py +9 -2
  44. mteb/deprecated_evaluator.py +16 -12
  45. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  48. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  49. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  50. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  51. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  52. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  65. mteb/evaluate.py +33 -20
  66. mteb/filter_tasks.py +12 -7
  67. mteb/get_tasks.py +9 -4
  68. mteb/languages/language_scripts.py +8 -3
  69. mteb/leaderboard/app.py +11 -4
  70. mteb/leaderboard/table.py +7 -2
  71. mteb/load_results.py +9 -3
  72. mteb/models/abs_encoder.py +22 -12
  73. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  74. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  75. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  76. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  77. mteb/models/get_model_meta.py +32 -6
  78. mteb/models/instruct_wrapper.py +13 -5
  79. mteb/models/model_implementations/align_models.py +10 -4
  80. mteb/models/model_implementations/amazon_models.py +1 -0
  81. mteb/models/model_implementations/andersborges.py +2 -0
  82. mteb/models/model_implementations/ara_models.py +1 -0
  83. mteb/models/model_implementations/arctic_models.py +8 -0
  84. mteb/models/model_implementations/b1ade_models.py +1 -0
  85. mteb/models/model_implementations/bedrock_models.py +20 -6
  86. mteb/models/model_implementations/bge_models.py +40 -1
  87. mteb/models/model_implementations/bica_model.py +1 -0
  88. mteb/models/model_implementations/blip2_models.py +11 -4
  89. mteb/models/model_implementations/blip_models.py +17 -4
  90. mteb/models/model_implementations/bm25.py +24 -14
  91. mteb/models/model_implementations/bmretriever_models.py +10 -2
  92. mteb/models/model_implementations/cadet_models.py +1 -0
  93. mteb/models/model_implementations/cde_models.py +11 -5
  94. mteb/models/model_implementations/clip_models.py +12 -4
  95. mteb/models/model_implementations/clips_models.py +3 -0
  96. mteb/models/model_implementations/codefuse_models.py +5 -0
  97. mteb/models/model_implementations/codesage_models.py +3 -0
  98. mteb/models/model_implementations/cohere_models.py +14 -4
  99. mteb/models/model_implementations/cohere_v.py +14 -4
  100. mteb/models/model_implementations/colpali_models.py +7 -3
  101. mteb/models/model_implementations/colqwen_models.py +17 -31
  102. mteb/models/model_implementations/colsmol_models.py +3 -1
  103. mteb/models/model_implementations/conan_models.py +11 -4
  104. mteb/models/model_implementations/dino_models.py +28 -4
  105. mteb/models/model_implementations/e5_instruct.py +4 -0
  106. mteb/models/model_implementations/e5_models.py +9 -0
  107. mteb/models/model_implementations/e5_v.py +10 -4
  108. mteb/models/model_implementations/eagerworks_models.py +11 -4
  109. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  110. mteb/models/model_implementations/en_code_retriever.py +1 -0
  111. mteb/models/model_implementations/euler_models.py +1 -0
  112. mteb/models/model_implementations/evaclip_models.py +13 -4
  113. mteb/models/model_implementations/fa_models.py +9 -0
  114. mteb/models/model_implementations/facebookai.py +2 -0
  115. mteb/models/model_implementations/geogpt_models.py +1 -0
  116. mteb/models/model_implementations/gme_v_models.py +7 -3
  117. mteb/models/model_implementations/google_models.py +15 -4
  118. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
  119. mteb/models/model_implementations/gritlm_models.py +3 -0
  120. mteb/models/model_implementations/gte_models.py +9 -0
  121. mteb/models/model_implementations/hinvec_models.py +6 -1
  122. mteb/models/model_implementations/human.py +1 -0
  123. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  124. mteb/models/model_implementations/inf_models.py +2 -0
  125. mteb/models/model_implementations/jasper_models.py +14 -5
  126. mteb/models/model_implementations/jina_clip.py +10 -4
  127. mteb/models/model_implementations/jina_models.py +17 -5
  128. mteb/models/model_implementations/kalm_models.py +24 -12
  129. mteb/models/model_implementations/kblab.py +1 -0
  130. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  131. mteb/models/model_implementations/kfst.py +1 -0
  132. mteb/models/model_implementations/kowshik24_models.py +1 -0
  133. mteb/models/model_implementations/lens_models.py +2 -0
  134. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  135. mteb/models/model_implementations/linq_models.py +7 -1
  136. mteb/models/model_implementations/listconranker.py +10 -4
  137. mteb/models/model_implementations/llm2clip_models.py +12 -4
  138. mteb/models/model_implementations/llm2vec_models.py +20 -6
  139. mteb/models/model_implementations/mcinext_models.py +8 -2
  140. mteb/models/model_implementations/mdbr_models.py +2 -0
  141. mteb/models/model_implementations/misc_models.py +63 -0
  142. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  143. mteb/models/model_implementations/mme5_models.py +2 -1
  144. mteb/models/model_implementations/moco_models.py +11 -4
  145. mteb/models/model_implementations/mod_models.py +2 -1
  146. mteb/models/model_implementations/model2vec_models.py +23 -4
  147. mteb/models/model_implementations/moka_models.py +3 -0
  148. mteb/models/model_implementations/nbailab.py +3 -0
  149. mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
  150. mteb/models/model_implementations/nomic_models.py +17 -4
  151. mteb/models/model_implementations/nomic_models_vision.py +5 -3
  152. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
  153. mteb/models/model_implementations/nvidia_models.py +15 -4
  154. mteb/models/model_implementations/octen_models.py +3 -1
  155. mteb/models/model_implementations/openai_models.py +14 -4
  156. mteb/models/model_implementations/openclip_models.py +17 -4
  157. mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
  158. mteb/models/model_implementations/ops_moa_models.py +9 -2
  159. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  160. mteb/models/model_implementations/pawan_models.py +1 -0
  161. mteb/models/model_implementations/piccolo_models.py +2 -0
  162. mteb/models/model_implementations/promptriever_models.py +16 -6
  163. mteb/models/model_implementations/pylate_models.py +32 -13
  164. mteb/models/model_implementations/qodo_models.py +2 -0
  165. mteb/models/model_implementations/qtack_models.py +1 -0
  166. mteb/models/model_implementations/qwen3_models.py +11 -1
  167. mteb/models/model_implementations/qzhou_models.py +2 -0
  168. mteb/models/model_implementations/random_baseline.py +4 -3
  169. mteb/models/model_implementations/rasgaard_models.py +1 -0
  170. mteb/models/model_implementations/reasonir_model.py +65 -0
  171. mteb/models/model_implementations/repllama_models.py +15 -6
  172. mteb/models/model_implementations/rerankers_custom.py +13 -4
  173. mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
  174. mteb/models/model_implementations/richinfoai_models.py +1 -0
  175. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  176. mteb/models/model_implementations/ruri_models.py +10 -0
  177. mteb/models/model_implementations/salesforce_models.py +10 -1
  178. mteb/models/model_implementations/samilpwc_models.py +1 -0
  179. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  180. mteb/models/model_implementations/searchmap_models.py +1 -0
  181. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  182. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
  183. mteb/models/model_implementations/seed_models.py +2 -1
  184. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  185. mteb/models/model_implementations/shuu_model.py +1 -0
  186. mteb/models/model_implementations/siglip_models.py +19 -4
  187. mteb/models/model_implementations/slm_models.py +7 -4
  188. mteb/models/model_implementations/sonar_models.py +2 -1
  189. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  190. mteb/models/model_implementations/stella_models.py +6 -0
  191. mteb/models/model_implementations/tarka_models.py +2 -0
  192. mteb/models/model_implementations/text2vec_models.py +3 -0
  193. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  194. mteb/models/model_implementations/uae_models.py +10 -4
  195. mteb/models/model_implementations/vdr_models.py +8 -1
  196. mteb/models/model_implementations/vi_vn_models.py +6 -0
  197. mteb/models/model_implementations/vista_models.py +11 -4
  198. mteb/models/model_implementations/vlm2vec_models.py +11 -4
  199. mteb/models/model_implementations/voyage_models.py +52 -4
  200. mteb/models/model_implementations/voyage_v.py +11 -6
  201. mteb/models/model_implementations/xyz_models.py +1 -0
  202. mteb/models/model_implementations/youtu_models.py +1 -0
  203. mteb/models/model_implementations/yuan_models.py +1 -0
  204. mteb/models/model_implementations/yuan_models_en.py +2 -1
  205. mteb/models/model_meta.py +47 -9
  206. mteb/models/models_protocols.py +23 -18
  207. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  208. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  209. mteb/models/search_wrappers.py +31 -12
  210. mteb/models/sentence_transformer_wrapper.py +4 -3
  211. mteb/models/vllm_wrapper.py +8 -6
  212. mteb/results/benchmark_results.py +22 -17
  213. mteb/results/model_result.py +21 -15
  214. mteb/results/task_result.py +32 -16
  215. mteb/similarity_functions.py +8 -2
  216. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  217. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  218. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  219. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  220. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  221. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  222. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  223. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  224. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  225. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  226. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  227. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  228. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  229. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  230. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  231. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  232. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  233. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  234. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  235. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  236. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  237. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  238. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  239. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  240. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  241. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  242. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  243. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  244. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  245. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  246. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  247. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  248. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  249. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  250. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  251. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  252. mteb/tasks/classification/est/estonian_valence.py +1 -1
  253. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  254. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  256. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  257. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  258. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  259. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  260. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  261. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  262. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  263. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  264. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  265. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  266. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  267. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  268. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  269. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  270. mteb/tasks/classification/kor/klue_tc.py +2 -2
  271. mteb/tasks/classification/kor/kor_fin.py +1 -1
  272. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  273. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  274. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  275. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  276. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  277. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  278. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  279. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  280. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  281. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  282. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  283. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  284. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  285. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  286. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  287. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  288. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  289. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  290. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  291. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  292. mteb/tasks/classification/ron/moroco.py +1 -1
  293. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  294. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  295. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  296. mteb/tasks/classification/rus/headline_classification.py +2 -2
  297. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  298. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  299. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  300. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  301. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  302. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  303. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  304. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  305. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  306. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  307. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  308. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  309. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  310. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  311. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  312. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  313. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  314. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  315. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  316. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  317. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  318. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  319. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  320. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  321. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  322. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  323. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  324. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  325. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  326. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  327. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  328. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  329. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  330. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  331. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  332. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  333. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  334. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  335. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  336. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  337. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  338. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  341. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  342. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  343. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  344. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  345. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  346. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  347. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  348. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  349. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  350. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  351. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  352. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  353. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  354. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  355. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  356. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  357. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  358. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  359. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  360. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  361. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  362. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  363. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  364. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  365. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  366. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  367. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  368. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  369. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  370. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  371. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  372. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  373. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  374. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  375. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  376. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  377. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  378. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  379. mteb/tasks/pair_classification/rus/terra.py +2 -2
  380. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  381. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  382. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  383. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  384. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  385. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  386. mteb/tasks/retrieval/code/code_rag.py +4 -4
  387. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  388. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  389. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  390. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  391. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  392. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  393. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  394. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  395. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  396. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  397. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  398. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  399. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  400. mteb/tasks/retrieval/eng/__init__.py +42 -0
  401. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  402. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  403. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  404. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  405. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  406. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  407. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  408. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  409. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  410. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  411. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  412. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  413. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  414. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  415. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  416. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  417. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  418. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  419. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  420. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  421. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  422. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  423. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  424. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  425. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  426. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  428. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  435. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  438. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  439. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  440. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  441. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  442. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  443. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  444. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  445. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  446. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  447. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  448. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  449. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  450. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  451. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  452. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  453. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  454. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  455. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  456. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  457. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  458. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  459. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  460. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  461. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  462. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  463. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  464. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  465. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  466. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  467. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  468. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  469. mteb/tasks/retrieval/nob/norquad.py +1 -1
  470. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  471. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  472. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  473. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  474. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  475. mteb/tasks/sts/kor/klue_sts.py +1 -1
  476. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  477. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  478. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  479. mteb/types/_encoder_io.py +1 -1
  480. mteb/types/statistics.py +9 -2
  481. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
  482. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
  483. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  484. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  485. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  486. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 23904,
4
+ "number_of_characters": 20825122,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 20797224,
7
+ "min_text_length": 74,
8
+ "average_text_length": 872.4033726246906,
9
+ "max_text_length": 19104,
10
+ "unique_texts": 23839
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 27898,
15
+ "min_text_length": 13,
16
+ "average_text_length": 429.2,
17
+ "max_text_length": 1255,
18
+ "unique_texts": 65
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 126,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.9384615384615385,
25
+ "max_relevant_docs_per_query": 6,
26
+ "unique_relevant_docs": 95
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 1549535,
30
+ "min_top_ranked_per_query": 23839,
31
+ "average_top_ranked_per_query": 23839.0,
32
+ "max_top_ranked_per_query": 23839
33
+ }
34
+ }
35
+ }
mteb/evaluate.py CHANGED
@@ -2,7 +2,6 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import warnings
5
- from collections.abc import Iterable
6
5
  from pathlib import Path
7
6
  from time import time
8
7
  from typing import TYPE_CHECKING, cast
@@ -17,22 +16,25 @@ from mteb.abstasks.aggregated_task import AbsTaskAggregate
17
16
  from mteb.benchmarks.benchmark import Benchmark
18
17
  from mteb.cache import ResultCache
19
18
  from mteb.models.model_meta import ModelMeta
20
- from mteb.models.models_protocols import (
21
- MTEBModels,
22
- )
23
19
  from mteb.models.sentence_transformer_wrapper import (
24
20
  CrossEncoderWrapper,
25
21
  SentenceTransformerEncoderWrapper,
26
22
  )
27
23
  from mteb.results import ModelResult, TaskResult
28
24
  from mteb.results.task_result import TaskError
29
- from mteb.types import HFSubset, PromptType, SplitName
30
- from mteb.types._encoder_io import EncodeKwargs
31
- from mteb.types._metadata import ModelName, Revision
25
+ from mteb.types import PromptType
32
26
 
33
27
  if TYPE_CHECKING:
28
+ from collections.abc import Iterable
29
+
34
30
  from sentence_transformers import CrossEncoder, SentenceTransformer
35
31
 
32
+ from mteb.models.models_protocols import (
33
+ MTEBModels,
34
+ )
35
+ from mteb.types import EncodeKwargs, HFSubset, SplitName
36
+ from mteb.types._metadata import ModelName, Revision
37
+
36
38
  logger = logging.getLogger(__name__)
37
39
 
38
40
 
@@ -69,13 +71,13 @@ def _sanitize_model(
69
71
  meta = getattr(model, "mteb_model_meta")
70
72
  if not isinstance(meta, ModelMeta):
71
73
  meta = ModelMeta._from_hub(None)
72
- wrapped_model = cast(MTEBModels | ModelMeta, model)
74
+ wrapped_model = cast("MTEBModels | ModelMeta", model)
73
75
  else:
74
76
  meta = ModelMeta._from_hub(None) if not isinstance(model, ModelMeta) else model
75
77
  wrapped_model = meta
76
78
 
77
- model_name = cast(str, meta.name)
78
- model_revision = cast(str, meta.revision)
79
+ model_name = cast("str", meta.name)
80
+ model_revision = cast("str", meta.revision)
79
81
 
80
82
  return wrapped_model, meta, model_name, model_revision
81
83
 
@@ -123,6 +125,7 @@ def _evaluate_task(
123
125
  co2_tracker=False,
124
126
  prediction_folder=prediction_folder,
125
127
  public_only=public_only,
128
+ num_proc=num_proc,
126
129
  )
127
130
  if isinstance(result, TaskResult):
128
131
  result.kg_co2_emissions = tracker.final_emissions
@@ -132,10 +135,10 @@ def _evaluate_task(
132
135
 
133
136
  task.check_if_dataset_is_superseded()
134
137
 
135
- data_loaded = task.data_loaded
136
- if not data_loaded:
138
+ data_preloaded = task.data_loaded
139
+ if not data_preloaded:
137
140
  try:
138
- task.load_data()
141
+ task.load_data(num_proc=num_proc)
139
142
  except DatasetNotFoundError as e:
140
143
  if not task.metadata.is_public and public_only is None:
141
144
  msg = (
@@ -161,6 +164,7 @@ def _evaluate_task(
161
164
  subsets_to_run=hf_subsets,
162
165
  encode_kwargs=encode_kwargs,
163
166
  prediction_folder=prediction_folder,
167
+ num_proc=num_proc,
164
168
  )
165
169
  tock = time()
166
170
 
@@ -176,7 +180,7 @@ def _evaluate_task(
176
180
  kg_co2_emissions=None,
177
181
  )
178
182
 
179
- if data_loaded: # only unload if we loaded the data
183
+ if not data_preloaded: # only unload if we loaded the data
180
184
  task.unload_data()
181
185
 
182
186
  return result
@@ -202,10 +206,10 @@ def _check_model_modalities(
202
206
  if isinstance(tasks, AbsTask):
203
207
  check_tasks = [tasks]
204
208
  elif isinstance(tasks, Benchmark):
205
- benchmark = cast(Benchmark, tasks)
209
+ benchmark = cast("Benchmark", tasks)
206
210
  check_tasks = benchmark.tasks
207
211
  else:
208
- check_tasks = cast(Iterable[AbsTask], tasks)
212
+ check_tasks = cast("Iterable[AbsTask]", tasks)
209
213
 
210
214
  warnings, errors = [], []
211
215
 
@@ -278,6 +282,7 @@ def evaluate(
278
282
  prediction_folder: Path | str | None = None,
279
283
  show_progress_bar: bool = True,
280
284
  public_only: bool | None = None,
285
+ num_proc: int = 1,
281
286
  ) -> ModelResult:
282
287
  """This function runs a model on a given task and returns the results.
283
288
 
@@ -286,7 +291,7 @@ def evaluate(
286
291
  tasks: A task to run.
287
292
  co2_tracker: If True, track the CO₂ emissions of the evaluation, required codecarbon to be installed, which can be installed using
288
293
  `pip install mteb[codecarbon]`. If none is passed co2 tracking will only be run if codecarbon is installed.
289
- encode_kwargs: Additional keyword arguments passed to the models `encode` method.
294
+ encode_kwargs: Additional keyword arguments passed to the models `encode` and `load_data` methods;
290
295
  raise_error: If True, raise an error if the task fails. If False, return an empty list.
291
296
  cache: The cache to use for loading the results. If None, then no cache will be used. The default cache saved the cache in the
292
297
  `~/.cache/mteb` directory. It can be overridden by setting the `MTEB_CACHE` environment variable to a different directory or by directly
@@ -298,10 +303,11 @@ def evaluate(
298
303
  changed.
299
304
  - "only-cache": Only load the results from the cache folder and do not run the task. Useful if you just want to load the results from the
300
305
  cache.
301
- prediction_folder: Optional folder in which to save model predictions for the task. Predictions of the tasks will be sabed in `prediction_folder/{task_name}_predictions.json`
306
+ prediction_folder: Optional folder in which to save model predictions for the task. Predictions of the tasks will be saved in `prediction_folder/{task_name}_predictions.json`
302
307
  show_progress_bar: Whether to show a progress bar when running the evaluation. Default is True. Setting this to False will also set the
303
308
  `encode_kwargs['show_progress_bar']` to False if encode_kwargs is unspecified.
304
309
  public_only: Run only public tasks. If None, it will attempt to run the private task.
310
+ num_proc: Number of processes to use during data loading and transformation. Defaults to 1.
305
311
 
306
312
  Returns:
307
313
  The results of the evaluation.
@@ -342,7 +348,7 @@ def evaluate(
342
348
 
343
349
  # AbsTaskAggregate is a special case where we have to run multiple tasks and combine the results
344
350
  if isinstance(tasks, AbsTaskAggregate):
345
- aggregated_task = cast(AbsTaskAggregate, tasks)
351
+ aggregated_task = cast("AbsTaskAggregate", tasks)
346
352
  results = evaluate(
347
353
  model,
348
354
  aggregated_task.metadata.tasks,
@@ -354,8 +360,12 @@ def evaluate(
354
360
  prediction_folder=prediction_folder,
355
361
  show_progress_bar=show_progress_bar,
356
362
  public_only=public_only,
363
+ num_proc=num_proc,
357
364
  )
358
365
  combined_results = aggregated_task.combine_task_results(results.task_results)
366
+ if cache:
367
+ cache.save_to_cache(combined_results, meta)
368
+
359
369
  return ModelResult(
360
370
  model_name=results.model_name,
361
371
  model_revision=results.model_revision,
@@ -365,7 +375,7 @@ def evaluate(
365
375
  if isinstance(tasks, AbsTask):
366
376
  task = tasks
367
377
  else:
368
- tasks = cast(Iterable[AbsTask], tasks)
378
+ tasks = cast("Iterable[AbsTask]", tasks)
369
379
  evaluate_results = []
370
380
  exceptions = []
371
381
  tasks_tqdm = tqdm(
@@ -386,6 +396,7 @@ def evaluate(
386
396
  prediction_folder=prediction_folder,
387
397
  show_progress_bar=False,
388
398
  public_only=public_only,
399
+ num_proc=num_proc,
389
400
  )
390
401
  evaluate_results.extend(_res.task_results)
391
402
  if _res.exceptions:
@@ -465,6 +476,7 @@ def evaluate(
465
476
  encode_kwargs=encode_kwargs,
466
477
  prediction_folder=prediction_folder,
467
478
  public_only=public_only,
479
+ num_proc=num_proc,
468
480
  )
469
481
  except Exception as e:
470
482
  logger.error(
@@ -480,6 +492,7 @@ def evaluate(
480
492
  encode_kwargs=encode_kwargs,
481
493
  prediction_folder=prediction_folder,
482
494
  public_only=public_only,
495
+ num_proc=num_proc,
483
496
  )
484
497
  logger.info(f"✓ Finished evaluation for {task.metadata.name}")
485
498
 
mteb/filter_tasks.py CHANGED
@@ -1,19 +1,24 @@
1
1
  """This script contains functions that are used to get an overview of the MTEB benchmark."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import logging
4
- from collections.abc import Iterable, Sequence
5
- from typing import overload
6
+ from typing import TYPE_CHECKING, overload
6
7
 
7
- from mteb.abstasks import (
8
- AbsTask,
9
- )
10
8
  from mteb.abstasks.aggregated_task import AbsTaskAggregate
11
- from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
12
9
  from mteb.languages import (
13
10
  ISO_TO_LANGUAGE,
14
11
  ISO_TO_SCRIPT,
15
12
  )
16
- from mteb.types import Modalities
13
+
14
+ if TYPE_CHECKING:
15
+ from collections.abc import Iterable, Sequence
16
+
17
+ from mteb.abstasks import (
18
+ AbsTask,
19
+ )
20
+ from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
21
+ from mteb.types import Modalities
17
22
 
18
23
  logger = logging.getLogger(__name__)
19
24
 
mteb/get_tasks.py CHANGED
@@ -1,20 +1,25 @@
1
1
  """This script contains functions that are used to get an overview of the MTEB benchmark."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import difflib
4
6
  import logging
5
7
  import warnings
6
8
  from collections import Counter, defaultdict
7
- from collections.abc import Iterable, Sequence
8
- from typing import Any
9
+ from typing import TYPE_CHECKING, Any
9
10
 
10
11
  import pandas as pd
11
12
 
12
13
  from mteb.abstasks import (
13
14
  AbsTask,
14
15
  )
15
- from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
16
16
  from mteb.filter_tasks import filter_tasks
17
- from mteb.types import Modalities
17
+
18
+ if TYPE_CHECKING:
19
+ from collections.abc import Iterable, Sequence
20
+
21
+ from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
22
+ from mteb.types import Modalities
18
23
 
19
24
  logger = logging.getLogger(__name__)
20
25
 
@@ -1,10 +1,15 @@
1
- from collections.abc import Iterable, Sequence
2
- from dataclasses import dataclass
1
+ from __future__ import annotations
3
2
 
4
- from typing_extensions import Self
3
+ from dataclasses import dataclass
4
+ from typing import TYPE_CHECKING
5
5
 
6
6
  from mteb.languages.check_language_code import check_language_code
7
7
 
8
+ if TYPE_CHECKING:
9
+ from collections.abc import Iterable, Sequence
10
+
11
+ from typing_extensions import Self
12
+
8
13
 
9
14
  @dataclass
10
15
  class LanguageScripts:
mteb/leaderboard/app.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import itertools
2
4
  import json
3
5
  import logging
@@ -5,15 +7,14 @@ import tempfile
5
7
  import time
6
8
  import warnings
7
9
  from pathlib import Path
8
- from typing import Literal, get_args
10
+ from typing import TYPE_CHECKING, Literal, get_args
9
11
  from urllib.parse import urlencode
10
12
 
11
13
  import cachetools
12
14
  import gradio as gr
13
- import pandas as pd
15
+ import pandas as pd # noqa: TC002 # gradio tries to validate typehints
14
16
 
15
17
  import mteb
16
- from mteb import BenchmarkResults
17
18
  from mteb.benchmarks.benchmark import RtebBenchmark
18
19
  from mteb.cache import ResultCache
19
20
  from mteb.leaderboard.benchmark_selector import (
@@ -31,6 +32,9 @@ from mteb.leaderboard.table import (
31
32
  from mteb.leaderboard.text_segments import ACKNOWLEDGEMENT, FAQ
32
33
  from mteb.models.model_meta import MODEL_TYPES
33
34
 
35
+ if TYPE_CHECKING:
36
+ from mteb import BenchmarkResults
37
+
34
38
  logger = logging.getLogger(__name__)
35
39
 
36
40
 
@@ -546,7 +550,10 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
546
550
 
547
551
  logger.info("Step 7/7: Building Gradio interface and callbacks...")
548
552
  interface_start = time.time()
549
- with gr.Blocks(fill_width=True) as demo:
553
+ with gr.Blocks(
554
+ title="MTEB Leaderboard",
555
+ fill_width=True,
556
+ ) as demo:
550
557
  with gr.Sidebar(
551
558
  position="left",
552
559
  label="Benchmark Selection and Customization",
mteb/leaderboard/table.py CHANGED
@@ -1,3 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  import gradio as gr
2
6
  import matplotlib.pyplot as plt
3
7
  import numpy as np
@@ -5,8 +9,9 @@ import pandas as pd
5
9
  from matplotlib.colors import LinearSegmentedColormap
6
10
  from pandas.api.types import is_numeric_dtype
7
11
 
8
- from mteb.benchmarks.benchmark import Benchmark
9
- from mteb.results.benchmark_results import BenchmarkResults
12
+ if TYPE_CHECKING:
13
+ from mteb.benchmarks.benchmark import Benchmark
14
+ from mteb.results.benchmark_results import BenchmarkResults
10
15
 
11
16
 
12
17
  def _borda_count(scores: pd.Series) -> pd.Series:
mteb/load_results.py CHANGED
@@ -1,13 +1,19 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import sys
4
- from collections.abc import Iterable, Sequence
5
- from pathlib import Path
6
+ from typing import TYPE_CHECKING
6
7
 
7
8
  from mteb.abstasks.abstask import AbsTask
8
9
  from mteb.models.model_meta import ModelMeta
9
10
  from mteb.results import BenchmarkResults, ModelResult, TaskResult
10
- from mteb.types import ModelName, Revision
11
+
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Iterable, Sequence
14
+ from pathlib import Path
15
+
16
+ from mteb.types import ModelName, Revision
11
17
 
12
18
  if sys.version_info >= (3, 13):
13
19
  from warnings import deprecated
@@ -1,14 +1,12 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import warnings
3
5
  from abc import ABC, abstractmethod
4
- from collections.abc import Callable, Sequence
5
- from typing import Any, Literal, cast, get_args, overload
6
-
7
- from torch.utils.data import DataLoader
8
- from typing_extensions import Unpack
6
+ from typing import TYPE_CHECKING, Any, Literal, cast, get_args, overload
9
7
 
10
8
  import mteb
11
- from mteb.abstasks.task_metadata import TaskMetadata, TaskType
9
+ from mteb.abstasks.task_metadata import TaskType
12
10
  from mteb.similarity_functions import (
13
11
  cos_sim,
14
12
  dot_score,
@@ -18,13 +16,25 @@ from mteb.similarity_functions import (
18
16
  pairwise_max_sim,
19
17
  )
20
18
  from mteb.types import (
21
- Array,
22
- BatchedInput,
23
- EncodeKwargs,
24
19
  PromptType,
25
20
  )
26
21
 
27
- from .model_meta import ModelMeta, ScoringFunction
22
+ from .model_meta import ScoringFunction
23
+
24
+ if TYPE_CHECKING:
25
+ from collections.abc import Callable, Sequence
26
+
27
+ from torch.utils.data import DataLoader
28
+ from typing_extensions import Unpack
29
+
30
+ from mteb.abstasks.task_metadata import TaskMetadata
31
+ from mteb.types import (
32
+ Array,
33
+ BatchedInput,
34
+ EncodeKwargs,
35
+ )
36
+
37
+ from .model_meta import ModelMeta
28
38
 
29
39
  logger = logging.getLogger(__name__)
30
40
 
@@ -314,7 +324,7 @@ class AbsEncoder(ABC):
314
324
  ):
315
325
  arr = self.model.similarity(embeddings1, embeddings2)
316
326
  # We assume that the model returns an Array-like object:
317
- arr = cast(Array, arr)
327
+ arr = cast("Array", arr)
318
328
  return arr
319
329
  return cos_sim(embeddings1, embeddings2)
320
330
  if self.mteb_model_meta.similarity_fn_name is ScoringFunction.COSINE:
@@ -352,7 +362,7 @@ class AbsEncoder(ABC):
352
362
  ):
353
363
  arr = self.model.similarity_pairwise(embeddings1, embeddings2)
354
364
  # We assume that the model returns an Array-like object:
355
- arr = cast(Array, arr)
365
+ arr = cast("Array", arr)
356
366
  return arr
357
367
  return pairwise_cos_sim(embeddings1, embeddings2)
358
368
  if self.mteb_model_meta.similarity_fn_name is ScoringFunction.COSINE:
@@ -1,9 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
- from pathlib import Path
4
- from typing import Any, Protocol, runtime_checkable
3
+ from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
5
4
 
6
- import numpy as np
5
+ if TYPE_CHECKING:
6
+ from pathlib import Path
7
+
8
+ import numpy as np
7
9
 
8
10
 
9
11
  @runtime_checkable
@@ -1,6 +1,12 @@
1
+ from __future__ import annotations
2
+
1
3
  import hashlib
2
- from collections.abc import Mapping
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ if TYPE_CHECKING:
7
+ from collections.abc import Mapping
8
+
9
+ from PIL import Image
4
10
 
5
11
 
6
12
  def _hash_item(item: Mapping[str, Any]) -> str:
@@ -10,8 +16,6 @@ def _hash_item(item: Mapping[str, Any]) -> str:
10
16
  item_hash = hashlib.sha256(item_text.encode()).hexdigest()
11
17
 
12
18
  if "image" in item:
13
- from PIL import Image
14
-
15
19
  image: Image.Image = item["image"]
16
20
  item_hash += hashlib.sha256(image.tobytes()).hexdigest()
17
21
 
@@ -1,16 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import warnings
4
6
  from pathlib import Path
5
- from typing import Any
7
+ from typing import TYPE_CHECKING, Any
6
8
 
7
9
  import numpy as np
8
10
 
9
11
  from mteb._requires_package import requires_package
10
- from mteb.types import BatchedInput
11
12
 
12
13
  from ._hash_utils import _hash_item
13
14
 
15
+ if TYPE_CHECKING:
16
+ import faiss
17
+
18
+ from mteb.types import BatchedInput
19
+
14
20
  logger = logging.getLogger(__name__)
15
21
 
16
22
 
@@ -24,7 +30,6 @@ class FaissCache:
24
30
  "FAISS-based vector cache",
25
31
  install_instruction="pip install mteb[faiss-cpu]",
26
32
  )
27
- import faiss
28
33
 
29
34
  self.directory = Path(directory)
30
35
  self.directory.mkdir(parents=True, exist_ok=True)
@@ -1,21 +1,26 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from pathlib import Path
3
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
4
6
 
5
7
  import numpy as np
6
8
  import torch
7
9
  from datasets import Dataset
8
- from torch.utils.data import DataLoader
9
10
 
10
11
  from mteb._create_dataloaders import create_dataloader
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
- from mteb.models.cache_wrappers.cache_backend_protocol import (
13
- CacheBackendProtocol,
14
- )
15
12
  from mteb.models.cache_wrappers.cache_backends.numpy_cache import NumpyCache
16
- from mteb.models.model_meta import ModelMeta
17
- from mteb.models.models_protocols import EncoderProtocol
18
- from mteb.types import Array, BatchedInput, PromptType
13
+
14
+ if TYPE_CHECKING:
15
+ from torch.utils.data import DataLoader
16
+
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.models.cache_wrappers.cache_backend_protocol import (
19
+ CacheBackendProtocol,
20
+ )
21
+ from mteb.models.model_meta import ModelMeta
22
+ from mteb.models.models_protocols import EncoderProtocol
23
+ from mteb.types import Array, BatchedInput, PromptType
19
24
 
20
25
  logger = logging.getLogger(__name__)
21
26
 
@@ -1,15 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import difflib
2
4
  import logging
3
- from collections.abc import Iterable
4
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
5
6
 
6
- from mteb.abstasks import AbsTask
7
7
  from mteb.models import (
8
8
  ModelMeta,
9
- MTEBModels,
10
9
  )
11
10
  from mteb.models.model_implementations import MODEL_REGISTRY
12
11
 
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Iterable
14
+
15
+ from mteb.abstasks import AbsTask
16
+ from mteb.models import (
17
+ MTEBModels,
18
+ )
19
+
13
20
  logger = logging.getLogger(__name__)
14
21
 
15
22
 
@@ -116,7 +123,10 @@ def get_model(
116
123
 
117
124
 
118
125
  def get_model_meta(
119
- model_name: str, revision: str | None = None, fetch_from_hf: bool = True
126
+ model_name: str,
127
+ revision: str | None = None,
128
+ fetch_from_hf: bool = True,
129
+ fill_missing: bool = False,
120
130
  ) -> ModelMeta:
121
131
  """A function to fetch a model metadata object by name.
122
132
 
@@ -124,6 +134,7 @@ def get_model_meta(
124
134
  model_name: Name of the model to fetch
125
135
  revision: Revision of the model to fetch
126
136
  fetch_from_hf: Whether to fetch the model from HuggingFace Hub if not found in the registry
137
+ fill_missing: Computes missing attributes from the metadata including number of parameters and memory usage.
127
138
 
128
139
  Returns:
129
140
  A model metadata object
@@ -135,10 +146,25 @@ def get_model_meta(
135
146
  raise ValueError(
136
147
  f"Model revision {revision} not found for model {model_name}. Expected {model_meta.revision}."
137
148
  )
149
+
150
+ if fill_missing and fetch_from_hf:
151
+ original_meta_dict = model_meta.model_dump()
152
+ new_meta = ModelMeta.from_hub(model_name)
153
+ new_meta_dict = new_meta.model_dump(exclude_none=True)
154
+
155
+ updates = {
156
+ k: v
157
+ for k, v in new_meta_dict.items()
158
+ if original_meta_dict.get(k) is None
159
+ }
160
+
161
+ if updates:
162
+ return model_meta.model_copy(update=updates)
138
163
  return model_meta
164
+
139
165
  if fetch_from_hf:
140
166
  logger.info(
141
- "Model not found in model registry. Attempting to extract metadata by loading the model ({model_name}) using HuggingFace."
167
+ f"Model not found in model registry. Attempting to extract metadata by loading the model ({model_name}) using HuggingFace."
142
168
  )
143
169
  meta = ModelMeta.from_hub(model_name, revision)
144
170
  return meta
@@ -1,16 +1,24 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import torch
6
- from torch.utils.data import DataLoader
7
7
 
8
8
  from mteb._requires_package import requires_package
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
- from mteb.types import Array, BatchedInput, PromptType
9
+ from mteb.types import PromptType
11
10
 
12
11
  from .abs_encoder import AbsEncoder
13
12
 
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Callable
15
+
16
+ from torch.utils.data import DataLoader
17
+
18
+ from mteb.abstasks.task_metadata import TaskMetadata
19
+ from mteb.types import Array, BatchedInput
20
+
21
+
14
22
  logger = logging.getLogger(__name__)
15
23
 
16
24