mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (486) hide show
  1. mteb/_create_dataloaders.py +63 -14
  2. mteb/_evaluators/any_sts_evaluator.py +12 -5
  3. mteb/_evaluators/clustering_evaluator.py +12 -4
  4. mteb/_evaluators/evaluator.py +11 -5
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
  6. mteb/_evaluators/pair_classification_evaluator.py +13 -5
  7. mteb/_evaluators/retrieval_evaluator.py +22 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +20 -11
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +10 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +48 -21
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +25 -9
  21. mteb/abstasks/clustering.py +23 -10
  22. mteb/abstasks/clustering_legacy.py +22 -8
  23. mteb/abstasks/image/image_text_pair_classification.py +23 -9
  24. mteb/abstasks/multilabel_classification.py +13 -5
  25. mteb/abstasks/pair_classification.py +27 -11
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +56 -30
  28. mteb/abstasks/retrieval_dataset_loaders.py +48 -37
  29. mteb/abstasks/sts.py +29 -13
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +23 -12
  32. mteb/abstasks/text/reranking.py +2 -2
  33. mteb/abstasks/text/summarization.py +19 -8
  34. mteb/abstasks/zeroshot_classification.py +23 -9
  35. mteb/benchmarks/_create_table.py +13 -7
  36. mteb/benchmarks/benchmark.py +11 -1
  37. mteb/benchmarks/benchmarks/__init__.py +2 -0
  38. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  39. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  40. mteb/cache.py +10 -5
  41. mteb/cli/_display_tasks.py +9 -3
  42. mteb/cli/build_cli.py +5 -2
  43. mteb/cli/generate_model_card.py +9 -2
  44. mteb/deprecated_evaluator.py +16 -12
  45. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  48. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  49. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  50. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  51. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  52. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  65. mteb/evaluate.py +33 -20
  66. mteb/filter_tasks.py +12 -7
  67. mteb/get_tasks.py +9 -4
  68. mteb/languages/language_scripts.py +8 -3
  69. mteb/leaderboard/app.py +11 -4
  70. mteb/leaderboard/table.py +7 -2
  71. mteb/load_results.py +9 -3
  72. mteb/models/abs_encoder.py +22 -12
  73. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  74. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  75. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  76. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  77. mteb/models/get_model_meta.py +32 -6
  78. mteb/models/instruct_wrapper.py +13 -5
  79. mteb/models/model_implementations/align_models.py +10 -4
  80. mteb/models/model_implementations/amazon_models.py +1 -0
  81. mteb/models/model_implementations/andersborges.py +2 -0
  82. mteb/models/model_implementations/ara_models.py +1 -0
  83. mteb/models/model_implementations/arctic_models.py +8 -0
  84. mteb/models/model_implementations/b1ade_models.py +1 -0
  85. mteb/models/model_implementations/bedrock_models.py +20 -6
  86. mteb/models/model_implementations/bge_models.py +40 -1
  87. mteb/models/model_implementations/bica_model.py +1 -0
  88. mteb/models/model_implementations/blip2_models.py +11 -4
  89. mteb/models/model_implementations/blip_models.py +17 -4
  90. mteb/models/model_implementations/bm25.py +24 -14
  91. mteb/models/model_implementations/bmretriever_models.py +10 -2
  92. mteb/models/model_implementations/cadet_models.py +1 -0
  93. mteb/models/model_implementations/cde_models.py +11 -5
  94. mteb/models/model_implementations/clip_models.py +12 -4
  95. mteb/models/model_implementations/clips_models.py +3 -0
  96. mteb/models/model_implementations/codefuse_models.py +5 -0
  97. mteb/models/model_implementations/codesage_models.py +3 -0
  98. mteb/models/model_implementations/cohere_models.py +14 -4
  99. mteb/models/model_implementations/cohere_v.py +14 -4
  100. mteb/models/model_implementations/colpali_models.py +7 -3
  101. mteb/models/model_implementations/colqwen_models.py +17 -31
  102. mteb/models/model_implementations/colsmol_models.py +3 -1
  103. mteb/models/model_implementations/conan_models.py +11 -4
  104. mteb/models/model_implementations/dino_models.py +28 -4
  105. mteb/models/model_implementations/e5_instruct.py +4 -0
  106. mteb/models/model_implementations/e5_models.py +9 -0
  107. mteb/models/model_implementations/e5_v.py +10 -4
  108. mteb/models/model_implementations/eagerworks_models.py +11 -4
  109. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  110. mteb/models/model_implementations/en_code_retriever.py +1 -0
  111. mteb/models/model_implementations/euler_models.py +1 -0
  112. mteb/models/model_implementations/evaclip_models.py +13 -4
  113. mteb/models/model_implementations/fa_models.py +9 -0
  114. mteb/models/model_implementations/facebookai.py +2 -0
  115. mteb/models/model_implementations/geogpt_models.py +1 -0
  116. mteb/models/model_implementations/gme_v_models.py +7 -3
  117. mteb/models/model_implementations/google_models.py +15 -4
  118. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
  119. mteb/models/model_implementations/gritlm_models.py +3 -0
  120. mteb/models/model_implementations/gte_models.py +9 -0
  121. mteb/models/model_implementations/hinvec_models.py +6 -1
  122. mteb/models/model_implementations/human.py +1 -0
  123. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  124. mteb/models/model_implementations/inf_models.py +2 -0
  125. mteb/models/model_implementations/jasper_models.py +14 -5
  126. mteb/models/model_implementations/jina_clip.py +10 -4
  127. mteb/models/model_implementations/jina_models.py +17 -5
  128. mteb/models/model_implementations/kalm_models.py +24 -12
  129. mteb/models/model_implementations/kblab.py +1 -0
  130. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  131. mteb/models/model_implementations/kfst.py +1 -0
  132. mteb/models/model_implementations/kowshik24_models.py +1 -0
  133. mteb/models/model_implementations/lens_models.py +2 -0
  134. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  135. mteb/models/model_implementations/linq_models.py +7 -1
  136. mteb/models/model_implementations/listconranker.py +10 -4
  137. mteb/models/model_implementations/llm2clip_models.py +12 -4
  138. mteb/models/model_implementations/llm2vec_models.py +20 -6
  139. mteb/models/model_implementations/mcinext_models.py +8 -2
  140. mteb/models/model_implementations/mdbr_models.py +2 -0
  141. mteb/models/model_implementations/misc_models.py +63 -0
  142. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  143. mteb/models/model_implementations/mme5_models.py +2 -1
  144. mteb/models/model_implementations/moco_models.py +11 -4
  145. mteb/models/model_implementations/mod_models.py +2 -1
  146. mteb/models/model_implementations/model2vec_models.py +23 -4
  147. mteb/models/model_implementations/moka_models.py +3 -0
  148. mteb/models/model_implementations/nbailab.py +3 -0
  149. mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
  150. mteb/models/model_implementations/nomic_models.py +17 -4
  151. mteb/models/model_implementations/nomic_models_vision.py +5 -3
  152. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
  153. mteb/models/model_implementations/nvidia_models.py +15 -4
  154. mteb/models/model_implementations/octen_models.py +3 -1
  155. mteb/models/model_implementations/openai_models.py +14 -4
  156. mteb/models/model_implementations/openclip_models.py +17 -4
  157. mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
  158. mteb/models/model_implementations/ops_moa_models.py +9 -2
  159. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  160. mteb/models/model_implementations/pawan_models.py +1 -0
  161. mteb/models/model_implementations/piccolo_models.py +2 -0
  162. mteb/models/model_implementations/promptriever_models.py +16 -6
  163. mteb/models/model_implementations/pylate_models.py +32 -13
  164. mteb/models/model_implementations/qodo_models.py +2 -0
  165. mteb/models/model_implementations/qtack_models.py +1 -0
  166. mteb/models/model_implementations/qwen3_models.py +11 -1
  167. mteb/models/model_implementations/qzhou_models.py +2 -0
  168. mteb/models/model_implementations/random_baseline.py +4 -3
  169. mteb/models/model_implementations/rasgaard_models.py +1 -0
  170. mteb/models/model_implementations/reasonir_model.py +65 -0
  171. mteb/models/model_implementations/repllama_models.py +15 -6
  172. mteb/models/model_implementations/rerankers_custom.py +13 -4
  173. mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
  174. mteb/models/model_implementations/richinfoai_models.py +1 -0
  175. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  176. mteb/models/model_implementations/ruri_models.py +10 -0
  177. mteb/models/model_implementations/salesforce_models.py +10 -1
  178. mteb/models/model_implementations/samilpwc_models.py +1 -0
  179. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  180. mteb/models/model_implementations/searchmap_models.py +1 -0
  181. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  182. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
  183. mteb/models/model_implementations/seed_models.py +2 -1
  184. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  185. mteb/models/model_implementations/shuu_model.py +1 -0
  186. mteb/models/model_implementations/siglip_models.py +19 -4
  187. mteb/models/model_implementations/slm_models.py +7 -4
  188. mteb/models/model_implementations/sonar_models.py +2 -1
  189. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  190. mteb/models/model_implementations/stella_models.py +6 -0
  191. mteb/models/model_implementations/tarka_models.py +2 -0
  192. mteb/models/model_implementations/text2vec_models.py +3 -0
  193. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  194. mteb/models/model_implementations/uae_models.py +10 -4
  195. mteb/models/model_implementations/vdr_models.py +8 -1
  196. mteb/models/model_implementations/vi_vn_models.py +6 -0
  197. mteb/models/model_implementations/vista_models.py +11 -4
  198. mteb/models/model_implementations/vlm2vec_models.py +11 -4
  199. mteb/models/model_implementations/voyage_models.py +52 -4
  200. mteb/models/model_implementations/voyage_v.py +11 -6
  201. mteb/models/model_implementations/xyz_models.py +1 -0
  202. mteb/models/model_implementations/youtu_models.py +1 -0
  203. mteb/models/model_implementations/yuan_models.py +1 -0
  204. mteb/models/model_implementations/yuan_models_en.py +2 -1
  205. mteb/models/model_meta.py +47 -9
  206. mteb/models/models_protocols.py +23 -18
  207. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  208. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  209. mteb/models/search_wrappers.py +31 -12
  210. mteb/models/sentence_transformer_wrapper.py +4 -3
  211. mteb/models/vllm_wrapper.py +8 -6
  212. mteb/results/benchmark_results.py +22 -17
  213. mteb/results/model_result.py +21 -15
  214. mteb/results/task_result.py +32 -16
  215. mteb/similarity_functions.py +8 -2
  216. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  217. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  218. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  219. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  220. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  221. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  222. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  223. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  224. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  225. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  226. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  227. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  228. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  229. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  230. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  231. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  232. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  233. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  234. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  235. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  236. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  237. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  238. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  239. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  240. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  241. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  242. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  243. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  244. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  245. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  246. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  247. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  248. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  249. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  250. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  251. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  252. mteb/tasks/classification/est/estonian_valence.py +1 -1
  253. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  254. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  256. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  257. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  258. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  259. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  260. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  261. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  262. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  263. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  264. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  265. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  266. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  267. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  268. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  269. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  270. mteb/tasks/classification/kor/klue_tc.py +2 -2
  271. mteb/tasks/classification/kor/kor_fin.py +1 -1
  272. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  273. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  274. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  275. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  276. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  277. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  278. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  279. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  280. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  281. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  282. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  283. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  284. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  285. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  286. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  287. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  288. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  289. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  290. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  291. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  292. mteb/tasks/classification/ron/moroco.py +1 -1
  293. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  294. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  295. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  296. mteb/tasks/classification/rus/headline_classification.py +2 -2
  297. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  298. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  299. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  300. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  301. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  302. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  303. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  304. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  305. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  306. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  307. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  308. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  309. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  310. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  311. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  312. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  313. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  314. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  315. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  316. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  317. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  318. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  319. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  320. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  321. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  322. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  323. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  324. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  325. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  326. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  327. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  328. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  329. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  330. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  331. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  332. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  333. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  334. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  335. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  336. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  337. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  338. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  341. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  342. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  343. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  344. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  345. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  346. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  347. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  348. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  349. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  350. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  351. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  352. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  353. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  354. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  355. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  356. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  357. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  358. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  359. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  360. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  361. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  362. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  363. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  364. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  365. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  366. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  367. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  368. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  369. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  370. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  371. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  372. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  373. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  374. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  375. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  376. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  377. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  378. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  379. mteb/tasks/pair_classification/rus/terra.py +2 -2
  380. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  381. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  382. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  383. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  384. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  385. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  386. mteb/tasks/retrieval/code/code_rag.py +4 -4
  387. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  388. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  389. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  390. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  391. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  392. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  393. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  394. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  395. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  396. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  397. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  398. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  399. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  400. mteb/tasks/retrieval/eng/__init__.py +42 -0
  401. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  402. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  403. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  404. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  405. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  406. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  407. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  408. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  409. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  410. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  411. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  412. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  413. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  414. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  415. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  416. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  417. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  418. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  419. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  420. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  421. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  422. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  423. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  424. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  425. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  426. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  428. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  435. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  438. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  439. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  440. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  441. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  442. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  443. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  444. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  445. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  446. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  447. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  448. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  449. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  450. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  451. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  452. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  453. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  454. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  455. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  456. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  457. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  458. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  459. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  460. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  461. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  462. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  463. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  464. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  465. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  466. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  467. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  468. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  469. mteb/tasks/retrieval/nob/norquad.py +1 -1
  470. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  471. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  472. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  473. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  474. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  475. mteb/tasks/sts/kor/klue_sts.py +1 -1
  476. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  477. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  478. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  479. mteb/types/_encoder_io.py +1 -1
  480. mteb/types/statistics.py +9 -2
  481. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
  482. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
  483. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  484. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  485. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  486. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,22 @@
1
- import logging
2
- from typing import Any, Protocol, cast
1
+ from __future__ import annotations
3
2
 
4
- import numpy as np
5
- from datasets import Dataset
6
- from torch.utils.data import DataLoader
7
- from typing_extensions import Self
3
+ import logging
4
+ from typing import TYPE_CHECKING, Any, Protocol, cast
8
5
 
9
6
  from mteb._create_dataloaders import create_dataloader
10
- from mteb.abstasks.task_metadata import TaskMetadata
11
- from mteb.models import EncoderProtocol
12
- from mteb.types import Array, BatchedInput, EncodeKwargs
13
7
 
14
8
  from .evaluator import Evaluator
15
9
 
10
+ if TYPE_CHECKING:
11
+ import numpy as np
12
+ from datasets import Dataset
13
+ from torch.utils.data import DataLoader
14
+ from typing_extensions import Self
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.models import EncoderProtocol
18
+ from mteb.types import Array, BatchedInput, EncodeKwargs
19
+
16
20
  logger = logging.getLogger(__name__)
17
21
 
18
22
 
@@ -50,18 +54,20 @@ class SklearnEvaluator(Evaluator):
50
54
  self.evaluator_model = evaluator_model
51
55
 
52
56
  def create_dataloaders(
53
- self, encode_kwargs: EncodeKwargs
57
+ self, encode_kwargs: EncodeKwargs, num_proc: int
54
58
  ) -> tuple[DataLoader[BatchedInput], DataLoader[BatchedInput]]:
55
59
  dataloader_train = create_dataloader(
56
60
  self.train_dataset,
57
61
  self.task_metadata,
58
62
  input_column=self.values_column_name,
63
+ num_proc=num_proc,
59
64
  **encode_kwargs,
60
65
  )
61
66
  dataloader_test = create_dataloader(
62
67
  self.eval_dataset,
63
68
  self.task_metadata,
64
69
  input_column=self.values_column_name,
70
+ num_proc=num_proc,
65
71
  **encode_kwargs,
66
72
  )
67
73
  return dataloader_train, dataloader_test
@@ -72,6 +78,7 @@ class SklearnEvaluator(Evaluator):
72
78
  *,
73
79
  encode_kwargs: EncodeKwargs,
74
80
  test_cache: Array | None = None,
81
+ num_proc: int = 1,
75
82
  ) -> tuple[np.ndarray, Array]:
76
83
  """Classification evaluation by training a sklearn classifier on the embeddings of the training set and evaluating on the embeddings of the test set.
77
84
 
@@ -79,6 +86,7 @@ class SklearnEvaluator(Evaluator):
79
86
  model: Encoder
80
87
  encode_kwargs: encode kwargs
81
88
  test_cache: embeddings of the test set, if already computed
89
+ num_proc: number of processes to use
82
90
 
83
91
  Returns:
84
92
  Tuple of test predictions and embeddings
@@ -86,6 +94,7 @@ class SklearnEvaluator(Evaluator):
86
94
  """
87
95
  dataloader_train, dataloader_test = self.create_dataloaders(
88
96
  encode_kwargs=encode_kwargs,
97
+ num_proc=num_proc,
89
98
  )
90
99
 
91
100
  logger.info("Running - Encoding samples...")
@@ -104,7 +113,7 @@ class SklearnEvaluator(Evaluator):
104
113
  hf_subset=self.hf_subset,
105
114
  **encode_kwargs,
106
115
  )
107
- test_cache = cast(Array, test_cache)
116
+ test_cache = cast("Array", test_cache)
108
117
 
109
118
  logger.info("Running - Fitting classifier...")
110
119
  y_train = self.train_dataset[self.label_column_name]
@@ -1,4 +1,7 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from typing import TYPE_CHECKING
2
5
 
3
6
  import torch
4
7
  from datasets import Dataset
@@ -6,9 +9,11 @@ from tqdm.auto import tqdm
6
9
 
7
10
  from mteb._create_dataloaders import _create_dataloader_from_texts
8
11
  from mteb._evaluators.evaluator import Evaluator
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
- from mteb.models import EncoderProtocol
11
- from mteb.types import Array, EncodeKwargs
12
+
13
+ if TYPE_CHECKING:
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.models import EncoderProtocol
16
+ from mteb.types import Array, EncodeKwargs
12
17
 
13
18
  logger = logging.getLogger(__name__)
14
19
 
@@ -36,6 +41,7 @@ class BitextMiningEvaluator(Evaluator):
36
41
  model: EncoderProtocol,
37
42
  *,
38
43
  encode_kwargs: EncodeKwargs,
44
+ num_proc: int = 1,
39
45
  ) -> dict[str, list[dict[str, float]]]:
40
46
  pair_elements = {p for pair in self.pairs for p in pair}
41
47
  if isinstance(self.sentences, Dataset):
@@ -50,6 +56,7 @@ class BitextMiningEvaluator(Evaluator):
50
56
  for sub in tqdm(subsets):
51
57
  dataloader = _create_dataloader_from_texts(
52
58
  self.sentences[sub],
59
+ num_proc=num_proc,
53
60
  **encode_kwargs,
54
61
  )
55
62
  embeddings[sub] = model.encode(
@@ -1,6 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import sys
3
- from typing import TypedDict
5
+ from typing import TYPE_CHECKING, TypedDict
4
6
 
5
7
  import numpy as np
6
8
  import torch
@@ -9,10 +11,12 @@ from tqdm.auto import tqdm
9
11
 
10
12
  from mteb._create_dataloaders import _create_dataloader_from_texts
11
13
  from mteb._evaluators.evaluator import Evaluator
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
- from mteb.models import EncoderProtocol
14
14
  from mteb.similarity_functions import cos_sim, dot_score
15
- from mteb.types import EncodeKwargs
15
+
16
+ if TYPE_CHECKING:
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.models import EncoderProtocol
19
+ from mteb.types import EncodeKwargs
16
20
 
17
21
  # if later than python 3.13 use typing module
18
22
  if sys.version_info >= (3, 13):
@@ -96,6 +100,7 @@ class SummarizationEvaluator(Evaluator):
96
100
  model: EncoderProtocol,
97
101
  *,
98
102
  encode_kwargs: EncodeKwargs,
103
+ num_proc: int = 1,
99
104
  ) -> SummarizationDistances:
100
105
  # Get the human & machine summaries for the text in one go for all
101
106
  human_lens = [len(human_summaries) for human_summaries in self.human_summaries]
@@ -111,6 +116,7 @@ class SummarizationEvaluator(Evaluator):
111
116
  for human_summaries in self.human_summaries
112
117
  for summary in human_summaries
113
118
  ],
119
+ num_proc=num_proc,
114
120
  **encode_kwargs,
115
121
  ),
116
122
  task_metadata=self.task_metadata,
@@ -1,4 +1,7 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from typing import TYPE_CHECKING
2
5
 
3
6
  from datasets import Dataset
4
7
 
@@ -6,13 +9,17 @@ from mteb._create_dataloaders import (
6
9
  _create_dataloader_from_texts,
7
10
  create_dataloader,
8
11
  )
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
- from mteb.models import EncoderProtocol
11
12
  from mteb.similarity_functions import similarity
12
- from mteb.types import Array, EncodeKwargs
13
13
 
14
14
  from .evaluator import Evaluator
15
15
 
16
+ if TYPE_CHECKING:
17
+ from datasets import Dataset
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.models import EncoderProtocol
21
+ from mteb.types import Array, EncodeKwargs
22
+
16
23
  logger = logging.getLogger(__name__)
17
24
 
18
25
 
@@ -41,11 +48,13 @@ class ZeroShotClassificationEvaluator(Evaluator):
41
48
  model: EncoderProtocol,
42
49
  *,
43
50
  encode_kwargs: EncodeKwargs,
51
+ num_proc: int = 1,
44
52
  ) -> Array:
45
53
  dataloader = create_dataloader(
46
54
  self.dataset,
47
55
  input_column=self.input_column_name,
48
56
  task_metadata=self.task_metadata,
57
+ num_proc=num_proc,
49
58
  **encode_kwargs,
50
59
  )
51
60
 
mteb/_helpful_enum.py CHANGED
@@ -1,6 +1,10 @@
1
+ from __future__ import annotations
2
+
1
3
  from enum import Enum
4
+ from typing import TYPE_CHECKING
2
5
 
3
- from typing_extensions import Self
6
+ if TYPE_CHECKING:
7
+ from typing_extensions import Self
4
8
 
5
9
 
6
10
  class HelpfulStrEnum(str, Enum):
@@ -1,12 +1,18 @@
1
1
  """Simplified version of https://gist.github.com/AlexeyVatolin/ea3adc21aa7a767603ff393b22085adc from https://github.com/embeddings-benchmark/mteb/pull/2900"""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import logging
6
+ from typing import TYPE_CHECKING
4
7
 
5
8
  import datasets
6
9
  import pandas as pd
7
- from datasets import Dataset, DatasetDict
10
+ from datasets import DatasetDict
11
+
12
+ if TYPE_CHECKING:
13
+ from datasets import Dataset
8
14
 
9
- from mteb import TaskMetadata
15
+ from mteb import TaskMetadata
10
16
 
11
17
  logger = logging.getLogger(__name__)
12
18
 
@@ -1,9 +1,10 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from typing import TYPE_CHECKING
2
5
 
3
6
  from datasets import DatasetDict
4
7
 
5
- from mteb import TaskMetadata
6
- from mteb.abstasks import AbsTaskClassification
7
8
  from mteb.abstasks._data_filter.filters import (
8
9
  deduplicate,
9
10
  filter_empty,
@@ -13,6 +14,10 @@ from mteb.abstasks._data_filter.filters import (
13
14
  split_train_test,
14
15
  )
15
16
 
17
+ if TYPE_CHECKING:
18
+ from mteb import TaskMetadata
19
+ from mteb.abstasks import AbsTaskClassification
20
+
16
21
  logger = logging.getLogger(__name__)
17
22
 
18
23
 
@@ -2,10 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  import hashlib
4
4
  from collections import Counter
5
- from collections.abc import Mapping
6
5
  from typing import TYPE_CHECKING, cast
7
6
 
8
- from mteb.types import TopRankedDocumentsType
9
7
  from mteb.types.statistics import (
10
8
  ImageStatistics,
11
9
  LabelStatistics,
@@ -16,8 +14,12 @@ from mteb.types.statistics import (
16
14
  )
17
15
 
18
16
  if TYPE_CHECKING:
17
+ from collections.abc import Mapping
18
+
19
19
  from PIL import Image
20
20
 
21
+ from mteb.types import TopRankedDocumentsType
22
+
21
23
 
22
24
  def calculate_text_statistics(texts: list[str]) -> TextStatistics:
23
25
  """Calculate descriptive statistics for a list of texts.
@@ -87,13 +89,13 @@ def calculate_label_statistics(labels: list[int | list[int]]) -> LabelStatistics
87
89
 
88
90
  if not isinstance(labels[0], list):
89
91
  # single label classification
90
- single_label = cast(list[int], labels)
92
+ single_label = cast("list[int]", labels)
91
93
  label_len = [1] * len(single_label)
92
94
  total_label_len = len(single_label)
93
95
  total_labels.extend(single_label)
94
96
  elif isinstance(labels[0], list):
95
97
  # multilabel classification
96
- multilabel_labels = cast(list[list[int]], labels)
98
+ multilabel_labels = cast("list[list[int]]", labels)
97
99
  label_len = [len(l) for l in multilabel_labels]
98
100
  total_label_len = sum(label_len)
99
101
  for l in multilabel_labels:
mteb/abstasks/abstask.py CHANGED
@@ -1,30 +1,38 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import warnings
4
6
  from abc import ABC, abstractmethod
5
- from collections.abc import Mapping, Sequence
7
+ from collections.abc import Sequence
6
8
  from copy import copy
7
9
  from pathlib import Path
8
- from typing import Any, Literal, cast
10
+ from typing import TYPE_CHECKING, Any, Literal, cast
9
11
 
10
12
  import numpy as np
11
13
  from datasets import ClassLabel, Dataset, DatasetDict, load_dataset
12
14
  from sklearn.preprocessing import MultiLabelBinarizer
13
15
  from tqdm.auto import tqdm
14
- from typing_extensions import Self
15
16
 
16
17
  from mteb._set_seed import _set_seed
17
- from mteb.abstasks.task_metadata import TaskMetadata
18
18
  from mteb.languages import LanguageScripts
19
19
  from mteb.models import (
20
20
  CrossEncoderProtocol,
21
21
  EncoderProtocol,
22
- MTEBModels,
23
22
  SearchProtocol,
24
23
  )
25
- from mteb.types import HFSubset, Modalities, ScoresDict
26
- from mteb.types._encoder_io import EncodeKwargs
27
- from mteb.types.statistics import DescriptiveStatistics, SplitDescriptiveStatistics
24
+
25
+ if TYPE_CHECKING:
26
+ from collections.abc import Mapping
27
+
28
+ from typing_extensions import Self
29
+
30
+ from mteb.abstasks.task_metadata import TaskMetadata
31
+ from mteb.models import (
32
+ MTEBModels,
33
+ )
34
+ from mteb.types import EncodeKwargs, HFSubset, Modalities, ScoresDict
35
+ from mteb.types.statistics import DescriptiveStatistics, SplitDescriptiveStatistics
28
36
 
29
37
  logger = logging.getLogger(__name__)
30
38
 
@@ -108,11 +116,14 @@ class AbsTask(ABC):
108
116
  logger.warning(msg)
109
117
  warnings.warn(msg)
110
118
 
111
- def dataset_transform(self):
119
+ def dataset_transform(self, num_proc: int = 1):
112
120
  """A transform operations applied to the dataset after loading.
113
121
 
114
122
  This method is useful when the dataset from Huggingface is not in an `mteb` compatible format.
115
123
  Override this method if your dataset requires additional transformation.
124
+
125
+ Args:
126
+ num_proc: Number of processes to use for the transformation.
116
127
  """
117
128
  pass
118
129
 
@@ -124,6 +135,7 @@ class AbsTask(ABC):
124
135
  *,
125
136
  encode_kwargs: EncodeKwargs,
126
137
  prediction_folder: Path | None = None,
138
+ num_proc: int = 1,
127
139
  **kwargs: Any,
128
140
  ) -> Mapping[HFSubset, ScoresDict]:
129
141
  """Evaluates an MTEB compatible model on the task.
@@ -134,6 +146,7 @@ class AbsTask(ABC):
134
146
  subsets_to_run: List of huggingface subsets (HFSubsets) to evaluate. If None, all subsets are evaluated.
135
147
  encode_kwargs: Additional keyword arguments that are passed to the model's `encode` method.
136
148
  prediction_folder: Folder to save model predictions
149
+ num_proc: Number of processes to use for loading the dataset or processing.
137
150
  kwargs: Additional keyword arguments that are passed to the _evaluate_subset method.
138
151
 
139
152
  Returns:
@@ -163,7 +176,7 @@ class AbsTask(ABC):
163
176
  if not self.data_loaded:
164
177
  self.load_data()
165
178
 
166
- self.dataset = cast(dict[HFSubset, DatasetDict], self.dataset)
179
+ self.dataset = cast("dict[HFSubset, DatasetDict]", self.dataset)
167
180
 
168
181
  scores = {}
169
182
  if self.hf_subsets is None:
@@ -189,6 +202,7 @@ class AbsTask(ABC):
189
202
  hf_subset=hf_subset,
190
203
  encode_kwargs=encode_kwargs,
191
204
  prediction_folder=prediction_folder,
205
+ num_proc=num_proc,
192
206
  **kwargs,
193
207
  )
194
208
  self._add_main_score(scores[hf_subset])
@@ -204,6 +218,7 @@ class AbsTask(ABC):
204
218
  hf_subset: str,
205
219
  encode_kwargs: EncodeKwargs,
206
220
  prediction_folder: Path | None = None,
221
+ num_proc: int = 1,
207
222
  **kwargs: Any,
208
223
  ) -> ScoresDict:
209
224
  raise NotImplementedError(
@@ -308,11 +323,15 @@ class AbsTask(ABC):
308
323
  ) # only take the specified test split.
309
324
  return dataset_dict
310
325
 
311
- def load_data(self) -> None:
326
+ def load_data(self, num_proc: int = 1, **kwargs: Any) -> None:
312
327
  """Loads dataset from HuggingFace hub
313
328
 
314
329
  This is the main loading function for Task. Do not overwrite this, instead we recommend using `dataset_transform`, which is called after the
315
330
  dataset is loaded using `datasets.load_dataset`.
331
+
332
+ Args:
333
+ num_proc: Number of processes to use for loading the dataset.
334
+ kwargs: Additional keyword arguments passed to the load_dataset function. Keep for forward compatibility.
316
335
  """
317
336
  if self.data_loaded:
318
337
  return
@@ -325,11 +344,12 @@ class AbsTask(ABC):
325
344
  self.dataset[hf_subset] = load_dataset(
326
345
  name=hf_subset,
327
346
  **self.metadata.dataset,
347
+ num_proc=num_proc,
328
348
  )
329
349
  else:
330
350
  # some of monolingual datasets explicitly adding the split name to the dataset name
331
- self.dataset = load_dataset(**self.metadata.dataset)
332
- self.dataset_transform()
351
+ self.dataset = load_dataset(**self.metadata.dataset, num_proc=num_proc)
352
+ self.dataset_transform(num_proc=num_proc)
333
353
  self.data_loaded = True
334
354
 
335
355
  def fast_load(self) -> None:
@@ -352,12 +372,13 @@ class AbsTask(ABC):
352
372
  self.dataset[lang] = DatasetDict(subset)
353
373
 
354
374
  def calculate_descriptive_statistics(
355
- self, overwrite_results: bool = False
375
+ self, overwrite_results: bool = False, num_proc: int = 1
356
376
  ) -> dict[str, DescriptiveStatistics]:
357
377
  """Calculates descriptive statistics from the dataset.
358
378
 
359
379
  Args:
360
380
  overwrite_results: Whether to overwrite existing results. If False and results already exist, the existing results will be loaded from cache.
381
+ num_proc: Number of processes to use for loading the dataset.
361
382
 
362
383
  Returns:
363
384
  A dictionary containing descriptive statistics for each split.
@@ -371,7 +392,7 @@ class AbsTask(ABC):
371
392
  return existing_stats
372
393
 
373
394
  if not self.data_loaded:
374
- self.load_data()
395
+ self.load_data(num_proc=num_proc)
375
396
 
376
397
  descriptive_stats: dict[str, DescriptiveStatistics] = {}
377
398
  hf_subset_stat: Literal["hf_subset_descriptive_stats"] = (
@@ -509,7 +530,7 @@ class AbsTask(ABC):
509
530
  scores["main_score"] = scores[self.metadata.main_score]
510
531
 
511
532
  def _upload_dataset_to_hub(
512
- self, repo_name: str, fields: list[str] | dict[str, str]
533
+ self, repo_name: str, fields: list[str] | dict[str, str], num_proc: int = 1
513
534
  ) -> None:
514
535
  if self.dataset is None:
515
536
  raise ValueError("Dataset not loaded")
@@ -534,7 +555,10 @@ class AbsTask(ABC):
534
555
  )
535
556
  sentences = DatasetDict(sentences)
536
557
  sentences.push_to_hub(
537
- repo_name, config, commit_message=f"Add {config} dataset"
558
+ repo_name,
559
+ config,
560
+ commit_message=f"Add {config} dataset",
561
+ num_proc=num_proc,
538
562
  )
539
563
  else:
540
564
  sentences = {}
@@ -551,16 +575,19 @@ class AbsTask(ABC):
551
575
  {field: self.dataset[split][field] for field in fields}
552
576
  )
553
577
  sentences = DatasetDict(sentences)
554
- sentences.push_to_hub(repo_name, commit_message="Add dataset")
578
+ sentences.push_to_hub(
579
+ repo_name, commit_message="Add dataset", num_proc=num_proc
580
+ )
555
581
 
556
- def _push_dataset_to_hub(self, repo_name: str) -> None:
582
+ def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
557
583
  raise NotImplementedError
558
584
 
559
- def push_dataset_to_hub(self, repo_name: str) -> None:
585
+ def push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
560
586
  """Push the dataset to the HuggingFace Hub.
561
587
 
562
588
  Args:
563
589
  repo_name: The name of the repository to push the dataset to.
590
+ num_proc: Number of processes to use for loading the dataset.
564
591
 
565
592
  Examples:
566
593
  >>> import mteb
@@ -572,7 +599,7 @@ class AbsTask(ABC):
572
599
  if not self.data_loaded:
573
600
  self.load_data()
574
601
 
575
- self._push_dataset_to_hub(repo_name)
602
+ self._push_dataset_to_hub(repo_name, num_proc)
576
603
  # dataset repo not creating when pushing card
577
604
  self.metadata.push_dataset_card_to_hub(repo_name)
578
605
 
@@ -1,28 +1,39 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from datetime import datetime
5
+ from typing import TYPE_CHECKING
3
6
 
4
7
  from pydantic import ConfigDict, Field, model_validator
5
- from typing_extensions import Self
6
8
 
7
9
  from mteb.types import (
8
- ISOLanguageScript,
9
10
  Languages,
10
- Licenses,
11
- Modalities,
12
- StrDate,
13
11
  )
14
12
 
15
13
  from .abstask import AbsTask
16
14
  from .task_metadata import (
17
- AnnotatorType,
18
15
  MetadataDatasetDict,
19
- SampleCreationMethod,
20
- TaskDomain,
21
16
  TaskMetadata,
22
- TaskSubtype,
23
17
  TaskType,
24
18
  )
25
19
 
20
+ if TYPE_CHECKING:
21
+ from typing_extensions import Self
22
+
23
+ from mteb.types import (
24
+ ISOLanguageScript,
25
+ Licenses,
26
+ Modalities,
27
+ StrDate,
28
+ )
29
+
30
+ from .task_metadata import (
31
+ AnnotatorType,
32
+ SampleCreationMethod,
33
+ TaskDomain,
34
+ TaskSubtype,
35
+ )
36
+
26
37
  logger = logging.getLogger(__name__)
27
38
 
28
39
 
@@ -1,19 +1,26 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import warnings
3
- from collections.abc import Mapping
4
- from pathlib import Path
5
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
6
6
 
7
7
  import numpy as np
8
- from datasets import Dataset, DatasetDict
9
8
 
10
- from mteb.models.models_protocols import MTEBModels
11
9
  from mteb.results.task_result import TaskResult
12
- from mteb.types import EncodeKwargs, HFSubset, ScoresDict
13
- from mteb.types.statistics import DescriptiveStatistics
14
10
 
15
11
  from .abstask import AbsTask
16
- from .aggregate_task_metadata import AggregateTaskMetadata
12
+
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Mapping
15
+ from pathlib import Path
16
+
17
+ from datasets import Dataset, DatasetDict
18
+
19
+ from mteb.models.models_protocols import MTEBModels
20
+ from mteb.types import EncodeKwargs, HFSubset, ScoresDict
21
+ from mteb.types.statistics import DescriptiveStatistics
22
+
23
+ from .aggregate_task_metadata import AggregateTaskMetadata
17
24
 
18
25
  logger = logging.getLogger(__name__)
19
26