mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,7 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import TypedDict
4
+ from typing import TYPE_CHECKING, TypedDict
3
5
 
4
6
  from datasets import (
5
7
  Dataset,
@@ -11,13 +13,14 @@ from datasets import (
11
13
  load_dataset,
12
14
  )
13
15
 
14
- from mteb.types import (
15
- CorpusDatasetType,
16
- InstructionDatasetType,
17
- QueryDatasetType,
18
- RelevantDocumentsType,
19
- TopRankedDocumentsType,
20
- )
16
+ if TYPE_CHECKING:
17
+ from mteb.types import (
18
+ CorpusDatasetType,
19
+ InstructionDatasetType,
20
+ QueryDatasetType,
21
+ RelevantDocumentsType,
22
+ TopRankedDocumentsType,
23
+ )
21
24
 
22
25
  logger = logging.getLogger(__name__)
23
26
 
@@ -73,28 +76,36 @@ class RetrievalDatasetLoader:
73
76
  self.config = config if config != "default" else None
74
77
  self.dataset_configs = get_dataset_config_names(self.hf_repo, self.revision)
75
78
 
76
- def load(self) -> RetrievalSplitData:
79
+ def load(
80
+ self,
81
+ num_proc: int = 1,
82
+ ) -> RetrievalSplitData:
77
83
  """Loads the dataset split for the specified configuration.
78
84
 
85
+ Args:
86
+ num_proc: The number of processes to use.
87
+
79
88
  Returns:
80
89
  A dictionary containing the corpus, queries, relevant documents, instructions (if applicable), and top-ranked documents (if applicable).
81
90
  """
82
91
  top_ranked = None
83
92
 
84
- qrels = self._load_qrels()
85
- corpus = self._load_corpus()
86
- queries = self._load_queries()
93
+ qrels = self._load_qrels(num_proc)
94
+ corpus = self._load_corpus(num_proc)
95
+ queries = self._load_queries(num_proc)
87
96
 
88
97
  queries = queries.filter(
89
98
  lambda x: x["id"] in qrels.keys(), desc="Filtering queries by qrels"
90
99
  )
91
100
 
92
101
  if any(c.endswith("top_ranked") for c in self.dataset_configs):
93
- top_ranked = self._load_top_ranked()
102
+ top_ranked = self._load_top_ranked(num_proc)
94
103
 
95
104
  if any(c.endswith("instruction") for c in self.dataset_configs):
96
- instructions = self._load_instructions()
97
- queries = _combine_queries_with_instructions_datasets(queries, instructions)
105
+ instructions = self._load_instructions(num_proc)
106
+ queries = _combine_queries_with_instructions_datasets(
107
+ queries, instructions, num_proc
108
+ )
98
109
 
99
110
  return RetrievalSplitData(
100
111
  corpus=corpus,
@@ -117,49 +128,50 @@ class RetrievalDatasetLoader:
117
128
  f"Split {self.split} not found in {splits}. Please specify a valid split."
118
129
  )
119
130
 
120
- def _load_dataset_split(self, config: str) -> Dataset:
131
+ def _load_dataset_split(self, config: str, num_proc: int) -> Dataset:
121
132
  return load_dataset(
122
133
  self.hf_repo,
123
134
  config,
124
135
  split=self._get_split(config),
125
136
  trust_remote_code=self.trust_remote_code,
126
137
  revision=self.revision,
138
+ num_proc=num_proc,
127
139
  )
128
140
 
129
- def _load_corpus(self) -> CorpusDatasetType:
130
- logger.info("Loading Corpus...")
131
-
141
+ def _load_corpus(self, num_proc: int) -> CorpusDatasetType:
132
142
  config = f"{self.config}-corpus" if self.config is not None else "corpus"
133
- corpus_ds = self._load_dataset_split(config)
143
+ logger.info("Loading corpus subset: %s", config)
144
+
145
+ corpus_ds = self._load_dataset_split(config, num_proc)
134
146
  if "_id" in corpus_ds.column_names:
135
147
  corpus_ds = corpus_ds.cast_column("_id", Value("string")).rename_column(
136
148
  "_id", "id"
137
149
  )
138
150
  logger.info("Loaded %d %s Documents.", len(corpus_ds), self.split.upper())
139
- logger.info("Doc Example: %s", corpus_ds[0])
151
+ logger.debug("Doc Example: %s", corpus_ds[0])
140
152
  return corpus_ds
141
153
 
142
- def _load_queries(self) -> QueryDatasetType:
143
- logger.info("Loading Queries...")
144
-
154
+ def _load_queries(self, num_proc: int) -> QueryDatasetType:
145
155
  config = f"{self.config}-queries" if self.config is not None else "queries"
156
+ logger.info("Loading queries subset: %s", config)
157
+
146
158
  if "query" in self.dataset_configs:
147
159
  config = "query"
148
- queries_ds = self._load_dataset_split(config)
160
+ queries_ds = self._load_dataset_split(config, num_proc)
149
161
  if "_id" in queries_ds.column_names:
150
162
  queries_ds = queries_ds.cast_column("_id", Value("string")).rename_column(
151
163
  "_id", "id"
152
164
  )
153
165
 
154
166
  logger.info("Loaded %d %s queries.", len(queries_ds), self.split.upper())
155
- logger.info("Query Example: %s", queries_ds[0])
167
+ logger.debug("Query Example: %s", queries_ds[0])
156
168
 
157
169
  return queries_ds
158
170
 
159
- def _load_qrels(self) -> RelevantDocumentsType:
160
- logger.info("Loading qrels...")
161
-
171
+ def _load_qrels(self, num_proc: int) -> RelevantDocumentsType:
162
172
  config = f"{self.config}-qrels" if self.config is not None else "default"
173
+
174
+ logger.info("Loading qrels subset: %s", config)
163
175
  if config == "default" and config not in self.dataset_configs:
164
176
  if "qrels" in self.dataset_configs:
165
177
  config = "qrels"
@@ -168,7 +180,7 @@ class RetrievalDatasetLoader:
168
180
  "No qrels or default config found. Please specify a valid config or ensure the dataset has qrels."
169
181
  )
170
182
 
171
- qrels_ds = self._load_dataset_split(config)
183
+ qrels_ds = self._load_dataset_split(config, num_proc)
172
184
  qrels_ds = qrels_ds.select_columns(["query-id", "corpus-id", "score"])
173
185
 
174
186
  qrels_ds = qrels_ds.cast(
@@ -191,13 +203,12 @@ class RetrievalDatasetLoader:
191
203
  logger.info("Loaded %d %s qrels.", len(qrels_dict), self.split.upper())
192
204
  return qrels_dict
193
205
 
194
- def _load_top_ranked(self) -> TopRankedDocumentsType:
195
- logger.info("Loading Top Ranked")
196
-
206
+ def _load_top_ranked(self, num_proc: int) -> TopRankedDocumentsType:
197
207
  config = (
198
208
  f"{self.config}-top_ranked" if self.config is not None else "top_ranked"
199
209
  )
200
- top_ranked_ds = self._load_dataset_split(config)
210
+ logger.info("Loading top ranked subset: %s", config)
211
+ top_ranked_ds = self._load_dataset_split(config, num_proc)
201
212
  top_ranked_ds = top_ranked_ds.cast(
202
213
  Features(
203
214
  {
@@ -215,13 +226,12 @@ class RetrievalDatasetLoader:
215
226
  logger.info(f"Top ranked loaded: {len(top_ranked_ds)}")
216
227
  return top_ranked_dict
217
228
 
218
- def _load_instructions(self) -> InstructionDatasetType:
219
- logger.info("Loading Instructions")
220
-
229
+ def _load_instructions(self, num_proc: int) -> InstructionDatasetType:
221
230
  config = (
222
231
  f"{self.config}-instruction" if self.config is not None else "instruction"
223
232
  )
224
- instructions_ds = self._load_dataset_split(config)
233
+ logger.info("Loading instruction subset: %s", config)
234
+ instructions_ds = self._load_dataset_split(config, num_proc)
225
235
  instructions_ds = instructions_ds.cast(
226
236
  Features(
227
237
  {
@@ -236,6 +246,7 @@ class RetrievalDatasetLoader:
236
246
  def _combine_queries_with_instructions_datasets(
237
247
  queries_dataset: QueryDatasetType,
238
248
  instruction_dataset: InstructionDatasetType | dict[str, str],
249
+ num_proc: int,
239
250
  ) -> Dataset:
240
251
  if isinstance(instruction_dataset, Dataset):
241
252
  instruction_to_query_idx = {
@@ -248,4 +259,4 @@ def _combine_queries_with_instructions_datasets(
248
259
  row["instruction"] = instruction_to_query_idx[row["id"]]
249
260
  return row
250
261
 
251
- return queries_dataset.map(_add_instruction_to_query)
262
+ return queries_dataset.map(_add_instruction_to_query, num_proc=num_proc)
mteb/abstasks/sts.py CHANGED
@@ -1,19 +1,14 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from pathlib import Path
3
- from typing import Any, TypedDict, cast
4
+ from typing import TYPE_CHECKING, Any, TypedDict, cast
4
5
 
5
- from datasets import Dataset
6
6
  from scipy.stats import pearsonr, spearmanr
7
7
 
8
8
  from mteb._evaluators import AnySTSEvaluator
9
- from mteb._evaluators.any_sts_evaluator import STSEvaluatorScores
10
9
  from mteb.models import EncoderProtocol
11
- from mteb.types import PromptType
12
10
  from mteb.types.statistics import (
13
- ImageStatistics,
14
- ScoreStatistics,
15
11
  SplitDescriptiveStatistics,
16
- TextStatistics,
17
12
  )
18
13
 
19
14
  from ._statistics_calculation import (
@@ -23,6 +18,20 @@ from ._statistics_calculation import (
23
18
  )
24
19
  from .abstask import AbsTask
25
20
 
21
+ if TYPE_CHECKING:
22
+ from pathlib import Path
23
+
24
+ from datasets import Dataset
25
+
26
+ from mteb._evaluators.any_sts_evaluator import STSEvaluatorScores
27
+ from mteb.models import MTEBModels
28
+ from mteb.types import EncodeKwargs, PromptType
29
+ from mteb.types.statistics import (
30
+ ImageStatistics,
31
+ ScoreStatistics,
32
+ TextStatistics,
33
+ )
34
+
26
35
  logger = logging.getLogger(__name__)
27
36
 
28
37
 
@@ -103,14 +112,18 @@ class AbsTaskSTS(AbsTask):
103
112
 
104
113
  def _evaluate_subset(
105
114
  self,
106
- model: EncoderProtocol,
115
+ model: MTEBModels,
107
116
  data_split: Dataset,
108
- encode_kwargs: dict[str, Any],
117
+ encode_kwargs: EncodeKwargs,
109
118
  hf_split: str,
110
119
  hf_subset: str,
111
120
  prediction_folder: Path | None = None,
121
+ num_proc: int = 1,
112
122
  **kwargs: Any,
113
123
  ) -> STSMetrics:
124
+ if not isinstance(model, EncoderProtocol):
125
+ raise TypeError("Expected model to be an instance of EncoderProtocol")
126
+
114
127
  normalized_scores = list(map(self._normalize, data_split["score"]))
115
128
  data_split = data_split.select_columns(list(self.column_names))
116
129
 
@@ -124,7 +137,11 @@ class AbsTaskSTS(AbsTask):
124
137
  input2_prompt_type=self.input2_prompt_type,
125
138
  **kwargs,
126
139
  )
127
- scores = evaluator(model, encode_kwargs=encode_kwargs)
140
+ scores = evaluator(
141
+ model,
142
+ encode_kwargs=encode_kwargs,
143
+ num_proc=num_proc,
144
+ )
128
145
 
129
146
  if prediction_folder:
130
147
  self._save_task_predictions(
@@ -142,7 +159,7 @@ class AbsTaskSTS(AbsTask):
142
159
  ) -> STSMetrics:
143
160
  def compute_corr(x: list[float], y: list[float]) -> tuple[float, float]:
144
161
  """Return (pearson, spearman) correlations between x and y."""
145
- return pearsonr(x, y)[0], spearmanr(x, y)[0]
162
+ return float(pearsonr(x, y)[0]), float(spearmanr(x, y)[0])
146
163
 
147
164
  cosine_pearson, cosine_spearman = compute_corr(
148
165
  normalized_scores, scores["cosine_scores"]
@@ -179,7 +196,7 @@ class AbsTaskSTS(AbsTask):
179
196
  self, split: str, hf_subset: str | None = None, compute_overall: bool = False
180
197
  ) -> AnySTSDescriptiveStatistics:
181
198
  first_column, second_column = self.column_names
182
- self.dataset = cast(dict[str, dict[str, Dataset]], self.dataset)
199
+ self.dataset = cast("dict[str, dict[str, Dataset]]", self.dataset)
183
200
 
184
201
  if hf_subset:
185
202
  sentence1 = self.dataset[hf_subset][split][first_column]
@@ -233,9 +250,11 @@ class AbsTaskSTS(AbsTask):
233
250
  label_statistics=labels_statistics,
234
251
  )
235
252
 
236
- def _push_dataset_to_hub(self, repo_name: str) -> None:
253
+ def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
237
254
  self._upload_dataset_to_hub(
238
- repo_name, [self.column_names[0], self.column_names[1], "score"]
255
+ repo_name,
256
+ [self.column_names[0], self.column_names[1], "score"],
257
+ num_proc=num_proc,
239
258
  )
240
259
 
241
260
  def _normalize(self, x: float) -> float:
@@ -1,8 +1,10 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  from collections.abc import Sequence
4
6
  from pathlib import Path
5
- from typing import Any, Literal
7
+ from typing import TYPE_CHECKING, Any, Literal, cast
6
8
 
7
9
  from huggingface_hub import (
8
10
  DatasetCard,
@@ -16,13 +18,11 @@ from pydantic import (
16
18
  ConfigDict,
17
19
  field_validator,
18
20
  )
19
- from typing_extensions import Required, TypedDict
21
+ from typing_extensions import Required, TypedDict # noqa: TC002
20
22
 
21
23
  import mteb
22
24
  from mteb.languages import check_language_code
23
25
  from mteb.types import (
24
- HFSubset,
25
- ISOLanguageScript,
26
26
  Languages,
27
27
  Licenses,
28
28
  Modalities,
@@ -30,7 +30,17 @@ from mteb.types import (
30
30
  StrDate,
31
31
  StrURL,
32
32
  )
33
- from mteb.types.statistics import DescriptiveStatistics
33
+
34
+ if TYPE_CHECKING:
35
+ from huggingface_hub import (
36
+ CardData,
37
+ )
38
+
39
+ from mteb.types import (
40
+ HFSubset,
41
+ ISOLanguageScript,
42
+ )
43
+ from mteb.types.statistics import DescriptiveStatistics
34
44
 
35
45
  logger = logging.getLogger(__name__)
36
46
 
@@ -150,7 +160,7 @@ _TASK_TYPE = (
150
160
  "InstructionReranking",
151
161
  ) + MIEB_TASK_TYPE
152
162
 
153
- TaskType = Literal[_TASK_TYPE]
163
+ TaskType = Literal[_TASK_TYPE] # type: ignore[valid-type]
154
164
  """The type of the task. E.g. includes "Classification", "Retrieval" and "Clustering"."""
155
165
 
156
166
 
@@ -192,8 +202,10 @@ AnnotatorType = Literal[
192
202
  """The type of the annotators. Is often important for understanding the quality of a dataset."""
193
203
 
194
204
 
195
- PromptDict = TypedDict(
196
- "PromptDict", {prompt_type.value: str for prompt_type in PromptType}, total=False
205
+ PromptDict = TypedDict( # type: ignore[misc]
206
+ "PromptDict",
207
+ {prompt_type.value: str for prompt_type in PromptType},
208
+ total=False,
197
209
  )
198
210
  """A dictionary containing the prompt used for the task.
199
211
 
@@ -365,7 +377,7 @@ class TaskMetadata(BaseModel):
365
377
  """Return a dictionary mapping huggingface subsets to languages."""
366
378
  if isinstance(self.eval_langs, dict):
367
379
  return self.eval_langs
368
- return {"default": self.eval_langs} # type: ignore
380
+ return {"default": cast("list[str]", self.eval_langs)}
369
381
 
370
382
  @property
371
383
  def intext_citation(self, include_cite: bool = True) -> str:
@@ -376,9 +388,8 @@ class TaskMetadata(BaseModel):
376
388
  if include_cite and cite:
377
389
  # check for whitespace in the citation
378
390
  if " " in cite:
379
- logger.warning(
380
- "Citation contains whitespace. Please ensure that the citation is correctly formatted."
381
- )
391
+ msg = "Citation contains whitespace. Please ensure that the citation is correctly formatted."
392
+ logger.warning(msg)
382
393
  return f"\\cite{{{cite}}}"
383
394
  return cite
384
395
 
@@ -414,7 +425,7 @@ class TaskMetadata(BaseModel):
414
425
  for subset, subset_value in stats.items():
415
426
  if subset == "hf_subset_descriptive_stats":
416
427
  continue
417
- n_samples[subset] = subset_value["num_samples"] # type: ignore
428
+ n_samples[subset] = subset_value["num_samples"]
418
429
  return n_samples
419
430
 
420
431
  @property
@@ -447,7 +458,7 @@ class TaskMetadata(BaseModel):
447
458
  Raises:
448
459
  ValueError: If the prompt type is not recognized.
449
460
  """
450
- if prompt_type is None:
461
+ if prompt_type is None or self.category is None:
451
462
  return self.modalities
452
463
  query_modalities, doc_modalities = self.category.split("2")
453
464
  category_to_modality: dict[str, Modalities] = {
@@ -467,7 +478,7 @@ class TaskMetadata(BaseModel):
467
478
 
468
479
  def _create_dataset_card_data(
469
480
  self,
470
- existing_dataset_card_data: DatasetCardData | None = None,
481
+ existing_dataset_card_data: CardData | None = None,
471
482
  ) -> tuple[DatasetCardData, dict[str, Any]]:
472
483
  """Create a DatasetCardData object from the task metadata.
473
484
 
@@ -483,7 +494,6 @@ class TaskMetadata(BaseModel):
483
494
  dataset_type = [
484
495
  *self._hf_task_type(),
485
496
  *self._hf_task_category(),
486
- *self._hf_subtypes(),
487
497
  ]
488
498
  languages = self._hf_languages()
489
499
 
@@ -502,12 +512,13 @@ class TaskMetadata(BaseModel):
502
512
 
503
513
  tags = ["mteb"] + self.modalities
504
514
 
505
- descriptive_stats = self.descriptive_stats
506
- if descriptive_stats is not None:
507
- for split, split_stat in descriptive_stats.items():
515
+ descriptive_stats = ""
516
+ if self.descriptive_stats is not None:
517
+ descriptive_stats_ = self.descriptive_stats
518
+ for split, split_stat in descriptive_stats_.items():
508
519
  if len(split_stat.get("hf_subset_descriptive_stats", {})) > 10:
509
520
  split_stat.pop("hf_subset_descriptive_stats", {})
510
- descriptive_stats = json.dumps(descriptive_stats, indent=4)
521
+ descriptive_stats = json.dumps(descriptive_stats_, indent=4)
511
522
 
512
523
  dataset_card_data_params = existing_dataset_card_data.to_dict()
513
524
  # override the existing values
@@ -584,10 +595,8 @@ class TaskMetadata(BaseModel):
584
595
 
585
596
  def _hf_subtypes(self) -> list[str]:
586
597
  # to get full list of available task_ids execute
587
- # requests.post("https://huggingface.co/api/validate-yaml", json={
588
- # "content": "---\ntask_ids: 'test'\n---",
589
- # "repoType": "dataset"
590
- # })
598
+ # https://huggingface.co/api/datasets-tags-by-type?type=task_ids
599
+ # ref https://huggingface-openapi.hf.space/#tag/datasets/GET/api/datasets-tags-by-type
591
600
  mteb_to_hf_subtype = {
592
601
  "Article retrieval": ["document-retrieval"],
593
602
  "Conversational retrieval": ["conversational", "utterance-retrieval"],
@@ -609,7 +618,7 @@ class TaskMetadata(BaseModel):
609
618
  "hate-speech-detection",
610
619
  ],
611
620
  "Thematic clustering": [],
612
- "Scientific Reranking": [],
621
+ "Scientific Reranking": ["text-scoring"],
613
622
  "Claim verification": ["fact-checking", "fact-checking-retrieval"],
614
623
  "Topic classification": ["topic-classification"],
615
624
  "Code retrieval": [],
@@ -617,21 +626,21 @@ class TaskMetadata(BaseModel):
617
626
  "Cross-Lingual Semantic Discrimination": [],
618
627
  "Textual Entailment": ["natural-language-inference"],
619
628
  "Counterfactual Detection": [],
620
- "Emotion classification": [],
629
+ "Emotion classification": ["sentiment-classification"],
621
630
  "Reasoning as Retrieval": [],
622
631
  "Rendered Texts Understanding": [],
623
632
  "Image Text Retrieval": [],
624
633
  "Object recognition": [],
625
634
  "Scene recognition": [],
626
635
  "Caption Pairing": ["image-captioning"],
627
- "Emotion recognition": [],
636
+ "Emotion recognition": ["sentiment-scoring"],
628
637
  "Textures recognition": [],
629
638
  "Activity recognition": [],
630
639
  "Tumor detection": [],
631
640
  "Duplicate Detection": [],
632
641
  "Rendered semantic textual similarity": [
633
642
  "semantic-similarity-scoring",
634
- "rendered semantic textual similarity",
643
+ "semantic-similarity-classification",
635
644
  ],
636
645
  "Intent classification": [
637
646
  "intent-classification",
@@ -645,10 +654,8 @@ class TaskMetadata(BaseModel):
645
654
 
646
655
  def _hf_task_type(self) -> list[str]:
647
656
  # to get full list of task_types execute:
648
- # requests.post("https://huggingface.co/api/validate-yaml", json={
649
- # "content": "---\ntask_categories: ['test']\n---", "repoType": "dataset"
650
- # }).json()
651
- # or look at https://huggingface.co/tasks
657
+ # https://huggingface.co/api/datasets-tags-by-type?type=task_categories
658
+ # ref https://huggingface-openapi.hf.space/#tag/datasets/GET/api/datasets-tags-by-type
652
659
  mteb_task_type_to_datasets = {
653
660
  # Text
654
661
  "BitextMining": ["translation"],
@@ -667,7 +674,7 @@ class TaskMetadata(BaseModel):
667
674
  "Any2AnyRetrieval": ["visual-document-retrieval"],
668
675
  "Any2AnyMultilingualRetrieval": ["visual-document-retrieval"],
669
676
  "VisionCentricQA": ["visual-question-answering"],
670
- "ImageClustering": ["image-clustering"],
677
+ "ImageClustering": ["image-feature-extraction"],
671
678
  "ImageClassification": ["image-classification"],
672
679
  "ImageMultilabelClassification": ["image-classification"],
673
680
  "DocumentUnderstanding": ["visual-document-retrieval"],
@@ -695,11 +702,11 @@ class TaskMetadata(BaseModel):
695
702
 
696
703
  def _hf_languages(self) -> list[str]:
697
704
  languages: list[str] = []
698
- if self.is_multilingual:
699
- for val in list(self.eval_langs.values()):
705
+ if self.is_multilingual and isinstance(self.eval_langs, dict):
706
+ for val in self.eval_langs.values():
700
707
  languages.extend(val)
701
708
  else:
702
- languages = self.eval_langs
709
+ languages = cast("list[str]", self.eval_langs)
703
710
  # value "python" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters),
704
711
  # or a special value like "code", "multilingual".
705
712
  readme_langs = []
@@ -711,7 +718,7 @@ class TaskMetadata(BaseModel):
711
718
  readme_langs.append(lang_name)
712
719
  return sorted(set(readme_langs))
713
720
 
714
- def _hf_license(self) -> str:
721
+ def _hf_license(self) -> str | None:
715
722
  dataset_license = self.license
716
723
  if dataset_license:
717
724
  license_mapping = {