mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from collections import defaultdict
3
- from pathlib import Path
4
- from typing import Any, TypedDict
5
+ from typing import TYPE_CHECKING, Any, TypedDict
5
6
 
6
7
  import numpy as np
7
8
  from datasets import Dataset, DatasetDict
@@ -16,12 +17,8 @@ from sklearn.metrics import (
16
17
 
17
18
  from mteb._evaluators.sklearn_evaluator import SklearnEvaluator, SklearnModelProtocol
18
19
  from mteb.models import EncoderProtocol, MTEBModels
19
- from mteb.types import HFSubset, ScoresDict
20
20
  from mteb.types.statistics import (
21
- ImageStatistics,
22
- LabelStatistics,
23
21
  SplitDescriptiveStatistics,
24
- TextStatistics,
25
22
  )
26
23
 
27
24
  from ._statistics_calculation import (
@@ -31,6 +28,18 @@ from ._statistics_calculation import (
31
28
  )
32
29
  from .abstask import AbsTask
33
30
 
31
+ if TYPE_CHECKING:
32
+ from pathlib import Path
33
+
34
+ from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
35
+ from mteb.models import MTEBModels
36
+ from mteb.types import EncodeKwargs, HFSubset, ScoresDict
37
+ from mteb.types.statistics import (
38
+ ImageStatistics,
39
+ LabelStatistics,
40
+ TextStatistics,
41
+ )
42
+
34
43
  logger = logging.getLogger(__name__)
35
44
 
36
45
 
@@ -98,9 +107,8 @@ class AbsTaskClassification(AbsTask):
98
107
  text: str (for text) or PIL.Image (for image). Column name can be changed via `input_column_name` attribute.
99
108
  label: int. Column name can be changed via `label_column_name` attribute.
100
109
  evaluator_model: The model to use for evaluation. Can be any sklearn compatible model. Default is `LogisticRegression`.
101
- Full details of api in [`SklearnModelProtocol`][mteb._evaluators.sklearn_evaluator.SklearnModelProtocol].
102
- samples_per_label: Number of samples per label to use for training the evaluator model. Default is 8.
103
- n_experiments: Number of experiments to run. Default is 10.
110
+ samples_per_label: Number of samples per label to use for training the evaluator model. Default is 8.
111
+ n_experiments: Number of experiments to run. Default is 10.
104
112
  train_split: Name of the split to use for training the evaluator model. Default is "train".
105
113
  label_column_name: Name of the column containing the labels. Default is "label".
106
114
  input_column_name: Name of the column containing the input data. Default is "text".
@@ -126,8 +134,9 @@ class AbsTaskClassification(AbsTask):
126
134
  split: str = "test",
127
135
  subsets_to_run: list[HFSubset] | None = None,
128
136
  *,
129
- encode_kwargs: dict[str, Any],
137
+ encode_kwargs: EncodeKwargs,
130
138
  prediction_folder: Path | None = None,
139
+ num_proc: int = 1,
131
140
  **kwargs: Any,
132
141
  ) -> dict[HFSubset, ScoresDict]:
133
142
  """Evaluate a model on the classification task.
@@ -141,7 +150,10 @@ class AbsTaskClassification(AbsTask):
141
150
  )
142
151
 
143
152
  if not self.data_loaded:
144
- self.load_data()
153
+ self.load_data(num_proc=num_proc)
154
+
155
+ if self.dataset is None:
156
+ raise RuntimeError("Dataset not loaded.")
145
157
 
146
158
  if "random_state" in self.evaluator_model.get_params():
147
159
  self.evaluator_model = self.evaluator_model.set_params(
@@ -171,23 +183,28 @@ class AbsTaskClassification(AbsTask):
171
183
  hf_subset=hf_subset,
172
184
  encode_kwargs=encode_kwargs,
173
185
  prediction_folder=prediction_folder,
186
+ num_proc=num_proc,
174
187
  **kwargs,
175
188
  )
176
189
  self._add_main_score(scores[hf_subset])
177
190
 
178
- return scores
191
+ return scores # type: ignore[return-value]
179
192
 
180
193
  def _evaluate_subset(
181
194
  self,
182
- model: EncoderProtocol,
195
+ model: MTEBModels,
183
196
  data_split: DatasetDict,
184
197
  *,
185
- encode_kwargs: dict[str, Any],
198
+ encode_kwargs: EncodeKwargs,
186
199
  hf_split: str,
187
200
  hf_subset: str,
188
201
  prediction_folder: Path | None = None,
202
+ num_proc: int = 1,
189
203
  **kwargs: Any,
190
204
  ) -> FullClassificationMetrics:
205
+ if not isinstance(model, EncoderProtocol):
206
+ raise TypeError("Expected model to be an instance of EncoderProtocol")
207
+
191
208
  train_split = data_split[self.train_split]
192
209
  eval_split = data_split[hf_split]
193
210
 
@@ -216,7 +233,10 @@ class AbsTaskClassification(AbsTask):
216
233
  evaluator_model=self.evaluator_model,
217
234
  )
218
235
  y_pred, test_cache = evaluator(
219
- model, encode_kwargs=encode_kwargs, test_cache=test_cache
236
+ model,
237
+ encode_kwargs=encode_kwargs,
238
+ test_cache=test_cache,
239
+ num_proc=num_proc,
220
240
  )
221
241
  if prediction_folder:
222
242
  all_predictions.append(y_pred.tolist())
@@ -237,7 +257,7 @@ class AbsTaskClassification(AbsTask):
237
257
  # ap will be none for non binary classification tasks
238
258
  k: (
239
259
  float(np.mean(values))
240
- if (values := [s[k] for s in scores if s[k] is not None])
260
+ if (values := [s[k] for s in scores if s[k] is not None]) # type: ignore[literal-required]
241
261
  else np.nan
242
262
  )
243
263
  for k in scores[0].keys()
@@ -245,7 +265,7 @@ class AbsTaskClassification(AbsTask):
245
265
  logger.info(f"Running {self.metadata.name} - Finished.")
246
266
  return FullClassificationMetrics(
247
267
  scores_per_experiment=scores,
248
- **avg_scores,
268
+ **avg_scores, # type: ignore[typeddict-item]
249
269
  )
250
270
 
251
271
  def _calculate_scores(
@@ -358,11 +378,12 @@ class AbsTaskClassification(AbsTask):
358
378
  label_statistics=label_statistics,
359
379
  )
360
380
 
361
- def _push_dataset_to_hub(self, repo_name: str) -> None:
381
+ def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
362
382
  self._upload_dataset_to_hub(
363
383
  repo_name,
364
384
  [
365
385
  self.input_column_name,
366
386
  self.label_column_name,
367
387
  ],
388
+ num_proc=num_proc,
368
389
  )
@@ -1,9 +1,10 @@
1
+ from __future__ import annotations
2
+
1
3
  import itertools
2
4
  import logging
3
5
  import random
4
6
  from collections import defaultdict
5
- from pathlib import Path
6
- from typing import Any
7
+ from typing import TYPE_CHECKING, Any, cast
7
8
 
8
9
  import numpy as np
9
10
  from datasets import Dataset, DatasetDict
@@ -12,12 +13,9 @@ from sklearn.metrics.cluster import v_measure_score
12
13
 
13
14
  from mteb._create_dataloaders import create_dataloader
14
15
  from mteb.models import EncoderProtocol
15
- from mteb.types import HFSubset, ScoresDict
16
+ from mteb.types import Array, HFSubset
16
17
  from mteb.types.statistics import (
17
- ImageStatistics,
18
- LabelStatistics,
19
18
  SplitDescriptiveStatistics,
20
- TextStatistics,
21
19
  )
22
20
 
23
21
  from ._statistics_calculation import (
@@ -27,6 +25,17 @@ from ._statistics_calculation import (
27
25
  )
28
26
  from .abstask import AbsTask
29
27
 
28
+ if TYPE_CHECKING:
29
+ from pathlib import Path
30
+
31
+ from mteb.models import MTEBModels
32
+ from mteb.types import Array, EncodeKwargs, ScoresDict
33
+ from mteb.types.statistics import (
34
+ ImageStatistics,
35
+ LabelStatistics,
36
+ TextStatistics,
37
+ )
38
+
30
39
  logger = logging.getLogger(__name__)
31
40
 
32
41
 
@@ -34,7 +43,7 @@ MultilingualDataset = dict[HFSubset, DatasetDict]
34
43
 
35
44
 
36
45
  def _evaluate_clustering_bootstrapped(
37
- embeddings: np.ndarray,
46
+ embeddings: Array,
38
47
  labels: list[list[str]],
39
48
  n_clusters: int,
40
49
  cluster_size: int,
@@ -61,21 +70,21 @@ def _evaluate_clustering_bootstrapped(
61
70
  max_depth = max(map(len, labels))
62
71
  # Evaluate on each level til max depth
63
72
  for i_level in range(max_depth):
64
- level_labels = []
73
+ level_labels: list[str | int] = []
65
74
  # Assign -1 to gold label if the level is not there
66
75
  for label in labels:
67
76
  if len(label) > i_level:
68
77
  level_labels.append(label[i_level])
69
78
  else:
70
79
  level_labels.append(-1)
71
- level_labels = np.array(level_labels)
80
+ np_level_labels = np.array(level_labels)
72
81
  valid_idx = np.array(
73
- [level_label != -1 for level_label in level_labels]
82
+ [level_label != -1 for level_label in np_level_labels]
74
83
  ) # Could be level_labels != -1 but fails with FutureWarning: elementwise comparison failed
75
- level_labels = level_labels[valid_idx]
84
+ np_level_labels = np_level_labels[valid_idx]
76
85
  level_embeddings = embeddings[valid_idx]
77
86
  clustering_model = MiniBatchKMeans(
78
- n_clusters=np.unique(level_labels).size,
87
+ n_clusters=np.unique(np_level_labels).size,
79
88
  batch_size=kmean_batch_size,
80
89
  init="k-means++",
81
90
  n_init=1, # default when kmeans++ is used
@@ -87,7 +96,7 @@ def _evaluate_clustering_bootstrapped(
87
96
  cluster_indices = rng_state.choices(range(n_embeddings), k=cluster_size)
88
97
 
89
98
  _embeddings = level_embeddings[cluster_indices]
90
- _labels = level_labels[cluster_indices]
99
+ _labels = np_level_labels[cluster_indices]
91
100
  cluster_assignment = clustering_model.fit_predict(_embeddings)
92
101
  v_measure = v_measure_score(_labels, cluster_assignment)
93
102
  v_measures[f"Level {i_level}"].append(v_measure)
@@ -153,15 +162,20 @@ class AbsTaskClustering(AbsTask):
153
162
 
154
163
  def _evaluate_subset(
155
164
  self,
156
- model: EncoderProtocol,
165
+ model: MTEBModels,
157
166
  data_split: Dataset,
158
167
  *,
159
- encode_kwargs: dict[str, Any],
168
+ encode_kwargs: EncodeKwargs,
160
169
  hf_split: str,
161
170
  hf_subset: str,
162
171
  prediction_folder: Path | None = None,
172
+ num_proc: int = 1,
163
173
  **kwargs: Any,
164
174
  ) -> ScoresDict:
175
+ if not isinstance(model, EncoderProtocol):
176
+ raise TypeError(
177
+ "Expected encoder model to be an instance of EncoderProtocol."
178
+ )
165
179
  if (
166
180
  self.max_document_to_embed is not None
167
181
  and self.max_fraction_of_documents_to_embed is not None
@@ -182,13 +196,13 @@ class AbsTaskClustering(AbsTask):
182
196
  self.max_fraction_of_documents_to_embed * len(data_split)
183
197
  )
184
198
  else:
185
- max_documents_to_embed = self.max_document_to_embed
199
+ max_documents_to_embed = cast("int", self.max_document_to_embed)
186
200
 
187
- max_documents_to_embed = min(len(data_split), max_documents_to_embed) # type: ignore
201
+ max_documents_to_embed = min(len(data_split), max_documents_to_embed)
188
202
  example_indices = self.rng_state.sample(
189
203
  range(len(data_split)), k=max_documents_to_embed
190
204
  )
191
- downsampled_dataset = data_split.select(example_indices) # type: ignore
205
+ downsampled_dataset = data_split.select(example_indices)
192
206
 
193
207
  downsampled_dataset = downsampled_dataset.select_columns(
194
208
  [self.input_column_name, self.label_column_name]
@@ -200,6 +214,7 @@ class AbsTaskClustering(AbsTask):
200
214
  downsampled_dataset,
201
215
  self.metadata,
202
216
  input_column=self.input_column_name,
217
+ num_proc=num_proc,
203
218
  **encode_kwargs,
204
219
  ),
205
220
  task_metadata=self.metadata,
@@ -283,9 +298,11 @@ class AbsTaskClustering(AbsTask):
283
298
  labels_statistics=label_statistics,
284
299
  )
285
300
 
286
- def _push_dataset_to_hub(self, repo_name: str) -> None:
301
+ def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
287
302
  self._upload_dataset_to_hub(
288
- repo_name, [self.input_column_name, self.label_column_name]
303
+ repo_name,
304
+ [self.input_column_name, self.label_column_name],
305
+ num_proc=num_proc,
289
306
  )
290
307
 
291
308
 
@@ -1,6 +1,7 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from pathlib import Path
3
- from typing import Any, TypedDict
4
+ from typing import TYPE_CHECKING, Any, TypedDict
4
5
 
5
6
  import numpy as np
6
7
  from datasets import Dataset
@@ -8,13 +9,9 @@ from scipy.optimize import linear_sum_assignment
8
9
  from sklearn import metrics
9
10
 
10
11
  from mteb._evaluators import ClusteringEvaluator
11
- from mteb.models import EncoderProtocol
12
- from mteb.types import ScoresDict
12
+ from mteb.models import EncoderProtocol, MTEBModels
13
13
  from mteb.types.statistics import (
14
- ImageStatistics,
15
- LabelStatistics,
16
14
  SplitDescriptiveStatistics,
17
- TextStatistics,
18
15
  )
19
16
 
20
17
  from ._statistics_calculation import (
@@ -24,6 +21,17 @@ from ._statistics_calculation import (
24
21
  )
25
22
  from .abstask import AbsTask
26
23
 
24
+ if TYPE_CHECKING:
25
+ from pathlib import Path
26
+
27
+ from mteb.models import MTEBModels
28
+ from mteb.types import EncodeKwargs, ScoresDict
29
+ from mteb.types.statistics import (
30
+ ImageStatistics,
31
+ LabelStatistics,
32
+ TextStatistics,
33
+ )
34
+
27
35
  logger = logging.getLogger(__name__)
28
36
 
29
37
 
@@ -80,15 +88,19 @@ class AbsTaskClusteringLegacy(AbsTask):
80
88
 
81
89
  def _evaluate_subset(
82
90
  self,
83
- model: EncoderProtocol,
91
+ model: MTEBModels,
84
92
  data_split: Dataset,
85
93
  *,
86
- encode_kwargs: dict[str, Any],
94
+ encode_kwargs: EncodeKwargs,
87
95
  hf_split: str,
88
96
  hf_subset: str,
89
97
  prediction_folder: Path | None = None,
98
+ num_proc: int = 1,
90
99
  **kwargs: Any,
91
100
  ) -> ScoresDict:
101
+ if not isinstance(model, EncoderProtocol):
102
+ raise TypeError("Expected model to be an instance of EncoderProtocol")
103
+
92
104
  data_split = data_split.select_columns(
93
105
  [self.input_column_name, self.label_column_name]
94
106
  )
@@ -139,9 +151,6 @@ class AbsTaskClusteringLegacy(AbsTask):
139
151
  }
140
152
  return scores
141
153
 
142
- data_split = data_split.select_columns(
143
- [self.input_column_name, self.label_column_name]
144
- )
145
154
  evaluator = self.evaluator(
146
155
  data_split,
147
156
  input_column_name=self.input_column_name,
@@ -151,10 +160,14 @@ class AbsTaskClusteringLegacy(AbsTask):
151
160
  hf_subset=hf_subset,
152
161
  **kwargs,
153
162
  )
154
- clusters = evaluator(model, encode_kwargs=encode_kwargs)
163
+ evaluate_clusters = evaluator(
164
+ model,
165
+ encode_kwargs=encode_kwargs,
166
+ num_proc=num_proc,
167
+ )
155
168
  if prediction_folder:
156
169
  self._save_task_predictions(
157
- clusters,
170
+ evaluate_clusters,
158
171
  model,
159
172
  prediction_folder,
160
173
  hf_subset=hf_subset,
@@ -163,7 +176,7 @@ class AbsTaskClusteringLegacy(AbsTask):
163
176
 
164
177
  return self._compute_metrics(
165
178
  data_split[self.label_column_name],
166
- clusters,
179
+ evaluate_clusters,
167
180
  )
168
181
 
169
182
  def _compute_metrics(
@@ -230,11 +243,12 @@ class AbsTaskClusteringLegacy(AbsTask):
230
243
  label_statistics=label_statistics,
231
244
  )
232
245
 
233
- def _push_dataset_to_hub(self, repo_name: str) -> None:
246
+ def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
234
247
  self._upload_dataset_to_hub(
235
248
  repo_name,
236
249
  [
237
250
  self.input_column_name,
238
251
  self.label_column_name,
239
252
  ],
253
+ num_proc=num_proc,
240
254
  )
@@ -1,10 +1,11 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from collections.abc import Sequence
3
- from pathlib import Path
4
- from typing import Any, TypedDict
5
+ from typing import TYPE_CHECKING, Any, TypedDict
5
6
 
6
7
  import torch
7
- from datasets import Dataset, concatenate_datasets
8
+ from datasets import concatenate_datasets
8
9
 
9
10
  from mteb._evaluators import ImageTextPairClassificationEvaluator
10
11
  from mteb.abstasks._statistics_calculation import (
@@ -14,11 +15,21 @@ from mteb.abstasks._statistics_calculation import (
14
15
  from mteb.abstasks.abstask import AbsTask
15
16
  from mteb.models.models_protocols import EncoderProtocol
16
17
  from mteb.types.statistics import (
17
- ImageStatistics,
18
18
  SplitDescriptiveStatistics,
19
- TextStatistics,
20
19
  )
21
20
 
21
+ if TYPE_CHECKING:
22
+ from pathlib import Path
23
+
24
+ from datasets import Dataset
25
+
26
+ from mteb.models.models_protocols import MTEBModels
27
+ from mteb.types import EncodeKwargs
28
+ from mteb.types.statistics import (
29
+ ImageStatistics,
30
+ TextStatistics,
31
+ )
32
+
22
33
  logger = logging.getLogger(__name__)
23
34
 
24
35
 
@@ -116,15 +127,18 @@ class AbsTaskImageTextPairClassification(AbsTask):
116
127
 
117
128
  def _evaluate_subset(
118
129
  self,
119
- model: EncoderProtocol,
130
+ model: MTEBModels,
120
131
  data_split: Dataset,
121
132
  *,
122
- encode_kwargs: dict[str, Any],
133
+ encode_kwargs: EncodeKwargs,
123
134
  hf_split: str,
124
135
  hf_subset: str,
125
136
  prediction_folder: Path | None = None,
137
+ num_proc: int = 1,
126
138
  **kwargs: Any,
127
139
  ) -> ImageTextPairClassificationMetrics:
140
+ if not isinstance(model, EncoderProtocol):
141
+ raise TypeError("Expected model to be an instance of EncoderProtocol")
128
142
  select_columns = []
129
143
  for columns in (self.images_column_names, self.texts_column_names):
130
144
  if isinstance(columns, str):
@@ -154,7 +168,9 @@ class AbsTaskImageTextPairClassification(AbsTask):
154
168
  hf_subset=hf_subset,
155
169
  **kwargs,
156
170
  )
157
- scores = evaluator(model, encode_kwargs=encode_kwargs)
171
+ scores: list[torch.Tensor] = evaluator(
172
+ model, encode_kwargs=encode_kwargs, num_proc=num_proc
173
+ ) # type: ignore[assignment]
158
174
  if prediction_folder:
159
175
  self._save_task_predictions(
160
176
  [score.tolist() for score in scores],
@@ -202,7 +218,7 @@ class AbsTaskImageTextPairClassification(AbsTask):
202
218
  accuracy=torch.Tensor(all_correct_scores).float().mean().item(),
203
219
  )
204
220
 
205
- def _push_dataset_to_hub(self, repo_name: str) -> None:
221
+ def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
206
222
  text_columns = (
207
223
  [self.texts_column_names]
208
224
  if isinstance(self.texts_column_names, str)
@@ -217,4 +233,5 @@ class AbsTaskImageTextPairClassification(AbsTask):
217
233
  self._upload_dataset_to_hub(
218
234
  repo_name,
219
235
  [*text_columns, *image_columns],
236
+ num_proc=num_proc,
220
237
  )
@@ -1,8 +1,9 @@
1
+ from __future__ import annotations
2
+
1
3
  import itertools
2
4
  import logging
3
5
  from collections import defaultdict
4
- from pathlib import Path
5
- from typing import Any, TypedDict
6
+ from typing import TYPE_CHECKING, Any, TypedDict
6
7
 
7
8
  import numpy as np
8
9
  from datasets import DatasetDict
@@ -15,23 +16,29 @@ from typing_extensions import override
15
16
 
16
17
  from mteb._create_dataloaders import create_dataloader
17
18
  from mteb._evaluators.classification_metrics import hamming_score
18
- from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
19
19
  from mteb.models import EncoderProtocol
20
20
 
21
21
  from .classification import AbsTaskClassification
22
22
 
23
+ if TYPE_CHECKING:
24
+ from pathlib import Path
25
+
26
+ from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
27
+ from mteb.models import MTEBModels
28
+ from mteb.types import Array, EncodeKwargs
29
+
23
30
  logger = logging.getLogger(__name__)
24
31
 
25
32
 
26
33
  def _evaluate_classifier(
27
- embeddings_train: np.ndarray,
34
+ embeddings_train: Array,
28
35
  y_train: np.ndarray,
29
- embeddings_test: np.ndarray,
36
+ embeddings_test: Array,
30
37
  classifier: SklearnModelProtocol,
31
38
  ) -> tuple[np.ndarray, SklearnModelProtocol]:
32
- classifier: SklearnModelProtocol = clone(classifier)
33
- classifier.fit(embeddings_train, y_train)
34
- return classifier.predict(embeddings_test), classifier
39
+ classifier_copy: SklearnModelProtocol = clone(classifier)
40
+ classifier_copy.fit(embeddings_train, y_train)
41
+ return classifier_copy.predict(embeddings_test), classifier_copy
35
42
 
36
43
 
37
44
  class MultilabelClassificationMetrics(TypedDict):
@@ -69,25 +76,29 @@ class AbsTaskMultilabelClassification(AbsTaskClassification):
69
76
  input_column_name: Name of the column containing the input text.
70
77
  label_column_name: Name of the column containing the labels.
71
78
  samples_per_label: Number of samples to use pr. label. These samples are embedded and a classifier is fit using the labels and samples.
72
- evaluator: Classifier to use for evaluation. Must implement the SklearnModelProtocol.
79
+ evaluator_model: Classifier to use for evaluation. Must implement the SklearnModelProtocol.
73
80
  """
74
81
 
75
- evaluator: SklearnModelProtocol = KNeighborsClassifier(n_neighbors=5)
82
+ evaluator_model: SklearnModelProtocol = KNeighborsClassifier(n_neighbors=5)
76
83
  input_column_name: str = "text"
77
84
  label_column_name: str = "label"
78
85
 
79
86
  @override
80
- def _evaluate_subset(
87
+ def _evaluate_subset( # type: ignore[override]
81
88
  self,
82
- model: EncoderProtocol,
89
+ model: MTEBModels,
83
90
  data_split: DatasetDict,
84
91
  *,
85
- encode_kwargs: dict[str, Any],
92
+ encode_kwargs: EncodeKwargs,
86
93
  hf_split: str,
87
94
  hf_subset: str,
88
95
  prediction_folder: Path | None = None,
96
+ num_proc: int = 1,
89
97
  **kwargs: Any,
90
98
  ) -> FullMultilabelClassificationMetrics:
99
+ if not isinstance(model, EncoderProtocol):
100
+ raise TypeError("Expected model to be an instance of EncoderProtocol")
101
+
91
102
  if isinstance(data_split, DatasetDict):
92
103
  data_split = data_split.select_columns(
93
104
  [self.input_column_name, self.label_column_name]
@@ -115,6 +126,7 @@ class AbsTaskMultilabelClassification(AbsTaskClassification):
115
126
  unique_train_dataset,
116
127
  self.metadata,
117
128
  input_column=self.input_column_name,
129
+ num_proc=num_proc,
118
130
  **encode_kwargs,
119
131
  )
120
132
 
@@ -165,7 +177,7 @@ class AbsTaskMultilabelClassification(AbsTaskClassification):
165
177
  y_train = train_split.select(sample_indices)[self.label_column_name]
166
178
  y_train = binarizer.transform(y_train)
167
179
  y_pred, current_classifier = _evaluate_classifier(
168
- X_train, y_train, X_test, self.evaluator
180
+ X_train, y_train, X_test, self.evaluator_model
169
181
  )
170
182
  if prediction_folder:
171
183
  all_predictions.append(y_pred.tolist())
@@ -185,19 +197,20 @@ class AbsTaskMultilabelClassification(AbsTaskClassification):
185
197
  )
186
198
 
187
199
  avg_scores: dict[str, Any] = {
188
- k: np.mean([s[k] for s in scores]) for k in scores[0].keys()
200
+ k: np.mean([s[k] for s in scores]) # type: ignore[literal-required]
201
+ for k in scores[0].keys()
189
202
  }
190
203
  logger.info("Running multilabel classification - Finished.")
191
204
  return FullMultilabelClassificationMetrics(
192
205
  scores_per_experiment=scores,
193
- **avg_scores,
206
+ **avg_scores, # type: ignore[typeddict-item]
194
207
  )
195
208
 
196
- def _calculate_scores(
209
+ def _calculate_scores( # type: ignore[override]
197
210
  self,
198
211
  y_test: np.ndarray,
199
212
  y_pred: np.ndarray,
200
- x_test_embedding: np.ndarray,
213
+ x_test_embedding: Array,
201
214
  current_classifier: SklearnModelProtocol,
202
215
  ) -> MultilabelClassificationMetrics:
203
216
  accuracy = current_classifier.score(x_test_embedding, y_test)
@@ -232,10 +245,9 @@ class AbsTaskMultilabelClassification(AbsTaskClassification):
232
245
  """
233
246
  sample_indices = []
234
247
  if idxs is None:
235
- idxs = np.arange(len(y))
248
+ idxs = list(np.arange(len(y)))
236
249
  self.np_rng.shuffle(idxs)
237
- idxs = idxs.tolist()
238
- label_counter = defaultdict(int)
250
+ label_counter: dict[int, int] = defaultdict(int)
239
251
  for i in idxs:
240
252
  if any((label_counter[label] < samples_per_label) for label in y[i]):
241
253
  sample_indices.append(i)