mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -2,9 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  import hashlib
4
4
  from collections import Counter
5
- from typing import TYPE_CHECKING
5
+ from typing import TYPE_CHECKING, cast
6
6
 
7
- from mteb.types import TopRankedDocumentsType
8
7
  from mteb.types.statistics import (
9
8
  ImageStatistics,
10
9
  LabelStatistics,
@@ -15,8 +14,12 @@ from mteb.types.statistics import (
15
14
  )
16
15
 
17
16
  if TYPE_CHECKING:
17
+ from collections.abc import Mapping
18
+
18
19
  from PIL import Image
19
20
 
21
+ from mteb.types import TopRankedDocumentsType
22
+
20
23
 
21
24
  def calculate_text_statistics(texts: list[str]) -> TextStatistics:
22
25
  """Calculate descriptive statistics for a list of texts.
@@ -52,7 +55,7 @@ def calculate_image_statistics(images: list[Image.Image]) -> ImageStatistics:
52
55
  seen_hashes: set[str] = set()
53
56
 
54
57
  for img in images:
55
- width, height = img.size # type: ignore
58
+ width, height = img.size
56
59
  img_heights.append(height)
57
60
  img_widths.append(width)
58
61
 
@@ -82,17 +85,24 @@ def calculate_label_statistics(labels: list[int | list[int]]) -> LabelStatistics
82
85
  LabelStatistics: A dictionary containing the descriptive statistics.
83
86
 
84
87
  """
88
+ total_labels: list[int | None] = []
89
+
85
90
  if not isinstance(labels[0], list):
86
- label_len = [1] * len(labels)
87
- total_label_len = len(labels)
88
- total_labels = labels
91
+ # single label classification
92
+ single_label = cast("list[int]", labels)
93
+ label_len = [1] * len(single_label)
94
+ total_label_len = len(single_label)
95
+ total_labels.extend(single_label)
89
96
  elif isinstance(labels[0], list):
90
97
  # multilabel classification
91
- label_len = [len(l) for l in labels]
98
+ multilabel_labels = cast("list[list[int]]", labels)
99
+ label_len = [len(l) for l in multilabel_labels]
92
100
  total_label_len = sum(label_len)
93
- total_labels = []
94
- for l in labels:
95
- total_labels.extend(l if len(l) > 0 else [None])
101
+ for l in multilabel_labels:
102
+ if l and len(l) > 0:
103
+ total_labels.extend(l)
104
+ else:
105
+ total_labels.append(None)
96
106
  else:
97
107
  raise ValueError(
98
108
  "Labels must be a list of integers or a list of lists of integers."
@@ -159,7 +169,7 @@ def calculate_top_ranked_statistics(
159
169
 
160
170
 
161
171
  def calculate_relevant_docs_statistics(
162
- relevant_docs: dict[str, dict[str, float]],
172
+ relevant_docs: Mapping[str, Mapping[str, int]],
163
173
  ) -> RelevantDocsStatistics:
164
174
  qrels_lengths = [len(relevant_docs[qid]) for qid in relevant_docs]
165
175
  unique_qrels = len({doc for qid in relevant_docs for doc in relevant_docs[qid]})
@@ -39,6 +39,7 @@ Bibtex:
39
39
  """
40
40
 
41
41
  import itertools
42
+ from typing import Any
42
43
 
43
44
  import numpy as np
44
45
  import scipy.sparse as sp
@@ -119,8 +120,10 @@ def _get_most_desired_combination(samples_with_combination: dict):
119
120
  if support_size == 0:
120
121
  continue
121
122
  if currently_chosen is None or (
122
- best_number_of_combinations < number_of_combinations # type: ignore
123
- and best_support_size > support_size # type: ignore
123
+ best_number_of_combinations is not None
124
+ and best_support_size is not None
125
+ and best_number_of_combinations < number_of_combinations
126
+ and best_support_size > support_size
124
127
  ):
125
128
  currently_chosen = combination
126
129
  best_number_of_combinations, best_support_size = (
@@ -162,7 +165,7 @@ class IterativeStratification(_BaseKFold):
162
165
  self._rng_state = check_random_state(random_state)
163
166
  need_shuffle = shuffle or random_state is not None
164
167
  self.order = order
165
- super().__init__( # type: ignore
168
+ super().__init__(
166
169
  n_splits,
167
170
  shuffle=need_shuffle,
168
171
  random_state=self._rng_state if need_shuffle else None,
@@ -172,8 +175,7 @@ class IterativeStratification(_BaseKFold):
172
175
  self.percentage_per_fold = sample_distribution_per_fold
173
176
  else:
174
177
  self.percentage_per_fold = [
175
- 1 / float(self.n_splits)
176
- for _ in range(self.n_splits) # type: ignore
178
+ 1 / float(self.n_splits) for _ in range(self.n_splits)
177
179
  ]
178
180
 
179
181
  def _prepare_stratification(
@@ -182,9 +184,9 @@ class IterativeStratification(_BaseKFold):
182
184
  list[list[int]],
183
185
  dict[int, bool],
184
186
  list[list[int]],
185
- list[list[list[int]]],
186
- dict[tuple[int, ...], list[int]],
187
- list[list[int]],
187
+ list[list[Any]],
188
+ dict[str, list[Any]],
189
+ list[list[Any]],
188
190
  ]:
189
191
  """Prepares variables for performing stratification
190
192
 
@@ -206,14 +208,14 @@ class IterativeStratification(_BaseKFold):
206
208
  """
207
209
  self.n_samples, self.n_labels = y.shape
208
210
  self.desired_samples_per_fold = np.array(
209
- [self.percentage_per_fold[i] * self.n_samples for i in range(self.n_splits)] # type: ignore
211
+ [self.percentage_per_fold[i] * self.n_samples for i in range(self.n_splits)]
210
212
  )
211
213
  rows = sp.lil_matrix(y).rows
212
214
  rows_used = dict.fromkeys(range(self.n_samples), False)
213
215
  all_combinations = []
214
- per_row_combinations = [[] for i in range(self.n_samples)]
215
- samples_with_combination = {}
216
- folds = [[] for _ in range(self.n_splits)] # type: ignore
216
+ per_row_combinations: list[list[Any]] = [[] for i in range(self.n_samples)]
217
+ samples_with_combination: dict[str, list[Any]] = {}
218
+ folds: list[list[int]] = [[] for _ in range(self.n_splits)]
217
219
 
218
220
  # for every row
219
221
  for sample_index, label_assignment in enumerate(rows):
@@ -229,21 +231,19 @@ class IterativeStratification(_BaseKFold):
229
231
  all_combinations.append(combination)
230
232
  per_row_combinations[sample_index].append(combination)
231
233
 
232
- all_combinations = [list(x) for x in set(all_combinations)]
233
-
234
234
  self.desired_samples_per_combination_per_fold = {
235
235
  combination: np.array(
236
236
  [
237
237
  len(evidence_for_combination) * self.percentage_per_fold[j]
238
- for j in range(self.n_splits) # type: ignore
238
+ for j in range(self.n_splits)
239
239
  ]
240
240
  )
241
241
  for combination, evidence_for_combination in samples_with_combination.items()
242
242
  }
243
243
  return (
244
- rows,
244
+ rows.tolist(),
245
245
  rows_used,
246
- all_combinations,
246
+ [list(x) for x in set(all_combinations)],
247
247
  per_row_combinations,
248
248
  samples_with_combination,
249
249
  folds,
@@ -328,7 +328,7 @@ class IterativeStratification(_BaseKFold):
328
328
  per_row_combinations,
329
329
  samples_with_combination,
330
330
  folds,
331
- ) = self._prepare_stratification(y) # type: ignore
331
+ ) = self._prepare_stratification(y)
332
332
 
333
333
  self._distribute_positive_evidence(
334
334
  rows_used, folds, samples_with_combination, per_row_combinations
mteb/abstasks/abstask.py CHANGED
@@ -1,28 +1,38 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
5
+ import warnings
3
6
  from abc import ABC, abstractmethod
4
7
  from collections.abc import Sequence
5
8
  from copy import copy
6
9
  from pathlib import Path
7
- from typing import Any, cast
10
+ from typing import TYPE_CHECKING, Any, Literal, cast
8
11
 
9
12
  import numpy as np
10
13
  from datasets import ClassLabel, Dataset, DatasetDict, load_dataset
11
14
  from sklearn.preprocessing import MultiLabelBinarizer
12
15
  from tqdm.auto import tqdm
13
- from typing_extensions import Self
14
16
 
15
17
  from mteb._set_seed import _set_seed
16
- from mteb.abstasks.task_metadata import TaskMetadata
17
18
  from mteb.languages import LanguageScripts
18
19
  from mteb.models import (
19
20
  CrossEncoderProtocol,
20
21
  EncoderProtocol,
21
- MTEBModels,
22
22
  SearchProtocol,
23
23
  )
24
- from mteb.types import HFSubset, Modalities, ScoresDict
25
- from mteb.types.statistics import DescriptiveStatistics, SplitDescriptiveStatistics
24
+
25
+ if TYPE_CHECKING:
26
+ from collections.abc import Mapping
27
+
28
+ from typing_extensions import Self
29
+
30
+ from mteb.abstasks.task_metadata import TaskMetadata
31
+ from mteb.models import (
32
+ MTEBModels,
33
+ )
34
+ from mteb.types import EncodeKwargs, HFSubset, Modalities, ScoresDict
35
+ from mteb.types.statistics import DescriptiveStatistics, SplitDescriptiveStatistics
26
36
 
27
37
  logger = logging.getLogger(__name__)
28
38
 
@@ -78,8 +88,8 @@ class AbsTask(ABC):
78
88
  """
79
89
 
80
90
  metadata: TaskMetadata
81
- abstask_prompt: str | None = None
82
- _eval_splits: list[str] | None = None
91
+ abstask_prompt: str
92
+ _eval_splits: Sequence[str] | None = None
83
93
  dataset: dict[HFSubset, DatasetDict] | None = None
84
94
  data_loaded: bool = False
85
95
  hf_subsets: list[HFSubset]
@@ -102,15 +112,18 @@ class AbsTask(ABC):
102
112
  def check_if_dataset_is_superseded(self) -> None:
103
113
  """Check if the dataset is superseded by a newer version."""
104
114
  if self.superseded_by:
105
- logger.warning(
106
- f"Dataset '{self.metadata.name}' is superseded by '{self.superseded_by}', you might consider using the newer version of the dataset."
107
- )
115
+ msg = f"Dataset '{self.metadata.name}' is superseded by '{self.superseded_by}'. We recommend using the newer version of the dataset unless you are running a specific benchmark. See `get_task('{self.superseded_by}').metadata.description` to get a description of the task and changes."
116
+ logger.warning(msg)
117
+ warnings.warn(msg)
108
118
 
109
- def dataset_transform(self):
119
+ def dataset_transform(self, num_proc: int = 1):
110
120
  """A transform operations applied to the dataset after loading.
111
121
 
112
122
  This method is useful when the dataset from Huggingface is not in an `mteb` compatible format.
113
123
  Override this method if your dataset requires additional transformation.
124
+
125
+ Args:
126
+ num_proc: Number of processes to use for the transformation.
114
127
  """
115
128
  pass
116
129
 
@@ -120,10 +133,11 @@ class AbsTask(ABC):
120
133
  split: str = "test",
121
134
  subsets_to_run: list[HFSubset] | None = None,
122
135
  *,
123
- encode_kwargs: dict[str, Any],
136
+ encode_kwargs: EncodeKwargs,
124
137
  prediction_folder: Path | None = None,
138
+ num_proc: int = 1,
125
139
  **kwargs: Any,
126
- ) -> dict[HFSubset, ScoresDict]:
140
+ ) -> Mapping[HFSubset, ScoresDict]:
127
141
  """Evaluates an MTEB compatible model on the task.
128
142
 
129
143
  Args:
@@ -132,6 +146,7 @@ class AbsTask(ABC):
132
146
  subsets_to_run: List of huggingface subsets (HFSubsets) to evaluate. If None, all subsets are evaluated.
133
147
  encode_kwargs: Additional keyword arguments that are passed to the model's `encode` method.
134
148
  prediction_folder: Folder to save model predictions
149
+ num_proc: Number of processes to use for loading the dataset or processing.
135
150
  kwargs: Additional keyword arguments that are passed to the _evaluate_subset method.
136
151
 
137
152
  Returns:
@@ -161,7 +176,7 @@ class AbsTask(ABC):
161
176
  if not self.data_loaded:
162
177
  self.load_data()
163
178
 
164
- self.dataset = cast(dict[HFSubset, DatasetDict], self.dataset)
179
+ self.dataset = cast("dict[HFSubset, DatasetDict]", self.dataset)
165
180
 
166
181
  scores = {}
167
182
  if self.hf_subsets is None:
@@ -187,6 +202,7 @@ class AbsTask(ABC):
187
202
  hf_subset=hf_subset,
188
203
  encode_kwargs=encode_kwargs,
189
204
  prediction_folder=prediction_folder,
205
+ num_proc=num_proc,
190
206
  **kwargs,
191
207
  )
192
208
  self._add_main_score(scores[hf_subset])
@@ -195,13 +211,14 @@ class AbsTask(ABC):
195
211
  @abstractmethod
196
212
  def _evaluate_subset(
197
213
  self,
198
- model: EncoderProtocol,
214
+ model: MTEBModels,
199
215
  data_split: Dataset,
200
216
  *,
201
- encode_kwargs: dict[str, Any],
202
217
  hf_split: str,
203
218
  hf_subset: str,
219
+ encode_kwargs: EncodeKwargs,
204
220
  prediction_folder: Path | None = None,
221
+ num_proc: int = 1,
205
222
  **kwargs: Any,
206
223
  ) -> ScoresDict:
207
224
  raise NotImplementedError(
@@ -210,7 +227,7 @@ class AbsTask(ABC):
210
227
 
211
228
  def _save_task_predictions(
212
229
  self,
213
- predictions: dict[str, Any] | list[Any],
230
+ predictions: Mapping[str, Any] | list[Any],
214
231
  model: MTEBModels,
215
232
  prediction_folder: Path,
216
233
  hf_split: str,
@@ -226,7 +243,7 @@ class AbsTask(ABC):
226
243
  hf_subset: The subset of the dataset (e.g. "en").
227
244
  """
228
245
  predictions_path = self._predictions_path(prediction_folder)
229
- existing_results = {
246
+ existing_results: dict[str, Any] = {
230
247
  "mteb_model_meta": {
231
248
  "model_name": model.mteb_model_meta.name,
232
249
  "revision": model.mteb_model_meta.revision,
@@ -306,11 +323,15 @@ class AbsTask(ABC):
306
323
  ) # only take the specified test split.
307
324
  return dataset_dict
308
325
 
309
- def load_data(self) -> None:
326
+ def load_data(self, num_proc: int = 1, **kwargs: Any) -> None:
310
327
  """Loads dataset from HuggingFace hub
311
328
 
312
329
  This is the main loading function for Task. Do not overwrite this, instead we recommend using `dataset_transform`, which is called after the
313
330
  dataset is loaded using `datasets.load_dataset`.
331
+
332
+ Args:
333
+ num_proc: Number of processes to use for loading the dataset.
334
+ kwargs: Additional keyword arguments passed to the load_dataset function. Keep for forward compatibility.
314
335
  """
315
336
  if self.data_loaded:
316
337
  return
@@ -323,11 +344,12 @@ class AbsTask(ABC):
323
344
  self.dataset[hf_subset] = load_dataset(
324
345
  name=hf_subset,
325
346
  **self.metadata.dataset,
347
+ num_proc=num_proc,
326
348
  )
327
349
  else:
328
350
  # some of monolingual datasets explicitly adding the split name to the dataset name
329
- self.dataset = load_dataset(**self.metadata.dataset) # type: ignore
330
- self.dataset_transform()
351
+ self.dataset = load_dataset(**self.metadata.dataset, num_proc=num_proc)
352
+ self.dataset_transform(num_proc=num_proc)
331
353
  self.data_loaded = True
332
354
 
333
355
  def fast_load(self) -> None:
@@ -350,27 +372,32 @@ class AbsTask(ABC):
350
372
  self.dataset[lang] = DatasetDict(subset)
351
373
 
352
374
  def calculate_descriptive_statistics(
353
- self, overwrite_results: bool = False
375
+ self, overwrite_results: bool = False, num_proc: int = 1
354
376
  ) -> dict[str, DescriptiveStatistics]:
355
377
  """Calculates descriptive statistics from the dataset.
356
378
 
357
379
  Args:
358
380
  overwrite_results: Whether to overwrite existing results. If False and results already exist, the existing results will be loaded from cache.
381
+ num_proc: Number of processes to use for loading the dataset.
359
382
 
360
383
  Returns:
361
384
  A dictionary containing descriptive statistics for each split.
362
385
  """
363
386
  from mteb.abstasks import AbsTaskClassification
364
387
 
365
- if self.metadata.descriptive_stat_path.exists() and not overwrite_results:
388
+ existing_stats = self.metadata.descriptive_stats
389
+
390
+ if existing_stats is not None and not overwrite_results:
366
391
  logger.info("Loading metadata descriptive statistics from cache.")
367
- return self.metadata.descriptive_stats
392
+ return existing_stats
368
393
 
369
394
  if not self.data_loaded:
370
- self.load_data()
395
+ self.load_data(num_proc=num_proc)
371
396
 
372
397
  descriptive_stats: dict[str, DescriptiveStatistics] = {}
373
- hf_subset_stat = "hf_subset_descriptive_stats"
398
+ hf_subset_stat: Literal["hf_subset_descriptive_stats"] = (
399
+ "hf_subset_descriptive_stats"
400
+ )
374
401
  eval_splits = self.metadata.eval_splits
375
402
  if isinstance(self, AbsTaskClassification):
376
403
  eval_splits.append(self.train_split)
@@ -381,7 +408,7 @@ class AbsTask(ABC):
381
408
  logger.info(f"Processing metadata for split {split}")
382
409
  if self.metadata.is_multilingual:
383
410
  descriptive_stats[split] = (
384
- self._calculate_descriptive_statistics_from_split(
411
+ self._calculate_descriptive_statistics_from_split( # type: ignore[assignment]
385
412
  split, compute_overall=True
386
413
  )
387
414
  )
@@ -400,7 +427,7 @@ class AbsTask(ABC):
400
427
  descriptive_stats[split][hf_subset_stat][hf_subset] = split_details
401
428
  else:
402
429
  split_details = self._calculate_descriptive_statistics_from_split(split)
403
- descriptive_stats[split] = split_details
430
+ descriptive_stats[split] = split_details # type: ignore[assignment]
404
431
 
405
432
  with self.metadata.descriptive_stat_path.open("w") as f:
406
433
  json.dump(descriptive_stats, f, indent=4)
@@ -437,7 +464,7 @@ class AbsTask(ABC):
437
464
 
438
465
  return self.metadata.languages
439
466
 
440
- def filter_eval_splits(self, eval_splits: list[str] | None) -> Self:
467
+ def filter_eval_splits(self, eval_splits: Sequence[str] | None) -> Self:
441
468
  """Filter the evaluation splits of the task.
442
469
 
443
470
  Args:
@@ -451,9 +478,9 @@ class AbsTask(ABC):
451
478
 
452
479
  def filter_languages(
453
480
  self,
454
- languages: list[str] | None,
455
- script: list[str] | None = None,
456
- hf_subsets: list[HFSubset] | None = None,
481
+ languages: Sequence[str] | None,
482
+ script: Sequence[str] | None = None,
483
+ hf_subsets: Sequence[HFSubset] | None = None,
457
484
  exclusive_language_filter: bool = False,
458
485
  ) -> Self:
459
486
  """Filter the languages of the task.
@@ -499,12 +526,14 @@ class AbsTask(ABC):
499
526
  self.hf_subsets = subsets_to_keep
500
527
  return self
501
528
 
502
- def _add_main_score(self, scores: dict[HFSubset, ScoresDict]) -> None:
529
+ def _add_main_score(self, scores: ScoresDict) -> None:
503
530
  scores["main_score"] = scores[self.metadata.main_score]
504
531
 
505
532
  def _upload_dataset_to_hub(
506
- self, repo_name: str, fields: list[str] | dict[str, str]
533
+ self, repo_name: str, fields: list[str] | dict[str, str], num_proc: int = 1
507
534
  ) -> None:
535
+ if self.dataset is None:
536
+ raise ValueError("Dataset not loaded")
508
537
  if self.metadata.is_multilingual:
509
538
  for config in self.metadata.eval_langs:
510
539
  logger.info(f"Converting {config} of {self.metadata.name}")
@@ -526,7 +555,10 @@ class AbsTask(ABC):
526
555
  )
527
556
  sentences = DatasetDict(sentences)
528
557
  sentences.push_to_hub(
529
- repo_name, config, commit_message=f"Add {config} dataset"
558
+ repo_name,
559
+ config,
560
+ commit_message=f"Add {config} dataset",
561
+ num_proc=num_proc,
530
562
  )
531
563
  else:
532
564
  sentences = {}
@@ -543,16 +575,19 @@ class AbsTask(ABC):
543
575
  {field: self.dataset[split][field] for field in fields}
544
576
  )
545
577
  sentences = DatasetDict(sentences)
546
- sentences.push_to_hub(repo_name, commit_message="Add dataset")
578
+ sentences.push_to_hub(
579
+ repo_name, commit_message="Add dataset", num_proc=num_proc
580
+ )
547
581
 
548
- def _push_dataset_to_hub(self, repo_name: str) -> None:
582
+ def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
549
583
  raise NotImplementedError
550
584
 
551
- def push_dataset_to_hub(self, repo_name: str) -> None:
585
+ def push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
552
586
  """Push the dataset to the HuggingFace Hub.
553
587
 
554
588
  Args:
555
589
  repo_name: The name of the repository to push the dataset to.
590
+ num_proc: Number of processes to use for loading the dataset.
556
591
 
557
592
  Examples:
558
593
  >>> import mteb
@@ -564,7 +599,7 @@ class AbsTask(ABC):
564
599
  if not self.data_loaded:
565
600
  self.load_data()
566
601
 
567
- self._push_dataset_to_hub(repo_name)
602
+ self._push_dataset_to_hub(repo_name, num_proc)
568
603
  # dataset repo not creating when pushing card
569
604
  self.metadata.push_dataset_card_to_hub(repo_name)
570
605
 
@@ -574,7 +609,7 @@ class AbsTask(ABC):
574
609
  return False
575
610
 
576
611
  @property
577
- def eval_splits(self) -> list[str]:
612
+ def eval_splits(self) -> Sequence[str]:
578
613
  """Returns the evaluation splits of the task."""
579
614
  if self._eval_splits:
580
615
  return self._eval_splits
@@ -607,9 +642,8 @@ class AbsTask(ABC):
607
642
  self.data_loaded = False
608
643
  logger.info(f"Unloaded dataset {self.metadata.name} from memory.")
609
644
  else:
610
- logger.warning(
611
- f"Dataset {self.metadata.name} is not loaded, cannot unload it."
612
- )
645
+ msg = f"Dataset `{self.metadata.name}` is not loaded, cannot unload it."
646
+ logger.warning(msg)
613
647
 
614
648
  @property
615
649
  def superseded_by(self) -> str | None:
@@ -1,29 +1,39 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from datetime import datetime
5
+ from typing import TYPE_CHECKING
3
6
 
4
7
  from pydantic import ConfigDict, Field, model_validator
5
- from typing_extensions import Self
6
8
 
7
9
  from mteb.types import (
8
- HFSubset,
9
- ISOLanguageScript,
10
10
  Languages,
11
- Licenses,
12
- Modalities,
13
- StrDate,
14
11
  )
15
12
 
16
13
  from .abstask import AbsTask
17
14
  from .task_metadata import (
18
- AnnotatorType,
19
15
  MetadataDatasetDict,
20
- SampleCreationMethod,
21
- TaskDomain,
22
16
  TaskMetadata,
23
- TaskSubtype,
24
17
  TaskType,
25
18
  )
26
19
 
20
+ if TYPE_CHECKING:
21
+ from typing_extensions import Self
22
+
23
+ from mteb.types import (
24
+ ISOLanguageScript,
25
+ Licenses,
26
+ Modalities,
27
+ StrDate,
28
+ )
29
+
30
+ from .task_metadata import (
31
+ AnnotatorType,
32
+ SampleCreationMethod,
33
+ TaskDomain,
34
+ TaskSubtype,
35
+ )
36
+
27
37
  logger = logging.getLogger(__name__)
28
38
 
29
39
 
@@ -60,14 +70,7 @@ class AggregateTaskMetadata(TaskMetadata):
60
70
  reference: str | None = None
61
71
  bibtex_citation: str | None = None
62
72
 
63
- @property
64
- def hf_subsets_to_langscripts(self) -> dict[HFSubset, list[ISOLanguageScript]]:
65
- """Return a dictionary mapping huggingface subsets to languages."""
66
- if isinstance(self.eval_langs, dict):
67
- return self.eval_langs
68
- return {"default": self.eval_langs} # type: ignore
69
-
70
- @model_validator(mode="after") # type: ignore
73
+ @model_validator(mode="after")
71
74
  def _compute_unfilled_cases(self) -> Self:
72
75
  if not self.eval_langs:
73
76
  self.eval_langs = self._compute_eval_langs()
@@ -1,18 +1,26 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from pathlib import Path
3
- from typing import Any
4
+ import warnings
5
+ from typing import TYPE_CHECKING, Any
4
6
 
5
7
  import numpy as np
6
- from datasets import Dataset, DatasetDict
7
- from typing_extensions import Self
8
8
 
9
- from mteb.models.models_protocols import MTEBModels
10
9
  from mteb.results.task_result import TaskResult
11
- from mteb.types import HFSubset, ScoresDict
12
- from mteb.types.statistics import DescriptiveStatistics
13
10
 
14
11
  from .abstask import AbsTask
15
- from .aggregate_task_metadata import AggregateTaskMetadata
12
+
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Mapping
15
+ from pathlib import Path
16
+
17
+ from datasets import Dataset, DatasetDict
18
+
19
+ from mteb.models.models_protocols import MTEBModels
20
+ from mteb.types import EncodeKwargs, HFSubset, ScoresDict
21
+ from mteb.types.statistics import DescriptiveStatistics
22
+
23
+ from .aggregate_task_metadata import AggregateTaskMetadata
16
24
 
17
25
  logger = logging.getLogger(__name__)
18
26
 
@@ -32,7 +40,7 @@ class AbsTaskAggregate(AbsTask):
32
40
 
33
41
  def task_results_to_scores(
34
42
  self, task_results: list[TaskResult]
35
- ) -> dict[str, dict[HFSubset, ScoresDict]]:
43
+ ) -> dict[str, Mapping[HFSubset, ScoresDict]]:
36
44
  """The function that aggregated scores. Can be redefined to allow for custom aggregations.
37
45
 
38
46
  Args:
@@ -41,7 +49,7 @@ class AbsTaskAggregate(AbsTask):
41
49
  Returns:
42
50
  A dictionary with the aggregated scores.
43
51
  """
44
- scores = {}
52
+ scores: dict[str, Mapping[HFSubset, ScoresDict]] = {}
45
53
  subsets = (
46
54
  self.metadata.eval_langs.keys()
47
55
  if isinstance(self.metadata.eval_langs, dict)
@@ -113,40 +121,20 @@ class AbsTaskAggregate(AbsTask):
113
121
  )
114
122
  mteb_versions = {tr.mteb_version for tr in task_results}
115
123
  if len(mteb_versions) != 1:
116
- logger.warning(
117
- f"All tasks of {self.metadata.name} is not run using the same version."
118
- )
124
+ msg = f"All tasks of {self.metadata.name} is not run using the same version. different versions found are: {mteb_versions}"
125
+ logger.warning(msg)
126
+ warnings.warn(msg)
119
127
  task_res.mteb_version = None
120
128
  task_res.mteb_version = task_results[0].mteb_version
121
129
  return task_res
122
130
 
123
- def check_if_dataset_is_superseded(self) -> None:
124
- """Check if the dataset is superseded by a newer version"""
125
- if self.superseded_by:
126
- logger.warning(
127
- f"Dataset '{self.metadata.name}' is superseded by '{self.superseded_by}', you might consider using the newer version of the dataset."
128
- )
129
-
130
- def filter_eval_splits(self, eval_splits: list[str] | None) -> Self:
131
- """Filter the evaluation splits of the task.
132
-
133
- Args:
134
- eval_splits: List of splits to evaluate on. If None, all splits in metadata
135
- are used.
136
-
137
- Returns:
138
- The task with filtered evaluation splits.
139
- """
140
- self._eval_splits = eval_splits
141
- return self
142
-
143
131
  def evaluate(
144
132
  self,
145
133
  model: MTEBModels,
146
134
  split: str = "test",
147
135
  subsets_to_run: list[HFSubset] | None = None,
148
136
  *,
149
- encode_kwargs: dict[str, Any],
137
+ encode_kwargs: EncodeKwargs,
150
138
  prediction_folder: Path | None = None,
151
139
  **kwargs: Any,
152
140
  ) -> dict[HFSubset, ScoresDict]:
@@ -160,7 +148,7 @@ class AbsTaskAggregate(AbsTask):
160
148
  self,
161
149
  model: MTEBModels,
162
150
  data_split: DatasetDict | Dataset,
163
- encode_kwargs: dict[str, Any],
151
+ encode_kwargs: EncodeKwargs,
164
152
  **kwargs: Any,
165
153
  ) -> ScoresDict:
166
154
  raise NotImplementedError(