mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,9 @@
1
+ from __future__ import annotations
2
+
1
3
  import hashlib
2
4
  from collections import Counter
3
-
4
- from PIL import Image
5
+ from collections.abc import Mapping
6
+ from typing import TYPE_CHECKING, cast
5
7
 
6
8
  from mteb.types import TopRankedDocumentsType
7
9
  from mteb.types.statistics import (
@@ -13,6 +15,9 @@ from mteb.types.statistics import (
13
15
  TopRankedStatistics,
14
16
  )
15
17
 
18
+ if TYPE_CHECKING:
19
+ from PIL import Image
20
+
16
21
 
17
22
  def calculate_text_statistics(texts: list[str]) -> TextStatistics:
18
23
  """Calculate descriptive statistics for a list of texts.
@@ -48,7 +53,7 @@ def calculate_image_statistics(images: list[Image.Image]) -> ImageStatistics:
48
53
  seen_hashes: set[str] = set()
49
54
 
50
55
  for img in images:
51
- width, height = img.size # type: ignore
56
+ width, height = img.size
52
57
  img_heights.append(height)
53
58
  img_widths.append(width)
54
59
 
@@ -78,17 +83,24 @@ def calculate_label_statistics(labels: list[int | list[int]]) -> LabelStatistics
78
83
  LabelStatistics: A dictionary containing the descriptive statistics.
79
84
 
80
85
  """
86
+ total_labels: list[int | None] = []
87
+
81
88
  if not isinstance(labels[0], list):
82
- label_len = [1] * len(labels)
83
- total_label_len = len(labels)
84
- total_labels = labels
89
+ # single label classification
90
+ single_label = cast(list[int], labels)
91
+ label_len = [1] * len(single_label)
92
+ total_label_len = len(single_label)
93
+ total_labels.extend(single_label)
85
94
  elif isinstance(labels[0], list):
86
95
  # multilabel classification
87
- label_len = [len(l) for l in labels]
96
+ multilabel_labels = cast(list[list[int]], labels)
97
+ label_len = [len(l) for l in multilabel_labels]
88
98
  total_label_len = sum(label_len)
89
- total_labels = []
90
- for l in labels:
91
- total_labels.extend(l if len(l) > 0 else [None])
99
+ for l in multilabel_labels:
100
+ if l and len(l) > 0:
101
+ total_labels.extend(l)
102
+ else:
103
+ total_labels.append(None)
92
104
  else:
93
105
  raise ValueError(
94
106
  "Labels must be a list of integers or a list of lists of integers."
@@ -155,7 +167,7 @@ def calculate_top_ranked_statistics(
155
167
 
156
168
 
157
169
  def calculate_relevant_docs_statistics(
158
- relevant_docs: dict[str, dict[str, float]],
170
+ relevant_docs: Mapping[str, Mapping[str, int]],
159
171
  ) -> RelevantDocsStatistics:
160
172
  qrels_lengths = [len(relevant_docs[qid]) for qid in relevant_docs]
161
173
  unique_qrels = len({doc for qid in relevant_docs for doc in relevant_docs[qid]})
@@ -39,6 +39,7 @@ Bibtex:
39
39
  """
40
40
 
41
41
  import itertools
42
+ from typing import Any
42
43
 
43
44
  import numpy as np
44
45
  import scipy.sparse as sp
@@ -119,8 +120,10 @@ def _get_most_desired_combination(samples_with_combination: dict):
119
120
  if support_size == 0:
120
121
  continue
121
122
  if currently_chosen is None or (
122
- best_number_of_combinations < number_of_combinations # type: ignore
123
- and best_support_size > support_size # type: ignore
123
+ best_number_of_combinations is not None
124
+ and best_support_size is not None
125
+ and best_number_of_combinations < number_of_combinations
126
+ and best_support_size > support_size
124
127
  ):
125
128
  currently_chosen = combination
126
129
  best_number_of_combinations, best_support_size = (
@@ -162,7 +165,7 @@ class IterativeStratification(_BaseKFold):
162
165
  self._rng_state = check_random_state(random_state)
163
166
  need_shuffle = shuffle or random_state is not None
164
167
  self.order = order
165
- super().__init__( # type: ignore
168
+ super().__init__(
166
169
  n_splits,
167
170
  shuffle=need_shuffle,
168
171
  random_state=self._rng_state if need_shuffle else None,
@@ -172,8 +175,7 @@ class IterativeStratification(_BaseKFold):
172
175
  self.percentage_per_fold = sample_distribution_per_fold
173
176
  else:
174
177
  self.percentage_per_fold = [
175
- 1 / float(self.n_splits)
176
- for _ in range(self.n_splits) # type: ignore
178
+ 1 / float(self.n_splits) for _ in range(self.n_splits)
177
179
  ]
178
180
 
179
181
  def _prepare_stratification(
@@ -182,9 +184,9 @@ class IterativeStratification(_BaseKFold):
182
184
  list[list[int]],
183
185
  dict[int, bool],
184
186
  list[list[int]],
185
- list[list[list[int]]],
186
- dict[tuple[int, ...], list[int]],
187
- list[list[int]],
187
+ list[list[Any]],
188
+ dict[str, list[Any]],
189
+ list[list[Any]],
188
190
  ]:
189
191
  """Prepares variables for performing stratification
190
192
 
@@ -206,14 +208,14 @@ class IterativeStratification(_BaseKFold):
206
208
  """
207
209
  self.n_samples, self.n_labels = y.shape
208
210
  self.desired_samples_per_fold = np.array(
209
- [self.percentage_per_fold[i] * self.n_samples for i in range(self.n_splits)] # type: ignore
211
+ [self.percentage_per_fold[i] * self.n_samples for i in range(self.n_splits)]
210
212
  )
211
213
  rows = sp.lil_matrix(y).rows
212
214
  rows_used = dict.fromkeys(range(self.n_samples), False)
213
215
  all_combinations = []
214
- per_row_combinations = [[] for i in range(self.n_samples)]
215
- samples_with_combination = {}
216
- folds = [[] for _ in range(self.n_splits)] # type: ignore
216
+ per_row_combinations: list[list[Any]] = [[] for i in range(self.n_samples)]
217
+ samples_with_combination: dict[str, list[Any]] = {}
218
+ folds: list[list[int]] = [[] for _ in range(self.n_splits)]
217
219
 
218
220
  # for every row
219
221
  for sample_index, label_assignment in enumerate(rows):
@@ -229,21 +231,19 @@ class IterativeStratification(_BaseKFold):
229
231
  all_combinations.append(combination)
230
232
  per_row_combinations[sample_index].append(combination)
231
233
 
232
- all_combinations = [list(x) for x in set(all_combinations)]
233
-
234
234
  self.desired_samples_per_combination_per_fold = {
235
235
  combination: np.array(
236
236
  [
237
237
  len(evidence_for_combination) * self.percentage_per_fold[j]
238
- for j in range(self.n_splits) # type: ignore
238
+ for j in range(self.n_splits)
239
239
  ]
240
240
  )
241
241
  for combination, evidence_for_combination in samples_with_combination.items()
242
242
  }
243
243
  return (
244
- rows,
244
+ rows.tolist(),
245
245
  rows_used,
246
- all_combinations,
246
+ [list(x) for x in set(all_combinations)],
247
247
  per_row_combinations,
248
248
  samples_with_combination,
249
249
  folds,
@@ -328,7 +328,7 @@ class IterativeStratification(_BaseKFold):
328
328
  per_row_combinations,
329
329
  samples_with_combination,
330
330
  folds,
331
- ) = self._prepare_stratification(y) # type: ignore
331
+ ) = self._prepare_stratification(y)
332
332
 
333
333
  self._distribute_positive_evidence(
334
334
  rows_used, folds, samples_with_combination, per_row_combinations
mteb/abstasks/abstask.py CHANGED
@@ -1,10 +1,11 @@
1
1
  import json
2
2
  import logging
3
+ import warnings
3
4
  from abc import ABC, abstractmethod
4
- from collections.abc import Sequence
5
+ from collections.abc import Mapping, Sequence
5
6
  from copy import copy
6
7
  from pathlib import Path
7
- from typing import Any, cast
8
+ from typing import Any, Literal, cast
8
9
 
9
10
  import numpy as np
10
11
  from datasets import ClassLabel, Dataset, DatasetDict, load_dataset
@@ -22,6 +23,7 @@ from mteb.models import (
22
23
  SearchProtocol,
23
24
  )
24
25
  from mteb.types import HFSubset, Modalities, ScoresDict
26
+ from mteb.types._encoder_io import EncodeKwargs
25
27
  from mteb.types.statistics import DescriptiveStatistics, SplitDescriptiveStatistics
26
28
 
27
29
  logger = logging.getLogger(__name__)
@@ -78,8 +80,8 @@ class AbsTask(ABC):
78
80
  """
79
81
 
80
82
  metadata: TaskMetadata
81
- abstask_prompt: str | None = None
82
- _eval_splits: list[str] | None = None
83
+ abstask_prompt: str
84
+ _eval_splits: Sequence[str] | None = None
83
85
  dataset: dict[HFSubset, DatasetDict] | None = None
84
86
  data_loaded: bool = False
85
87
  hf_subsets: list[HFSubset]
@@ -102,9 +104,9 @@ class AbsTask(ABC):
102
104
  def check_if_dataset_is_superseded(self) -> None:
103
105
  """Check if the dataset is superseded by a newer version."""
104
106
  if self.superseded_by:
105
- logger.warning(
106
- f"Dataset '{self.metadata.name}' is superseded by '{self.superseded_by}', you might consider using the newer version of the dataset."
107
- )
107
+ msg = f"Dataset '{self.metadata.name}' is superseded by '{self.superseded_by}'. We recommend using the newer version of the dataset unless you are running a specific benchmark. See `get_task('{self.superseded_by}').metadata.description` to get a description of the task and changes."
108
+ logger.warning(msg)
109
+ warnings.warn(msg)
108
110
 
109
111
  def dataset_transform(self):
110
112
  """A transform operations applied to the dataset after loading.
@@ -120,10 +122,10 @@ class AbsTask(ABC):
120
122
  split: str = "test",
121
123
  subsets_to_run: list[HFSubset] | None = None,
122
124
  *,
123
- encode_kwargs: dict[str, Any],
125
+ encode_kwargs: EncodeKwargs,
124
126
  prediction_folder: Path | None = None,
125
127
  **kwargs: Any,
126
- ) -> dict[HFSubset, ScoresDict]:
128
+ ) -> Mapping[HFSubset, ScoresDict]:
127
129
  """Evaluates an MTEB compatible model on the task.
128
130
 
129
131
  Args:
@@ -195,12 +197,12 @@ class AbsTask(ABC):
195
197
  @abstractmethod
196
198
  def _evaluate_subset(
197
199
  self,
198
- model: EncoderProtocol,
200
+ model: MTEBModels,
199
201
  data_split: Dataset,
200
202
  *,
201
- encode_kwargs: dict[str, Any],
202
203
  hf_split: str,
203
204
  hf_subset: str,
205
+ encode_kwargs: EncodeKwargs,
204
206
  prediction_folder: Path | None = None,
205
207
  **kwargs: Any,
206
208
  ) -> ScoresDict:
@@ -210,7 +212,7 @@ class AbsTask(ABC):
210
212
 
211
213
  def _save_task_predictions(
212
214
  self,
213
- predictions: dict[str, Any] | list[Any],
215
+ predictions: Mapping[str, Any] | list[Any],
214
216
  model: MTEBModels,
215
217
  prediction_folder: Path,
216
218
  hf_split: str,
@@ -226,7 +228,7 @@ class AbsTask(ABC):
226
228
  hf_subset: The subset of the dataset (e.g. "en").
227
229
  """
228
230
  predictions_path = self._predictions_path(prediction_folder)
229
- existing_results = {
231
+ existing_results: dict[str, Any] = {
230
232
  "mteb_model_meta": {
231
233
  "model_name": model.mteb_model_meta.name,
232
234
  "revision": model.mteb_model_meta.revision,
@@ -326,7 +328,7 @@ class AbsTask(ABC):
326
328
  )
327
329
  else:
328
330
  # some of monolingual datasets explicitly adding the split name to the dataset name
329
- self.dataset = load_dataset(**self.metadata.dataset) # type: ignore
331
+ self.dataset = load_dataset(**self.metadata.dataset)
330
332
  self.dataset_transform()
331
333
  self.data_loaded = True
332
334
 
@@ -362,15 +364,19 @@ class AbsTask(ABC):
362
364
  """
363
365
  from mteb.abstasks import AbsTaskClassification
364
366
 
365
- if self.metadata.descriptive_stat_path.exists() and not overwrite_results:
367
+ existing_stats = self.metadata.descriptive_stats
368
+
369
+ if existing_stats is not None and not overwrite_results:
366
370
  logger.info("Loading metadata descriptive statistics from cache.")
367
- return self.metadata.descriptive_stats
371
+ return existing_stats
368
372
 
369
373
  if not self.data_loaded:
370
374
  self.load_data()
371
375
 
372
376
  descriptive_stats: dict[str, DescriptiveStatistics] = {}
373
- hf_subset_stat = "hf_subset_descriptive_stats"
377
+ hf_subset_stat: Literal["hf_subset_descriptive_stats"] = (
378
+ "hf_subset_descriptive_stats"
379
+ )
374
380
  eval_splits = self.metadata.eval_splits
375
381
  if isinstance(self, AbsTaskClassification):
376
382
  eval_splits.append(self.train_split)
@@ -381,7 +387,7 @@ class AbsTask(ABC):
381
387
  logger.info(f"Processing metadata for split {split}")
382
388
  if self.metadata.is_multilingual:
383
389
  descriptive_stats[split] = (
384
- self._calculate_descriptive_statistics_from_split(
390
+ self._calculate_descriptive_statistics_from_split( # type: ignore[assignment]
385
391
  split, compute_overall=True
386
392
  )
387
393
  )
@@ -400,7 +406,7 @@ class AbsTask(ABC):
400
406
  descriptive_stats[split][hf_subset_stat][hf_subset] = split_details
401
407
  else:
402
408
  split_details = self._calculate_descriptive_statistics_from_split(split)
403
- descriptive_stats[split] = split_details
409
+ descriptive_stats[split] = split_details # type: ignore[assignment]
404
410
 
405
411
  with self.metadata.descriptive_stat_path.open("w") as f:
406
412
  json.dump(descriptive_stats, f, indent=4)
@@ -437,7 +443,7 @@ class AbsTask(ABC):
437
443
 
438
444
  return self.metadata.languages
439
445
 
440
- def filter_eval_splits(self, eval_splits: list[str] | None) -> Self:
446
+ def filter_eval_splits(self, eval_splits: Sequence[str] | None) -> Self:
441
447
  """Filter the evaluation splits of the task.
442
448
 
443
449
  Args:
@@ -451,9 +457,9 @@ class AbsTask(ABC):
451
457
 
452
458
  def filter_languages(
453
459
  self,
454
- languages: list[str] | None,
455
- script: list[str] | None = None,
456
- hf_subsets: list[HFSubset] | None = None,
460
+ languages: Sequence[str] | None,
461
+ script: Sequence[str] | None = None,
462
+ hf_subsets: Sequence[HFSubset] | None = None,
457
463
  exclusive_language_filter: bool = False,
458
464
  ) -> Self:
459
465
  """Filter the languages of the task.
@@ -499,12 +505,14 @@ class AbsTask(ABC):
499
505
  self.hf_subsets = subsets_to_keep
500
506
  return self
501
507
 
502
- def _add_main_score(self, scores: dict[HFSubset, ScoresDict]) -> None:
508
+ def _add_main_score(self, scores: ScoresDict) -> None:
503
509
  scores["main_score"] = scores[self.metadata.main_score]
504
510
 
505
511
  def _upload_dataset_to_hub(
506
512
  self, repo_name: str, fields: list[str] | dict[str, str]
507
513
  ) -> None:
514
+ if self.dataset is None:
515
+ raise ValueError("Dataset not loaded")
508
516
  if self.metadata.is_multilingual:
509
517
  for config in self.metadata.eval_langs:
510
518
  logger.info(f"Converting {config} of {self.metadata.name}")
@@ -574,7 +582,7 @@ class AbsTask(ABC):
574
582
  return False
575
583
 
576
584
  @property
577
- def eval_splits(self) -> list[str]:
585
+ def eval_splits(self) -> Sequence[str]:
578
586
  """Returns the evaluation splits of the task."""
579
587
  if self._eval_splits:
580
588
  return self._eval_splits
@@ -607,9 +615,8 @@ class AbsTask(ABC):
607
615
  self.data_loaded = False
608
616
  logger.info(f"Unloaded dataset {self.metadata.name} from memory.")
609
617
  else:
610
- logger.warning(
611
- f"Dataset {self.metadata.name} is not loaded, cannot unload it."
612
- )
618
+ msg = f"Dataset `{self.metadata.name}` is not loaded, cannot unload it."
619
+ logger.warning(msg)
613
620
 
614
621
  @property
615
622
  def superseded_by(self) -> str | None:
@@ -5,7 +5,6 @@ from pydantic import ConfigDict, Field, model_validator
5
5
  from typing_extensions import Self
6
6
 
7
7
  from mteb.types import (
8
- HFSubset,
9
8
  ISOLanguageScript,
10
9
  Languages,
11
10
  Licenses,
@@ -60,14 +59,7 @@ class AggregateTaskMetadata(TaskMetadata):
60
59
  reference: str | None = None
61
60
  bibtex_citation: str | None = None
62
61
 
63
- @property
64
- def hf_subsets_to_langscripts(self) -> dict[HFSubset, list[ISOLanguageScript]]:
65
- """Return a dictionary mapping huggingface subsets to languages."""
66
- if isinstance(self.eval_langs, dict):
67
- return self.eval_langs
68
- return {"default": self.eval_langs} # type: ignore
69
-
70
- @model_validator(mode="after") # type: ignore
62
+ @model_validator(mode="after")
71
63
  def _compute_unfilled_cases(self) -> Self:
72
64
  if not self.eval_langs:
73
65
  self.eval_langs = self._compute_eval_langs()
@@ -1,14 +1,15 @@
1
1
  import logging
2
+ import warnings
3
+ from collections.abc import Mapping
2
4
  from pathlib import Path
3
5
  from typing import Any
4
6
 
5
7
  import numpy as np
6
8
  from datasets import Dataset, DatasetDict
7
- from typing_extensions import Self
8
9
 
9
10
  from mteb.models.models_protocols import MTEBModels
10
11
  from mteb.results.task_result import TaskResult
11
- from mteb.types import HFSubset, ScoresDict
12
+ from mteb.types import EncodeKwargs, HFSubset, ScoresDict
12
13
  from mteb.types.statistics import DescriptiveStatistics
13
14
 
14
15
  from .abstask import AbsTask
@@ -32,7 +33,7 @@ class AbsTaskAggregate(AbsTask):
32
33
 
33
34
  def task_results_to_scores(
34
35
  self, task_results: list[TaskResult]
35
- ) -> dict[str, dict[HFSubset, ScoresDict]]:
36
+ ) -> dict[str, Mapping[HFSubset, ScoresDict]]:
36
37
  """The function that aggregated scores. Can be redefined to allow for custom aggregations.
37
38
 
38
39
  Args:
@@ -41,7 +42,7 @@ class AbsTaskAggregate(AbsTask):
41
42
  Returns:
42
43
  A dictionary with the aggregated scores.
43
44
  """
44
- scores = {}
45
+ scores: dict[str, Mapping[HFSubset, ScoresDict]] = {}
45
46
  subsets = (
46
47
  self.metadata.eval_langs.keys()
47
48
  if isinstance(self.metadata.eval_langs, dict)
@@ -113,40 +114,20 @@ class AbsTaskAggregate(AbsTask):
113
114
  )
114
115
  mteb_versions = {tr.mteb_version for tr in task_results}
115
116
  if len(mteb_versions) != 1:
116
- logger.warning(
117
- f"All tasks of {self.metadata.name} is not run using the same version."
118
- )
117
+ msg = f"All tasks of {self.metadata.name} is not run using the same version. different versions found are: {mteb_versions}"
118
+ logger.warning(msg)
119
+ warnings.warn(msg)
119
120
  task_res.mteb_version = None
120
121
  task_res.mteb_version = task_results[0].mteb_version
121
122
  return task_res
122
123
 
123
- def check_if_dataset_is_superseded(self) -> None:
124
- """Check if the dataset is superseded by a newer version"""
125
- if self.superseded_by:
126
- logger.warning(
127
- f"Dataset '{self.metadata.name}' is superseded by '{self.superseded_by}', you might consider using the newer version of the dataset."
128
- )
129
-
130
- def filter_eval_splits(self, eval_splits: list[str] | None) -> Self:
131
- """Filter the evaluation splits of the task.
132
-
133
- Args:
134
- eval_splits: List of splits to evaluate on. If None, all splits in metadata
135
- are used.
136
-
137
- Returns:
138
- The task with filtered evaluation splits.
139
- """
140
- self._eval_splits = eval_splits
141
- return self
142
-
143
124
  def evaluate(
144
125
  self,
145
126
  model: MTEBModels,
146
127
  split: str = "test",
147
128
  subsets_to_run: list[HFSubset] | None = None,
148
129
  *,
149
- encode_kwargs: dict[str, Any],
130
+ encode_kwargs: EncodeKwargs,
150
131
  prediction_folder: Path | None = None,
151
132
  **kwargs: Any,
152
133
  ) -> dict[HFSubset, ScoresDict]:
@@ -160,7 +141,7 @@ class AbsTaskAggregate(AbsTask):
160
141
  self,
161
142
  model: MTEBModels,
162
143
  data_split: DatasetDict | Dataset,
163
- encode_kwargs: dict[str, Any],
144
+ encode_kwargs: EncodeKwargs,
164
145
  **kwargs: Any,
165
146
  ) -> ScoresDict:
166
147
  raise NotImplementedError(
@@ -5,7 +5,6 @@ from typing import Any, TypedDict
5
5
 
6
6
  import numpy as np
7
7
  from datasets import Dataset, DatasetDict
8
- from PIL import ImageFile
9
8
  from sklearn.linear_model import LogisticRegression
10
9
  from sklearn.metrics import (
11
10
  accuracy_score,
@@ -17,7 +16,7 @@ from sklearn.metrics import (
17
16
 
18
17
  from mteb._evaluators.sklearn_evaluator import SklearnEvaluator, SklearnModelProtocol
19
18
  from mteb.models import EncoderProtocol, MTEBModels
20
- from mteb.types import HFSubset, ScoresDict
19
+ from mteb.types import EncodeKwargs, HFSubset, ScoresDict
21
20
  from mteb.types.statistics import (
22
21
  ImageStatistics,
23
22
  LabelStatistics,
@@ -32,7 +31,6 @@ from ._statistics_calculation import (
32
31
  )
33
32
  from .abstask import AbsTask
34
33
 
35
- ImageFile.LOAD_TRUNCATED_IMAGES = True
36
34
  logger = logging.getLogger(__name__)
37
35
 
38
36
 
@@ -100,9 +98,8 @@ class AbsTaskClassification(AbsTask):
100
98
  text: str (for text) or PIL.Image (for image). Column name can be changed via `input_column_name` attribute.
101
99
  label: int. Column name can be changed via `label_column_name` attribute.
102
100
  evaluator_model: The model to use for evaluation. Can be any sklearn compatible model. Default is `LogisticRegression`.
103
- Full details of api in [`SklearnModelProtocol`][mteb._evaluators.sklearn_evaluator.SklearnModelProtocol].
104
- samples_per_label: Number of samples per label to use for training the evaluator model. Default is 8.
105
- n_experiments: Number of experiments to run. Default is 10.
101
+ samples_per_label: Number of samples per label to use for training the evaluator model. Default is 8.
102
+ n_experiments: Number of experiments to run. Default is 10.
106
103
  train_split: Name of the split to use for training the evaluator model. Default is "train".
107
104
  label_column_name: Name of the column containing the labels. Default is "label".
108
105
  input_column_name: Name of the column containing the input data. Default is "text".
@@ -128,7 +125,7 @@ class AbsTaskClassification(AbsTask):
128
125
  split: str = "test",
129
126
  subsets_to_run: list[HFSubset] | None = None,
130
127
  *,
131
- encode_kwargs: dict[str, Any],
128
+ encode_kwargs: EncodeKwargs,
132
129
  prediction_folder: Path | None = None,
133
130
  **kwargs: Any,
134
131
  ) -> dict[HFSubset, ScoresDict]:
@@ -145,6 +142,9 @@ class AbsTaskClassification(AbsTask):
145
142
  if not self.data_loaded:
146
143
  self.load_data()
147
144
 
145
+ if self.dataset is None:
146
+ raise RuntimeError("Dataset not loaded.")
147
+
148
148
  if "random_state" in self.evaluator_model.get_params():
149
149
  self.evaluator_model = self.evaluator_model.set_params(
150
150
  random_state=self.seed
@@ -177,19 +177,22 @@ class AbsTaskClassification(AbsTask):
177
177
  )
178
178
  self._add_main_score(scores[hf_subset])
179
179
 
180
- return scores
180
+ return scores # type: ignore[return-value]
181
181
 
182
182
  def _evaluate_subset(
183
183
  self,
184
- model: EncoderProtocol,
184
+ model: MTEBModels,
185
185
  data_split: DatasetDict,
186
186
  *,
187
- encode_kwargs: dict[str, Any],
187
+ encode_kwargs: EncodeKwargs,
188
188
  hf_split: str,
189
189
  hf_subset: str,
190
190
  prediction_folder: Path | None = None,
191
191
  **kwargs: Any,
192
192
  ) -> FullClassificationMetrics:
193
+ if not isinstance(model, EncoderProtocol):
194
+ raise TypeError("Expected model to be an instance of EncoderProtocol")
195
+
193
196
  train_split = data_split[self.train_split]
194
197
  eval_split = data_split[hf_split]
195
198
 
@@ -239,7 +242,7 @@ class AbsTaskClassification(AbsTask):
239
242
  # ap will be none for non binary classification tasks
240
243
  k: (
241
244
  float(np.mean(values))
242
- if (values := [s[k] for s in scores if s[k] is not None])
245
+ if (values := [s[k] for s in scores if s[k] is not None]) # type: ignore[literal-required]
243
246
  else np.nan
244
247
  )
245
248
  for k in scores[0].keys()
@@ -247,7 +250,7 @@ class AbsTaskClassification(AbsTask):
247
250
  logger.info(f"Running {self.metadata.name} - Finished.")
248
251
  return FullClassificationMetrics(
249
252
  scores_per_experiment=scores,
250
- **avg_scores,
253
+ **avg_scores, # type: ignore[typeddict-item]
251
254
  )
252
255
 
253
256
  def _calculate_scores(