mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,14 @@
1
1
  import logging
2
2
  from pathlib import Path
3
- from typing import Any, TypedDict
3
+ from typing import TypedDict
4
4
 
5
5
  import torch
6
6
  from datasets import Dataset
7
7
  from sklearn import metrics
8
8
 
9
9
  from mteb._evaluators import ZeroShotClassificationEvaluator
10
- from mteb.models import EncoderProtocol
10
+ from mteb.models import EncoderProtocol, MTEBModels
11
+ from mteb.types import EncodeKwargs
11
12
  from mteb.types.statistics import (
12
13
  ImageStatistics,
13
14
  LabelStatistics,
@@ -111,15 +112,18 @@ class AbsTaskZeroShotClassification(AbsTask):
111
112
 
112
113
  def _evaluate_subset(
113
114
  self,
114
- model: EncoderProtocol,
115
+ model: MTEBModels,
115
116
  data_split: Dataset,
116
117
  *,
117
118
  hf_split: str,
118
119
  hf_subset: str,
119
- encode_kwargs: dict[str, Any],
120
+ encode_kwargs: EncodeKwargs,
120
121
  prediction_folder: Path | None = None,
121
122
  **kwargs,
122
123
  ) -> ZeroShotClassificationMetrics:
124
+ if not isinstance(model, EncoderProtocol):
125
+ raise TypeError("Expected model to be an instance of EncoderProtocol")
126
+
123
127
  candidate_labels = self.get_candidate_labels()
124
128
  data_split = data_split.select_columns(
125
129
  [self.input_column_name, self.label_column_name]
@@ -1,6 +1,6 @@
1
- import math
2
1
  import re
3
2
  from collections import defaultdict
3
+ from typing import Literal
4
4
 
5
5
  import numpy as np
6
6
  import pandas as pd
@@ -32,26 +32,18 @@ def _split_on_capital(s: str) -> str:
32
32
  return " ".join(re.findall(r"[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)", s))
33
33
 
34
34
 
35
- def _format_n_parameters(n_parameters) -> str:
36
- if (n_parameters is None) or (not int(n_parameters)):
37
- return "Unknown"
38
- n_thousand = int(n_parameters // 1e3)
39
- if n_thousand < 1:
40
- return str(int(n_parameters))
41
- n_zeros = math.log10(n_thousand)
42
- if n_zeros >= 6:
43
- return str(n_thousand // (10**6)) + "B"
44
- if n_zeros >= 3:
45
- return str(n_thousand // (10**3)) + "M"
46
- return str(n_thousand) + "K"
35
+ def _format_n_parameters(n_parameters) -> float | None:
36
+ """Format n_parameters to be in billions with decimals down to 1 million. I.e. 7M -> 0.007B, 1.5B -> 1.5B, None -> None"""
37
+ if n_parameters:
38
+ n_parameters = float(n_parameters)
39
+ return round(n_parameters / 1e9, 3)
40
+ return None
47
41
 
48
42
 
49
- def _format_max_tokens(max_tokens: float | None) -> str:
50
- if max_tokens is None:
51
- return "Unknown"
52
- if max_tokens == np.inf:
53
- return "Infinite"
54
- return str(int(max_tokens))
43
+ def _format_max_tokens(max_tokens: float | None) -> float | None:
44
+ if max_tokens is None or max_tokens == np.inf:
45
+ return None
46
+ return float(max_tokens)
55
47
 
56
48
 
57
49
  def _get_means_per_types(per_task: pd.DataFrame):
@@ -144,18 +136,18 @@ def _create_summary_table_from_benchmark_results(
144
136
  joint_table.insert(
145
137
  1,
146
138
  "Embedding Dimensions",
147
- model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
139
+ model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
148
140
  )
149
141
  joint_table.insert(
150
142
  1,
151
- "Number of Parameters",
143
+ "Number of Parameters (B)",
152
144
  model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
153
145
  )
154
146
  joint_table.insert(
155
147
  1,
156
148
  "Memory Usage (MB)",
157
149
  model_metas.map(
158
- lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
150
+ lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
159
151
  ),
160
152
  )
161
153
 
@@ -250,6 +242,65 @@ def _create_per_task_table_from_benchmark_results(
250
242
  return per_task
251
243
 
252
244
 
245
+ def _create_per_language_table_from_benchmark_results(
246
+ benchmark_results: BenchmarkResults,
247
+ language_view: list[str] | Literal["all"],
248
+ ) -> pd.DataFrame:
249
+ """Create per-language table from BenchmarkResults.
250
+
251
+ Returns a DataFrame with one row per model and one column per language.
252
+
253
+ Args:
254
+ benchmark_results: BenchmarkResults object containing model results
255
+ language_view: List of languages to include in the per-language table, or "all" for all languages present in the results
256
+ Returns:
257
+ DataFrame with per-language scores, ready for styling in the leaderboard
258
+ """
259
+ if language_view != "all" and not isinstance(language_view, list):
260
+ raise ValueError("language_view must be a list of languages or 'all'")
261
+
262
+ data = benchmark_results.to_dataframe(aggregation_level="language", format="long")
263
+
264
+ if data.empty:
265
+ no_results_frame = pd.DataFrame(
266
+ {"No results": ["You can try relaxing your criteria"]}
267
+ )
268
+ return no_results_frame
269
+
270
+ if language_view != "all":
271
+ data = data[data["language"].isin(language_view)]
272
+
273
+ per_language = data.pivot_table(
274
+ index="model_name", columns="language", values="score", aggfunc="mean"
275
+ )
276
+
277
+ to_remove = per_language.isna().all(axis="columns")
278
+ if to_remove.all():
279
+ no_results_frame = pd.DataFrame(
280
+ {"No results": ["You can try relaxing your criteria"]}
281
+ )
282
+ return no_results_frame
283
+
284
+ models_to_remove = list(per_language[to_remove].index)
285
+ per_language = per_language.drop(models_to_remove, axis=0)
286
+
287
+ per_language["borda_rank"] = _get_borda_rank(per_language)
288
+ per_language = per_language.sort_values("borda_rank", ascending=True)
289
+ per_language = per_language.drop(columns=["borda_rank"])
290
+ per_language = per_language.reset_index()
291
+
292
+ per_language["model_name"] = per_language["model_name"].map(
293
+ lambda name: name.split("/")[-1]
294
+ )
295
+ per_language = per_language.rename(
296
+ columns={
297
+ "model_name": "Model",
298
+ }
299
+ )
300
+
301
+ return per_language
302
+
303
+
253
304
  def _create_summary_table_mean_public_private(
254
305
  benchmark_results: BenchmarkResults,
255
306
  ) -> pd.DataFrame:
@@ -323,18 +374,18 @@ def _create_summary_table_mean_public_private(
323
374
  joint_table.insert(
324
375
  1,
325
376
  "Embedding Dimensions",
326
- model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
377
+ model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
327
378
  )
328
379
  joint_table.insert(
329
380
  1,
330
- "Number of Parameters",
381
+ "Number of Parameters (B)",
331
382
  model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
332
383
  )
333
384
  joint_table.insert(
334
385
  1,
335
386
  "Memory Usage (MB)",
336
387
  model_metas.map(
337
- lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
388
+ lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
338
389
  ),
339
390
  )
340
391
 
@@ -358,9 +409,7 @@ def _create_summary_table_mean_public_private(
358
409
  "mean(public)": "Mean (Public)",
359
410
  "mean(private)": "Mean (Private)",
360
411
  }
361
- # For RTEB: all tasks are Retrieval type, so Retrieval column = Mean (Task)
362
- if "Retrieval" in joint_table.columns:
363
- rename_dict["Retrieval"] = "Mean (Task)"
412
+
364
413
  joint_table = joint_table.rename(columns=rename_dict)
365
414
 
366
415
  # Move borda rank to front
@@ -447,18 +496,18 @@ def _create_summary_table_mean_subset(
447
496
  joint_table.insert(
448
497
  1,
449
498
  "Embedding Dimensions",
450
- model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
499
+ model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
451
500
  )
452
501
  joint_table.insert(
453
502
  1,
454
- "Number of Parameters",
503
+ "Number of Parameters (B)",
455
504
  model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
456
505
  )
457
506
  joint_table.insert(
458
507
  1,
459
508
  "Memory Usage (MB)",
460
509
  model_metas.map(
461
- lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
510
+ lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
462
511
  ),
463
512
  )
464
513
 
@@ -560,25 +609,23 @@ def _create_summary_table_mean_task_type(
560
609
 
561
610
  # Insert model metadata columns
562
611
  joint_table.insert(
563
- 1,
564
- "Max Tokens",
565
- model_metas.map(lambda m: _format_max_tokens(m.max_tokens)),
612
+ 1, "Max Tokens", model_metas.map(lambda m: _format_max_tokens(m.max_tokens))
566
613
  )
567
614
  joint_table.insert(
568
615
  1,
569
616
  "Embedding Dimensions",
570
- model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
617
+ model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
571
618
  )
572
619
  joint_table.insert(
573
620
  1,
574
- "Number of Parameters",
621
+ "Number of Parameters (B)",
575
622
  model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
576
623
  )
577
624
  joint_table.insert(
578
625
  1,
579
626
  "Memory Usage (MB)",
580
627
  model_metas.map(
581
- lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
628
+ lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
582
629
  ),
583
630
  )
584
631
 
@@ -1,21 +1,16 @@
1
- from collections.abc import Iterable, Sequence
2
- from dataclasses import dataclass
3
- from typing import TYPE_CHECKING
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterator, Sequence
4
+ from dataclasses import dataclass, field
5
+ from typing import TYPE_CHECKING, Literal
4
6
 
5
7
  import pandas as pd
6
8
 
7
- from mteb.benchmarks._create_table import (
8
- _create_per_task_table_from_benchmark_results,
9
- _create_summary_table_from_benchmark_results,
10
- _create_summary_table_mean_public_private,
11
- _create_summary_table_mean_subset,
12
- _create_summary_table_mean_task_type,
13
- )
14
- from mteb.results import BenchmarkResults
9
+ from mteb.abstasks.abstask import AbsTask
15
10
  from mteb.types import StrURL
16
11
 
17
12
  if TYPE_CHECKING:
18
- from mteb.abstasks import AbsTask
13
+ from mteb.results import BenchmarkResults
19
14
 
20
15
 
21
16
  @dataclass
@@ -24,6 +19,7 @@ class Benchmark:
24
19
 
25
20
  Args:
26
21
  name: The name of the benchmark
22
+ aliases: Alternative names for the benchmark
27
23
  tasks: The tasks within the benchmark.
28
24
  description: A description of the benchmark, should include its intended goal and potentially a description of its construction
29
25
  reference: A link reference, to a source containing additional information typically to a paper, leaderboard or github.
@@ -42,7 +38,8 @@ class Benchmark:
42
38
  """
43
39
 
44
40
  name: str
45
- tasks: Sequence["AbsTask"]
41
+ tasks: Sequence[AbsTask]
42
+ aliases: Sequence[str] = field(default_factory=tuple)
46
43
  description: str | None = None
47
44
  reference: StrURL | None = None
48
45
  citation: str | None = None
@@ -50,14 +47,15 @@ class Benchmark:
50
47
  display_on_leaderboard: bool = True
51
48
  icon: str | None = None
52
49
  display_name: str | None = None
50
+ language_view: list[str] | Literal["all"] = field(default_factory=list)
53
51
 
54
- def __iter__(self) -> Iterable["AbsTask"]:
52
+ def __iter__(self) -> Iterator[AbsTask]:
55
53
  return iter(self.tasks)
56
54
 
57
55
  def __len__(self) -> int:
58
56
  return len(self.tasks)
59
57
 
60
- def __getitem__(self, index: int) -> "AbsTask":
58
+ def __getitem__(self, index: int) -> AbsTask:
61
59
  return self.tasks[index]
62
60
 
63
61
  def _create_summary_table(
@@ -68,6 +66,10 @@ class Benchmark:
68
66
  Returns:
69
67
  A pandas DataFrame representing the summary results.
70
68
  """
69
+ from mteb.benchmarks._create_table import (
70
+ _create_summary_table_from_benchmark_results,
71
+ )
72
+
71
73
  return _create_summary_table_from_benchmark_results(benchmark_results)
72
74
 
73
75
  def _create_per_task_table(
@@ -78,8 +80,38 @@ class Benchmark:
78
80
  Returns:
79
81
  A pandas DataFrame representing the per-task results.
80
82
  """
83
+ from mteb.benchmarks._create_table import (
84
+ _create_per_task_table_from_benchmark_results,
85
+ )
86
+
81
87
  return _create_per_task_table_from_benchmark_results(benchmark_results)
82
88
 
89
+ def _create_per_language_table(
90
+ self, benchmark_results: BenchmarkResults
91
+ ) -> pd.DataFrame:
92
+ """Create per-language table. Called by the leaderboard app.
93
+
94
+ Returns:
95
+ A pandas DataFrame representing the per-language results.
96
+ """
97
+ from mteb.benchmarks._create_table import (
98
+ _create_per_language_table_from_benchmark_results,
99
+ )
100
+
101
+ if self.language_view == "all" or len(self.language_view) > 0:
102
+ return _create_per_language_table_from_benchmark_results(
103
+ benchmark_results, self.language_view
104
+ )
105
+ else:
106
+ no_results_frame = pd.DataFrame(
107
+ {
108
+ "No results": [
109
+ "The per-language table is not available for this benchmark."
110
+ ]
111
+ }
112
+ )
113
+ return no_results_frame
114
+
83
115
 
84
116
  class RtebBenchmark(Benchmark):
85
117
  """Wrapper for RTEB benchmark."""
@@ -87,7 +119,14 @@ class RtebBenchmark(Benchmark):
87
119
  def _create_summary_table(
88
120
  self, benchmark_results: BenchmarkResults
89
121
  ) -> pd.DataFrame:
90
- return _create_summary_table_mean_public_private(benchmark_results)
122
+ from mteb.benchmarks._create_table import (
123
+ _create_summary_table_mean_public_private,
124
+ )
125
+
126
+ joint_table = _create_summary_table_mean_public_private(benchmark_results)
127
+ # For RTEB: all tasks are Retrieval type, so Retrieval column = Mean (Task)
128
+ joint_table = joint_table.rename(columns={"Retrieval": "Mean (Task)"})
129
+ return joint_table
91
130
 
92
131
 
93
132
  class HUMEBenchmark(Benchmark):
@@ -96,6 +135,8 @@ class HUMEBenchmark(Benchmark):
96
135
  def _create_summary_table(
97
136
  self, benchmark_results: BenchmarkResults
98
137
  ) -> pd.DataFrame:
138
+ from mteb.benchmarks._create_table import _create_summary_table_mean_subset
139
+
99
140
  return _create_summary_table_mean_subset(benchmark_results)
100
141
 
101
142
 
@@ -105,4 +146,24 @@ class MIEBBenchmark(Benchmark):
105
146
  def _create_summary_table(
106
147
  self, benchmark_results: BenchmarkResults
107
148
  ) -> pd.DataFrame:
149
+ from mteb.benchmarks._create_table import _create_summary_table_mean_task_type
150
+
108
151
  return _create_summary_table_mean_task_type(benchmark_results)
152
+
153
+
154
+ class VidoreBenchmark(Benchmark):
155
+ """Wrapper for Vidore3 benchmark."""
156
+
157
+ def _create_summary_table(
158
+ self, benchmark_results: BenchmarkResults
159
+ ) -> pd.DataFrame:
160
+ from mteb.benchmarks._create_table import (
161
+ _create_summary_table_mean_public_private,
162
+ )
163
+
164
+ joint_table = _create_summary_table_mean_public_private(benchmark_results)
165
+ # For ViDoRe (V1, V2, V3): all tasks are Document Understanding type, so Document Understanding column = Mean (Task)
166
+ joint_table = joint_table.rename(
167
+ columns={"Document Understanding": "Mean (Task)"}
168
+ )
169
+ return joint_table
@@ -6,12 +6,16 @@ from mteb.benchmarks.benchmarks.benchmarks import (
6
6
  BUILT_MTEB,
7
7
  C_MTEB,
8
8
  CHEMTEB,
9
+ CHEMTEB_V1_1,
9
10
  CODE_RAG,
10
11
  ENCODECHKA,
11
12
  FA_MTEB,
12
13
  FA_MTEB_2,
13
14
  HUME,
14
15
  JINA_VDR,
16
+ JMTEB_LITE_V1,
17
+ JMTEB_V2,
18
+ KOVIDORE_V2,
15
19
  LONG_EMBED,
16
20
  MIEB_ENG,
17
21
  MIEB_IMG,
@@ -38,10 +42,12 @@ from mteb.benchmarks.benchmarks.benchmarks import (
38
42
  SEB,
39
43
  VIDORE,
40
44
  VIDORE_V2,
45
+ VIDORE_V3,
41
46
  VISUAL_DOCUMENT_RETRIEVAL,
42
47
  VN_MTEB,
43
48
  CoIR,
44
49
  MTEB_code,
50
+ MTEB_MAIN_RU_v1_1,
45
51
  MTEB_multilingual_v1,
46
52
  MTEB_multilingual_v2,
47
53
  RAR_b,
@@ -65,6 +71,7 @@ __all__ = [
65
71
  "BRIGHT_LONG",
66
72
  "BUILT_MTEB",
67
73
  "CHEMTEB",
74
+ "CHEMTEB_V1_1",
68
75
  "CODE_RAG",
69
76
  "C_MTEB",
70
77
  "ENCODECHKA",
@@ -73,6 +80,9 @@ __all__ = [
73
80
  "HUME",
74
81
  "HUME",
75
82
  "JINA_VDR",
83
+ "JMTEB_LITE_V1",
84
+ "JMTEB_V2",
85
+ "KOVIDORE_V2",
76
86
  "LONG_EMBED",
77
87
  "MIEB_ENG",
78
88
  "MIEB_IMG",
@@ -108,9 +118,11 @@ __all__ = [
108
118
  "SEB",
109
119
  "VIDORE",
110
120
  "VIDORE_V2",
121
+ "VIDORE_V3",
111
122
  "VISUAL_DOCUMENT_RETRIEVAL",
112
123
  "VN_MTEB",
113
124
  "CoIR",
125
+ "MTEB_MAIN_RU_v1_1",
114
126
  "MTEB_code",
115
127
  "MTEB_multilingual_v1",
116
128
  "MTEB_multilingual_v2",