mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
mteb/cache.py CHANGED
@@ -1,19 +1,30 @@
1
+ from __future__ import annotations
2
+
3
+ import gzip
4
+ import io
1
5
  import json
2
6
  import logging
3
7
  import os
4
8
  import shutil
5
9
  import subprocess
10
+ import warnings
6
11
  from collections import defaultdict
7
- from collections.abc import Sequence
8
12
  from pathlib import Path
9
- from typing import cast
13
+ from typing import TYPE_CHECKING, cast
14
+
15
+ import requests
16
+ from pydantic import ValidationError
10
17
 
11
18
  import mteb
12
19
  from mteb.abstasks import AbsTask
13
20
  from mteb.benchmarks.benchmark import Benchmark
14
21
  from mteb.models import ModelMeta
15
22
  from mteb.results import BenchmarkResults, ModelResult, TaskResult
16
- from mteb.types import ModelName, Revision
23
+
24
+ if TYPE_CHECKING:
25
+ from collections.abc import Iterable, Sequence
26
+
27
+ from mteb.types import ModelName, Revision
17
28
 
18
29
  logger = logging.getLogger(__name__)
19
30
 
@@ -22,8 +33,8 @@ class ResultCache:
22
33
  """Class to handle the local cache of MTEB results.
23
34
 
24
35
  Examples:
25
- >>> from mteb.cache import ResultCache
26
- >>> cache = ResultCache(cache_path="~/.cache/mteb") # default
36
+ >>> import mteb
37
+ >>> cache = mteb.ResultCache(cache_path="~/.cache/mteb") # default
27
38
  >>> cache.download_from_remote() # download the latest results from the remote repository
28
39
  >>> result = cache.load_results("task_name", "model_name")
29
40
  """
@@ -83,9 +94,9 @@ class ResultCache:
83
94
  model_path = results_folder / model_name
84
95
 
85
96
  if model_revision is None:
86
- logger.warning(
87
- "model_revision is not specified, attempting to load the latest revision. To disable this behavior, specify model_revision explicitly."
88
- )
97
+ msg = "`model_revision` is not specified, attempting to load the latest revision. To disable this behavior, specify the 'model_revision` explicitly."
98
+ logger.warning(msg)
99
+ warnings.warn(msg)
89
100
  # get revs from paths
90
101
  revisions = [p for p in model_path.glob("*") if p.is_dir()]
91
102
  if not revisions:
@@ -275,21 +286,165 @@ class ResultCache:
275
286
 
276
287
  return results_directory
277
288
 
289
+ def _download_cached_results_from_branch(
290
+ self,
291
+ branch: str = "cached-data",
292
+ filename: str = "__cached_results.json.gz",
293
+ output_path: Path | None = None,
294
+ remote: str = "https://github.com/embeddings-benchmark/results",
295
+ timeout: int = 60,
296
+ max_size_mb: int = 500,
297
+ ) -> Path:
298
+ """Download pre-computed cached results from a specific branch.
299
+
300
+ This is significantly faster than download_from_remote() since it downloads
301
+ only a compressed cache file instead of cloning the entire repository.
302
+
303
+ The method performs the following steps:
304
+ 1. Downloads a gzipped JSON file from the specified branch
305
+ 2. Validates file size and content type
306
+ 3. Decompresses the gzip content
307
+ 4. Writes the decompressed JSON to disk
308
+
309
+ Args:
310
+ branch: Branch name to download from (default: "cached-data")
311
+ filename: Name of the cached results file (default: "__cached_results.json.gz")
312
+ output_path: Where to save the file. If None, uses mteb/leaderboard/__cached_results.json
313
+ remote: Base URL of the results repository
314
+ timeout: Request timeout in seconds (default: 60)
315
+ max_size_mb: Maximum allowed file size in megabytes (default: 500)
316
+
317
+ Returns:
318
+ Path to the downloaded and decompressed cache file
319
+
320
+ Raises:
321
+ requests.exceptions.RequestException: On HTTP errors
322
+ ValueError: On validation failures (size, content-type)
323
+ gzip.BadGzipFile: If content is not valid gzip
324
+ UnicodeDecodeError: If content cannot be decoded as UTF-8
325
+ PermissionError: If file cannot be written due to permissions
326
+ OSError: On other file system errors
327
+
328
+ Examples:
329
+ >>> import mteb
330
+ >>> cache = mteb.ResultCache()
331
+ >>> # Download optimized cached results
332
+ >>> cache_file = cache._download_cached_results_from_branch()
333
+ >>> # Use custom output path
334
+ >>> cache_file = cache._download_cached_results_from_branch(
335
+ ... output_path=Path("/tmp/my_cache.json")
336
+ ... )
337
+ """
338
+ if output_path is None:
339
+ # Default to saving in mteb/leaderboard/__cached_results.json
340
+ # Get the mteb package directory (parent of this file)
341
+ mteb_package_dir = Path(__file__).parent
342
+ output_path = mteb_package_dir / "leaderboard" / "__cached_results.json"
343
+
344
+ # Extract repository owner and name from the remote URL
345
+ # e.g., "https://github.com/embeddings-benchmark/results" -> "embeddings-benchmark/results"
346
+ repo_path = remote.replace("https://github.com/", "").replace(
347
+ "http://github.com/", ""
348
+ )
349
+
350
+ url = f"https://raw.githubusercontent.com/{repo_path}/{branch}/{filename}"
351
+ logger.info(f"Downloading cached results from {url}")
352
+
353
+ # Step 1: Download with validation
354
+ max_size_bytes = max_size_mb * 1024 * 1024
355
+
356
+ try:
357
+ response = requests.get(url, timeout=timeout)
358
+ response.raise_for_status()
359
+
360
+ # Check if this is a Git LFS pointer file
361
+ content_type = response.headers.get("content-type", "").lower()
362
+ if (
363
+ content_type == "text/plain; charset=utf-8"
364
+ and b"git-lfs" in response.content
365
+ ):
366
+ # Try Git LFS media URL instead
367
+ media_url = f"https://media.githubusercontent.com/media/{repo_path}/{branch}/{filename}"
368
+ logger.info(f"Detected Git LFS file, trying media URL: {media_url}")
369
+ response = requests.get(media_url, timeout=timeout)
370
+ response.raise_for_status()
371
+ content_type = response.headers.get("content-type", "").lower()
372
+
373
+ # Validate content-type header
374
+ expected_content_types = [
375
+ "application/gzip",
376
+ "application/octet-stream",
377
+ "application/x-gzip",
378
+ ]
379
+ if content_type and not any(
380
+ ct in content_type for ct in expected_content_types
381
+ ):
382
+ raise Exception(
383
+ f"Unexpected content-type: {content_type}. Expected one of: {expected_content_types}"
384
+ )
385
+
386
+ # Validate file size
387
+ content_length = len(response.content)
388
+ if content_length > max_size_bytes:
389
+ raise ValueError(
390
+ f"Downloaded file too large: {content_length} bytes (max: {max_size_bytes})"
391
+ )
392
+
393
+ logger.info(
394
+ f"HTTP request successful, content length: {content_length} bytes"
395
+ )
396
+ content = response.content
397
+
398
+ except Exception as e:
399
+ logger.error(f"Unexpected HTTP error: {type(e).__name__}: {e}")
400
+ raise e
401
+
402
+ # Step 2: Decompress gzip data
403
+ logger.info("Attempting gzip decompression...")
404
+
405
+ try:
406
+ with gzip.open(io.BytesIO(content), "rt", encoding="utf-8") as gz_file:
407
+ data = gz_file.read()
408
+ logger.info(f"Decompression successful, data length: {len(data)} chars")
409
+
410
+ except Exception as e:
411
+ logger.error(f"Unexpected decompression error: {type(e).__name__}: {e}")
412
+ raise e
413
+
414
+ # Step 3: Write to disk
415
+ logger.info(f"Attempting to write to: {output_path}")
416
+
417
+ # Check parent directory exists and is writable
418
+ output_path.parent.mkdir(parents=True, exist_ok=True)
419
+
420
+ try:
421
+ output_path.write_text(data, encoding="utf-8")
422
+ logger.info(
423
+ f"File write successful, size: {output_path.stat().st_size} bytes"
424
+ )
425
+ except Exception as e:
426
+ logger.error(f"Unexpected file write error: {type(e).__name__}: {e}")
427
+ raise e
428
+
429
+ return output_path
430
+
278
431
  def clear_cache(self) -> None:
279
432
  """Clear the local cache directory."""
280
433
  if self.cache_path.exists() and self.cache_path.is_dir():
281
434
  shutil.rmtree(self.cache_path)
282
435
  logger.info(f"Cache directory {self.cache_path} cleared.")
283
436
  else:
284
- logger.warning(f"Cache directory {self.cache_path} does not exist.")
437
+ msg = f"Cache directory `{self.cache_path}` does not exist."
438
+ logger.warning(msg)
439
+ warnings.warn(msg)
285
440
 
286
441
  def __repr__(self) -> str:
287
442
  return f"ResultCache(cache_path={self.cache_path})"
288
443
 
289
444
  def get_cache_paths(
290
445
  self,
291
- models: Sequence[str] | Sequence[ModelMeta] | None = None,
292
- tasks: Sequence[str] | Sequence[AbsTask] | None = None,
446
+ models: Sequence[str] | Iterable[ModelMeta] | None = None,
447
+ tasks: Sequence[str] | Iterable[AbsTask] | None = None,
293
448
  require_model_meta: bool = True,
294
449
  include_remote: bool = True,
295
450
  ) -> list[Path]:
@@ -311,8 +466,8 @@ class ResultCache:
311
466
  A list of paths in the cache directory.
312
467
 
313
468
  Examples:
314
- >>> from mteb.cache import ResultCache
315
- >>> cache = ResultCache()
469
+ >>> import mteb
470
+ >>> cache = mteb.ResultCache()
316
471
  >>>
317
472
  >>> # Get all cache paths
318
473
  >>> paths = cache.get_cache_paths()
@@ -422,7 +577,7 @@ class ResultCache:
422
577
  @staticmethod
423
578
  def _filter_paths_by_model_and_revision(
424
579
  paths: list[Path],
425
- models: Sequence[str] | Sequence[ModelMeta] | None = None,
580
+ models: Sequence[str] | Iterable[ModelMeta] | None = None,
426
581
  ) -> list[Path]:
427
582
  """Filter a list of paths by model name and optional revision.
428
583
 
@@ -432,8 +587,9 @@ class ResultCache:
432
587
  if not models:
433
588
  return paths
434
589
 
435
- if isinstance(models[0], ModelMeta):
436
- models = cast(list[ModelMeta], models)
590
+ first_model = next(iter(models))
591
+ if isinstance(first_model, ModelMeta):
592
+ models = cast("Iterable[ModelMeta]", models)
437
593
  name_and_revision = {
438
594
  (m.model_name_as_path(), m.revision or "no_revision_available")
439
595
  for m in models
@@ -444,13 +600,14 @@ class ResultCache:
444
600
  if (p.parent.parent.name, p.parent.name) in name_and_revision
445
601
  ]
446
602
 
447
- model_names = {m.replace("/", "__").replace(" ", "_") for m in models}
603
+ str_models = cast("Sequence[str]", models)
604
+ model_names = {m.replace("/", "__").replace(" ", "_") for m in str_models}
448
605
  return [p for p in paths if p.parent.parent.name in model_names]
449
606
 
450
607
  @staticmethod
451
608
  def _filter_paths_by_task(
452
609
  paths: list[Path],
453
- tasks: Sequence[str] | Sequence[AbsTask] | None = None,
610
+ tasks: Sequence[str] | Iterable[AbsTask] | None = None,
454
611
  ) -> list[Path]:
455
612
  if tasks is not None:
456
613
  task_names = set()
@@ -466,8 +623,8 @@ class ResultCache:
466
623
 
467
624
  def load_results(
468
625
  self,
469
- models: Sequence[str] | Sequence[ModelMeta] | None = None,
470
- tasks: Sequence[str] | Sequence[AbsTask] | Benchmark | str | None = None,
626
+ models: Sequence[str] | Iterable[ModelMeta] | None = None,
627
+ tasks: Sequence[str] | Iterable[AbsTask] | Benchmark | str | None = None,
471
628
  require_model_meta: bool = True,
472
629
  include_remote: bool = True,
473
630
  validate_and_filter: bool = False,
@@ -478,6 +635,7 @@ class ResultCache:
478
635
  Args:
479
636
  models: A list of model names to load the results for. If None it will load the results for all models.
480
637
  tasks: A list of task names to load the results for. If str is passed, then benchmark will be loaded.
638
+ If Benchmark is passed, then all tasks in the benchmark will be loaded.
481
639
  If None it will load the results for all tasks.
482
640
  require_model_meta: If True it will ignore results that do not have a model_meta.json file. If false it attempt to
483
641
  extract the model name and revision from the path.
@@ -490,8 +648,8 @@ class ResultCache:
490
648
  A BenchmarkResults object containing the results for the specified models and tasks.
491
649
 
492
650
  Examples:
493
- >>> from mteb.cache import ResultCache
494
- >>> cache = ResultCache()
651
+ >>> import mteb
652
+ >>> cache = mteb.ResultCache()
495
653
  >>>
496
654
  >>> # Load results for specific models and tasks
497
655
  >>> results = cache.load_results(
@@ -511,7 +669,7 @@ class ResultCache:
511
669
  )
512
670
  models_results = defaultdict(list)
513
671
 
514
- task_names = {}
672
+ task_names: dict[str, AbsTask | None] = {}
515
673
  if tasks is not None:
516
674
  for task in tasks:
517
675
  if isinstance(task, AbsTask):
@@ -529,10 +687,12 @@ class ResultCache:
529
687
  )
530
688
 
531
689
  if validate_and_filter:
532
- task = task_names[task_result.task_name]
690
+ task_instance = task_names[task_result.task_name]
533
691
  try:
534
- task_result = task_result.validate_and_filter_scores(task=task)
535
- except Exception as e:
692
+ task_result = task_result.validate_and_filter_scores(
693
+ task=task_instance
694
+ )
695
+ except ValidationError as e:
536
696
  logger.info(
537
697
  f"Validation failed for {task_result.task_name} in {model_name} {revision}: {e}"
538
698
  )
@@ -541,7 +701,7 @@ class ResultCache:
541
701
  models_results[(model_name, revision)].append(task_result)
542
702
 
543
703
  # create BenchmarkResults object
544
- models_results = [
704
+ models_results_object = [
545
705
  ModelResult(
546
706
  model_name=model_name,
547
707
  model_revision=revision,
@@ -550,9 +710,7 @@ class ResultCache:
550
710
  for (model_name, revision), task_results in models_results.items()
551
711
  ]
552
712
 
553
- benchmark_results = BenchmarkResults(
554
- model_results=models_results,
713
+ return BenchmarkResults(
714
+ model_results=models_results_object,
555
715
  benchmark=tasks if isinstance(tasks, Benchmark) else None,
556
716
  )
557
-
558
- return benchmark_results
@@ -1,9 +1,15 @@
1
- from collections.abc import Sequence
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
2
4
 
3
- from mteb.abstasks import AbsTask
4
- from mteb.benchmarks import Benchmark
5
5
  from mteb.get_tasks import MTEBTasks
6
6
 
7
+ if TYPE_CHECKING:
8
+ from collections.abc import Iterable, Sequence
9
+
10
+ from mteb.abstasks import AbsTask
11
+ from mteb.benchmarks import Benchmark
12
+
7
13
 
8
14
  def _display_benchmarks(benchmarks: Sequence[Benchmark]) -> None:
9
15
  """Get all benchmarks available in the MTEB."""
@@ -31,7 +37,7 @@ def _display_benchmarks(benchmarks: Sequence[Benchmark]) -> None:
31
37
  _display_tasks(benchmark.tasks, name=name)
32
38
 
33
39
 
34
- def _display_tasks(task_list: Sequence[AbsTask], name: str | None = None) -> None:
40
+ def _display_tasks(task_list: Iterable[AbsTask], name: str | None = None) -> None:
35
41
  from rich.console import Console
36
42
 
37
43
  console = Console()
mteb/cli/build_cli.py CHANGED
@@ -1,17 +1,22 @@
1
1
  import argparse
2
2
  import logging
3
3
  import os
4
+ import warnings
4
5
  from pathlib import Path
6
+ from typing import TYPE_CHECKING
5
7
 
6
8
  import torch
7
9
  from rich.logging import RichHandler
8
10
 
9
11
  import mteb
10
12
  from mteb.cache import ResultCache
13
+ from mteb.cli._display_tasks import _display_benchmarks, _display_tasks
11
14
  from mteb.cli.generate_model_card import generate_model_card
12
15
  from mteb.evaluate import OverwriteStrategy
13
16
 
14
- from ._display_tasks import _display_benchmarks, _display_tasks
17
+ if TYPE_CHECKING:
18
+ from mteb.abstasks.abstask import AbsTask
19
+ from mteb.types import EncodeKwargs
15
20
 
16
21
  logger = logging.getLogger(__name__)
17
22
 
@@ -53,7 +58,7 @@ def run(args: argparse.Namespace) -> None:
53
58
 
54
59
  if args.benchmarks:
55
60
  benchmarks = mteb.get_benchmarks(names=args.benchmarks)
56
- tasks = [t for b in benchmarks for t in b.tasks]
61
+ tasks = tuple(t for b in benchmarks for t in b.tasks)
57
62
  else:
58
63
  tasks = mteb.get_tasks(
59
64
  categories=args.categories,
@@ -63,21 +68,23 @@ def run(args: argparse.Namespace) -> None:
63
68
  eval_splits=args.eval_splits,
64
69
  )
65
70
 
66
- encode_kwargs = {}
71
+ encode_kwargs: EncodeKwargs = {}
67
72
  if args.batch_size is not None:
68
73
  encode_kwargs["batch_size"] = args.batch_size
69
74
 
70
75
  overwrite_strategy = args.overwrite_strategy
71
76
  if args.overwrite:
72
- logger.warning(
73
- "`--overwrite` is deprecated, please use `--overwrite-strategy 'always'` instead."
77
+ warnings.warn(
78
+ "`--overwrite` is deprecated, please use `--overwrite-strategy 'always'` instead.",
79
+ DeprecationWarning,
74
80
  )
75
81
  overwrite_strategy = OverwriteStrategy.ALWAYS.value
76
82
 
77
83
  prediction_folder = args.prediction_folder
78
84
  if args.save_predictions:
79
- logger.warning(
80
- "`--save_predictions` is deprecated, please use `--prediction-folder` instead."
85
+ warnings.warn(
86
+ "`--save_predictions` is deprecated, please use `--prediction-folder` instead.",
87
+ DeprecationWarning,
81
88
  )
82
89
  prediction_folder = args.output_folder
83
90
 
@@ -279,23 +286,25 @@ def _create_meta(args: argparse.Namespace) -> None:
279
286
  from_existing = Path(from_existing)
280
287
 
281
288
  if output_path.exists() and overwrite:
282
- logger.warning("Output path already exists, overwriting.")
289
+ msg = "Output path already exists, overwriting."
290
+ logger.warning(msg)
291
+ warnings.warn(msg)
283
292
  elif output_path.exists():
284
293
  raise FileExistsError(
285
294
  "Output path already exists, use --overwrite to overwrite."
286
295
  )
287
296
 
288
- tasks = []
297
+ benchmarks = None
298
+ tasks: list[AbsTask] = []
289
299
  if tasks_names is not None:
290
- tasks = mteb.get_tasks(tasks_names)
300
+ tasks = list(mteb.get_tasks(tasks_names))
291
301
  if benchmarks is not None:
292
302
  benchmarks = mteb.get_benchmarks(benchmarks)
293
- for benchmark in benchmarks:
294
- tasks.extend(benchmark.tasks)
295
303
 
296
304
  generate_model_card(
297
305
  model_name,
298
- tasks if len(tasks) > 0 else None,
306
+ tasks,
307
+ benchmarks,
299
308
  existing_model_card_id_or_path=from_existing,
300
309
  results_cache=ResultCache(results_folder),
301
310
  output_path=output_path,
@@ -356,6 +365,95 @@ def _add_create_meta_parser(subparsers) -> None:
356
365
  parser.set_defaults(func=_create_meta)
357
366
 
358
367
 
368
+ def _add_leaderboard_parser(subparsers) -> None:
369
+ parser = subparsers.add_parser("leaderboard", help="Launch the MTEB leaderboard")
370
+
371
+ parser.add_argument(
372
+ "--cache-path",
373
+ type=str,
374
+ help="Path to the cache folder containing model results",
375
+ required=False,
376
+ default=None,
377
+ )
378
+ parser.add_argument(
379
+ "--host",
380
+ type=str,
381
+ default="0.0.0.0",
382
+ help="Host to run the leaderboard server on",
383
+ )
384
+ parser.add_argument(
385
+ "--port",
386
+ type=int,
387
+ default=7860,
388
+ help="Port to run the leaderboard server on",
389
+ )
390
+ parser.add_argument(
391
+ "--share",
392
+ action="store_true",
393
+ default=False,
394
+ help="Create a public URL for the leaderboard",
395
+ )
396
+
397
+ parser.set_defaults(func=_leaderboard)
398
+
399
+
400
+ def _leaderboard(args: argparse.Namespace) -> None:
401
+ """Launch the MTEB leaderboard with specified cache path."""
402
+ # Import leaderboard module only when needed to avoid requiring leaderboard dependencies
403
+ # for other CLI commands
404
+ try:
405
+ import gradio as gr
406
+
407
+ from mteb.leaderboard import get_leaderboard_app
408
+ except ImportError as e:
409
+ raise ImportError(
410
+ "Seems like some dependencies are not installed. "
411
+ + "You can likely install these using: `pip install mteb[leaderboard]`. "
412
+ + f"{e}"
413
+ )
414
+
415
+ cache_path = args.cache_path
416
+
417
+ if cache_path:
418
+ logger.info(f"Using cache path: {cache_path}")
419
+ cache = ResultCache(cache_path)
420
+ else:
421
+ cache = ResultCache()
422
+ logger.info(f"Using default cache path: {cache.cache_path}")
423
+
424
+ app = get_leaderboard_app(cache)
425
+
426
+ logger.info(f"Starting leaderboard on {args.host}:{args.port}")
427
+ if args.share:
428
+ logger.info("Creating public URL...")
429
+
430
+ logging.getLogger("mteb.load_results.task_results").setLevel(
431
+ logging.ERROR
432
+ ) # Warnings related to task split
433
+ logging.getLogger("mteb.model_meta").setLevel(
434
+ logging.ERROR
435
+ ) # Warning related to model metadata (fetch_from_hf=False)
436
+ logging.getLogger("mteb.load_results.benchmark_results").setLevel(
437
+ logging.ERROR
438
+ ) # Warning related to model metadata (fetch_from_hf=False)
439
+ warnings.filterwarnings("ignore", message="Couldn't get scores for .* due to .*")
440
+
441
+ # Head content for Tailwind CSS
442
+ head = """
443
+ <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
444
+ """
445
+
446
+ app.launch(
447
+ server_name=args.host,
448
+ server_port=args.port,
449
+ share=args.share,
450
+ theme=gr.themes.Soft(
451
+ font=[gr.themes.GoogleFont("Roboto Mono"), "Arial", "sans-serif"],
452
+ ),
453
+ head=head,
454
+ )
455
+
456
+
359
457
  def build_cli() -> argparse.ArgumentParser:
360
458
  """Builds the argument parser for the MTEB CLI.
361
459
 
@@ -375,6 +473,7 @@ def build_cli() -> argparse.ArgumentParser:
375
473
  _add_available_tasks_parser(subparsers)
376
474
  _add_available_benchmarks_parser(subparsers)
377
475
  _add_create_meta_parser(subparsers)
476
+ _add_leaderboard_parser(subparsers)
378
477
 
379
478
  return parser
380
479