mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
mteb/cache.py CHANGED
@@ -1,14 +1,22 @@
1
+ import gzip
2
+ import io
1
3
  import json
2
4
  import logging
3
5
  import os
4
6
  import shutil
5
7
  import subprocess
8
+ import warnings
6
9
  from collections import defaultdict
7
- from collections.abc import Sequence
10
+ from collections.abc import Iterable, Sequence
8
11
  from pathlib import Path
9
12
  from typing import cast
10
13
 
14
+ import requests
15
+ from pydantic import ValidationError
16
+
17
+ import mteb
11
18
  from mteb.abstasks import AbsTask
19
+ from mteb.benchmarks.benchmark import Benchmark
12
20
  from mteb.models import ModelMeta
13
21
  from mteb.results import BenchmarkResults, ModelResult, TaskResult
14
22
  from mteb.types import ModelName, Revision
@@ -20,8 +28,8 @@ class ResultCache:
20
28
  """Class to handle the local cache of MTEB results.
21
29
 
22
30
  Examples:
23
- >>> from mteb.cache import ResultCache
24
- >>> cache = ResultCache(cache_path="~/.cache/mteb") # default
31
+ >>> import mteb
32
+ >>> cache = mteb.ResultCache(cache_path="~/.cache/mteb") # default
25
33
  >>> cache.download_from_remote() # download the latest results from the remote repository
26
34
  >>> result = cache.load_results("task_name", "model_name")
27
35
  """
@@ -62,7 +70,11 @@ class ResultCache:
62
70
  Returns:
63
71
  The path to the results of the task.
64
72
  """
65
- results_folder = "results" if not remote else "remote"
73
+ results_folder = (
74
+ self.cache_path / "results"
75
+ if not remote
76
+ else self.cache_path / "remote" / "results"
77
+ )
66
78
 
67
79
  if isinstance(model_name, ModelMeta):
68
80
  if model_revision is not None:
@@ -74,12 +86,12 @@ class ResultCache:
74
86
  elif isinstance(model_name, str):
75
87
  model_name = model_name.replace("/", "__").replace(" ", "_")
76
88
 
77
- model_path = self.cache_path / results_folder / model_name
89
+ model_path = results_folder / model_name
78
90
 
79
91
  if model_revision is None:
80
- logger.warning(
81
- "model_revision is not specified, attempting to load the latest revision. To disable this behavior, specify model_revision explicitly."
82
- )
92
+ msg = "`model_revision` is not specified, attempting to load the latest revision. To disable this behavior, specify the 'model_revision` explicitly."
93
+ logger.warning(msg)
94
+ warnings.warn(msg)
83
95
  # get revs from paths
84
96
  revisions = [p for p in model_path.glob("*") if p.is_dir()]
85
97
  if not revisions:
@@ -191,12 +203,14 @@ class ResultCache:
191
203
  self,
192
204
  remote: str = "https://github.com/embeddings-benchmark/results",
193
205
  download_latest: bool = True,
206
+ revision: str | None = None,
194
207
  ) -> Path:
195
208
  """Downloads the latest version of the results repository from GitHub to a local cache directory. Required git to be installed.
196
209
 
197
210
  Args:
198
211
  remote: The URL of the results repository on GitHub.
199
212
  download_latest: If True it will download the latest version of the repository, otherwise it will only update the existing repository.
213
+ revision: If specified, it will checkout the given revision after cloning or pulling the repository.
200
214
 
201
215
  Returns:
202
216
  The path to the local cache directory.
@@ -224,14 +238,27 @@ class ResultCache:
224
238
  )
225
239
  raise ValueError(msg)
226
240
 
227
- if download_latest:
241
+ if revision or download_latest:
228
242
  logger.info(
229
- f"remote repository already exists in {results_directory}, updating it using git pull"
243
+ f"remote repository already exists in {results_directory}, fetching updates"
244
+ )
245
+ subprocess.run(
246
+ ["git", "fetch", "--all", "--tags"],
247
+ cwd=results_directory,
248
+ check=True,
230
249
  )
231
- subprocess.run(["git", "pull"], cwd=results_directory)
232
250
  else:
233
251
  logger.debug(
234
- f"Results repository already exists in {results_directory}, skipping update, set download_latest=True to update it"
252
+ f"Results repository already exists in {results_directory}, skipping update, "
253
+ f"set download_latest=True to update it"
254
+ )
255
+
256
+ if revision:
257
+ logger.info(f"Checking out revision '{revision}'")
258
+ subprocess.run(
259
+ ["git", "checkout", revision],
260
+ cwd=results_directory,
261
+ check=True,
235
262
  )
236
263
  return results_directory
237
264
 
@@ -239,25 +266,180 @@ class ResultCache:
239
266
  f"No results repository found in {results_directory}, cloning it from {remote}"
240
267
  )
241
268
 
242
- subprocess.run(["git", "clone", remote, "remote"], cwd=self.cache_path)
269
+ clone_cmd = ["git", "clone", "--depth", "1"]
270
+
271
+ if revision:
272
+ logger.info(f"Cloning repository at revision '{revision}'")
273
+ clone_cmd.append(f"--revision={revision}")
274
+ clone_cmd.extend([remote, "remote"])
275
+
276
+ subprocess.run(
277
+ clone_cmd,
278
+ cwd=self.cache_path,
279
+ check=True,
280
+ )
243
281
 
244
282
  return results_directory
245
283
 
284
+ def _download_cached_results_from_branch(
285
+ self,
286
+ branch: str = "cached-data",
287
+ filename: str = "__cached_results.json.gz",
288
+ output_path: Path | None = None,
289
+ remote: str = "https://github.com/embeddings-benchmark/results",
290
+ timeout: int = 60,
291
+ max_size_mb: int = 500,
292
+ ) -> Path:
293
+ """Download pre-computed cached results from a specific branch.
294
+
295
+ This is significantly faster than download_from_remote() since it downloads
296
+ only a compressed cache file instead of cloning the entire repository.
297
+
298
+ The method performs the following steps:
299
+ 1. Downloads a gzipped JSON file from the specified branch
300
+ 2. Validates file size and content type
301
+ 3. Decompresses the gzip content
302
+ 4. Writes the decompressed JSON to disk
303
+
304
+ Args:
305
+ branch: Branch name to download from (default: "cached-data")
306
+ filename: Name of the cached results file (default: "__cached_results.json.gz")
307
+ output_path: Where to save the file. If None, uses mteb/leaderboard/__cached_results.json
308
+ remote: Base URL of the results repository
309
+ timeout: Request timeout in seconds (default: 60)
310
+ max_size_mb: Maximum allowed file size in megabytes (default: 500)
311
+
312
+ Returns:
313
+ Path to the downloaded and decompressed cache file
314
+
315
+ Raises:
316
+ requests.exceptions.RequestException: On HTTP errors
317
+ ValueError: On validation failures (size, content-type)
318
+ gzip.BadGzipFile: If content is not valid gzip
319
+ UnicodeDecodeError: If content cannot be decoded as UTF-8
320
+ PermissionError: If file cannot be written due to permissions
321
+ OSError: On other file system errors
322
+
323
+ Examples:
324
+ >>> import mteb
325
+ >>> cache = mteb.ResultCache()
326
+ >>> # Download optimized cached results
327
+ >>> cache_file = cache._download_cached_results_from_branch()
328
+ >>> # Use custom output path
329
+ >>> cache_file = cache._download_cached_results_from_branch(
330
+ ... output_path=Path("/tmp/my_cache.json")
331
+ ... )
332
+ """
333
+ if output_path is None:
334
+ # Default to saving in mteb/leaderboard/__cached_results.json
335
+ # Get the mteb package directory (parent of this file)
336
+ mteb_package_dir = Path(__file__).parent
337
+ output_path = mteb_package_dir / "leaderboard" / "__cached_results.json"
338
+
339
+ # Extract repository owner and name from the remote URL
340
+ # e.g., "https://github.com/embeddings-benchmark/results" -> "embeddings-benchmark/results"
341
+ repo_path = remote.replace("https://github.com/", "").replace(
342
+ "http://github.com/", ""
343
+ )
344
+
345
+ url = f"https://raw.githubusercontent.com/{repo_path}/{branch}/{filename}"
346
+ logger.info(f"Downloading cached results from {url}")
347
+
348
+ # Step 1: Download with validation
349
+ max_size_bytes = max_size_mb * 1024 * 1024
350
+
351
+ try:
352
+ response = requests.get(url, timeout=timeout)
353
+ response.raise_for_status()
354
+
355
+ # Check if this is a Git LFS pointer file
356
+ content_type = response.headers.get("content-type", "").lower()
357
+ if (
358
+ content_type == "text/plain; charset=utf-8"
359
+ and b"git-lfs" in response.content
360
+ ):
361
+ # Try Git LFS media URL instead
362
+ media_url = f"https://media.githubusercontent.com/media/{repo_path}/{branch}/{filename}"
363
+ logger.info(f"Detected Git LFS file, trying media URL: {media_url}")
364
+ response = requests.get(media_url, timeout=timeout)
365
+ response.raise_for_status()
366
+ content_type = response.headers.get("content-type", "").lower()
367
+
368
+ # Validate content-type header
369
+ expected_content_types = [
370
+ "application/gzip",
371
+ "application/octet-stream",
372
+ "application/x-gzip",
373
+ ]
374
+ if content_type and not any(
375
+ ct in content_type for ct in expected_content_types
376
+ ):
377
+ raise Exception(
378
+ f"Unexpected content-type: {content_type}. Expected one of: {expected_content_types}"
379
+ )
380
+
381
+ # Validate file size
382
+ content_length = len(response.content)
383
+ if content_length > max_size_bytes:
384
+ raise ValueError(
385
+ f"Downloaded file too large: {content_length} bytes (max: {max_size_bytes})"
386
+ )
387
+
388
+ logger.info(
389
+ f"HTTP request successful, content length: {content_length} bytes"
390
+ )
391
+ content = response.content
392
+
393
+ except Exception as e:
394
+ logger.error(f"Unexpected HTTP error: {type(e).__name__}: {e}")
395
+ raise e
396
+
397
+ # Step 2: Decompress gzip data
398
+ logger.info("Attempting gzip decompression...")
399
+
400
+ try:
401
+ with gzip.open(io.BytesIO(content), "rt", encoding="utf-8") as gz_file:
402
+ data = gz_file.read()
403
+ logger.info(f"Decompression successful, data length: {len(data)} chars")
404
+
405
+ except Exception as e:
406
+ logger.error(f"Unexpected decompression error: {type(e).__name__}: {e}")
407
+ raise e
408
+
409
+ # Step 3: Write to disk
410
+ logger.info(f"Attempting to write to: {output_path}")
411
+
412
+ # Check parent directory exists and is writable
413
+ output_path.parent.mkdir(parents=True, exist_ok=True)
414
+
415
+ try:
416
+ output_path.write_text(data, encoding="utf-8")
417
+ logger.info(
418
+ f"File write successful, size: {output_path.stat().st_size} bytes"
419
+ )
420
+ except Exception as e:
421
+ logger.error(f"Unexpected file write error: {type(e).__name__}: {e}")
422
+ raise e
423
+
424
+ return output_path
425
+
246
426
  def clear_cache(self) -> None:
247
427
  """Clear the local cache directory."""
248
428
  if self.cache_path.exists() and self.cache_path.is_dir():
249
429
  shutil.rmtree(self.cache_path)
250
430
  logger.info(f"Cache directory {self.cache_path} cleared.")
251
431
  else:
252
- logger.warning(f"Cache directory {self.cache_path} does not exist.")
432
+ msg = f"Cache directory `{self.cache_path}` does not exist."
433
+ logger.warning(msg)
434
+ warnings.warn(msg)
253
435
 
254
436
  def __repr__(self) -> str:
255
437
  return f"ResultCache(cache_path={self.cache_path})"
256
438
 
257
439
  def get_cache_paths(
258
440
  self,
259
- models: Sequence[str] | Sequence[ModelMeta] | None = None,
260
- tasks: Sequence[str] | Sequence[AbsTask] | None = None,
441
+ models: Sequence[str] | Iterable[ModelMeta] | None = None,
442
+ tasks: Sequence[str] | Iterable[AbsTask] | None = None,
261
443
  require_model_meta: bool = True,
262
444
  include_remote: bool = True,
263
445
  ) -> list[Path]:
@@ -279,8 +461,8 @@ class ResultCache:
279
461
  A list of paths in the cache directory.
280
462
 
281
463
  Examples:
282
- >>> from mteb.cache import ResultCache
283
- >>> cache = ResultCache()
464
+ >>> import mteb
465
+ >>> cache = mteb.ResultCache()
284
466
  >>>
285
467
  >>> # Get all cache paths
286
468
  >>> paths = cache.get_cache_paths()
@@ -390,7 +572,7 @@ class ResultCache:
390
572
  @staticmethod
391
573
  def _filter_paths_by_model_and_revision(
392
574
  paths: list[Path],
393
- models: Sequence[str] | Sequence[ModelMeta] | None = None,
575
+ models: Sequence[str] | Iterable[ModelMeta] | None = None,
394
576
  ) -> list[Path]:
395
577
  """Filter a list of paths by model name and optional revision.
396
578
 
@@ -400,8 +582,9 @@ class ResultCache:
400
582
  if not models:
401
583
  return paths
402
584
 
403
- if isinstance(models[0], ModelMeta):
404
- models = cast(list[ModelMeta], models)
585
+ first_model = next(iter(models))
586
+ if isinstance(first_model, ModelMeta):
587
+ models = cast(Iterable[ModelMeta], models)
405
588
  name_and_revision = {
406
589
  (m.model_name_as_path(), m.revision or "no_revision_available")
407
590
  for m in models
@@ -412,13 +595,14 @@ class ResultCache:
412
595
  if (p.parent.parent.name, p.parent.name) in name_and_revision
413
596
  ]
414
597
 
415
- model_names = {m.replace("/", "__").replace(" ", "_") for m in models}
598
+ str_models = cast(Sequence[str], models)
599
+ model_names = {m.replace("/", "__").replace(" ", "_") for m in str_models}
416
600
  return [p for p in paths if p.parent.parent.name in model_names]
417
601
 
418
602
  @staticmethod
419
603
  def _filter_paths_by_task(
420
604
  paths: list[Path],
421
- tasks: Sequence[str] | Sequence[AbsTask] | None = None,
605
+ tasks: Sequence[str] | Iterable[AbsTask] | None = None,
422
606
  ) -> list[Path]:
423
607
  if tasks is not None:
424
608
  task_names = set()
@@ -434,8 +618,8 @@ class ResultCache:
434
618
 
435
619
  def load_results(
436
620
  self,
437
- models: Sequence[str] | Sequence[ModelMeta] | None = None,
438
- tasks: Sequence[str] | Sequence[AbsTask] | None = None,
621
+ models: Sequence[str] | Iterable[ModelMeta] | None = None,
622
+ tasks: Sequence[str] | Iterable[AbsTask] | Benchmark | str | None = None,
439
623
  require_model_meta: bool = True,
440
624
  include_remote: bool = True,
441
625
  validate_and_filter: bool = False,
@@ -445,7 +629,9 @@ class ResultCache:
445
629
 
446
630
  Args:
447
631
  models: A list of model names to load the results for. If None it will load the results for all models.
448
- tasks: A list of task names to load the results for. If None it will load the results for all tasks.
632
+ tasks: A list of task names to load the results for. If str is passed, then benchmark will be loaded.
633
+ If Benchmark is passed, then all tasks in the benchmark will be loaded.
634
+ If None it will load the results for all tasks.
449
635
  require_model_meta: If True it will ignore results that do not have a model_meta.json file. If false it attempt to
450
636
  extract the model name and revision from the path.
451
637
  include_remote: If True, it will include results from the remote repository.
@@ -457,8 +643,8 @@ class ResultCache:
457
643
  A BenchmarkResults object containing the results for the specified models and tasks.
458
644
 
459
645
  Examples:
460
- >>> from mteb.cache import ResultCache
461
- >>> cache = ResultCache()
646
+ >>> import mteb
647
+ >>> cache = mteb.ResultCache()
462
648
  >>>
463
649
  >>> # Load results for specific models and tasks
464
650
  >>> results = cache.load_results(
@@ -467,6 +653,9 @@ class ResultCache:
467
653
  ... require_model_meta=True,
468
654
  ... )
469
655
  """
656
+ if isinstance(tasks, str):
657
+ tasks = mteb.get_benchmark(tasks)
658
+
470
659
  paths = self.get_cache_paths(
471
660
  models=models,
472
661
  tasks=tasks,
@@ -475,7 +664,7 @@ class ResultCache:
475
664
  )
476
665
  models_results = defaultdict(list)
477
666
 
478
- task_names = {}
667
+ task_names: dict[str, AbsTask | None] = {}
479
668
  if tasks is not None:
480
669
  for task in tasks:
481
670
  if isinstance(task, AbsTask):
@@ -493,10 +682,12 @@ class ResultCache:
493
682
  )
494
683
 
495
684
  if validate_and_filter:
496
- task = task_names[task_result.task_name]
685
+ task_instance = task_names[task_result.task_name]
497
686
  try:
498
- task_result.validate_and_filter_scores(task=task)
499
- except Exception as e:
687
+ task_result = task_result.validate_and_filter_scores(
688
+ task=task_instance
689
+ )
690
+ except ValidationError as e:
500
691
  logger.info(
501
692
  f"Validation failed for {task_result.task_name} in {model_name} {revision}: {e}"
502
693
  )
@@ -505,7 +696,7 @@ class ResultCache:
505
696
  models_results[(model_name, revision)].append(task_result)
506
697
 
507
698
  # create BenchmarkResults object
508
- models_results = [
699
+ models_results_object = [
509
700
  ModelResult(
510
701
  model_name=model_name,
511
702
  model_revision=revision,
@@ -514,8 +705,7 @@ class ResultCache:
514
705
  for (model_name, revision), task_results in models_results.items()
515
706
  ]
516
707
 
517
- benchmark_results = BenchmarkResults(
518
- model_results=models_results,
708
+ return BenchmarkResults(
709
+ model_results=models_results_object,
710
+ benchmark=tasks if isinstance(tasks, Benchmark) else None,
519
711
  )
520
-
521
- return benchmark_results
@@ -1,4 +1,4 @@
1
- from collections.abc import Sequence
1
+ from collections.abc import Iterable, Sequence
2
2
 
3
3
  from mteb.abstasks import AbsTask
4
4
  from mteb.benchmarks import Benchmark
@@ -31,7 +31,7 @@ def _display_benchmarks(benchmarks: Sequence[Benchmark]) -> None:
31
31
  _display_tasks(benchmark.tasks, name=name)
32
32
 
33
33
 
34
- def _display_tasks(task_list: Sequence[AbsTask], name: str | None = None) -> None:
34
+ def _display_tasks(task_list: Iterable[AbsTask], name: str | None = None) -> None:
35
35
  from rich.console import Console
36
36
 
37
37
  console = Console()
mteb/cli/build_cli.py CHANGED
@@ -1,17 +1,19 @@
1
1
  import argparse
2
2
  import logging
3
3
  import os
4
+ import warnings
4
5
  from pathlib import Path
5
6
 
6
7
  import torch
7
8
  from rich.logging import RichHandler
8
9
 
9
10
  import mteb
11
+ from mteb.abstasks.abstask import AbsTask
10
12
  from mteb.cache import ResultCache
13
+ from mteb.cli._display_tasks import _display_benchmarks, _display_tasks
11
14
  from mteb.cli.generate_model_card import generate_model_card
12
15
  from mteb.evaluate import OverwriteStrategy
13
-
14
- from ._display_tasks import _display_benchmarks, _display_tasks
16
+ from mteb.types._encoder_io import EncodeKwargs
15
17
 
16
18
  logger = logging.getLogger(__name__)
17
19
 
@@ -53,7 +55,7 @@ def run(args: argparse.Namespace) -> None:
53
55
 
54
56
  if args.benchmarks:
55
57
  benchmarks = mteb.get_benchmarks(names=args.benchmarks)
56
- tasks = [t for b in benchmarks for t in b.tasks]
58
+ tasks = tuple(t for b in benchmarks for t in b.tasks)
57
59
  else:
58
60
  tasks = mteb.get_tasks(
59
61
  categories=args.categories,
@@ -63,21 +65,23 @@ def run(args: argparse.Namespace) -> None:
63
65
  eval_splits=args.eval_splits,
64
66
  )
65
67
 
66
- encode_kwargs = {}
68
+ encode_kwargs: EncodeKwargs = {}
67
69
  if args.batch_size is not None:
68
70
  encode_kwargs["batch_size"] = args.batch_size
69
71
 
70
72
  overwrite_strategy = args.overwrite_strategy
71
73
  if args.overwrite:
72
- logger.warning(
73
- "`--overwrite` is deprecated, please use `--overwrite-strategy 'always'` instead."
74
+ warnings.warn(
75
+ "`--overwrite` is deprecated, please use `--overwrite-strategy 'always'` instead.",
76
+ DeprecationWarning,
74
77
  )
75
78
  overwrite_strategy = OverwriteStrategy.ALWAYS.value
76
79
 
77
80
  prediction_folder = args.prediction_folder
78
81
  if args.save_predictions:
79
- logger.warning(
80
- "`--save_predictions` is deprecated, please use `--prediction-folder` instead."
82
+ warnings.warn(
83
+ "`--save_predictions` is deprecated, please use `--prediction-folder` instead.",
84
+ DeprecationWarning,
81
85
  )
82
86
  prediction_folder = args.output_folder
83
87
 
@@ -279,23 +283,25 @@ def _create_meta(args: argparse.Namespace) -> None:
279
283
  from_existing = Path(from_existing)
280
284
 
281
285
  if output_path.exists() and overwrite:
282
- logger.warning("Output path already exists, overwriting.")
286
+ msg = "Output path already exists, overwriting."
287
+ logger.warning(msg)
288
+ warnings.warn(msg)
283
289
  elif output_path.exists():
284
290
  raise FileExistsError(
285
291
  "Output path already exists, use --overwrite to overwrite."
286
292
  )
287
293
 
288
- tasks = []
294
+ benchmarks = None
295
+ tasks: list[AbsTask] = []
289
296
  if tasks_names is not None:
290
- tasks = mteb.get_tasks(tasks_names)
297
+ tasks = list(mteb.get_tasks(tasks_names))
291
298
  if benchmarks is not None:
292
299
  benchmarks = mteb.get_benchmarks(benchmarks)
293
- for benchmark in benchmarks:
294
- tasks.extend(benchmark.tasks)
295
300
 
296
301
  generate_model_card(
297
302
  model_name,
298
- tasks if len(tasks) > 0 else None,
303
+ tasks,
304
+ benchmarks,
299
305
  existing_model_card_id_or_path=from_existing,
300
306
  results_cache=ResultCache(results_folder),
301
307
  output_path=output_path,
@@ -356,6 +362,95 @@ def _add_create_meta_parser(subparsers) -> None:
356
362
  parser.set_defaults(func=_create_meta)
357
363
 
358
364
 
365
+ def _add_leaderboard_parser(subparsers) -> None:
366
+ parser = subparsers.add_parser("leaderboard", help="Launch the MTEB leaderboard")
367
+
368
+ parser.add_argument(
369
+ "--cache-path",
370
+ type=str,
371
+ help="Path to the cache folder containing model results",
372
+ required=False,
373
+ default=None,
374
+ )
375
+ parser.add_argument(
376
+ "--host",
377
+ type=str,
378
+ default="0.0.0.0",
379
+ help="Host to run the leaderboard server on",
380
+ )
381
+ parser.add_argument(
382
+ "--port",
383
+ type=int,
384
+ default=7860,
385
+ help="Port to run the leaderboard server on",
386
+ )
387
+ parser.add_argument(
388
+ "--share",
389
+ action="store_true",
390
+ default=False,
391
+ help="Create a public URL for the leaderboard",
392
+ )
393
+
394
+ parser.set_defaults(func=_leaderboard)
395
+
396
+
397
+ def _leaderboard(args: argparse.Namespace) -> None:
398
+ """Launch the MTEB leaderboard with specified cache path."""
399
+ # Import leaderboard module only when needed to avoid requiring leaderboard dependencies
400
+ # for other CLI commands
401
+ try:
402
+ import gradio as gr
403
+
404
+ from mteb.leaderboard import get_leaderboard_app
405
+ except ImportError as e:
406
+ raise ImportError(
407
+ "Seems like some dependencies are not installed. "
408
+ + "You can likely install these using: `pip install mteb[leaderboard]`. "
409
+ + f"{e}"
410
+ )
411
+
412
+ cache_path = args.cache_path
413
+
414
+ if cache_path:
415
+ logger.info(f"Using cache path: {cache_path}")
416
+ cache = ResultCache(cache_path)
417
+ else:
418
+ cache = ResultCache()
419
+ logger.info(f"Using default cache path: {cache.cache_path}")
420
+
421
+ app = get_leaderboard_app(cache)
422
+
423
+ logger.info(f"Starting leaderboard on {args.host}:{args.port}")
424
+ if args.share:
425
+ logger.info("Creating public URL...")
426
+
427
+ logging.getLogger("mteb.load_results.task_results").setLevel(
428
+ logging.ERROR
429
+ ) # Warnings related to task split
430
+ logging.getLogger("mteb.model_meta").setLevel(
431
+ logging.ERROR
432
+ ) # Warning related to model metadata (fetch_from_hf=False)
433
+ logging.getLogger("mteb.load_results.benchmark_results").setLevel(
434
+ logging.ERROR
435
+ ) # Warning related to model metadata (fetch_from_hf=False)
436
+ warnings.filterwarnings("ignore", message="Couldn't get scores for .* due to .*")
437
+
438
+ # Head content for Tailwind CSS
439
+ head = """
440
+ <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
441
+ """
442
+
443
+ app.launch(
444
+ server_name=args.host,
445
+ server_port=args.port,
446
+ share=args.share,
447
+ theme=gr.themes.Soft(
448
+ font=[gr.themes.GoogleFont("Roboto Mono"), "Arial", "sans-serif"],
449
+ ),
450
+ head=head,
451
+ )
452
+
453
+
359
454
  def build_cli() -> argparse.ArgumentParser:
360
455
  """Builds the argument parser for the MTEB CLI.
361
456
 
@@ -375,6 +470,7 @@ def build_cli() -> argparse.ArgumentParser:
375
470
  _add_available_tasks_parser(subparsers)
376
471
  _add_available_benchmarks_parser(subparsers)
377
472
  _add_create_meta_parser(subparsers)
473
+ _add_leaderboard_parser(subparsers)
378
474
 
379
475
  return parser
380
476