mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (486) hide show
  1. mteb/_create_dataloaders.py +63 -14
  2. mteb/_evaluators/any_sts_evaluator.py +12 -5
  3. mteb/_evaluators/clustering_evaluator.py +12 -4
  4. mteb/_evaluators/evaluator.py +11 -5
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
  6. mteb/_evaluators/pair_classification_evaluator.py +13 -5
  7. mteb/_evaluators/retrieval_evaluator.py +22 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +20 -11
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +10 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +48 -21
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +25 -9
  21. mteb/abstasks/clustering.py +23 -10
  22. mteb/abstasks/clustering_legacy.py +22 -8
  23. mteb/abstasks/image/image_text_pair_classification.py +23 -9
  24. mteb/abstasks/multilabel_classification.py +13 -5
  25. mteb/abstasks/pair_classification.py +27 -11
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +56 -30
  28. mteb/abstasks/retrieval_dataset_loaders.py +48 -37
  29. mteb/abstasks/sts.py +29 -13
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +23 -12
  32. mteb/abstasks/text/reranking.py +2 -2
  33. mteb/abstasks/text/summarization.py +19 -8
  34. mteb/abstasks/zeroshot_classification.py +23 -9
  35. mteb/benchmarks/_create_table.py +13 -7
  36. mteb/benchmarks/benchmark.py +11 -1
  37. mteb/benchmarks/benchmarks/__init__.py +2 -0
  38. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  39. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  40. mteb/cache.py +10 -5
  41. mteb/cli/_display_tasks.py +9 -3
  42. mteb/cli/build_cli.py +5 -2
  43. mteb/cli/generate_model_card.py +9 -2
  44. mteb/deprecated_evaluator.py +16 -12
  45. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  48. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  49. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  50. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  51. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  52. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  65. mteb/evaluate.py +33 -20
  66. mteb/filter_tasks.py +12 -7
  67. mteb/get_tasks.py +9 -4
  68. mteb/languages/language_scripts.py +8 -3
  69. mteb/leaderboard/app.py +11 -4
  70. mteb/leaderboard/table.py +7 -2
  71. mteb/load_results.py +9 -3
  72. mteb/models/abs_encoder.py +22 -12
  73. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  74. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  75. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  76. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  77. mteb/models/get_model_meta.py +32 -6
  78. mteb/models/instruct_wrapper.py +13 -5
  79. mteb/models/model_implementations/align_models.py +10 -4
  80. mteb/models/model_implementations/amazon_models.py +1 -0
  81. mteb/models/model_implementations/andersborges.py +2 -0
  82. mteb/models/model_implementations/ara_models.py +1 -0
  83. mteb/models/model_implementations/arctic_models.py +8 -0
  84. mteb/models/model_implementations/b1ade_models.py +1 -0
  85. mteb/models/model_implementations/bedrock_models.py +20 -6
  86. mteb/models/model_implementations/bge_models.py +40 -1
  87. mteb/models/model_implementations/bica_model.py +1 -0
  88. mteb/models/model_implementations/blip2_models.py +11 -4
  89. mteb/models/model_implementations/blip_models.py +17 -4
  90. mteb/models/model_implementations/bm25.py +24 -14
  91. mteb/models/model_implementations/bmretriever_models.py +10 -2
  92. mteb/models/model_implementations/cadet_models.py +1 -0
  93. mteb/models/model_implementations/cde_models.py +11 -5
  94. mteb/models/model_implementations/clip_models.py +12 -4
  95. mteb/models/model_implementations/clips_models.py +3 -0
  96. mteb/models/model_implementations/codefuse_models.py +5 -0
  97. mteb/models/model_implementations/codesage_models.py +3 -0
  98. mteb/models/model_implementations/cohere_models.py +14 -4
  99. mteb/models/model_implementations/cohere_v.py +14 -4
  100. mteb/models/model_implementations/colpali_models.py +7 -3
  101. mteb/models/model_implementations/colqwen_models.py +17 -31
  102. mteb/models/model_implementations/colsmol_models.py +3 -1
  103. mteb/models/model_implementations/conan_models.py +11 -4
  104. mteb/models/model_implementations/dino_models.py +28 -4
  105. mteb/models/model_implementations/e5_instruct.py +4 -0
  106. mteb/models/model_implementations/e5_models.py +9 -0
  107. mteb/models/model_implementations/e5_v.py +10 -4
  108. mteb/models/model_implementations/eagerworks_models.py +11 -4
  109. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  110. mteb/models/model_implementations/en_code_retriever.py +1 -0
  111. mteb/models/model_implementations/euler_models.py +1 -0
  112. mteb/models/model_implementations/evaclip_models.py +13 -4
  113. mteb/models/model_implementations/fa_models.py +9 -0
  114. mteb/models/model_implementations/facebookai.py +2 -0
  115. mteb/models/model_implementations/geogpt_models.py +1 -0
  116. mteb/models/model_implementations/gme_v_models.py +7 -3
  117. mteb/models/model_implementations/google_models.py +15 -4
  118. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
  119. mteb/models/model_implementations/gritlm_models.py +3 -0
  120. mteb/models/model_implementations/gte_models.py +9 -0
  121. mteb/models/model_implementations/hinvec_models.py +6 -1
  122. mteb/models/model_implementations/human.py +1 -0
  123. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  124. mteb/models/model_implementations/inf_models.py +2 -0
  125. mteb/models/model_implementations/jasper_models.py +14 -5
  126. mteb/models/model_implementations/jina_clip.py +10 -4
  127. mteb/models/model_implementations/jina_models.py +17 -5
  128. mteb/models/model_implementations/kalm_models.py +24 -12
  129. mteb/models/model_implementations/kblab.py +1 -0
  130. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  131. mteb/models/model_implementations/kfst.py +1 -0
  132. mteb/models/model_implementations/kowshik24_models.py +1 -0
  133. mteb/models/model_implementations/lens_models.py +2 -0
  134. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  135. mteb/models/model_implementations/linq_models.py +7 -1
  136. mteb/models/model_implementations/listconranker.py +10 -4
  137. mteb/models/model_implementations/llm2clip_models.py +12 -4
  138. mteb/models/model_implementations/llm2vec_models.py +20 -6
  139. mteb/models/model_implementations/mcinext_models.py +8 -2
  140. mteb/models/model_implementations/mdbr_models.py +2 -0
  141. mteb/models/model_implementations/misc_models.py +63 -0
  142. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  143. mteb/models/model_implementations/mme5_models.py +2 -1
  144. mteb/models/model_implementations/moco_models.py +11 -4
  145. mteb/models/model_implementations/mod_models.py +2 -1
  146. mteb/models/model_implementations/model2vec_models.py +23 -4
  147. mteb/models/model_implementations/moka_models.py +3 -0
  148. mteb/models/model_implementations/nbailab.py +3 -0
  149. mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
  150. mteb/models/model_implementations/nomic_models.py +17 -4
  151. mteb/models/model_implementations/nomic_models_vision.py +5 -3
  152. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
  153. mteb/models/model_implementations/nvidia_models.py +15 -4
  154. mteb/models/model_implementations/octen_models.py +3 -1
  155. mteb/models/model_implementations/openai_models.py +14 -4
  156. mteb/models/model_implementations/openclip_models.py +17 -4
  157. mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
  158. mteb/models/model_implementations/ops_moa_models.py +9 -2
  159. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  160. mteb/models/model_implementations/pawan_models.py +1 -0
  161. mteb/models/model_implementations/piccolo_models.py +2 -0
  162. mteb/models/model_implementations/promptriever_models.py +16 -6
  163. mteb/models/model_implementations/pylate_models.py +32 -13
  164. mteb/models/model_implementations/qodo_models.py +2 -0
  165. mteb/models/model_implementations/qtack_models.py +1 -0
  166. mteb/models/model_implementations/qwen3_models.py +11 -1
  167. mteb/models/model_implementations/qzhou_models.py +2 -0
  168. mteb/models/model_implementations/random_baseline.py +4 -3
  169. mteb/models/model_implementations/rasgaard_models.py +1 -0
  170. mteb/models/model_implementations/reasonir_model.py +65 -0
  171. mteb/models/model_implementations/repllama_models.py +15 -6
  172. mteb/models/model_implementations/rerankers_custom.py +13 -4
  173. mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
  174. mteb/models/model_implementations/richinfoai_models.py +1 -0
  175. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  176. mteb/models/model_implementations/ruri_models.py +10 -0
  177. mteb/models/model_implementations/salesforce_models.py +10 -1
  178. mteb/models/model_implementations/samilpwc_models.py +1 -0
  179. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  180. mteb/models/model_implementations/searchmap_models.py +1 -0
  181. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  182. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
  183. mteb/models/model_implementations/seed_models.py +2 -1
  184. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  185. mteb/models/model_implementations/shuu_model.py +1 -0
  186. mteb/models/model_implementations/siglip_models.py +19 -4
  187. mteb/models/model_implementations/slm_models.py +7 -4
  188. mteb/models/model_implementations/sonar_models.py +2 -1
  189. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  190. mteb/models/model_implementations/stella_models.py +6 -0
  191. mteb/models/model_implementations/tarka_models.py +2 -0
  192. mteb/models/model_implementations/text2vec_models.py +3 -0
  193. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  194. mteb/models/model_implementations/uae_models.py +10 -4
  195. mteb/models/model_implementations/vdr_models.py +8 -1
  196. mteb/models/model_implementations/vi_vn_models.py +6 -0
  197. mteb/models/model_implementations/vista_models.py +11 -4
  198. mteb/models/model_implementations/vlm2vec_models.py +11 -4
  199. mteb/models/model_implementations/voyage_models.py +52 -4
  200. mteb/models/model_implementations/voyage_v.py +11 -6
  201. mteb/models/model_implementations/xyz_models.py +1 -0
  202. mteb/models/model_implementations/youtu_models.py +1 -0
  203. mteb/models/model_implementations/yuan_models.py +1 -0
  204. mteb/models/model_implementations/yuan_models_en.py +2 -1
  205. mteb/models/model_meta.py +47 -9
  206. mteb/models/models_protocols.py +23 -18
  207. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  208. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  209. mteb/models/search_wrappers.py +31 -12
  210. mteb/models/sentence_transformer_wrapper.py +4 -3
  211. mteb/models/vllm_wrapper.py +8 -6
  212. mteb/results/benchmark_results.py +22 -17
  213. mteb/results/model_result.py +21 -15
  214. mteb/results/task_result.py +32 -16
  215. mteb/similarity_functions.py +8 -2
  216. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  217. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  218. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  219. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  220. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  221. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  222. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  223. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  224. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  225. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  226. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  227. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  228. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  229. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  230. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  231. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  232. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  233. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  234. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  235. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  236. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  237. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  238. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  239. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  240. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  241. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  242. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  243. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  244. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  245. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  246. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  247. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  248. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  249. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  250. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  251. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  252. mteb/tasks/classification/est/estonian_valence.py +1 -1
  253. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  254. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  256. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  257. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  258. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  259. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  260. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  261. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  262. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  263. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  264. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  265. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  266. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  267. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  268. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  269. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  270. mteb/tasks/classification/kor/klue_tc.py +2 -2
  271. mteb/tasks/classification/kor/kor_fin.py +1 -1
  272. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  273. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  274. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  275. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  276. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  277. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  278. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  279. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  280. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  281. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  282. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  283. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  284. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  285. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  286. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  287. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  288. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  289. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  290. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  291. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  292. mteb/tasks/classification/ron/moroco.py +1 -1
  293. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  294. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  295. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  296. mteb/tasks/classification/rus/headline_classification.py +2 -2
  297. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  298. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  299. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  300. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  301. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  302. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  303. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  304. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  305. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  306. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  307. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  308. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  309. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  310. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  311. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  312. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  313. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  314. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  315. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  316. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  317. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  318. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  319. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  320. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  321. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  322. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  323. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  324. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  325. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  326. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  327. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  328. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  329. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  330. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  331. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  332. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  333. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  334. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  335. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  336. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  337. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  338. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  341. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  342. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  343. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  344. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  345. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  346. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  347. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  348. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  349. mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
  350. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  351. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  352. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  353. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  354. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  355. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  356. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  357. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  358. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  359. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  360. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  361. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  362. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  363. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  364. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  365. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  366. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  367. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  368. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  369. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  370. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  371. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  372. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  373. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  374. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  375. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  376. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  377. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  378. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  379. mteb/tasks/pair_classification/rus/terra.py +2 -2
  380. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  381. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  382. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  383. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  384. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  385. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  386. mteb/tasks/retrieval/code/code_rag.py +4 -4
  387. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  388. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  389. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  390. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  391. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  392. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  393. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  394. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  395. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
  396. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
  397. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  398. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  399. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  400. mteb/tasks/retrieval/eng/__init__.py +42 -0
  401. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  402. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  403. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  404. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  405. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  406. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  407. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  408. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  409. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  410. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  411. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  412. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  413. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  414. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  415. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  416. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  417. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  418. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  419. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  420. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  421. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  422. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  423. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  424. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  425. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  426. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  428. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  435. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  438. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  439. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  440. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  441. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  442. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  443. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  444. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  445. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  446. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  447. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  448. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  449. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  450. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  451. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  452. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  453. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  454. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  455. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  456. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  457. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  458. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  459. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  460. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  461. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  462. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  463. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  464. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  465. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  466. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  467. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  468. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  469. mteb/tasks/retrieval/nob/norquad.py +1 -1
  470. mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
  471. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  472. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  473. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  474. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  475. mteb/tasks/sts/kor/klue_sts.py +1 -1
  476. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  477. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  478. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  479. mteb/types/_encoder_io.py +1 -1
  480. mteb/types/statistics.py +9 -2
  481. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
  482. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
  483. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  484. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  485. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  486. {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -34,7 +34,7 @@ class AbsTaskReranking(AbsTaskRetrieval):
34
34
  For dataformat and other information, see [AbsTaskRetrieval][mteb.abstasks.retrieval.AbsTaskRetrieval].
35
35
  """
36
36
 
37
- def load_data(self) -> None:
37
+ def load_data(self, num_proc: int = 1, **kwargs) -> None:
38
38
  """Load the dataset."""
39
39
  if self.data_loaded:
40
40
  return
@@ -43,7 +43,7 @@ class AbsTaskReranking(AbsTaskRetrieval):
43
43
  self.transform_old_dataset_format()
44
44
  else:
45
45
  # use AbsTaskRetrieval default to load the data
46
- return super().load_data()
46
+ return super().load_data(num_proc=num_proc)
47
47
 
48
48
  def _process_example(self, example: dict, split: str, query_idx: int) -> dict:
49
49
  """Process a single example from the dataset.
@@ -1,24 +1,34 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from pathlib import Path
4
+ from typing import TYPE_CHECKING
3
5
 
4
6
  import numpy as np
5
- from datasets import Dataset
6
7
 
7
8
  from mteb._evaluators import SummarizationEvaluator
8
- from mteb._evaluators.text.summarization_evaluator import SummarizationMetrics
9
9
  from mteb.abstasks._statistics_calculation import (
10
10
  calculate_score_statistics,
11
11
  calculate_text_statistics,
12
12
  )
13
13
  from mteb.abstasks.abstask import AbsTask
14
- from mteb.models import EncoderProtocol, MTEBModels
15
- from mteb.types import EncodeKwargs
14
+ from mteb.models import EncoderProtocol
16
15
  from mteb.types.statistics import (
17
- ScoreStatistics,
18
16
  SplitDescriptiveStatistics,
19
- TextStatistics,
20
17
  )
21
18
 
19
+ if TYPE_CHECKING:
20
+ from pathlib import Path
21
+
22
+ from datasets import Dataset
23
+
24
+ from mteb._evaluators.text.summarization_evaluator import SummarizationMetrics
25
+ from mteb.models import MTEBModels
26
+ from mteb.types import EncodeKwargs
27
+ from mteb.types.statistics import (
28
+ ScoreStatistics,
29
+ TextStatistics,
30
+ )
31
+
22
32
  logger = logging.getLogger(__name__)
23
33
 
24
34
 
@@ -84,6 +94,7 @@ class AbsTaskSummarization(AbsTask):
84
94
  hf_subset: str,
85
95
  encode_kwargs: EncodeKwargs,
86
96
  prediction_folder: Path | None = None,
97
+ num_proc: int = 1,
87
98
  **kwargs,
88
99
  ) -> SummarizationMetrics:
89
100
  if not isinstance(model, EncoderProtocol):
@@ -105,7 +116,7 @@ class AbsTaskSummarization(AbsTask):
105
116
  hf_subset=hf_subset,
106
117
  **kwargs,
107
118
  )
108
- scores = evaluator(model, encode_kwargs=encode_kwargs)
119
+ scores = evaluator(model, encode_kwargs=encode_kwargs, num_proc=num_proc)
109
120
  if prediction_folder:
110
121
  self._save_task_predictions(
111
122
  scores,
@@ -1,19 +1,16 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from pathlib import Path
3
- from typing import TypedDict
4
+ from typing import TYPE_CHECKING, TypedDict
4
5
 
5
6
  import torch
6
7
  from datasets import Dataset
7
8
  from sklearn import metrics
8
9
 
9
10
  from mteb._evaluators import ZeroShotClassificationEvaluator
10
- from mteb.models import EncoderProtocol, MTEBModels
11
- from mteb.types import EncodeKwargs
11
+ from mteb.models import EncoderProtocol
12
12
  from mteb.types.statistics import (
13
- ImageStatistics,
14
- LabelStatistics,
15
13
  SplitDescriptiveStatistics,
16
- TextStatistics,
17
14
  )
18
15
 
19
16
  from ._statistics_calculation import (
@@ -23,6 +20,17 @@ from ._statistics_calculation import (
23
20
  )
24
21
  from .abstask import AbsTask
25
22
 
23
+ if TYPE_CHECKING:
24
+ from pathlib import Path
25
+
26
+ from mteb.models import MTEBModels
27
+ from mteb.types import EncodeKwargs
28
+ from mteb.types.statistics import (
29
+ ImageStatistics,
30
+ LabelStatistics,
31
+ TextStatistics,
32
+ )
33
+
26
34
  logger = logging.getLogger(__name__)
27
35
 
28
36
 
@@ -119,6 +127,7 @@ class AbsTaskZeroShotClassification(AbsTask):
119
127
  hf_subset: str,
120
128
  encode_kwargs: EncodeKwargs,
121
129
  prediction_folder: Path | None = None,
130
+ num_proc: int = 1,
122
131
  **kwargs,
123
132
  ) -> ZeroShotClassificationMetrics:
124
133
  if not isinstance(model, EncoderProtocol):
@@ -137,7 +146,11 @@ class AbsTaskZeroShotClassification(AbsTask):
137
146
  hf_subset=hf_subset,
138
147
  **kwargs,
139
148
  )
140
- probs = evaluator(model, encode_kwargs=encode_kwargs)
149
+ probs = evaluator(
150
+ model,
151
+ encode_kwargs=encode_kwargs,
152
+ num_proc=num_proc,
153
+ )
141
154
 
142
155
  if prediction_folder:
143
156
  self._save_task_predictions(
@@ -162,13 +175,14 @@ class AbsTaskZeroShotClassification(AbsTask):
162
175
  accuracy=metrics.accuracy_score(labels, predictions),
163
176
  )
164
177
 
165
- def _push_dataset_to_hub(self, repo_name: str) -> None:
178
+ def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
166
179
  self._upload_dataset_to_hub(
167
180
  repo_name,
168
181
  [
169
182
  self.input_column_name,
170
183
  self.label_column_name,
171
184
  ],
185
+ num_proc=num_proc,
172
186
  )
173
187
  labels_dataset = Dataset.from_dict({"labels": self.get_candidate_labels()})
174
188
  labels_dataset.push_to_hub(repo_name, config_name="labels")
@@ -1,13 +1,17 @@
1
+ from __future__ import annotations
2
+
1
3
  import re
2
4
  from collections import defaultdict
3
- from typing import Literal
5
+ from typing import TYPE_CHECKING, Literal
4
6
 
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
 
8
10
  import mteb
9
11
  from mteb.get_tasks import get_task, get_tasks
10
- from mteb.results.benchmark_results import BenchmarkResults
12
+
13
+ if TYPE_CHECKING:
14
+ from mteb.results.benchmark_results import BenchmarkResults
11
15
 
12
16
 
13
17
  def _borda_count(scores: pd.Series) -> pd.Series:
@@ -115,7 +119,6 @@ def _create_summary_table_from_benchmark_results(
115
119
 
116
120
  # Build joint table
117
121
  joint_table = mean_per_type.copy()
118
- joint_table = joint_table.drop(models_to_remove, axis=0)
119
122
  joint_table.insert(0, "mean", overall_mean)
120
123
  joint_table.insert(1, "mean_by_task_type", typed_mean)
121
124
  joint_table["borda_rank"] = _get_borda_rank(per_task)
@@ -303,6 +306,7 @@ def _create_per_language_table_from_benchmark_results(
303
306
 
304
307
  def _create_summary_table_mean_public_private(
305
308
  benchmark_results: BenchmarkResults,
309
+ exclude_private_from_borda: bool = False,
306
310
  ) -> pd.DataFrame:
307
311
  """Create summary table from BenchmarkResults.
308
312
 
@@ -311,6 +315,7 @@ def _create_summary_table_mean_public_private(
311
315
 
312
316
  Args:
313
317
  benchmark_results: BenchmarkResults object containing model results
318
+ exclude_private_from_borda: If True, calculate Borda rank using only public tasks
314
319
 
315
320
  Returns:
316
321
  DataFrame with model summaries, ready for styling in the leaderboard
@@ -353,10 +358,13 @@ def _create_summary_table_mean_public_private(
353
358
 
354
359
  # Build joint table
355
360
  joint_table = mean_per_type.copy()
356
- joint_table = joint_table.drop(models_to_remove, axis=0)
357
361
  joint_table.insert(0, "mean(public)", public_mean)
358
362
  joint_table.insert(1, "mean(private)", private_mean)
359
- joint_table["borda_rank"] = _get_borda_rank(per_task)
363
+ if exclude_private_from_borda:
364
+ borda_per_task = per_task[public_task_name]
365
+ else:
366
+ borda_per_task = per_task
367
+ joint_table["borda_rank"] = _get_borda_rank(borda_per_task)
360
368
  joint_table = joint_table.sort_values("borda_rank", ascending=True)
361
369
  joint_table = joint_table.reset_index()
362
370
 
@@ -476,7 +484,6 @@ def _create_summary_table_mean_subset(
476
484
 
477
485
  # Build joint table
478
486
  joint_table = mean_per_type.copy()
479
- joint_table = joint_table.drop(models_to_remove, axis=0)
480
487
  joint_table.insert(0, "mean(subset)", overall_subset_mean)
481
488
  joint_table["borda_rank"] = _get_borda_rank(per_subset)
482
489
  joint_table = joint_table.sort_values("mean(subset)", ascending=False)
@@ -595,7 +602,6 @@ def _create_summary_table_mean_task_type(
595
602
 
596
603
  # Build joint table
597
604
  joint_table = mean_per_type.copy()
598
- joint_table = joint_table.drop(models_to_remove, axis=0)
599
605
  joint_table.insert(0, "mean_by_task_type", typed_mean)
600
606
  joint_table = joint_table.sort_values("mean_by_task_type", ascending=False)
601
607
  joint_table["borda_rank"] = _get_borda_rank(per_task)
@@ -123,9 +123,19 @@ class RtebBenchmark(Benchmark):
123
123
  _create_summary_table_mean_public_private,
124
124
  )
125
125
 
126
- joint_table = _create_summary_table_mean_public_private(benchmark_results)
126
+ joint_table = _create_summary_table_mean_public_private(
127
+ benchmark_results, exclude_private_from_borda=True
128
+ )
129
+ # issue 3902: temporary remove the private column from RTEB summary table
130
+ if "Mean (Private)" in joint_table.columns:
131
+ joint_table = joint_table.drop(columns=["Mean (Private)"])
127
132
  # For RTEB: all tasks are Retrieval type, so Retrieval column = Mean (Task)
133
+ # but due to 3902, if Private column existed, Mean (Task) was the mean of Public and Private so instead we drop Mean (Task) and rename Mean (Public) to Mean (Task)
128
134
  joint_table = joint_table.rename(columns={"Retrieval": "Mean (Task)"})
135
+ if "Mean (Task)" in joint_table.columns:
136
+ joint_table = joint_table.drop(columns=["Mean (Task)"])
137
+ joint_table = joint_table.rename(columns={"Mean (Public)": "Mean (Task)"})
138
+
129
139
  return joint_table
130
140
 
131
141
 
@@ -3,6 +3,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
3
3
  BEIR_NL,
4
4
  BRIGHT,
5
5
  BRIGHT_LONG,
6
+ BRIGHT_V1_1,
6
7
  BUILT_MTEB,
7
8
  C_MTEB,
8
9
  CHEMTEB,
@@ -69,6 +70,7 @@ __all__ = [
69
70
  "BEIR_NL",
70
71
  "BRIGHT",
71
72
  "BRIGHT_LONG",
73
+ "BRIGHT_V1_1",
72
74
  "BUILT_MTEB",
73
75
  "CHEMTEB",
74
76
  "CHEMTEB_V1_1",
@@ -1330,6 +1330,46 @@ This is the long version of the benchmark, which only filter longer documents.
1330
1330
  """,
1331
1331
  )
1332
1332
 
1333
+ BRIGHT_V1_1 = Benchmark(
1334
+ name="BRIGHT(v1.1)",
1335
+ display_name="Reasoning Retrieval",
1336
+ tasks=get_tasks(
1337
+ tasks=[
1338
+ "BrightBiologyRetrieval",
1339
+ "BrightEarthScienceRetrieval",
1340
+ "BrightEconomicsRetrieval",
1341
+ "BrightPsychologyRetrieval",
1342
+ "BrightRoboticsRetrieval",
1343
+ "BrightStackoverflowRetrieval",
1344
+ "BrightSustainableLivingRetrieval",
1345
+ "BrightPonyRetrieval",
1346
+ "BrightLeetcodeRetrieval",
1347
+ "BrightAopsRetrieval",
1348
+ "BrightTheoremQATheoremsRetrieval",
1349
+ "BrightTheoremQAQuestionsRetrieval",
1350
+ "BrightBiologyLongRetrieval",
1351
+ "BrightEarthScienceLongRetrieval",
1352
+ "BrightEconomicsLongRetrieval",
1353
+ "BrightPsychologyLongRetrieval",
1354
+ "BrightRoboticsLongRetrieval",
1355
+ "BrightStackoverflowLongRetrieval",
1356
+ "BrightSustainableLivingLongRetrieval",
1357
+ "BrightPonyLongRetrieval",
1358
+ ],
1359
+ ),
1360
+ description="v1.1 refactors the BRIGHT into a different tasks and added prompt to individual tasks.",
1361
+ reference="https://brightbenchmark.github.io/",
1362
+ citation=r"""
1363
+ @article{su2024bright,
1364
+ author = {Su, Hongjin and Yen, Howard and Xia, Mengzhou and Shi, Weijia and Muennighoff, Niklas and Wang, Han-yu and Liu, Haisu and Shi, Quan and Siegel, Zachary S and Tang, Michael and others},
1365
+ journal = {arXiv preprint arXiv:2407.12883},
1366
+ title = {Bright: A realistic and challenging benchmark for reasoning-intensive retrieval},
1367
+ year = {2024},
1368
+ }
1369
+ """,
1370
+ )
1371
+
1372
+
1333
1373
  CODE_RAG = Benchmark(
1334
1374
  name="CodeRAG",
1335
1375
  tasks=get_tasks(
@@ -1781,8 +1821,7 @@ BEIR_NL = Benchmark(
1781
1821
  "TRECCOVID-NL",
1782
1822
  ],
1783
1823
  ),
1784
- description="BEIR-NL is a Dutch adaptation of the publicly available BEIR benchmark, created through automated "
1785
- "translation.",
1824
+ description="BEIR-NL is a Dutch adaptation of the publicly available BEIR benchmark, created through automated translation.",
1786
1825
  reference="https://arxiv.org/abs/2412.08329",
1787
1826
  contacts=["nikolay-banar"],
1788
1827
  citation=r"""
@@ -10,6 +10,8 @@ RTEB_CITATION = r"""@article{rteb2025,
10
10
  year = {2025},
11
11
  }"""
12
12
 
13
+ removal_note = "\n\nNote: We have temporarily removed the 'Private' column to read more about this decision out the [announcement](https://github.com/embeddings-benchmark/mteb/issues/3934)."
14
+
13
15
  RTEB_MAIN = RtebBenchmark(
14
16
  name="RTEB(beta)",
15
17
  display_name="RTEB Multilingual",
@@ -48,7 +50,8 @@ RTEB_MAIN = RtebBenchmark(
48
50
  "JapaneseLegal1Retrieval",
49
51
  ],
50
52
  ),
51
- description="RTEB (ReTrieval Embedding Benchmark) is a comprehensive benchmark for evaluating text retrieval models across multiple specialized domains including legal, finance, code, and healthcare. It contains diverse retrieval tasks designed to test models' ability to understand domain-specific terminology and retrieve relevant documents in specialized contexts across multiple languages. The dataset includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
53
+ description="RTEB (ReTrieval Embedding Benchmark) is a comprehensive benchmark for evaluating text retrieval models across multiple specialized domains including legal, finance, code, and healthcare. It contains diverse retrieval tasks designed to test models' ability to understand domain-specific terminology and retrieve relevant documents in specialized contexts across multiple languages. The dataset includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
54
+ + removal_note,
52
55
  citation=RTEB_CITATION,
53
56
  contacts=["fzowl"],
54
57
  )
@@ -83,7 +86,8 @@ RTEB_ENGLISH = RtebBenchmark(
83
86
  ],
84
87
  languages=["eng"],
85
88
  ),
86
- description="RTEB English is a subset of RTEB containing retrieval tasks in English across legal, finance, code, and healthcare domains. Includes diverse tasks covering specialized domains such as healthcare and finance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
89
+ description="RTEB English is a subset of RTEB containing retrieval tasks in English across legal, finance, code, and healthcare domains. Includes diverse tasks covering specialized domains such as healthcare and finance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
90
+ + removal_note,
87
91
  citation=RTEB_CITATION,
88
92
  contacts=["fzowl"],
89
93
  )
@@ -101,7 +105,8 @@ RTEB_FRENCH = RtebBenchmark(
101
105
  ],
102
106
  languages=["fra"],
103
107
  ),
104
- description="RTEB French is a subset of RTEB containing retrieval tasks in French across legal and general knowledge domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
108
+ description="RTEB French is a subset of RTEB containing retrieval tasks in French across legal and general knowledge domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
109
+ + removal_note,
105
110
  citation=RTEB_CITATION,
106
111
  contacts=["fzowl"],
107
112
  )
@@ -119,7 +124,8 @@ RTEB_GERMAN = RtebBenchmark(
119
124
  "GermanLegal1Retrieval",
120
125
  ],
121
126
  ),
122
- description="RTEB German is a subset of RTEB containing retrieval tasks in German across legal, healthcare, and business domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
127
+ description="RTEB German is a subset of RTEB containing retrieval tasks in German across legal, healthcare, and business domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
128
+ + removal_note,
123
129
  citation=RTEB_CITATION,
124
130
  contacts=["fzowl"],
125
131
  )
@@ -135,7 +141,8 @@ RTEB_JAPANESE = RtebBenchmark(
135
141
  "JapaneseLegal1Retrieval",
136
142
  ],
137
143
  ),
138
- description="RTEB Japanese is a subset of RTEB containing retrieval tasks in Japanese across legal and code domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
144
+ description="RTEB Japanese is a subset of RTEB containing retrieval tasks in Japanese across legal and code domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
145
+ + removal_note,
139
146
  citation=RTEB_CITATION,
140
147
  contacts=["fzowl"],
141
148
  )
@@ -156,7 +163,8 @@ RTEB_FINANCE = RtebBenchmark(
156
163
  "EnglishFinance4Retrieval",
157
164
  ],
158
165
  ),
159
- description="RTEB Finance is a subset of RTEB containing retrieval tasks specifically focused on financial domain including finance benchmarks, Q&A, financial document retrieval, and corporate governance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
166
+ description="RTEB Finance is a subset of RTEB containing retrieval tasks specifically focused on financial domain including finance benchmarks, Q&A, financial document retrieval, and corporate governance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
167
+ + removal_note,
160
168
  citation=RTEB_CITATION,
161
169
  contacts=["fzowl"],
162
170
  )
@@ -177,7 +185,8 @@ RTEB_LEGAL = RtebBenchmark(
177
185
  "JapaneseLegal1Retrieval",
178
186
  ],
179
187
  ),
180
- description="RTEB Legal is a subset of RTEB containing retrieval tasks specifically focused on legal domain including case documents, statutes, legal summarization, and multilingual legal Q&A. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
188
+ description="RTEB Legal is a subset of RTEB containing retrieval tasks specifically focused on legal domain including case documents, statutes, legal summarization, and multilingual legal Q&A. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
189
+ + removal_note,
181
190
  citation=RTEB_CITATION,
182
191
  contacts=["fzowl"],
183
192
  )
@@ -199,7 +208,8 @@ RTEB_CODE = RtebBenchmark(
199
208
  "JapaneseCode1Retrieval",
200
209
  ],
201
210
  ),
202
- description="RTEB Code is a subset of RTEB containing retrieval tasks specifically focused on programming and code domains including algorithmic problems, data science tasks, code evaluation, SQL retrieval, and multilingual code retrieval. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
211
+ description="RTEB Code is a subset of RTEB containing retrieval tasks specifically focused on programming and code domains including algorithmic problems, data science tasks, code evaluation, SQL retrieval, and multilingual code retrieval. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
212
+ + removal_note,
203
213
  citation=RTEB_CITATION,
204
214
  contacts=["fzowl"],
205
215
  )
@@ -217,7 +227,8 @@ RTEB_HEALTHCARE = RtebBenchmark(
217
227
  "GermanHealthcare1Retrieval",
218
228
  ],
219
229
  ),
220
- description="RTEB Healthcare is a subset of RTEB containing retrieval tasks specifically focused on healthcare and medical domains including medical Q&A, healthcare information retrieval, cross-lingual medical retrieval, and multilingual medical consultation. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues).",
230
+ description="RTEB Healthcare is a subset of RTEB containing retrieval tasks specifically focused on healthcare and medical domains including medical Q&A, healthcare information retrieval, cross-lingual medical retrieval, and multilingual medical consultation. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
231
+ + removal_note,
221
232
  citation=RTEB_CITATION,
222
233
  contacts=["fzowl"],
223
234
  )
mteb/cache.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import gzip
2
4
  import io
3
5
  import json
@@ -7,9 +9,8 @@ import shutil
7
9
  import subprocess
8
10
  import warnings
9
11
  from collections import defaultdict
10
- from collections.abc import Iterable, Sequence
11
12
  from pathlib import Path
12
- from typing import cast
13
+ from typing import TYPE_CHECKING, cast
13
14
 
14
15
  import requests
15
16
  from pydantic import ValidationError
@@ -19,7 +20,11 @@ from mteb.abstasks import AbsTask
19
20
  from mteb.benchmarks.benchmark import Benchmark
20
21
  from mteb.models import ModelMeta
21
22
  from mteb.results import BenchmarkResults, ModelResult, TaskResult
22
- from mteb.types import ModelName, Revision
23
+
24
+ if TYPE_CHECKING:
25
+ from collections.abc import Iterable, Sequence
26
+
27
+ from mteb.types import ModelName, Revision
23
28
 
24
29
  logger = logging.getLogger(__name__)
25
30
 
@@ -584,7 +589,7 @@ class ResultCache:
584
589
 
585
590
  first_model = next(iter(models))
586
591
  if isinstance(first_model, ModelMeta):
587
- models = cast(Iterable[ModelMeta], models)
592
+ models = cast("Iterable[ModelMeta]", models)
588
593
  name_and_revision = {
589
594
  (m.model_name_as_path(), m.revision or "no_revision_available")
590
595
  for m in models
@@ -595,7 +600,7 @@ class ResultCache:
595
600
  if (p.parent.parent.name, p.parent.name) in name_and_revision
596
601
  ]
597
602
 
598
- str_models = cast(Sequence[str], models)
603
+ str_models = cast("Sequence[str]", models)
599
604
  model_names = {m.replace("/", "__").replace(" ", "_") for m in str_models}
600
605
  return [p for p in paths if p.parent.parent.name in model_names]
601
606
 
@@ -1,9 +1,15 @@
1
- from collections.abc import Iterable, Sequence
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
2
4
 
3
- from mteb.abstasks import AbsTask
4
- from mteb.benchmarks import Benchmark
5
5
  from mteb.get_tasks import MTEBTasks
6
6
 
7
+ if TYPE_CHECKING:
8
+ from collections.abc import Iterable, Sequence
9
+
10
+ from mteb.abstasks import AbsTask
11
+ from mteb.benchmarks import Benchmark
12
+
7
13
 
8
14
  def _display_benchmarks(benchmarks: Sequence[Benchmark]) -> None:
9
15
  """Get all benchmarks available in the MTEB."""
mteb/cli/build_cli.py CHANGED
@@ -3,17 +3,20 @@ import logging
3
3
  import os
4
4
  import warnings
5
5
  from pathlib import Path
6
+ from typing import TYPE_CHECKING
6
7
 
7
8
  import torch
8
9
  from rich.logging import RichHandler
9
10
 
10
11
  import mteb
11
- from mteb.abstasks.abstask import AbsTask
12
12
  from mteb.cache import ResultCache
13
13
  from mteb.cli._display_tasks import _display_benchmarks, _display_tasks
14
14
  from mteb.cli.generate_model_card import generate_model_card
15
15
  from mteb.evaluate import OverwriteStrategy
16
- from mteb.types._encoder_io import EncodeKwargs
16
+
17
+ if TYPE_CHECKING:
18
+ from mteb.abstasks.abstask import AbsTask
19
+ from mteb.types import EncodeKwargs
17
20
 
18
21
  logger = logging.getLogger(__name__)
19
22
 
@@ -1,14 +1,21 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import warnings
3
- from collections.abc import Sequence
4
5
  from pathlib import Path
6
+ from typing import TYPE_CHECKING
5
7
 
6
8
  from huggingface_hub import ModelCard, ModelCardData, repo_exists
7
9
 
8
10
  from mteb.abstasks.abstask import AbsTask
9
- from mteb.benchmarks.benchmark import Benchmark
10
11
  from mteb.cache import ResultCache
11
12
 
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Sequence
15
+
16
+ from mteb.abstasks.abstask import AbsTask
17
+ from mteb.benchmarks.benchmark import Benchmark
18
+
12
19
  logger = logging.getLogger(__name__)
13
20
 
14
21
 
@@ -6,7 +6,6 @@ import os
6
6
  import sys
7
7
  import traceback
8
8
  import warnings
9
- from collections.abc import Iterable, Sequence
10
9
  from copy import deepcopy
11
10
  from datetime import datetime
12
11
  from itertools import chain
@@ -18,26 +17,31 @@ import datasets
18
17
 
19
18
  import mteb
20
19
  from mteb.abstasks import AbsTask
21
- from mteb.abstasks.aggregated_task import AbsTaskAggregate
22
- from mteb.abstasks.task_metadata import TaskCategory, TaskType
23
20
  from mteb.benchmarks import Benchmark
24
21
  from mteb.models import (
25
22
  CrossEncoderWrapper,
26
23
  ModelMeta,
27
- MTEBModels,
28
24
  SentenceTransformerEncoderWrapper,
29
25
  )
30
26
  from mteb.results import TaskResult
31
- from mteb.types import EncodeKwargs, ScoresDict
27
+
28
+ if TYPE_CHECKING:
29
+ from collections.abc import Iterable, Sequence
30
+
31
+ from sentence_transformers import CrossEncoder, SentenceTransformer
32
+
33
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
34
+ from mteb.abstasks.task_metadata import TaskCategory, TaskType
35
+ from mteb.models import (
36
+ MTEBModels,
37
+ )
38
+ from mteb.types import EncodeKwargs, ScoresDict
32
39
 
33
40
  if sys.version_info >= (3, 13):
34
41
  from warnings import deprecated
35
42
  else:
36
43
  from typing_extensions import deprecated
37
44
 
38
- if TYPE_CHECKING:
39
- from sentence_transformers import CrossEncoder, SentenceTransformer
40
-
41
45
  logger = logging.getLogger(__name__)
42
46
 
43
47
 
@@ -66,9 +70,9 @@ class MTEB:
66
70
  """
67
71
  if isinstance(next(iter(tasks)), Benchmark):
68
72
  self.benchmarks = tasks
69
- self.tasks = list(chain.from_iterable(cast(Iterable[Benchmark], tasks)))
73
+ self.tasks = list(chain.from_iterable(cast("Iterable[Benchmark]", tasks)))
70
74
  elif isinstance(next(iter(tasks)), AbsTask):
71
- self.tasks = list(cast(Iterable[AbsTask], tasks))
75
+ self.tasks = list(cast("Iterable[AbsTask]", tasks))
72
76
 
73
77
  self.err_logs_path = Path(err_logs_path)
74
78
  self._last_evaluated_splits: dict[str, list[str]] = {}
@@ -313,7 +317,7 @@ class MTEB:
313
317
  elif isinstance(model, CrossEncoder):
314
318
  mteb_model = CrossEncoderWrapper(model)
315
319
  else:
316
- mteb_model = cast(MTEBModels, model)
320
+ mteb_model = cast("MTEBModels", model)
317
321
 
318
322
  meta = self.create_model_meta(mteb_model)
319
323
  output_path = self._create_output_folder(meta, output_folder)
@@ -346,7 +350,7 @@ class MTEB:
346
350
  )
347
351
 
348
352
  if task.is_aggregate:
349
- aggregated_task = cast(AbsTaskAggregate, task)
353
+ aggregated_task = cast("AbsTaskAggregate", task)
350
354
  self_ = MTEB(tasks=aggregated_task.metadata.tasks)
351
355
  aggregated_task_results = self_.run(
352
356
  mteb_model,
@@ -0,0 +1,35 @@
1
+ {
2
+ "standard": {
3
+ "num_samples": 188113,
4
+ "number_of_characters": 141769714,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 141734227,
7
+ "min_text_length": 58,
8
+ "average_text_length": 753.8974425803981,
9
+ "max_text_length": 7334,
10
+ "unique_texts": 176508
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 35487,
15
+ "min_text_length": 85,
16
+ "average_text_length": 319.7027027027027,
17
+ "max_text_length": 1167,
18
+ "unique_texts": 111
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 524,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 4.7207207207207205,
25
+ "max_relevant_docs_per_query": 8,
26
+ "unique_relevant_docs": 111
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 20264921,
30
+ "min_top_ranked_per_query": 176954,
31
+ "average_top_ranked_per_query": 182566.85585585586,
32
+ "max_top_ranked_per_query": 186176
33
+ }
34
+ }
35
+ }