mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING
3
5
 
4
6
  from mteb._create_dataloaders import _create_text_queries_dataloader
5
7
  from mteb._requires_package import requires_package
6
- from mteb.abstasks.task_metadata import TaskMetadata
7
8
  from mteb.models.model_meta import ModelMeta
8
- from mteb.models.models_protocols import SearchProtocol
9
- from mteb.types import (
10
- CorpusDatasetType,
11
- InstructionDatasetType,
12
- QueryDatasetType,
13
- RetrievalOutputType,
14
- TopRankedDocumentsType,
15
- )
9
+
10
+ if TYPE_CHECKING:
11
+ from mteb.abstasks.task_metadata import TaskMetadata
12
+ from mteb.models.models_protocols import SearchProtocol
13
+ from mteb.types import (
14
+ CorpusDatasetType,
15
+ EncodeKwargs,
16
+ QueryDatasetType,
17
+ RetrievalOutputType,
18
+ TopRankedDocumentsType,
19
+ )
16
20
 
17
21
  logger = logging.getLogger(__name__)
18
22
 
@@ -49,7 +53,8 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
49
53
  task_metadata: TaskMetadata,
50
54
  hf_split: str,
51
55
  hf_subset: str,
52
- encode_kwargs: dict[str, Any],
56
+ encode_kwargs: EncodeKwargs,
57
+ num_proc: int = 1,
53
58
  ) -> None:
54
59
  logger.info("Encoding Corpus...")
55
60
  corpus_texts = [
@@ -74,9 +79,9 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
74
79
  hf_split: str,
75
80
  hf_subset: str,
76
81
  top_k: int,
77
- encode_kwargs: dict[str, Any],
78
- instructions: InstructionDatasetType | None = None,
82
+ encode_kwargs: EncodeKwargs,
79
83
  top_ranked: TopRankedDocumentsType | None = None,
84
+ num_proc: int = 1,
80
85
  ) -> RetrievalOutputType:
81
86
  logger.info("Encoding Queries...")
82
87
  query_ids = list(queries["id"])
@@ -98,13 +103,17 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
98
103
  query_results = queries_results[qi]
99
104
  scores = queries_scores[qi]
100
105
  doc_id_to_score = {}
106
+ query_documents = (
107
+ top_ranked[qid] if top_ranked and qid in top_ranked else None
108
+ )
101
109
 
102
110
  # Iterate over results
103
- for ri in range(len(query_results)):
104
- doc_idx = query_results[ri]
105
- score = scores[ri]
111
+ for doc_idx, score in zip(query_results, scores):
106
112
  doc_id = self.corpus_idx_to_id[doc_idx]
107
113
 
114
+ # handle reranking with a filtered set of documents
115
+ if query_documents is not None and doc_id not in query_documents:
116
+ continue
108
117
  doc_id_to_score[doc_id] = float(score)
109
118
 
110
119
  results[qid] = doc_id_to_score
@@ -113,7 +122,7 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
113
122
 
114
123
  def encode(self, texts: list[str]):
115
124
  """Encode input text as term vectors"""
116
- return bm25s.tokenize(texts, stopwords=self.stopwords, stemmer=self.stemmer) # type: ignore
125
+ return bm25s.tokenize(texts, stopwords=self.stopwords, stemmer=self.stemmer)
117
126
 
118
127
  return BM25Search(**kwargs)
119
128
 
@@ -127,6 +136,7 @@ bm25_s = ModelMeta(
127
136
  revision="0_1_10",
128
137
  release_date="2024-07-10", # release of version 0.1.10
129
138
  n_parameters=None,
139
+ n_embedding_parameters=None,
130
140
  memory_usage_mb=None,
131
141
  embed_dim=None,
132
142
  license=None,
@@ -1,5 +1,6 @@
1
- from collections.abc import Callable
2
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
3
4
 
4
5
  import torch
5
6
  from sentence_transformers import SentenceTransformer
@@ -9,6 +10,9 @@ from mteb.models import ModelMeta
9
10
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
10
11
  from mteb.types import PromptType
11
12
 
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Callable
15
+
12
16
 
13
17
  def instruction_template(
14
18
  instruction: str, prompt_type: PromptType | None = None
@@ -25,6 +29,7 @@ class BMRetrieverWrapper(InstructSentenceTransformerModel):
25
29
  self,
26
30
  model_name: str,
27
31
  revision: str,
32
+ device: str | None = None,
28
33
  instruction_template: str
29
34
  | Callable[[str, PromptType | None], str]
30
35
  | None = None,
@@ -52,6 +57,7 @@ class BMRetrieverWrapper(InstructSentenceTransformerModel):
52
57
 
53
58
  transformer = Transformer(
54
59
  model_name,
60
+ device=device,
55
61
  **kwargs,
56
62
  )
57
63
  pooling = Pooling(
@@ -97,12 +103,13 @@ BMRetriever_410M = ModelMeta(
97
103
  release_date="2024-04-29",
98
104
  embed_dim=1024,
99
105
  n_parameters=353_822_720,
106
+ n_embedding_parameters=51_511_296,
100
107
  memory_usage_mb=1349,
101
108
  max_tokens=2048,
102
109
  license="mit",
103
110
  reference="https://huggingface.co/BMRetriever/BMRetriever-410M",
104
111
  similarity_fn_name="cosine",
105
- framework=["Sentence Transformers", "PyTorch"],
112
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
106
113
  use_instructions=True,
107
114
  public_training_code=None,
108
115
  public_training_data=None,
@@ -127,12 +134,13 @@ BMRetriever_1B = ModelMeta(
127
134
  release_date="2024-04-29",
128
135
  embed_dim=2048,
129
136
  n_parameters=908_759_040,
137
+ n_embedding_parameters=103_022_592,
130
138
  memory_usage_mb=3466,
131
139
  max_tokens=2048,
132
140
  license="mit",
133
141
  reference="https://huggingface.co/BMRetriever/BMRetriever-1B",
134
142
  similarity_fn_name="cosine",
135
- framework=["Sentence Transformers", "PyTorch"],
143
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
136
144
  use_instructions=True,
137
145
  public_training_code=None,
138
146
  public_training_data=None,
@@ -157,12 +165,13 @@ BMRetriever_2B = ModelMeta(
157
165
  release_date="2024-04-29",
158
166
  embed_dim=2048,
159
167
  n_parameters=2_506_172_416,
168
+ n_embedding_parameters=524_288_000,
160
169
  memory_usage_mb=9560,
161
170
  max_tokens=8192,
162
171
  license="mit",
163
172
  reference="https://huggingface.co/BMRetriever/BMRetriever-2B",
164
173
  similarity_fn_name="cosine",
165
- framework=["Sentence Transformers", "PyTorch"],
174
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
166
175
  use_instructions=True,
167
176
  public_training_code=None,
168
177
  public_training_data=None,
@@ -187,12 +196,13 @@ BMRetriever_7B = ModelMeta(
187
196
  release_date="2024-04-29",
188
197
  embed_dim=4096,
189
198
  n_parameters=7_110_660_096,
199
+ n_embedding_parameters=131_072_000,
190
200
  memory_usage_mb=27124,
191
201
  max_tokens=32768,
192
202
  license="mit",
193
203
  reference="https://huggingface.co/BMRetriever/BMRetriever-7B",
194
204
  similarity_fn_name="cosine",
195
- framework=["Sentence Transformers", "PyTorch"],
205
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
196
206
  use_instructions=True,
197
207
  public_training_code=None,
198
208
  public_training_data=None,
@@ -41,13 +41,14 @@ cadet_embed = ModelMeta(
41
41
  open_weights=True,
42
42
  release_date="2025-05-11",
43
43
  n_parameters=109_000_000,
44
+ n_embedding_parameters=23_440_896,
44
45
  memory_usage_mb=418,
45
46
  embed_dim=768,
46
47
  license="apache-2.0",
47
48
  max_tokens=512,
48
49
  reference="https://huggingface.co/manveertamber/cadet-embed-base-v1",
49
50
  similarity_fn_name="cosine",
50
- framework=["Sentence Transformers", "PyTorch"],
51
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
51
52
  use_instructions=True,
52
53
  public_training_code="https://github.com/manveertamber/cadet-dense-retrieval",
53
54
  # we provide the code to generate the training data
@@ -1,27 +1,31 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Sequence
3
4
  from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import numpy as np
6
7
  import torch
7
- from torch.utils.data import DataLoader
8
8
 
9
9
  import mteb
10
10
  from mteb._create_dataloaders import _corpus_to_dict
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
13
- from mteb.models.models_protocols import PromptType
14
12
  from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
15
- from mteb.types import Array, BatchedInput
13
+ from mteb.types import PromptType
16
14
 
17
15
  from .bge_models import bge_full_data
18
16
 
19
17
  if TYPE_CHECKING:
18
+ from collections.abc import Sequence
19
+
20
+ from torch.utils.data import DataLoader
21
+
20
22
  from mteb.abstasks import (
21
23
  AbsTaskClassification,
22
24
  AbsTaskRetrieval,
23
25
  AbsTaskSummarization,
24
26
  )
27
+ from mteb.abstasks.task_metadata import TaskMetadata
28
+ from mteb.types import Array, BatchedInput
25
29
  logger = logging.getLogger(__name__)
26
30
 
27
31
  CDE_CITATION = """@misc{morris2024contextualdocumentembeddings,
@@ -49,10 +53,17 @@ class CDEWrapper(SentenceTransformerEncoderWrapper):
49
53
  "InstructionReranking",
50
54
  )
51
55
 
52
- def __init__(self, model: str, *args, **kwargs: Any) -> None:
56
+ def __init__(
57
+ self,
58
+ model: str,
59
+ revision: str | None = None,
60
+ device: str | None = None,
61
+ *args,
62
+ **kwargs: Any,
63
+ ) -> None:
53
64
  from transformers import AutoConfig
54
65
 
55
- super().__init__(model, *args, **kwargs)
66
+ super().__init__(model, revision=revision, device=device, *args, **kwargs)
56
67
  model_config = AutoConfig.from_pretrained(model, trust_remote_code=True)
57
68
  self.max_sentences = model_config.transductive_corpus_size
58
69
 
@@ -215,12 +226,13 @@ cde_small_v1 = ModelMeta(
215
226
  revision="e151df18af0d7f1d1c37b074fee58406ececf19f",
216
227
  release_date="2024-09-24",
217
228
  n_parameters=int(281 * 1e6),
229
+ n_embedding_parameters=None,
218
230
  memory_usage_mb=1072, # Though the second-stage model is only 140M
219
231
  max_tokens=512,
220
232
  embed_dim=768,
221
233
  license="mit",
222
234
  similarity_fn_name=ScoringFunction.COSINE,
223
- framework=["Sentence Transformers"],
235
+ framework=["Sentence Transformers", "safetensors", "Transformers"],
224
236
  reference="https://huggingface.co/jxm/cde-small-v1",
225
237
  use_instructions=True,
226
238
  adapted_from="nomic-ai/nomic-bert-2048",
@@ -244,12 +256,13 @@ cde_small_v2 = ModelMeta(
244
256
  revision="4e1d021a6c3fd7ce8aa0a7204057eee5ae61d390",
245
257
  release_date="2025-01-13",
246
258
  n_parameters=int(306 * 1e6),
259
+ n_embedding_parameters=None,
247
260
  memory_usage_mb=1166, # Though the second-stage model is only 140M
248
261
  max_tokens=512,
249
262
  embed_dim=768,
250
263
  license="mit",
251
264
  similarity_fn_name=ScoringFunction.COSINE,
252
- framework=["Sentence Transformers"],
265
+ framework=["Sentence Transformers", "safetensors", "Transformers"],
253
266
  reference="https://huggingface.co/jxm/cde-small-v1",
254
267
  use_instructions=True,
255
268
  adapted_from="answerdotai/ModernBERT-base",
@@ -1,13 +1,18 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.abs_encoder import AbsEncoder
9
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
 
13
18
  class CLIPModel(AbsEncoder):
@@ -115,7 +120,7 @@ CLIP_CITATION = """
115
120
 
116
121
 
117
122
  clip_vit_large_patch14 = ModelMeta(
118
- loader=CLIPModel, # type: ignore
123
+ loader=CLIPModel,
119
124
  name="openai/clip-vit-large-patch14",
120
125
  model_type=["dense"],
121
126
  languages=["eng-Latn"],
@@ -123,6 +128,7 @@ clip_vit_large_patch14 = ModelMeta(
123
128
  release_date="2021-02-26",
124
129
  modalities=["image", "text"],
125
130
  n_parameters=428_000_000,
131
+ n_embedding_parameters=None,
126
132
  memory_usage_mb=1631,
127
133
  max_tokens=77,
128
134
  embed_dim=768,
@@ -130,7 +136,7 @@ clip_vit_large_patch14 = ModelMeta(
130
136
  open_weights=True,
131
137
  public_training_code=None,
132
138
  public_training_data=None,
133
- framework=["PyTorch"],
139
+ framework=["PyTorch", "Transformers", "safetensors"],
134
140
  reference="https://huggingface.co/openai/clip-vit-large-patch14",
135
141
  similarity_fn_name=ScoringFunction.COSINE,
136
142
  use_instructions=False,
@@ -139,7 +145,7 @@ clip_vit_large_patch14 = ModelMeta(
139
145
  )
140
146
 
141
147
  clip_vit_base_patch32 = ModelMeta(
142
- loader=CLIPModel, # type: ignore
148
+ loader=CLIPModel,
143
149
  name="openai/clip-vit-base-patch32",
144
150
  model_type=["dense"],
145
151
  languages=["eng-Latn"],
@@ -147,6 +153,7 @@ clip_vit_base_patch32 = ModelMeta(
147
153
  release_date="2021-02-26",
148
154
  modalities=["image", "text"],
149
155
  n_parameters=151_000_000,
156
+ n_embedding_parameters=None,
150
157
  memory_usage_mb=576,
151
158
  max_tokens=77,
152
159
  embed_dim=512,
@@ -154,7 +161,7 @@ clip_vit_base_patch32 = ModelMeta(
154
161
  open_weights=True,
155
162
  public_training_code=None,
156
163
  public_training_data=None,
157
- framework=["PyTorch"],
164
+ framework=["PyTorch", "Transformers"],
158
165
  reference="https://huggingface.co/openai/clip-vit-base-patch32",
159
166
  similarity_fn_name=ScoringFunction.COSINE,
160
167
  use_instructions=False,
@@ -163,7 +170,7 @@ clip_vit_base_patch32 = ModelMeta(
163
170
  )
164
171
 
165
172
  clip_vit_base_patch16 = ModelMeta(
166
- loader=CLIPModel, # type: ignore
173
+ loader=CLIPModel,
167
174
  name="openai/clip-vit-base-patch16",
168
175
  model_type=["dense"],
169
176
  languages=["eng-Latn"],
@@ -171,6 +178,7 @@ clip_vit_base_patch16 = ModelMeta(
171
178
  release_date="2021-02-26",
172
179
  modalities=["image", "text"],
173
180
  n_parameters=151_000_000,
181
+ n_embedding_parameters=None,
174
182
  memory_usage_mb=576,
175
183
  max_tokens=77,
176
184
  embed_dim=512,
@@ -178,7 +186,7 @@ clip_vit_base_patch16 = ModelMeta(
178
186
  open_weights=True,
179
187
  public_training_code=None,
180
188
  public_training_data=None,
181
- framework=["PyTorch"],
189
+ framework=["PyTorch", "Transformers"],
182
190
  reference="https://huggingface.co/openai/clip-vit-base-patch16",
183
191
  similarity_fn_name=ScoringFunction.COSINE,
184
192
  use_instructions=False,
@@ -30,13 +30,14 @@ e5_nl_small = ModelMeta(
30
30
  revision="0243664a6c5e12eef854b091eb283e51833c3e9f",
31
31
  release_date="2025-09-23",
32
32
  n_parameters=40_800_000,
33
+ n_embedding_parameters=19_200_768,
33
34
  memory_usage_mb=78,
34
35
  embed_dim=384,
35
36
  license="mit",
36
37
  max_tokens=512,
37
38
  reference="https://huggingface.co/clips/e5-small-trm-nl",
38
39
  similarity_fn_name=ScoringFunction.COSINE,
39
- framework=["Sentence Transformers", "PyTorch"],
40
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
40
41
  use_instructions=True,
41
42
  public_training_code="https://github.com/ELotfi/e5-nl",
42
43
  public_training_data="https://huggingface.co/collections/clips/beir-nl",
@@ -57,13 +58,14 @@ e5_nl_base = ModelMeta(
57
58
  revision="6bd5722f236da48b4b8bcb28cc1fc478f7089956",
58
59
  release_date="2025-09-23",
59
60
  n_parameters=124_400_000,
61
+ n_embedding_parameters=38_401_536,
60
62
  memory_usage_mb=237,
61
63
  embed_dim=768,
62
64
  license="mit",
63
65
  max_tokens=514,
64
66
  reference="https://huggingface.co/clips/e5-base-trm-nl",
65
67
  similarity_fn_name=ScoringFunction.COSINE,
66
- framework=["Sentence Transformers", "PyTorch"],
68
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
67
69
  use_instructions=True,
68
70
  public_training_code="https://github.com/ELotfi/e5-nl",
69
71
  public_training_data="https://huggingface.co/collections/clips/beir-nl",
@@ -84,13 +86,14 @@ e5_nl_large = ModelMeta(
84
86
  revision="683333f86ed9eb3699b5567f0fdabeb958d412b0",
85
87
  release_date="2025-09-23",
86
88
  n_parameters=355_000_000,
89
+ n_embedding_parameters=51_202_048,
87
90
  memory_usage_mb=1355,
88
91
  embed_dim=1024,
89
92
  license="mit",
90
93
  max_tokens=514,
91
94
  reference="https://huggingface.co/clips/e5-large-trm-nl",
92
95
  similarity_fn_name=ScoringFunction.COSINE,
93
- framework=["Sentence Transformers", "PyTorch"],
96
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
94
97
  use_instructions=True,
95
98
  public_training_code="https://github.com/ELotfi/e5-nl",
96
99
  public_training_data="https://huggingface.co/collections/clips/beir-nl",
@@ -236,13 +236,14 @@ F2LLM_0B6 = ModelMeta(
236
236
  revision="36416618b83d4bd84a8ca30c2ee01ed518f9f2e7",
237
237
  release_date="2025-09-18",
238
238
  n_parameters=595_776_512,
239
+ n_embedding_parameters=None,
239
240
  memory_usage_mb=1137,
240
241
  embed_dim=1024,
241
242
  license="apache-2.0",
242
243
  max_tokens=8192,
243
244
  reference="https://huggingface.co/codefuse-ai/F2LLM-0.6B",
244
245
  similarity_fn_name="cosine",
245
- framework=["Sentence Transformers", "PyTorch"],
246
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
246
247
  use_instructions=True,
247
248
  public_training_code="https://github.com/codefuse-ai/F2LLM",
248
249
  public_training_data="https://huggingface.co/datasets/codefuse-ai/F2LLM",
@@ -266,13 +267,14 @@ F2LLM_1B7 = ModelMeta(
266
267
  revision="fdce0e09655f42cea26f7f66f5a70cd4507ea45c",
267
268
  release_date="2025-09-18",
268
269
  n_parameters=1_720_574_976,
270
+ n_embedding_parameters=None,
269
271
  memory_usage_mb=3282,
270
272
  embed_dim=2560,
271
273
  license="apache-2.0",
272
274
  max_tokens=8192,
273
275
  reference="https://huggingface.co/codefuse-ai/F2LLM-1.7B",
274
276
  similarity_fn_name="cosine",
275
- framework=["Sentence Transformers", "PyTorch"],
277
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
276
278
  use_instructions=True,
277
279
  public_training_code="https://github.com/codefuse-ai/F2LLM",
278
280
  public_training_data="https://huggingface.co/datasets/codefuse-ai/F2LLM",
@@ -296,13 +298,14 @@ F2LLM_4B = ModelMeta(
296
298
  revision="9fe95901ed2b6b59dd7673d6e93c9d76766a1e25",
297
299
  release_date="2025-09-18",
298
300
  n_parameters=4_021_774_336,
301
+ n_embedding_parameters=None,
299
302
  memory_usage_mb=7672,
300
303
  embed_dim=2560,
301
304
  license="apache-2.0",
302
305
  max_tokens=8192,
303
306
  reference="https://huggingface.co/codefuse-ai/F2LLM-4B",
304
307
  similarity_fn_name="cosine",
305
- framework=["Sentence Transformers", "PyTorch"],
308
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
306
309
  use_instructions=True,
307
310
  public_training_code="https://github.com/codefuse-ai/F2LLM",
308
311
  public_training_data="https://huggingface.co/datasets/codefuse-ai/F2LLM",
@@ -318,6 +321,7 @@ C2LLM_0B5 = ModelMeta(
318
321
  release_date="2025-12-22",
319
322
  languages=c2llm_languages,
320
323
  n_parameters=497252096,
324
+ n_embedding_parameters=None,
321
325
  memory_usage_mb=948.0,
322
326
  max_tokens=32768,
323
327
  embed_dim=896,
@@ -325,7 +329,7 @@ C2LLM_0B5 = ModelMeta(
325
329
  open_weights=True,
326
330
  public_training_code=None,
327
331
  public_training_data=None,
328
- framework=["PyTorch", "Sentence Transformers"],
332
+ framework=["PyTorch", "Sentence Transformers", "Transformers", "safetensors"],
329
333
  reference="https://huggingface.co/codefuse-ai/C2LLM-0.5B",
330
334
  similarity_fn_name=ScoringFunction.COSINE,
331
335
  use_instructions=True,
@@ -346,6 +350,7 @@ C2LLM_7B = ModelMeta(
346
350
  release_date="2025-12-22",
347
351
  languages=c2llm_languages,
348
352
  n_parameters=7667028992,
353
+ n_embedding_parameters=None,
349
354
  memory_usage_mb=14624.0,
350
355
  max_tokens=32768,
351
356
  embed_dim=3584,
@@ -353,7 +358,7 @@ C2LLM_7B = ModelMeta(
353
358
  open_weights=True,
354
359
  public_training_code=None,
355
360
  public_training_data=None,
356
- framework=["PyTorch", "Sentence Transformers"],
361
+ framework=["PyTorch", "Sentence Transformers", "Transformers", "safetensors"],
357
362
  reference="https://huggingface.co/codefuse-ai/C2LLM-7B",
358
363
  similarity_fn_name=ScoringFunction.COSINE,
359
364
  use_instructions=True,
@@ -28,6 +28,7 @@ codesage_large = ModelMeta(
28
28
  release_date="2024-02-03",
29
29
  modalities=["text"],
30
30
  n_parameters=1_300_000_000,
31
+ n_embedding_parameters=100_667_392,
31
32
  memory_usage_mb=4959,
32
33
  max_tokens=2048,
33
34
  embed_dim=2048,
@@ -35,7 +36,7 @@ codesage_large = ModelMeta(
35
36
  open_weights=True,
36
37
  public_training_code=None,
37
38
  public_training_data=None,
38
- framework=["PyTorch"],
39
+ framework=["PyTorch", "Transformers"],
39
40
  reference="https://huggingface.co/codesage/codesage-large-v2",
40
41
  similarity_fn_name=ScoringFunction.COSINE,
41
42
  use_instructions=False,
@@ -55,6 +56,7 @@ codesage_base = ModelMeta(
55
56
  release_date="2024-02-03",
56
57
  modalities=["text"],
57
58
  n_parameters=356_000_000,
59
+ n_embedding_parameters=50_333_696,
58
60
  memory_usage_mb=1358,
59
61
  max_tokens=2048,
60
62
  embed_dim=1024,
@@ -62,7 +64,7 @@ codesage_base = ModelMeta(
62
64
  open_weights=True,
63
65
  public_training_code=None,
64
66
  public_training_data=None,
65
- framework=["PyTorch"],
67
+ framework=["PyTorch", "Transformers"],
66
68
  reference="https://huggingface.co/codesage/codesage-base-v2",
67
69
  similarity_fn_name=ScoringFunction.COSINE,
68
70
  use_instructions=False,
@@ -82,6 +84,7 @@ codesage_small = ModelMeta(
82
84
  release_date="2024-02-03",
83
85
  modalities=["text"],
84
86
  n_parameters=130_000_000,
87
+ n_embedding_parameters=50_333_696,
85
88
  memory_usage_mb=496,
86
89
  max_tokens=2048,
87
90
  embed_dim=1024,
@@ -89,7 +92,7 @@ codesage_small = ModelMeta(
89
92
  open_weights=True,
90
93
  public_training_code=None,
91
94
  public_training_data=None,
92
- framework=["PyTorch"],
95
+ framework=["PyTorch", "Transformers"],
93
96
  reference="https://huggingface.co/codesage/codesage-small-v2",
94
97
  similarity_fn_name=ScoringFunction.COSINE,
95
98
  use_instructions=False,
@@ -1,18 +1,24 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import time
3
5
  from functools import wraps
4
- from typing import Any, Literal, get_args
6
+ from typing import TYPE_CHECKING, Any, Literal, get_args
5
7
 
6
8
  import numpy as np
7
9
  import torch
8
- from torch.utils.data import DataLoader
9
10
  from tqdm.auto import tqdm
10
11
 
11
12
  from mteb._requires_package import requires_package
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.models.abs_encoder import AbsEncoder
14
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
15
- from mteb.types import Array, BatchedInput, PromptType
15
+ from mteb.types import PromptType
16
+
17
+ if TYPE_CHECKING:
18
+ from torch.utils.data import DataLoader
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.types import Array, BatchedInput
16
22
 
17
23
  logger = logging.getLogger(__name__)
18
24
 
@@ -222,7 +228,7 @@ class CohereTextEmbeddingModel(AbsEncoder):
222
228
  ) -> None:
223
229
  requires_package(self, "cohere", model_name, "pip install 'mteb[cohere]'")
224
230
 
225
- import cohere # type: ignore
231
+ import cohere
226
232
 
227
233
  self.model_name = model_name.removeprefix("Cohere/Cohere-")
228
234
  self.sep = sep
@@ -386,13 +392,14 @@ cohere_mult_3 = ModelMeta(
386
392
  revision="1",
387
393
  release_date="2023-11-02",
388
394
  n_parameters=None,
395
+ n_embedding_parameters=None,
389
396
  memory_usage_mb=None,
390
397
  max_tokens=None,
391
398
  embed_dim=512,
392
399
  reference="https://cohere.com/blog/introducing-embed-v3",
393
400
  license=None,
394
401
  similarity_fn_name=ScoringFunction.COSINE,
395
- framework=["API"],
402
+ framework=["API", "Transformers"],
396
403
  use_instructions=True,
397
404
  public_training_code=None,
398
405
  public_training_data=None, # assumed
@@ -412,12 +419,13 @@ cohere_eng_3 = ModelMeta(
412
419
  revision="1",
413
420
  release_date="2023-11-02",
414
421
  n_parameters=None,
422
+ n_embedding_parameters=None,
415
423
  memory_usage_mb=None,
416
424
  max_tokens=512,
417
425
  embed_dim=1024,
418
426
  license=None,
419
427
  similarity_fn_name=ScoringFunction.COSINE,
420
- framework=["API"],
428
+ framework=["API", "Transformers"],
421
429
  use_instructions=True,
422
430
  public_training_code=None,
423
431
  public_training_data=None, # assumed
@@ -437,12 +445,13 @@ cohere_mult_light_3 = ModelMeta(
437
445
  reference="https://cohere.com/blog/introducing-embed-v3",
438
446
  release_date="2023-11-02",
439
447
  n_parameters=None,
448
+ n_embedding_parameters=None,
440
449
  memory_usage_mb=None,
441
450
  max_tokens=512,
442
451
  embed_dim=384,
443
452
  license=None,
444
453
  similarity_fn_name=ScoringFunction.COSINE,
445
- framework=["API"],
454
+ framework=["API", "Transformers"],
446
455
  use_instructions=True,
447
456
  public_training_code=None,
448
457
  public_training_data=None, # assumed
@@ -462,12 +471,13 @@ cohere_eng_light_3 = ModelMeta(
462
471
  revision="1",
463
472
  release_date="2023-11-02",
464
473
  n_parameters=None,
474
+ n_embedding_parameters=None,
465
475
  memory_usage_mb=None,
466
476
  max_tokens=512,
467
477
  embed_dim=384,
468
478
  license=None,
469
479
  similarity_fn_name=ScoringFunction.COSINE,
470
- framework=["API"],
480
+ framework=["API", "Transformers"],
471
481
  use_instructions=True,
472
482
  public_training_code=None,
473
483
  public_training_data=None, # assumed