mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (529) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +78 -30
  3. mteb/_evaluators/any_sts_evaluator.py +13 -6
  4. mteb/_evaluators/clustering_evaluator.py +13 -5
  5. mteb/_evaluators/evaluator.py +12 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
  7. mteb/_evaluators/pair_classification_evaluator.py +17 -7
  8. mteb/_evaluators/retrieval_evaluator.py +23 -14
  9. mteb/_evaluators/retrieval_metrics.py +26 -19
  10. mteb/_evaluators/sklearn_evaluator.py +27 -17
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
  12. mteb/_evaluators/text/summarization_evaluator.py +31 -20
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +9 -3
  16. mteb/abstasks/_data_filter/task_pipelines.py +10 -2
  17. mteb/abstasks/_statistics_calculation.py +21 -11
  18. mteb/abstasks/_stratification.py +18 -18
  19. mteb/abstasks/abstask.py +78 -44
  20. mteb/abstasks/aggregate_task_metadata.py +21 -18
  21. mteb/abstasks/aggregated_task.py +23 -35
  22. mteb/abstasks/classification.py +39 -18
  23. mteb/abstasks/clustering.py +37 -20
  24. mteb/abstasks/clustering_legacy.py +30 -16
  25. mteb/abstasks/image/image_text_pair_classification.py +26 -9
  26. mteb/abstasks/multilabel_classification.py +33 -21
  27. mteb/abstasks/pair_classification.py +44 -19
  28. mteb/abstasks/regression.py +18 -10
  29. mteb/abstasks/retrieval.py +82 -52
  30. mteb/abstasks/retrieval_dataset_loaders.py +50 -39
  31. mteb/abstasks/sts.py +34 -15
  32. mteb/abstasks/task_metadata.py +44 -37
  33. mteb/abstasks/text/bitext_mining.py +57 -35
  34. mteb/abstasks/text/reranking.py +10 -8
  35. mteb/abstasks/text/summarization.py +26 -10
  36. mteb/abstasks/zeroshot_classification.py +27 -9
  37. mteb/benchmarks/_create_table.py +13 -7
  38. mteb/benchmarks/benchmark.py +15 -3
  39. mteb/benchmarks/benchmarks/__init__.py +6 -0
  40. mteb/benchmarks/benchmarks/benchmarks.py +153 -13
  41. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  42. mteb/benchmarks/get_benchmark.py +14 -55
  43. mteb/cache.py +189 -31
  44. mteb/cli/_display_tasks.py +10 -4
  45. mteb/cli/build_cli.py +112 -13
  46. mteb/cli/generate_model_card.py +50 -23
  47. mteb/deprecated_evaluator.py +72 -54
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  52. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  64. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  65. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  66. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  67. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  68. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  69. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  70. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  71. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  72. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  73. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  74. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  75. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  81. mteb/evaluate.py +71 -47
  82. mteb/filter_tasks.py +36 -32
  83. mteb/get_tasks.py +37 -33
  84. mteb/languages/language_scripts.py +11 -4
  85. mteb/leaderboard/app.py +172 -37
  86. mteb/leaderboard/table.py +7 -2
  87. mteb/load_results.py +20 -14
  88. mteb/models/abs_encoder.py +30 -16
  89. mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
  90. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
  91. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
  92. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  93. mteb/models/cache_wrappers/cache_wrapper.py +16 -11
  94. mteb/models/get_model_meta.py +53 -9
  95. mteb/models/instruct_wrapper.py +41 -13
  96. mteb/models/model_implementations/align_models.py +11 -5
  97. mteb/models/model_implementations/amazon_models.py +1 -0
  98. mteb/models/model_implementations/andersborges.py +6 -4
  99. mteb/models/model_implementations/ara_models.py +2 -1
  100. mteb/models/model_implementations/arctic_models.py +16 -8
  101. mteb/models/model_implementations/b1ade_models.py +2 -1
  102. mteb/models/model_implementations/bedrock_models.py +20 -6
  103. mteb/models/model_implementations/bge_models.py +85 -22
  104. mteb/models/model_implementations/bica_model.py +4 -3
  105. mteb/models/model_implementations/blip2_models.py +13 -6
  106. mteb/models/model_implementations/blip_models.py +33 -20
  107. mteb/models/model_implementations/bm25.py +27 -17
  108. mteb/models/model_implementations/bmretriever_models.py +16 -6
  109. mteb/models/model_implementations/cadet_models.py +2 -1
  110. mteb/models/model_implementations/cde_models.py +22 -9
  111. mteb/models/model_implementations/clip_models.py +18 -10
  112. mteb/models/model_implementations/clips_models.py +6 -3
  113. mteb/models/model_implementations/codefuse_models.py +10 -5
  114. mteb/models/model_implementations/codesage_models.py +6 -3
  115. mteb/models/model_implementations/cohere_models.py +19 -9
  116. mteb/models/model_implementations/cohere_v.py +16 -6
  117. mteb/models/model_implementations/colpali_models.py +10 -6
  118. mteb/models/model_implementations/colqwen_models.py +24 -38
  119. mteb/models/model_implementations/colsmol_models.py +5 -3
  120. mteb/models/model_implementations/conan_models.py +12 -5
  121. mteb/models/model_implementations/dino_models.py +70 -46
  122. mteb/models/model_implementations/e5_instruct.py +27 -4
  123. mteb/models/model_implementations/e5_models.py +18 -9
  124. mteb/models/model_implementations/e5_v.py +16 -10
  125. mteb/models/model_implementations/eagerworks_models.py +12 -5
  126. mteb/models/model_implementations/emillykkejensen_models.py +9 -6
  127. mteb/models/model_implementations/en_code_retriever.py +2 -1
  128. mteb/models/model_implementations/euler_models.py +3 -2
  129. mteb/models/model_implementations/evaclip_models.py +13 -4
  130. mteb/models/model_implementations/fa_models.py +18 -9
  131. mteb/models/model_implementations/facebookai.py +16 -2
  132. mteb/models/model_implementations/geogpt_models.py +2 -1
  133. mteb/models/model_implementations/gme_v_models.py +13 -8
  134. mteb/models/model_implementations/google_models.py +16 -5
  135. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
  136. mteb/models/model_implementations/gritlm_models.py +5 -2
  137. mteb/models/model_implementations/gte_models.py +34 -13
  138. mteb/models/model_implementations/hinvec_models.py +7 -2
  139. mteb/models/model_implementations/human.py +1 -0
  140. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  141. mteb/models/model_implementations/inf_models.py +4 -2
  142. mteb/models/model_implementations/jasper_models.py +16 -7
  143. mteb/models/model_implementations/jina_clip.py +58 -14
  144. mteb/models/model_implementations/jina_models.py +35 -16
  145. mteb/models/model_implementations/kalm_models.py +24 -12
  146. mteb/models/model_implementations/kblab.py +13 -6
  147. mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
  148. mteb/models/model_implementations/kfst.py +2 -1
  149. mteb/models/model_implementations/kowshik24_models.py +2 -1
  150. mteb/models/model_implementations/lens_models.py +2 -0
  151. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  152. mteb/models/model_implementations/linq_models.py +8 -2
  153. mteb/models/model_implementations/listconranker.py +11 -5
  154. mteb/models/model_implementations/llm2clip_models.py +18 -10
  155. mteb/models/model_implementations/llm2vec_models.py +28 -14
  156. mteb/models/model_implementations/mcinext_models.py +12 -3
  157. mteb/models/model_implementations/mdbr_models.py +19 -3
  158. mteb/models/model_implementations/misc_models.py +131 -68
  159. mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
  160. mteb/models/model_implementations/mme5_models.py +3 -2
  161. mteb/models/model_implementations/moco_models.py +15 -8
  162. mteb/models/model_implementations/mod_models.py +3 -2
  163. mteb/models/model_implementations/model2vec_models.py +37 -18
  164. mteb/models/model_implementations/moka_models.py +4 -1
  165. mteb/models/model_implementations/nbailab.py +6 -3
  166. mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
  167. mteb/models/model_implementations/nomic_models.py +47 -19
  168. mteb/models/model_implementations/nomic_models_vision.py +6 -4
  169. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
  170. mteb/models/model_implementations/nvidia_models.py +165 -22
  171. mteb/models/model_implementations/octen_models.py +64 -3
  172. mteb/models/model_implementations/openai_models.py +14 -4
  173. mteb/models/model_implementations/openclip_models.py +30 -17
  174. mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
  175. mteb/models/model_implementations/ops_moa_models.py +10 -3
  176. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
  177. mteb/models/model_implementations/pawan_models.py +2 -1
  178. mteb/models/model_implementations/piccolo_models.py +3 -1
  179. mteb/models/model_implementations/pixie_models.py +56 -0
  180. mteb/models/model_implementations/promptriever_models.py +20 -10
  181. mteb/models/model_implementations/pylate_models.py +41 -21
  182. mteb/models/model_implementations/qodo_models.py +4 -2
  183. mteb/models/model_implementations/qtack_models.py +2 -1
  184. mteb/models/model_implementations/qwen3_models.py +14 -4
  185. mteb/models/model_implementations/qzhou_models.py +4 -2
  186. mteb/models/model_implementations/random_baseline.py +7 -6
  187. mteb/models/model_implementations/rasgaard_models.py +3 -2
  188. mteb/models/model_implementations/reasonir_model.py +66 -1
  189. mteb/models/model_implementations/repllama_models.py +18 -9
  190. mteb/models/model_implementations/rerankers_custom.py +25 -10
  191. mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
  192. mteb/models/model_implementations/richinfoai_models.py +2 -1
  193. mteb/models/model_implementations/ru_sentence_models.py +40 -20
  194. mteb/models/model_implementations/ruri_models.py +20 -10
  195. mteb/models/model_implementations/salesforce_models.py +13 -4
  196. mteb/models/model_implementations/samilpwc_models.py +2 -1
  197. mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
  198. mteb/models/model_implementations/searchmap_models.py +2 -1
  199. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  200. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
  201. mteb/models/model_implementations/seed_models.py +2 -1
  202. mteb/models/model_implementations/sentence_transformers_models.py +142 -22
  203. mteb/models/model_implementations/shuu_model.py +2 -1
  204. mteb/models/model_implementations/siglip_models.py +39 -24
  205. mteb/models/model_implementations/slm_models.py +419 -0
  206. mteb/models/model_implementations/sonar_models.py +2 -1
  207. mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
  208. mteb/models/model_implementations/stella_models.py +23 -4
  209. mteb/models/model_implementations/tarka_models.py +4 -2
  210. mteb/models/model_implementations/text2vec_models.py +12 -3
  211. mteb/models/model_implementations/ua_sentence_models.py +2 -1
  212. mteb/models/model_implementations/uae_models.py +17 -5
  213. mteb/models/model_implementations/vdr_models.py +9 -2
  214. mteb/models/model_implementations/vi_vn_models.py +12 -6
  215. mteb/models/model_implementations/vista_models.py +11 -4
  216. mteb/models/model_implementations/vlm2vec_models.py +14 -7
  217. mteb/models/model_implementations/voyage_models.py +136 -4
  218. mteb/models/model_implementations/voyage_v.py +17 -10
  219. mteb/models/model_implementations/xyz_models.py +1 -0
  220. mteb/models/model_implementations/youtu_models.py +2 -1
  221. mteb/models/model_implementations/yuan_models.py +2 -1
  222. mteb/models/model_implementations/yuan_models_en.py +3 -2
  223. mteb/models/model_meta.py +127 -40
  224. mteb/models/models_protocols.py +43 -22
  225. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  226. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
  227. mteb/models/search_wrappers.py +63 -29
  228. mteb/models/sentence_transformer_wrapper.py +52 -26
  229. mteb/models/vllm_wrapper.py +329 -0
  230. mteb/py.typed +0 -0
  231. mteb/results/benchmark_results.py +48 -35
  232. mteb/results/model_result.py +68 -32
  233. mteb/results/task_result.py +110 -72
  234. mteb/similarity_functions.py +19 -9
  235. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  236. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  237. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  238. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  239. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  240. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  241. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  242. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  243. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  244. mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
  245. mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
  246. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  247. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  248. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  249. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
  251. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  252. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
  253. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
  256. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
  257. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
  258. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
  259. mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
  260. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
  261. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
  262. mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
  263. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
  264. mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
  265. mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
  266. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
  267. mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
  268. mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
  269. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
  270. mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
  271. mteb/tasks/classification/est/estonian_valence.py +2 -2
  272. mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
  273. mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
  274. mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
  275. mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
  276. mteb/tasks/classification/fra/french_book_reviews.py +2 -2
  277. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
  278. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
  279. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
  280. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
  281. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
  282. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
  283. mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
  284. mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
  285. mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
  286. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
  287. mteb/tasks/classification/jpn/wrime_classification.py +1 -1
  288. mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
  289. mteb/tasks/classification/kor/klue_tc.py +2 -2
  290. mteb/tasks/classification/kor/kor_fin.py +1 -1
  291. mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
  292. mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
  293. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  294. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
  295. mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
  296. mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
  297. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
  298. mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
  299. mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
  300. mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
  301. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
  302. mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
  303. mteb/tasks/classification/multilingual/scala_classification.py +2 -2
  304. mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
  305. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  306. mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
  307. mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
  308. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
  309. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
  310. mteb/tasks/classification/ory/odia_news_classification.py +2 -2
  311. mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
  312. mteb/tasks/classification/ron/moroco.py +1 -1
  313. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
  314. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
  315. mteb/tasks/classification/rus/georeview_classification.py +1 -1
  316. mteb/tasks/classification/rus/headline_classification.py +2 -2
  317. mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
  318. mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
  319. mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
  320. mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
  321. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
  322. mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
  323. mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
  324. mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
  325. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
  326. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
  327. mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
  328. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
  329. mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
  330. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
  331. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  332. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
  333. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  334. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
  335. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
  336. mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
  337. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
  338. mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
  339. mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
  340. mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
  341. mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
  342. mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
  343. mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
  344. mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
  345. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  346. mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
  347. mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
  348. mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
  349. mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
  350. mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
  351. mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
  352. mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
  353. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  354. mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
  355. mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
  356. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  357. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  358. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
  359. mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
  360. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
  361. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
  362. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
  363. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
  364. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
  365. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
  366. mteb/tasks/clustering/nob/snl_clustering.py +8 -3
  367. mteb/tasks/clustering/nob/vg_clustering.py +8 -3
  368. mteb/tasks/clustering/pol/polish_clustering.py +3 -3
  369. mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
  370. mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
  371. mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
  372. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  373. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
  374. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  375. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  376. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
  377. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
  378. mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
  379. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  380. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
  381. mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
  382. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
  383. mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
  384. mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
  385. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
  386. mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
  387. mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
  388. mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
  389. mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
  390. mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
  391. mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
  392. mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
  393. mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
  394. mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
  395. mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
  396. mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
  397. mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
  398. mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
  399. mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
  400. mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
  401. mteb/tasks/pair_classification/rus/terra.py +2 -2
  402. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
  403. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
  404. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
  405. mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
  406. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  407. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  408. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  409. mteb/tasks/retrieval/code/code_rag.py +16 -16
  410. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  411. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  412. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  413. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  414. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  415. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  416. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  417. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  418. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
  419. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
  420. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  421. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  422. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  423. mteb/tasks/retrieval/eng/__init__.py +44 -0
  424. mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
  425. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  426. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  427. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  428. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  429. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  430. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  431. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  432. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  433. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  434. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  435. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  436. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  437. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  438. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  439. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  440. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  441. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  442. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  443. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  445. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  446. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  447. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  448. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  449. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  450. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  451. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  452. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  453. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  454. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  455. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  456. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  457. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  458. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  459. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  460. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  461. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  462. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  463. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  464. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  465. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  466. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  467. mteb/tasks/retrieval/kor/__init__.py +15 -1
  468. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  469. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  470. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  471. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  472. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  473. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  474. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  475. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  476. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  477. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  478. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
  479. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  480. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  481. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
  482. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  483. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  484. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  485. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  486. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  487. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  488. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  489. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  490. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  491. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  492. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  493. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  494. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  495. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  496. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  497. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  498. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  499. mteb/tasks/retrieval/nob/norquad.py +3 -3
  500. mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
  501. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  502. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  503. mteb/tasks/retrieval/vie/__init__.py +14 -6
  504. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  505. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  506. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  507. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  508. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  509. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  510. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  511. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  512. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  513. mteb/tasks/sts/fao/faroese_sts.py +1 -1
  514. mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
  515. mteb/tasks/sts/kor/klue_sts.py +1 -1
  516. mteb/tasks/sts/por/sick_br_sts.py +1 -1
  517. mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
  518. mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
  519. mteb/types/__init__.py +2 -0
  520. mteb/types/_encoder_io.py +13 -1
  521. mteb/types/_result.py +2 -1
  522. mteb/types/statistics.py +18 -5
  523. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
  524. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
  525. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
  526. mteb/models/model_implementations/mxbai_models.py +0 -111
  527. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
  528. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
  529. {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,20 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_image_dependencies
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
+ from mteb.models.model_implementations.colpali_models import COLPALI_TRAINING_DATA
10
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
12
18
 
13
19
  JINA_CLIP_CITATION = """@article{koukounas2024jinaclip,
14
20
  title={Jina CLIP: Your CLIP Model Is Also Your Text Retriever},
@@ -120,8 +126,17 @@ class JinaCLIPModel(AbsEncoder):
120
126
  raise ValueError
121
127
 
122
128
 
129
+ _JINA_CLIP_TRAIN_DATASETS_V1 = {
130
+ # LAION400M
131
+ # ShareGPT4V
132
+ "MSMARCO",
133
+ "NQ",
134
+ "HotpotQA",
135
+ # Natural Language Inference (NLI) dataset (Bowman et al., 2015)
136
+ }
137
+
123
138
  jina_clip_v1 = ModelMeta(
124
- loader=JinaCLIPModel, # type: ignore
139
+ loader=JinaCLIPModel,
125
140
  name="jinaai/jina-clip-v1",
126
141
  model_type=["dense"],
127
142
  languages=["eng-Latn"],
@@ -129,6 +144,7 @@ jina_clip_v1 = ModelMeta(
129
144
  release_date="2024-05-30",
130
145
  modalities=["image", "text"],
131
146
  n_parameters=223_000_000,
147
+ n_embedding_parameters=None,
132
148
  memory_usage_mb=849,
133
149
  max_tokens=8192,
134
150
  embed_dim=768,
@@ -136,17 +152,45 @@ jina_clip_v1 = ModelMeta(
136
152
  open_weights=True,
137
153
  public_training_code=None,
138
154
  public_training_data=None,
139
- framework=["PyTorch"],
155
+ framework=["PyTorch", "Transformers", "ONNX", "safetensors"],
140
156
  reference="https://huggingface.co/jinaai/jina-clip-v1",
141
157
  similarity_fn_name=ScoringFunction.COSINE,
142
158
  use_instructions=True,
143
- training_datasets={
144
- # LAION400M
145
- # ShareGPT4V
146
- "MSMARCO",
147
- # NQ
148
- # HotpotQA
149
- # Natural Language Inference (NLI) dataset (Bowman et al., 2015)
150
- },
159
+ training_datasets=_JINA_CLIP_TRAIN_DATASETS_V1,
151
160
  citation=JINA_CLIP_CITATION,
161
+ superseded_by="jinaai/jina-clip-v2",
162
+ )
163
+
164
+ jina_clip_v2 = ModelMeta(
165
+ loader=JinaCLIPModel,
166
+ name="jinaai/jina-clip-v2",
167
+ revision="344d954da76eb8ad47a7aaff42d012e30c15b8fe",
168
+ release_date="2024-10-09",
169
+ languages=["eng-Latn"],
170
+ n_parameters=865278477,
171
+ memory_usage_mb=1650.0,
172
+ max_tokens=8192,
173
+ embed_dim=1024,
174
+ license="cc-by-nc-4.0",
175
+ open_weights=True,
176
+ public_training_code=None,
177
+ public_training_data=None,
178
+ framework=["PyTorch", "Sentence Transformers"],
179
+ reference="https://huggingface.co/jinaai/jina-clip-v2",
180
+ similarity_fn_name=ScoringFunction.COSINE,
181
+ use_instructions=False,
182
+ training_datasets=_JINA_CLIP_TRAIN_DATASETS_V1 | COLPALI_TRAINING_DATA,
183
+ modalities=["text", "image"],
184
+ model_type=["dense"],
185
+ citation="""
186
+ @misc{koukounas2024jinaclipv2multilingualmultimodalembeddings,
187
+ title={jina-clip-v2: Multilingual Multimodal Embeddings for Text and Images},
188
+ author={Andreas Koukounas and Georgios Mastrapas and Bo Wang and Mohammad Kalim Akram and Sedigheh Eslami and Michael Günther and Isabelle Mohr and Saba Sturua and Scott Martens and Nan Wang and Han Xiao},
189
+ year={2024},
190
+ eprint={2412.08802},
191
+ archivePrefix={arXiv},
192
+ primaryClass={cs.CL},
193
+ url={https://arxiv.org/abs/2412.08802},
194
+ }
195
+ """,
152
196
  )
@@ -1,14 +1,13 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from collections import defaultdict
3
- from typing import Any, ClassVar
5
+ from typing import TYPE_CHECKING, Any, ClassVar
4
6
 
5
7
  import numpy as np
6
8
  import torch
7
- from sentence_transformers import CrossEncoder
8
- from torch.utils.data import DataLoader
9
9
 
10
10
  from mteb._requires_package import requires_package
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
11
  from mteb.languages import PROGRAMMING_LANGS
13
12
  from mteb.models.abs_encoder import AbsEncoder
14
13
  from mteb.models.model_meta import ModelMeta, ScoringFunction
@@ -16,7 +15,13 @@ from mteb.models.sentence_transformer_wrapper import (
16
15
  CrossEncoderWrapper,
17
16
  SentenceTransformerEncoderWrapper,
18
17
  )
19
- from mteb.types import Array, BatchedInput, PromptType
18
+
19
+ if TYPE_CHECKING:
20
+ from sentence_transformers import CrossEncoder
21
+ from torch.utils.data import DataLoader
22
+
23
+ from mteb.abstasks.task_metadata import TaskMetadata
24
+ from mteb.types import Array, BatchedInput, PromptType
20
25
 
21
26
  logger = logging.getLogger(__name__)
22
27
 
@@ -257,6 +262,7 @@ class JinaRerankerV3Wrapper(CrossEncoderWrapper):
257
262
  self,
258
263
  model: CrossEncoder | str,
259
264
  revision: str | None = None,
265
+ device: str | None = None,
260
266
  trust_remote_code: bool = True,
261
267
  **kwargs: Any,
262
268
  ) -> None:
@@ -267,10 +273,7 @@ class JinaRerankerV3Wrapper(CrossEncoderWrapper):
267
273
  model, trust_remote_code=trust_remote_code, dtype="auto"
268
274
  )
269
275
 
270
- device = kwargs.get("device", None)
271
- if device is None:
272
- device = get_device_name()
273
- logger.info(f"Use pytorch device: {device}")
276
+ device = device or get_device_name()
274
277
 
275
278
  self.model.to(device)
276
279
  self.model.eval()
@@ -320,6 +323,7 @@ class JinaWrapper(SentenceTransformerEncoderWrapper):
320
323
  self,
321
324
  model: str,
322
325
  revision: str,
326
+ device: str | None = None,
323
327
  model_prompts: dict[str, str] | None = None,
324
328
  **kwargs,
325
329
  ) -> None:
@@ -339,7 +343,9 @@ class JinaWrapper(SentenceTransformerEncoderWrapper):
339
343
  )
340
344
  import flash_attn # noqa: F401
341
345
 
342
- super().__init__(model, revision, model_prompts, **kwargs)
346
+ super().__init__(
347
+ model, revision, device=device, model_prompts=model_prompts, **kwargs
348
+ )
343
349
 
344
350
  def encode(
345
351
  self,
@@ -727,12 +733,13 @@ jina_reranker_v3 = ModelMeta(
727
733
  release_date="2025-09-18", # official release date
728
734
  modalities=["text"],
729
735
  n_parameters=int(0.6 * 1e9),
736
+ n_embedding_parameters=None,
730
737
  memory_usage_mb=1138,
731
738
  max_tokens=131072,
732
739
  embed_dim=None,
733
740
  license="cc-by-nc-4.0",
734
741
  similarity_fn_name=None,
735
- framework=["PyTorch"],
742
+ framework=["PyTorch", "Transformers", "safetensors"],
736
743
  use_instructions=None,
737
744
  reference="https://huggingface.co/jinaai/jina-reranker-v3",
738
745
  public_training_code=None,
@@ -770,12 +777,13 @@ jina_embeddings_v4 = ModelMeta(
770
777
  release_date="2025-06-24", # official release date
771
778
  modalities=["image", "text"],
772
779
  n_parameters=int(3.8 * 1e9),
780
+ n_embedding_parameters=None,
773
781
  memory_usage_mb=7500,
774
782
  max_tokens=32768,
775
783
  embed_dim=2048,
776
784
  license="cc-by-nc-4.0",
777
785
  similarity_fn_name="cosine",
778
- framework=["Sentence Transformers", "PyTorch"],
786
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
779
787
  use_instructions=True,
780
788
  reference="https://huggingface.co/jinaai/jina-embeddings-v4",
781
789
  public_training_code=None,
@@ -795,7 +803,7 @@ jina_embeddings_v4 = ModelMeta(
795
803
 
796
804
 
797
805
  jina_embeddings_v3 = ModelMeta(
798
- loader=JinaWrapper, # type: ignore
806
+ loader=JinaWrapper,
799
807
  loader_kwargs=dict(
800
808
  trust_remote_code=True,
801
809
  model_prompts={
@@ -818,12 +826,19 @@ jina_embeddings_v3 = ModelMeta(
818
826
  revision="215a6e121fa0183376388ac6b1ae230326bfeaed",
819
827
  release_date="2024-09-18", # official release date
820
828
  n_parameters=int(572 * 1e6),
829
+ n_embedding_parameters=None,
821
830
  memory_usage_mb=1092,
822
831
  max_tokens=8194,
823
832
  embed_dim=1024,
824
833
  license="cc-by-nc-4.0",
825
834
  similarity_fn_name=ScoringFunction.COSINE,
826
- framework=["Sentence Transformers", "PyTorch"],
835
+ framework=[
836
+ "Sentence Transformers",
837
+ "PyTorch",
838
+ "Transformers",
839
+ "ONNX",
840
+ "safetensors",
841
+ ],
827
842
  use_instructions=True,
828
843
  reference="https://huggingface.co/jinaai/jina-embeddings-v3",
829
844
  public_training_code=None,
@@ -872,13 +887,14 @@ jina_embeddings_v2_base_en = ModelMeta(
872
887
  revision="6e85f575bc273f1fd840a658067d0157933c83f0",
873
888
  release_date="2023-09-27",
874
889
  n_parameters=137_000_000,
890
+ n_embedding_parameters=23_445_504,
875
891
  memory_usage_mb=262,
876
892
  embed_dim=768,
877
893
  license="apache-2.0",
878
894
  max_tokens=8192,
879
895
  reference="https://huggingface.co/jinaai/jina-embeddings-v2-base-en",
880
896
  similarity_fn_name=ScoringFunction.COSINE,
881
- framework=["Sentence Transformers", "PyTorch"],
897
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
882
898
  use_instructions=False,
883
899
  superseded_by=None,
884
900
  adapted_from="jina-bert-base-en-v1", # pretrained on C4 with Alibi to support longer context.
@@ -936,13 +952,14 @@ jina_embeddings_v2_small_en = ModelMeta(
936
952
  revision="44e7d1d6caec8c883c2d4b207588504d519788d0",
937
953
  release_date="2023-09-27",
938
954
  n_parameters=32_700_000,
955
+ n_embedding_parameters=15_630_336,
939
956
  memory_usage_mb=62,
940
957
  embed_dim=512,
941
958
  license="apache-2.0",
942
959
  max_tokens=8192,
943
960
  reference="https://huggingface.co/jinaai/jina-embeddings-v2-small-en",
944
961
  similarity_fn_name=ScoringFunction.COSINE,
945
- framework=["Sentence Transformers", "PyTorch"],
962
+ framework=["Sentence Transformers", "PyTorch", "ONNX", "safetensors"],
946
963
  use_instructions=False,
947
964
  superseded_by=None,
948
965
  adapted_from="jina-bert-smalll-en-v1", # pretrained on C4 with Alibi to support longer context
@@ -997,6 +1014,7 @@ jina_embedding_b_en_v1 = ModelMeta(
997
1014
  revision="32aa658e5ceb90793454d22a57d8e3a14e699516",
998
1015
  release_date="2023-07-07",
999
1016
  n_parameters=110_000_000,
1017
+ n_embedding_parameters=24_674_304,
1000
1018
  memory_usage_mb=420,
1001
1019
  embed_dim=768,
1002
1020
  license="apache-2.0",
@@ -1054,6 +1072,7 @@ jina_embedding_s_en_v1 = ModelMeta(
1054
1072
  revision="5ac6cd473e2324c6d5f9e558a6a9f65abb57143e",
1055
1073
  release_date="2023-07-07",
1056
1074
  n_parameters=35_000_000,
1075
+ n_embedding_parameters=16_449_536,
1057
1076
  memory_usage_mb=134,
1058
1077
  embed_dim=512,
1059
1078
  license="apache-2.0",
@@ -1,14 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
9
9
  from mteb.models.model_meta import ModelMeta
10
10
  from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
11
- from mteb.types import Array, BatchedInput, PromptType
11
+ from mteb.types import PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput
12
18
 
13
19
  logger = logging.getLogger(__name__)
14
20
 
@@ -774,6 +780,7 @@ HIT_TMG__KaLM_embedding_multilingual_mini_instruct_v1 = ModelMeta(
774
780
  release_date="2024-10-23",
775
781
  languages=["eng-Latn", "zho-Hans"],
776
782
  n_parameters=494032768,
783
+ n_embedding_parameters=136_134_656,
777
784
  memory_usage_mb=1885,
778
785
  max_tokens=512,
779
786
  embed_dim=896,
@@ -799,6 +806,7 @@ HIT_TMG__KaLM_embedding_multilingual_mini_v1 = ModelMeta(
799
806
  release_date="2024-08-27",
800
807
  languages=["eng-Latn", "zho-Hans"],
801
808
  n_parameters=494032768,
809
+ n_embedding_parameters=136_134_656,
802
810
  memory_usage_mb=1885,
803
811
  max_tokens=512,
804
812
  embed_dim=896,
@@ -830,6 +838,7 @@ HIT_TMG__KaLM_embedding_multilingual_mini_instruct_v1_5 = ModelMeta(
830
838
  release_date="2024-12-26",
831
839
  languages=["eng-Latn", "zho-Hans"],
832
840
  n_parameters=494032768,
841
+ n_embedding_parameters=136_134_656,
833
842
  memory_usage_mb=1885,
834
843
  max_tokens=512,
835
844
  embed_dim=896,
@@ -861,6 +870,7 @@ HIT_TMG__KaLM_embedding_multilingual_mini_instruct_v2 = ModelMeta(
861
870
  release_date="2025-06-25",
862
871
  languages=["eng-Latn", "zho-Hans"],
863
872
  n_parameters=494032768,
873
+ n_embedding_parameters=136_134_656,
864
874
  memory_usage_mb=942,
865
875
  max_tokens=512,
866
876
  embed_dim=896,
@@ -892,6 +902,7 @@ KaLM_Embedding_KaLM_embedding_multilingual_mini_instruct_v2_5 = ModelMeta(
892
902
  release_date="2025-09-30",
893
903
  languages=["eng-Latn", "zho-Hans"],
894
904
  n_parameters=494032768,
905
+ n_embedding_parameters=136_134_656,
895
906
  memory_usage_mb=1885,
896
907
  max_tokens=512,
897
908
  embed_dim=896,
@@ -907,23 +918,23 @@ KaLM_Embedding_KaLM_embedding_multilingual_mini_instruct_v2_5 = ModelMeta(
907
918
  adapted_from="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2",
908
919
  superseded_by=None,
909
920
  citation="""@misc{zhao2025kalmembeddingv2,
910
- title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
921
+ title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
911
922
  author={Xinping Zhao and Xinshuo Hu and Zifei Shan and Shouzheng Huang and Yao Zhou and Xin Zhang and Zetian Sun and Zhenyu Liu and Dongfang Li and Xinyuan Wei and Youcheng Pan and Yang Xiang and Meishan Zhang and Haofen Wang and Jun Yu and Baotian Hu and Min Zhang},
912
923
  year={2025},
913
924
  eprint={2506.20923},
914
925
  archivePrefix={arXiv},
915
926
  primaryClass={cs.CL},
916
- url={https://arxiv.org/abs/2506.20923},
927
+ url={https://arxiv.org/abs/2506.20923},
917
928
  }
918
929
 
919
930
  @misc{hu2025kalmembedding,
920
- title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
931
+ title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
921
932
  author={Xinshuo Hu and Zifei Shan and Xinping Zhao and Zetian Sun and Zhenyu Liu and Dongfang Li and Shaolin Ye and Xinyuan Wei and Qian Chen and Baotian Hu and Haofen Wang and Jun Yu and Min Zhang},
922
933
  year={2025},
923
934
  eprint={2501.01028},
924
935
  archivePrefix={arXiv},
925
936
  primaryClass={cs.CL},
926
- url={https://arxiv.org/abs/2501.01028},
937
+ url={https://arxiv.org/abs/2501.01028},
927
938
  }""",
928
939
  )
929
940
 
@@ -942,6 +953,7 @@ KaLM_Embedding_gemma_3_12b_2511 = ModelMeta(
942
953
  open_weights=True,
943
954
  release_date="2025-11-06",
944
955
  n_parameters=11.76 * 1e9,
956
+ n_embedding_parameters=None,
945
957
  memory_usage_mb=44884,
946
958
  max_tokens=32768,
947
959
  embed_dim=3840,
@@ -954,22 +966,22 @@ KaLM_Embedding_gemma_3_12b_2511 = ModelMeta(
954
966
  public_training_data=None,
955
967
  training_datasets=KaLM_Embedding_gemma_3_12b_training_data,
956
968
  citation="""@misc{zhao2025kalmembeddingv2,
957
- title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
969
+ title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
958
970
  author={Xinping Zhao and Xinshuo Hu and Zifei Shan and Shouzheng Huang and Yao Zhou and Xin Zhang and Zetian Sun and Zhenyu Liu and Dongfang Li and Xinyuan Wei and Youcheng Pan and Yang Xiang and Meishan Zhang and Haofen Wang and Jun Yu and Baotian Hu and Min Zhang},
959
971
  year={2025},
960
972
  eprint={2506.20923},
961
973
  archivePrefix={arXiv},
962
974
  primaryClass={cs.CL},
963
- url={https://arxiv.org/abs/2506.20923},
975
+ url={https://arxiv.org/abs/2506.20923},
964
976
  }
965
977
 
966
978
  @misc{hu2025kalmembedding,
967
- title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
979
+ title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
968
980
  author={Xinshuo Hu and Zifei Shan and Xinping Zhao and Zetian Sun and Zhenyu Liu and Dongfang Li and Shaolin Ye and Xinyuan Wei and Qian Chen and Baotian Hu and Haofen Wang and Jun Yu and Min Zhang},
969
981
  year={2025},
970
982
  eprint={2501.01028},
971
983
  archivePrefix={arXiv},
972
984
  primaryClass={cs.CL},
973
- url={https://arxiv.org/abs/2501.01028},
985
+ url={https://arxiv.org/abs/2501.01028},
974
986
  }""",
975
987
  )
@@ -10,22 +10,29 @@ sbert_swedish = ModelMeta(
10
10
  revision="6b5e83cd29c03729cfdc33d13b1423399b0efb5c",
11
11
  release_date="2023-01-11",
12
12
  n_parameters=124690944,
13
+ n_embedding_parameters=38_649_600,
13
14
  memory_usage_mb=476,
14
15
  embed_dim=768,
15
16
  license="apache-2.0",
16
17
  max_tokens=384,
17
18
  reference="https://huggingface.co/KBLab/sentence-bert-swedish-cased",
18
19
  similarity_fn_name=ScoringFunction.COSINE,
19
- framework=["Sentence Transformers", "PyTorch"],
20
+ framework=[
21
+ "Sentence Transformers",
22
+ "PyTorch",
23
+ "safetensors",
24
+ "GGUF",
25
+ "Transformers",
26
+ ],
20
27
  use_instructions=False,
21
28
  public_training_code=None,
22
29
  public_training_data=None,
23
30
  training_datasets=None,
24
31
  adapted_from="sentence-transformers/all-mpnet-base-v2",
25
- citation="""@misc{rekathati2021introducing,
26
- author = {Rekathati, Faton},
27
- title = {The KBLab Blog: Introducing a Swedish Sentence Transformer},
28
- url = {https://kb-labb.github.io/posts/2021-08-23-a-swedish-sentence-transformer/},
29
- year = {2021}
32
+ citation="""@misc{rekathati2021introducing,
33
+ author = {Rekathati, Faton},
34
+ title = {The KBLab Blog: Introducing a Swedish Sentence Transformer},
35
+ url = {https://kb-labb.github.io/posts/2021-08-23-a-swedish-sentence-transformer/},
36
+ year = {2021}
30
37
  }""",
31
38
  )
@@ -4,7 +4,7 @@ from mteb.models.sentence_transformer_wrapper import (
4
4
  )
5
5
 
6
6
  dfm_enc_large = ModelMeta(
7
- loader=sentence_transformers_loader, # type: ignore
7
+ loader=sentence_transformers_loader,
8
8
  name="KennethEnevoldsen/dfm-sentence-encoder-large",
9
9
  model_type=["dense"],
10
10
  languages=["dan-Latn"],
@@ -12,13 +12,14 @@ dfm_enc_large = ModelMeta(
12
12
  revision="132c53391e7a780dc6a2f9a03724d0158fe7122c",
13
13
  release_date="2023-07-12",
14
14
  n_parameters=355087360,
15
+ n_embedding_parameters=51_200_000,
15
16
  memory_usage_mb=1554,
16
17
  embed_dim=1024,
17
18
  license="mit",
18
19
  max_tokens=512,
19
20
  reference="https://huggingface.co/KennethEnevoldsen/dfm-sentence-encoder-large",
20
21
  similarity_fn_name=ScoringFunction.COSINE,
21
- framework=["Sentence Transformers", "PyTorch"],
22
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
22
23
  use_instructions=False,
23
24
  superseded_by=None,
24
25
  adapted_from="chcaa/dfm-encoder-large-v1",
@@ -39,7 +40,7 @@ dfm_enc_large = ModelMeta(
39
40
  )
40
41
 
41
42
  dfm_enc_med = ModelMeta(
42
- loader=sentence_transformers_loader, # type: ignore
43
+ loader=sentence_transformers_loader,
43
44
  name="KennethEnevoldsen/dfm-sentence-encoder-medium",
44
45
  model_type=["dense"],
45
46
  languages=["dan-Latn"],
@@ -47,13 +48,14 @@ dfm_enc_med = ModelMeta(
47
48
  revision="701bce95d499fa97610d57e8823c54fd1fb79930",
48
49
  release_date="2023-07-12",
49
50
  n_parameters=124445952,
51
+ n_embedding_parameters=38_403_840,
50
52
  memory_usage_mb=475,
51
53
  embed_dim=768,
52
54
  license="mit",
53
55
  max_tokens=512,
54
56
  reference="https://huggingface.co/KennethEnevoldsen/dfm-sentence-encoder-medium",
55
57
  similarity_fn_name=ScoringFunction.COSINE,
56
- framework=["Sentence Transformers", "PyTorch"],
58
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
57
59
  use_instructions=False,
58
60
  superseded_by=None,
59
61
  adapted_from=None,
@@ -10,13 +10,14 @@ xlmr_scandi = ModelMeta(
10
10
  revision="d40c10ca7b1e68b5a8372f2d112dac9eb3279df1",
11
11
  release_date="2022-02-22",
12
12
  n_parameters=278043648,
13
+ n_embedding_parameters=192_001_536,
13
14
  memory_usage_mb=1061,
14
15
  embed_dim=768,
15
16
  license="not specified",
16
17
  max_tokens=512,
17
18
  reference="https://huggingface.co/KFST/XLMRoberta-en-da-sv-nb",
18
19
  similarity_fn_name=ScoringFunction.COSINE,
19
- framework=["Sentence Transformers", "PyTorch"],
20
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
20
21
  use_instructions=False,
21
22
  public_training_code=None,
22
23
  public_training_data=None,
@@ -9,13 +9,14 @@ kowshik24_bangla_embedding_model = ModelMeta(
9
9
  revision="6689c21e69be5950596bad084457cbaa138728d8",
10
10
  release_date="2025-11-10",
11
11
  n_parameters=278_000_000,
12
+ n_embedding_parameters=192_001_536,
12
13
  memory_usage_mb=1061,
13
14
  embed_dim=768,
14
15
  license="apache-2.0",
15
16
  max_tokens=128,
16
17
  reference="https://huggingface.co/Kowshik24/bangla-sentence-transformer-ft-matryoshka-paraphrase-multilingual-mpnet-base-v2",
17
18
  similarity_fn_name="cosine",
18
- framework=["Sentence Transformers", "PyTorch"],
19
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
19
20
  use_instructions=False,
20
21
  public_training_code="https://github.com/kowshik24/Bangla-Embedding",
21
22
  public_training_data="https://huggingface.co/datasets/sartajekram/BanglaRQA",
@@ -18,6 +18,7 @@ lens_d4000 = ModelMeta(
18
18
  revision="e473b33364e6c48a324796fd1411d3b93670c6fe",
19
19
  release_date="2025-01-17",
20
20
  n_parameters=int(7.11 * 1e9),
21
+ n_embedding_parameters=131_084_288,
21
22
  memory_usage_mb=27125,
22
23
  embed_dim=4000,
23
24
  license="apache-2.0",
@@ -41,6 +42,7 @@ lens_d8000 = ModelMeta(
41
42
  revision="a0b87bd91cb27b6f2f0b0fe22c28026da1d464ef",
42
43
  release_date="2025-01-17",
43
44
  n_parameters=int(7.11 * 1e9),
45
+ n_embedding_parameters=131_084_288,
44
46
  memory_usage_mb=27125,
45
47
  embed_dim=8000,
46
48
  license="apache-2.0",
@@ -52,13 +52,14 @@ lgai_embedding_en = ModelMeta(
52
52
  revision="5e0b2316acc8c2e2941ded6b9cb200b1cb313e65",
53
53
  release_date="2025-06-11",
54
54
  n_parameters=7_110_000_000,
55
+ n_embedding_parameters=131_084_288,
55
56
  memory_usage_mb=27125,
56
57
  embed_dim=4096,
57
58
  license="apache-2.0",
58
59
  max_tokens=32768,
59
60
  reference="https://huggingface.co/annamodels/LGAI-Embedding-Preview",
60
61
  similarity_fn_name="cosine",
61
- framework=["Sentence Transformers", "PyTorch"],
62
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
62
63
  use_instructions=True,
63
64
  public_training_code=None,
64
65
  public_training_data=None,
@@ -1,11 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  import torch
2
6
 
3
7
  from mteb.models.instruct_wrapper import instruct_wrapper
4
8
  from mteb.models.model_meta import ModelMeta, ScoringFunction
5
- from mteb.types import PromptType
6
9
 
7
10
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
8
11
 
12
+ if TYPE_CHECKING:
13
+ from mteb.types import PromptType
9
14
  LINQ_EMBED_MISTRAL_CITATION = """@misc{LinqAIResearch2024,
10
15
  title={Linq-Embed-Mistral:Elevating Text Retrieval with Improved GPT Data Through Task-Specific Control and Quality Refinement},
11
16
  author={Junseong Kim and Seolhwa Lee and Jihoon Kwon and Sangmo Gu and Yejin Kim and Minkyung Cho and Jy-yong Sohn and Chanyeol Choi},
@@ -38,13 +43,14 @@ Linq_Embed_Mistral = ModelMeta(
38
43
  revision="0c1a0b0589177079acc552433cad51d7c9132379",
39
44
  release_date="2024-05-29", # initial commit of hf model.
40
45
  n_parameters=7_110_000_000,
46
+ n_embedding_parameters=None,
41
47
  memory_usage_mb=13563,
42
48
  embed_dim=4096,
43
49
  license="cc-by-nc-4.0",
44
50
  max_tokens=32768,
45
51
  reference="https://huggingface.co/Linq-AI-Research/Linq-Embed-Mistral",
46
52
  similarity_fn_name=ScoringFunction.COSINE,
47
- framework=["Sentence Transformers", "PyTorch"],
53
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
48
54
  use_instructions=True,
49
55
  public_training_code=None,
50
56
  public_training_data=None,
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
 
6
- from mteb.abstasks.task_metadata import TaskMetadata
7
7
  from mteb.models.model_meta import ModelMeta
8
- from mteb.types import BatchedInput, PromptType
9
8
 
10
9
  from .rerankers_custom import RerankerWrapper
11
10
 
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import BatchedInput, PromptType
16
+
12
17
  LISTCONRANKER_CITATION = """@article{liu2025listconranker,
13
18
  title={ListConRanker: A Contrastive Text Reranker with Listwise Encoding},
14
19
  author={Liu, Junlong and Ma, Yue and Zhao, Ruihui and Zheng, Junhao and Ma, Qianli and Kang, Yangyang},
@@ -118,6 +123,7 @@ listconranker = ModelMeta(
118
123
  revision="95ae6a5f422a916bc36520f0f3e198e7d91520a0",
119
124
  release_date="2024-12-11",
120
125
  n_parameters=401_000_000,
126
+ n_embedding_parameters=None,
121
127
  memory_usage_mb=1242,
122
128
  similarity_fn_name="cosine",
123
129
  training_datasets=listconranker_training_datasets,
@@ -125,7 +131,7 @@ listconranker = ModelMeta(
125
131
  license="mit",
126
132
  max_tokens=512,
127
133
  reference="https://huggingface.co/ByteDance/ListConRanker",
128
- framework=["PyTorch"],
134
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
129
135
  use_instructions=False,
130
136
  public_training_code=None,
131
137
  public_training_data=None,