mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,7 @@ from .sentence_transformers_models import sent_trf_training_dataset
13
13
  Haon_Chen__speed_embedding_7b_instruct = ModelMeta(
14
14
  loader=sentence_transformers_loader,
15
15
  name="Haon-Chen/speed-embedding-7b-instruct",
16
+ model_type=["dense"],
16
17
  revision="c167e9a8144b397622ce47b85d9edcdeecef3d3f",
17
18
  release_date="2024-10-31",
18
19
  languages=["eng-Latn"],
@@ -24,16 +25,23 @@ Haon_Chen__speed_embedding_7b_instruct = ModelMeta(
24
25
  open_weights=True,
25
26
  public_training_code=None,
26
27
  public_training_data=None,
27
- framework=["PyTorch"],
28
+ framework=["PyTorch", "Transformers", "safetensors"],
28
29
  reference="https://huggingface.co/Haon-Chen/speed-embedding-7b-instruct",
29
30
  similarity_fn_name=ScoringFunction.COSINE,
30
31
  use_instructions=None,
31
32
  training_datasets=None,
32
33
  adapted_from="mistralai/Mistral-7B-v0.1",
33
34
  superseded_by=None,
35
+ citation="""@article{chen2024little,
36
+ title={Little Giants: Synthesizing High-Quality Embedding Data at Scale},
37
+ author={Chen, Haonan and Wang, Liang and Yang, Nan and Zhu, Yutao and Zhao, Ziliang and Wei, Furu and Dou, Zhicheng},
38
+ journal={arXiv preprint arXiv:2410.18634},
39
+ year={2024}
40
+ }""",
34
41
  )
35
42
  Gameselo__STS_multilingual_mpnet_base_v2 = ModelMeta(
36
43
  name="Gameselo/STS-multilingual-mpnet-base-v2",
44
+ model_type=["dense"],
37
45
  revision="449f917af30f590fc31f9ffb226c94f21a2f47b8",
38
46
  release_date="2024-06-07",
39
47
  languages=[],
@@ -46,7 +54,7 @@ Gameselo__STS_multilingual_mpnet_base_v2 = ModelMeta(
46
54
  open_weights=True,
47
55
  public_training_code=None,
48
56
  public_training_data=None,
49
- framework=["PyTorch", "Sentence Transformers"],
57
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
50
58
  reference="https://huggingface.co/Gameselo/STS-multilingual-mpnet-base-v2",
51
59
  similarity_fn_name=ScoringFunction.COSINE,
52
60
  use_instructions=None,
@@ -121,10 +129,20 @@ Gameselo__STS_multilingual_mpnet_base_v2 = ModelMeta(
121
129
  },
122
130
  adapted_from="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
123
131
  superseded_by=None,
132
+ citation="""@inproceedings{reimers-2019-sentence-bert,
133
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
134
+ author = "Reimers, Nils and Gurevych, Iryna",
135
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
136
+ month = "11",
137
+ year = "2019",
138
+ publisher = "Association for Computational Linguistics",
139
+ url = "https://arxiv.org/abs/1908.10084",
140
+ }""",
124
141
  )
125
142
 
126
143
  Hum_Works__lodestone_base_4096_v1 = ModelMeta(
127
144
  name="Hum-Works/lodestone-base-4096-v1",
145
+ model_type=["dense"],
128
146
  revision="9bbc2d0b57dd2198aea029404b0f976712a7d966",
129
147
  release_date="2023-08-25",
130
148
  languages=["eng-Latn"],
@@ -137,7 +155,7 @@ Hum_Works__lodestone_base_4096_v1 = ModelMeta(
137
155
  open_weights=True,
138
156
  public_training_code=None,
139
157
  public_training_data=None,
140
- framework=["PyTorch"],
158
+ framework=["PyTorch", "Sentence Transformers"],
141
159
  reference="https://huggingface.co/Hum-Works/lodestone-base-4096-v1",
142
160
  similarity_fn_name=ScoringFunction.COSINE,
143
161
  use_instructions=None,
@@ -191,6 +209,7 @@ Hum_Works__lodestone_base_4096_v1 = ModelMeta(
191
209
  )
192
210
  Jaume__gemma_2b_embeddings = ModelMeta(
193
211
  name="Jaume/gemma-2b-embeddings",
212
+ model_type=["dense"],
194
213
  revision="86431f65d7c3f66b2af096c61e614a2958f191f1",
195
214
  release_date="2024-06-29",
196
215
  languages=[],
@@ -203,7 +222,7 @@ Jaume__gemma_2b_embeddings = ModelMeta(
203
222
  open_weights=True,
204
223
  public_training_code=None,
205
224
  public_training_data=None,
206
- framework=["PyTorch", "Sentence Transformers"],
225
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
207
226
  reference="https://huggingface.co/Jaume/gemma-2b-embeddings",
208
227
  similarity_fn_name=ScoringFunction.COSINE,
209
228
  use_instructions=None,
@@ -222,6 +241,7 @@ bilingual_embedding_training_data = {
222
241
 
223
242
  Lajavaness__bilingual_embedding_base = ModelMeta(
224
243
  name="Lajavaness/bilingual-embedding-base",
244
+ model_type=["dense"],
225
245
  revision="0bfc54bb2aa2666dd84715289c7ef58a95eb4d8d",
226
246
  release_date="2024-06-26",
227
247
  languages=None,
@@ -237,16 +257,40 @@ Lajavaness__bilingual_embedding_base = ModelMeta(
237
257
  open_weights=True,
238
258
  public_training_code=None,
239
259
  public_training_data=None,
240
- framework=["PyTorch", "Sentence Transformers"],
260
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
241
261
  reference="https://huggingface.co/Lajavaness/bilingual-embedding-base",
242
262
  similarity_fn_name=ScoringFunction.COSINE,
243
263
  use_instructions=None,
244
264
  training_datasets=bilingual_embedding_training_data,
245
265
  adapted_from="dangvantuan/bilingual_impl",
246
266
  superseded_by=None,
267
+ citation="""
268
+ @article{conneau2019unsupervised,
269
+ title={Unsupervised cross-lingual representation learning at scale},
270
+ author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin},
271
+ journal={arXiv preprint arXiv:1911.02116},
272
+ year={2019}
273
+ }
274
+
275
+ @article{reimers2019sentence,
276
+ title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
277
+ author={Nils Reimers, Iryna Gurevych},
278
+ journal={https://arxiv.org/abs/1908.10084},
279
+ year={2019}
280
+ }
281
+
282
+ @article{thakur2020augmented,
283
+ title={Augmented SBERT: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks},
284
+ author={Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna},
285
+ journal={arXiv e-prints},
286
+ pages={arXiv--2010},
287
+ year={2020}
288
+ }
289
+ """,
247
290
  )
248
291
  Lajavaness__bilingual_embedding_large = ModelMeta(
249
292
  name="Lajavaness/bilingual-embedding-large",
293
+ model_type=["dense"],
250
294
  revision="e83179d7a66e8aed1b3015e98bb5ae234ed89598",
251
295
  release_date="2024-06-24",
252
296
  languages=["fra-Latn", "eng-Latn"],
@@ -262,16 +306,40 @@ Lajavaness__bilingual_embedding_large = ModelMeta(
262
306
  open_weights=True,
263
307
  public_training_code=None,
264
308
  public_training_data=None,
265
- framework=["PyTorch", "Sentence Transformers"],
309
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
266
310
  reference="https://huggingface.co/Lajavaness/bilingual-embedding-large",
267
311
  similarity_fn_name=ScoringFunction.COSINE,
268
312
  use_instructions=None,
269
313
  training_datasets=bilingual_embedding_training_data,
270
314
  adapted_from="dangvantuan/bilingual_impl",
271
315
  superseded_by=None,
316
+ citation="""
317
+ @article{conneau2019unsupervised,
318
+ title={Unsupervised cross-lingual representation learning at scale},
319
+ author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin},
320
+ journal={arXiv preprint arXiv:1911.02116},
321
+ year={2019}
322
+ }
323
+
324
+ @article{reimers2019sentence,
325
+ title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
326
+ author={Nils Reimers, Iryna Gurevych},
327
+ journal={https://arxiv.org/abs/1908.10084},
328
+ year={2019}
329
+ }
330
+
331
+ @article{thakur2020augmented,
332
+ title={Augmented SBERT: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks},
333
+ author={Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna},
334
+ journal={arXiv e-prints},
335
+ pages={arXiv--2010},
336
+ year={2020}
337
+ }
338
+ """,
272
339
  )
273
340
  Lajavaness__bilingual_embedding_small = ModelMeta(
274
341
  name="Lajavaness/bilingual-embedding-small",
342
+ model_type=["dense"],
275
343
  revision="ed4a1dd814de0db81d4a4e287c296a03194463e3",
276
344
  release_date="2024-07-17",
277
345
  languages=["fra-Latn", "eng-Latn"],
@@ -287,16 +355,40 @@ Lajavaness__bilingual_embedding_small = ModelMeta(
287
355
  open_weights=True,
288
356
  public_training_code=None,
289
357
  public_training_data=None,
290
- framework=["PyTorch", "Sentence Transformers"],
358
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
291
359
  reference="https://huggingface.co/Lajavaness/bilingual-embedding-small",
292
360
  similarity_fn_name=ScoringFunction.COSINE,
293
361
  use_instructions=None,
294
362
  training_datasets=bilingual_embedding_training_data,
295
363
  adapted_from="dangvantuan/bilingual_impl",
296
364
  superseded_by=None,
365
+ citation="""
366
+ @article{conneau2019unsupervised,
367
+ title={Unsupervised cross-lingual representation learning at scale},
368
+ author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin},
369
+ journal={arXiv preprint arXiv:1911.02116},
370
+ year={2019}
371
+ }
372
+
373
+ @article{reimers2019sentence,
374
+ title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
375
+ author={Nils Reimers, Iryna Gurevych},
376
+ journal={https://arxiv.org/abs/1908.10084},
377
+ year={2019}
378
+ }
379
+
380
+ @article{thakur2020augmented,
381
+ title={Augmented SBERT: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks},
382
+ author={Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna},
383
+ journal={arXiv e-prints},
384
+ pages={arXiv--2010},
385
+ year={2020}
386
+ }
387
+ """,
297
388
  )
298
389
  Mihaiii__Bulbasaur = ModelMeta(
299
390
  name="Mihaiii/Bulbasaur",
391
+ model_type=["dense"],
300
392
  revision="6876f839e18ae36224049a41194a431953f08747",
301
393
  release_date="2024-04-27",
302
394
  languages=None,
@@ -309,7 +401,7 @@ Mihaiii__Bulbasaur = ModelMeta(
309
401
  open_weights=True,
310
402
  public_training_code=None,
311
403
  public_training_data=None,
312
- framework=["PyTorch", "Sentence Transformers"],
404
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
313
405
  reference="https://huggingface.co/Mihaiii/Bulbasaur",
314
406
  similarity_fn_name=ScoringFunction.COSINE,
315
407
  use_instructions=None,
@@ -320,6 +412,7 @@ Mihaiii__Bulbasaur = ModelMeta(
320
412
  )
321
413
  Mihaiii__Ivysaur = ModelMeta(
322
414
  name="Mihaiii/Ivysaur",
415
+ model_type=["dense"],
323
416
  revision="65914d976f45beb4bda7485c39d88865b4ce6554",
324
417
  release_date="2024-04-27",
325
418
  languages=None,
@@ -332,7 +425,7 @@ Mihaiii__Ivysaur = ModelMeta(
332
425
  open_weights=True,
333
426
  public_training_code=None,
334
427
  public_training_data=None,
335
- framework=["PyTorch", "Sentence Transformers"],
428
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
336
429
  reference="https://huggingface.co/Mihaiii/Ivysaur",
337
430
  similarity_fn_name=ScoringFunction.COSINE,
338
431
  use_instructions=None,
@@ -343,6 +436,7 @@ Mihaiii__Ivysaur = ModelMeta(
343
436
  )
344
437
  Mihaiii__Squirtle = ModelMeta(
345
438
  name="Mihaiii/Squirtle",
439
+ model_type=["dense"],
346
440
  revision="5b991da48a9286637a256d4a35aab87a1a57b78a",
347
441
  release_date="2024-04-30",
348
442
  languages=None,
@@ -355,7 +449,7 @@ Mihaiii__Squirtle = ModelMeta(
355
449
  open_weights=True,
356
450
  public_training_code=None,
357
451
  public_training_data=None,
358
- framework=["PyTorch", "Sentence Transformers"],
452
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
359
453
  reference="https://huggingface.co/Mihaiii/Squirtle",
360
454
  similarity_fn_name=ScoringFunction.COSINE,
361
455
  use_instructions=None,
@@ -366,6 +460,7 @@ Mihaiii__Squirtle = ModelMeta(
366
460
  )
367
461
  Mihaiii__Venusaur = ModelMeta(
368
462
  name="Mihaiii/Venusaur",
463
+ model_type=["dense"],
369
464
  revision="0dc817f0addbb7bab8feeeeaded538f9ffeb3419",
370
465
  release_date="2024-04-29",
371
466
  languages=None,
@@ -378,7 +473,7 @@ Mihaiii__Venusaur = ModelMeta(
378
473
  open_weights=True,
379
474
  public_training_code=None,
380
475
  public_training_data=None,
381
- framework=["PyTorch", "Sentence Transformers"],
476
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
382
477
  reference="https://huggingface.co/Mihaiii/Venusaur",
383
478
  similarity_fn_name=ScoringFunction.COSINE,
384
479
  use_instructions=None,
@@ -389,6 +484,7 @@ Mihaiii__Venusaur = ModelMeta(
389
484
  )
390
485
  Mihaiii__Wartortle = ModelMeta(
391
486
  name="Mihaiii/Wartortle",
487
+ model_type=["dense"],
392
488
  revision="14caca5253414d38a7d28b62d1b7c30ef3293a87",
393
489
  release_date="2024-04-30",
394
490
  languages=None,
@@ -401,7 +497,7 @@ Mihaiii__Wartortle = ModelMeta(
401
497
  open_weights=True,
402
498
  public_training_code=None,
403
499
  public_training_data=None,
404
- framework=["PyTorch", "Sentence Transformers"],
500
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
405
501
  reference="https://huggingface.co/Mihaiii/Wartortle",
406
502
  similarity_fn_name=ScoringFunction.COSINE,
407
503
  use_instructions=None,
@@ -412,6 +508,7 @@ Mihaiii__Wartortle = ModelMeta(
412
508
  )
413
509
  Mihaiii__gte_micro = ModelMeta(
414
510
  name="Mihaiii/gte-micro",
511
+ model_type=["dense"],
415
512
  revision="6fd2397cb9dfa7c901aedf9a2a44d3c888ccafdd",
416
513
  release_date="2024-04-21",
417
514
  languages=None,
@@ -424,7 +521,7 @@ Mihaiii__gte_micro = ModelMeta(
424
521
  open_weights=True,
425
522
  public_training_code=None,
426
523
  public_training_data=None,
427
- framework=["PyTorch", "Sentence Transformers"],
524
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
428
525
  reference="https://huggingface.co/Mihaiii/gte-micro",
429
526
  similarity_fn_name=ScoringFunction.COSINE,
430
527
  use_instructions=None,
@@ -434,6 +531,7 @@ Mihaiii__gte_micro = ModelMeta(
434
531
  )
435
532
  Mihaiii__gte_micro_v4 = ModelMeta(
436
533
  name="Mihaiii/gte-micro-v4",
534
+ model_type=["dense"],
437
535
  revision="78e1a4b348f8524c3ab2e3e3475788f5adb8c98f",
438
536
  release_date="2024-04-22",
439
537
  languages=None,
@@ -446,7 +544,7 @@ Mihaiii__gte_micro_v4 = ModelMeta(
446
544
  open_weights=True,
447
545
  public_training_code=None,
448
546
  public_training_data=None,
449
- framework=["PyTorch", "Sentence Transformers"],
547
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
450
548
  reference="https://huggingface.co/Mihaiii/gte-micro-v4",
451
549
  similarity_fn_name=ScoringFunction.COSINE,
452
550
  use_instructions=None,
@@ -456,6 +554,7 @@ Mihaiii__gte_micro_v4 = ModelMeta(
456
554
  )
457
555
  OrdalieTech__Solon_embeddings_large_0_1 = ModelMeta(
458
556
  name="OrdalieTech/Solon-embeddings-large-0.1",
557
+ model_type=["dense"],
459
558
  revision="9f6465f6ea2f6d10c6294bc15d84edf87d47cdef",
460
559
  release_date="2023-12-09",
461
560
  languages=["fra-Latn"],
@@ -468,7 +567,7 @@ OrdalieTech__Solon_embeddings_large_0_1 = ModelMeta(
468
567
  open_weights=True,
469
568
  public_training_code=None,
470
569
  public_training_data=None,
471
- framework=["PyTorch"],
570
+ framework=["PyTorch", "Transformers", "safetensors"],
472
571
  reference="https://huggingface.co/OrdalieTech/Solon-embeddings-large-0.1",
473
572
  similarity_fn_name=ScoringFunction.COSINE,
474
573
  use_instructions=None,
@@ -478,6 +577,7 @@ OrdalieTech__Solon_embeddings_large_0_1 = ModelMeta(
478
577
  )
479
578
  Omartificial_Intelligence_Space__Arabert_all_nli_triplet_Matryoshka = ModelMeta(
480
579
  name="Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka",
580
+ model_type=["dense"],
481
581
  revision="d0361a36f6fe69febfc8550d0918abab174f6f30",
482
582
  release_date="2024-06-16",
483
583
  languages=["ara-Arab"],
@@ -490,16 +590,26 @@ Omartificial_Intelligence_Space__Arabert_all_nli_triplet_Matryoshka = ModelMeta(
490
590
  open_weights=True,
491
591
  public_training_code=None,
492
592
  public_training_data=None,
493
- framework=["PyTorch", "Sentence Transformers"],
593
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
494
594
  reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabert-all-nli-triplet-Matryoshka",
495
595
  similarity_fn_name=ScoringFunction.COSINE,
496
596
  use_instructions=None,
497
597
  training_datasets=set(), # not in MTEB: {"Omartificial-Intelligence-Space/Arabic-NLi-Triplet"},
498
598
  adapted_from="aubmindlab/bert-base-arabertv02",
499
599
  superseded_by=None,
600
+ citation="""@inproceedings{reimers-2019-sentence-bert,
601
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
602
+ author = "Reimers, Nils and Gurevych, Iryna",
603
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
604
+ month = "11",
605
+ year = "2019",
606
+ publisher = "Association for Computational Linguistics",
607
+ url = "https://arxiv.org/abs/1908.10084",
608
+ }""",
500
609
  )
501
610
  Omartificial_Intelligence_Space__Arabic_MiniLM_L12_v2_all_nli_triplet = ModelMeta(
502
611
  name="Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet",
612
+ model_type=["dense"],
503
613
  revision="6916465c43b984e955aa6dc72851474f0128f428",
504
614
  release_date="2024-06-25",
505
615
  languages=["ara-Arab"],
@@ -512,7 +622,7 @@ Omartificial_Intelligence_Space__Arabic_MiniLM_L12_v2_all_nli_triplet = ModelMet
512
622
  open_weights=True,
513
623
  public_training_code=None,
514
624
  public_training_data=None,
515
- framework=["PyTorch", "Sentence Transformers"],
625
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
516
626
  reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-MiniLM-L12-v2-all-nli-triplet",
517
627
  similarity_fn_name=ScoringFunction.COSINE,
518
628
  use_instructions=None,
@@ -524,6 +634,7 @@ Omartificial_Intelligence_Space__Arabic_MiniLM_L12_v2_all_nli_triplet = ModelMet
524
634
  )
525
635
  Omartificial_Intelligence_Space__Arabic_all_nli_triplet_Matryoshka = ModelMeta(
526
636
  name="Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka",
637
+ model_type=["dense"],
527
638
  revision="1ca467cc576bd76666a4d21b24ee43afa914dd10",
528
639
  release_date="2024-06-14",
529
640
  languages=["ara-Arab"],
@@ -536,7 +647,7 @@ Omartificial_Intelligence_Space__Arabic_all_nli_triplet_Matryoshka = ModelMeta(
536
647
  open_weights=True,
537
648
  public_training_code=None,
538
649
  public_training_data=None,
539
- framework=["PyTorch", "Sentence Transformers"],
650
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
540
651
  reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-all-nli-triplet-Matryoshka",
541
652
  similarity_fn_name=ScoringFunction.COSINE,
542
653
  use_instructions=None,
@@ -545,9 +656,19 @@ Omartificial_Intelligence_Space__Arabic_all_nli_triplet_Matryoshka = ModelMeta(
545
656
  # {"Omartificial-Intelligence-Space/Arabic-NLi-Triplet"},
546
657
  adapted_from="sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
547
658
  superseded_by=None,
659
+ citation="""@inproceedings{reimers-2019-sentence-bert,
660
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
661
+ author = "Reimers, Nils and Gurevych, Iryna",
662
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
663
+ month = "11",
664
+ year = "2019",
665
+ publisher = "Association for Computational Linguistics",
666
+ url = "https://arxiv.org/abs/1908.10084",
667
+ }""",
548
668
  )
549
669
  Omartificial_Intelligence_Space__Arabic_labse_Matryoshka = ModelMeta(
550
670
  name="Omartificial-Intelligence-Space/Arabic-labse-Matryoshka",
671
+ model_type=["dense"],
551
672
  revision="ee6d5e33c78ed582ade47fd452a74ea52aa5bfe2",
552
673
  release_date="2024-06-16",
553
674
  languages=["ara-Arab"],
@@ -560,7 +681,7 @@ Omartificial_Intelligence_Space__Arabic_labse_Matryoshka = ModelMeta(
560
681
  open_weights=True,
561
682
  public_training_code=None,
562
683
  public_training_data=None,
563
- framework=["PyTorch", "Sentence Transformers"],
684
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
564
685
  reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-labse-Matryoshka",
565
686
  similarity_fn_name=ScoringFunction.COSINE,
566
687
  use_instructions=None,
@@ -569,9 +690,19 @@ Omartificial_Intelligence_Space__Arabic_labse_Matryoshka = ModelMeta(
569
690
  # {"Omartificial-Intelligence-Space/Arabic-NLi-Triplet"},
570
691
  adapted_from="sentence-transformers/LaBSE",
571
692
  superseded_by=None,
693
+ citation="""@inproceedings{reimers-2019-sentence-bert,
694
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
695
+ author = "Reimers, Nils and Gurevych, Iryna",
696
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
697
+ month = "11",
698
+ year = "2019",
699
+ publisher = "Association for Computational Linguistics",
700
+ url = "https://arxiv.org/abs/1908.10084",
701
+ }""",
572
702
  )
573
703
  Omartificial_Intelligence_Space__Arabic_mpnet_base_all_nli_triplet = ModelMeta(
574
704
  name="Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet",
705
+ model_type=["dense"],
575
706
  revision="2628cb641e040f44328195fadcdfb58e6d5cffa7",
576
707
  release_date="2024-06-15",
577
708
  languages=["ara-Arab"],
@@ -584,7 +715,7 @@ Omartificial_Intelligence_Space__Arabic_mpnet_base_all_nli_triplet = ModelMeta(
584
715
  open_weights=True,
585
716
  public_training_code=None,
586
717
  public_training_data=None,
587
- framework=["PyTorch", "Sentence Transformers"],
718
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
588
719
  reference="https://huggingface.co/Omartificial-Intelligence-Space/Arabic-mpnet-base-all-nli-triplet",
589
720
  similarity_fn_name=ScoringFunction.COSINE,
590
721
  use_instructions=None,
@@ -593,9 +724,19 @@ Omartificial_Intelligence_Space__Arabic_mpnet_base_all_nli_triplet = ModelMeta(
593
724
  # {"Omartificial-Intelligence-Space/Arabic-NLi-Triplet"},
594
725
  adapted_from="tomaarsen/mpnet-base-all-nli-triplet",
595
726
  superseded_by=None,
727
+ citation="""@inproceedings{reimers-2019-sentence-bert,
728
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
729
+ author = "Reimers, Nils and Gurevych, Iryna",
730
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
731
+ month = "11",
732
+ year = "2019",
733
+ publisher = "Association for Computational Linguistics",
734
+ url = "https://arxiv.org/abs/1908.10084",
735
+ }""",
596
736
  )
597
737
  Omartificial_Intelligence_Space__Marbert_all_nli_triplet_Matryoshka = ModelMeta(
598
738
  name="Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka",
739
+ model_type=["dense"],
599
740
  revision="ecf3274e164f057c4a3dd70691cae0265d87a9d0",
600
741
  release_date="2024-06-17",
601
742
  languages=["ara-Arab"],
@@ -608,16 +749,26 @@ Omartificial_Intelligence_Space__Marbert_all_nli_triplet_Matryoshka = ModelMeta(
608
749
  open_weights=True,
609
750
  public_training_code=None,
610
751
  public_training_data=None,
611
- framework=["PyTorch", "Sentence Transformers"],
752
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
612
753
  reference="https://huggingface.co/Omartificial-Intelligence-Space/Marbert-all-nli-triplet-Matryoshka",
613
754
  similarity_fn_name=ScoringFunction.COSINE,
614
755
  use_instructions=None,
615
756
  training_datasets=set(), # not in MTEB: "Omartificial-Intelligence-Space/Arabic-NLi-Triplet"},
616
757
  adapted_from="UBC-NLP/MARBERTv2",
617
758
  superseded_by=None,
759
+ citation="""@inproceedings{reimers-2019-sentence-bert,
760
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
761
+ author = "Reimers, Nils and Gurevych, Iryna",
762
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
763
+ month = "11",
764
+ year = "2019",
765
+ publisher = "Association for Computational Linguistics",
766
+ url = "https://arxiv.org/abs/1908.10084",
767
+ }""",
618
768
  )
619
769
  consciousai__cai_lunaris_text_embeddings = ModelMeta(
620
770
  name="consciousAI/cai-lunaris-text-embeddings",
771
+ model_type=["dense"],
621
772
  revision="8332c464d13505968ff7a6e2213f36fd8730b4c7",
622
773
  release_date="2023-06-22",
623
774
  languages=None,
@@ -630,7 +781,7 @@ consciousai__cai_lunaris_text_embeddings = ModelMeta(
630
781
  open_weights=True,
631
782
  public_training_code=None,
632
783
  public_training_data=None,
633
- framework=["PyTorch"],
784
+ framework=["PyTorch", "Sentence Transformers", "Transformers"],
634
785
  reference="https://huggingface.co/consciousAI/cai-lunaris-text-embeddings",
635
786
  similarity_fn_name=ScoringFunction.COSINE,
636
787
  use_instructions=None,
@@ -640,6 +791,7 @@ consciousai__cai_lunaris_text_embeddings = ModelMeta(
640
791
  )
641
792
  consciousai__cai_stellaris_text_embeddings = ModelMeta(
642
793
  name="consciousAI/cai-stellaris-text-embeddings",
794
+ model_type=["dense"],
643
795
  revision="c000ec4b29588daf0f4a0b2ad4e72ee807d8efc0",
644
796
  release_date="2023-06-23",
645
797
  languages=None,
@@ -652,7 +804,7 @@ consciousai__cai_stellaris_text_embeddings = ModelMeta(
652
804
  open_weights=True,
653
805
  public_training_code=None,
654
806
  public_training_data=None,
655
- framework=["PyTorch"],
807
+ framework=["PyTorch", "Sentence Transformers"],
656
808
  reference="https://huggingface.co/consciousAI/cai-stellaris-text-embeddings",
657
809
  similarity_fn_name=ScoringFunction.COSINE,
658
810
  use_instructions=None,
@@ -671,6 +823,7 @@ SENTENCE_CROISSANT_TRAINING_DATA = {
671
823
  }
672
824
  manu__sentence_croissant_alpha_v0_2 = ModelMeta(
673
825
  name="manu/sentence_croissant_alpha_v0.2",
826
+ model_type=["dense"],
674
827
  revision="4610b8cea65d7dd59e0b04af50753933fe5b29b2",
675
828
  release_date="2024-03-15",
676
829
  languages=None,
@@ -683,7 +836,7 @@ manu__sentence_croissant_alpha_v0_2 = ModelMeta(
683
836
  open_weights=True,
684
837
  public_training_code=None,
685
838
  public_training_data=None,
686
- framework=["PyTorch", "Sentence Transformers"],
839
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
687
840
  reference="https://huggingface.co/manu/sentence_croissant_alpha_v0.2",
688
841
  similarity_fn_name=ScoringFunction.COSINE,
689
842
  use_instructions=None,
@@ -693,6 +846,7 @@ manu__sentence_croissant_alpha_v0_2 = ModelMeta(
693
846
  )
694
847
  manu__sentence_croissant_alpha_v0_3 = ModelMeta(
695
848
  name="manu/sentence_croissant_alpha_v0.3",
849
+ model_type=["dense"],
696
850
  revision="4ac16754f3d81aba76cc32955dc9ee4122df96eb",
697
851
  release_date="2024-04-26",
698
852
  languages=None,
@@ -705,7 +859,7 @@ manu__sentence_croissant_alpha_v0_3 = ModelMeta(
705
859
  open_weights=True,
706
860
  public_training_code=None,
707
861
  public_training_data=None,
708
- framework=["PyTorch", "Sentence Transformers"],
862
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
709
863
  reference="https://huggingface.co/manu/sentence_croissant_alpha_v0.3",
710
864
  similarity_fn_name=ScoringFunction.COSINE,
711
865
  use_instructions=None,
@@ -715,6 +869,7 @@ manu__sentence_croissant_alpha_v0_3 = ModelMeta(
715
869
  )
716
870
  manu__sentence_croissant_alpha_v0_4 = ModelMeta(
717
871
  name="manu/sentence_croissant_alpha_v0.4",
872
+ model_type=["dense"],
718
873
  revision="0ce6372e6a3c21134dcf26dcde13cca869c767fc",
719
874
  release_date="2024-04-27",
720
875
  languages=["fra-Latn", "eng-Latn"],
@@ -727,7 +882,7 @@ manu__sentence_croissant_alpha_v0_4 = ModelMeta(
727
882
  open_weights=True,
728
883
  public_training_code=None,
729
884
  public_training_data=None,
730
- framework=["PyTorch", "Sentence Transformers"],
885
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
731
886
  reference="https://huggingface.co/manu/sentence_croissant_alpha_v0.4",
732
887
  similarity_fn_name=ScoringFunction.COSINE,
733
888
  use_instructions=None,
@@ -738,6 +893,7 @@ manu__sentence_croissant_alpha_v0_4 = ModelMeta(
738
893
  )
739
894
  thenlper__gte_base = ModelMeta(
740
895
  name="thenlper/gte-base",
896
+ model_type=["dense"],
741
897
  revision="c078288308d8dee004ab72c6191778064285ec0c",
742
898
  release_date="2023-07-27",
743
899
  languages=["eng-Latn"],
@@ -750,16 +906,23 @@ thenlper__gte_base = ModelMeta(
750
906
  open_weights=True,
751
907
  public_training_code=None,
752
908
  public_training_data=None,
753
- framework=["PyTorch"],
909
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
754
910
  reference="https://huggingface.co/thenlper/gte-base",
755
911
  similarity_fn_name=ScoringFunction.COSINE,
756
912
  use_instructions=None,
757
913
  training_datasets=None,
758
914
  adapted_from=None,
759
915
  superseded_by=None,
916
+ citation="""@article{li2023towards,
917
+ title={Towards general text embeddings with multi-stage contrastive learning},
918
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
919
+ journal={arXiv preprint arXiv:2308.03281},
920
+ year={2023}
921
+ }""",
760
922
  )
761
923
  thenlper__gte_large = ModelMeta(
762
924
  name="thenlper/gte-large",
925
+ model_type=["dense"],
763
926
  revision="4bef63f39fcc5e2d6b0aae83089f307af4970164",
764
927
  release_date="2023-07-27",
765
928
  languages=["eng-Latn"],
@@ -772,16 +935,23 @@ thenlper__gte_large = ModelMeta(
772
935
  open_weights=True,
773
936
  public_training_code=None,
774
937
  public_training_data=None,
775
- framework=["PyTorch"],
938
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
776
939
  reference="https://huggingface.co/thenlper/gte-large",
777
940
  similarity_fn_name=ScoringFunction.COSINE,
778
941
  use_instructions=None,
779
942
  training_datasets=None,
780
943
  adapted_from=None,
781
944
  superseded_by=None,
945
+ citation="""@article{li2023towards,
946
+ title={Towards general text embeddings with multi-stage contrastive learning},
947
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
948
+ journal={arXiv preprint arXiv:2308.03281},
949
+ year={2023}
950
+ }""",
782
951
  )
783
952
  thenlper__gte_small = ModelMeta(
784
953
  name="thenlper/gte-small",
954
+ model_type=["dense"],
785
955
  revision="17e1f347d17fe144873b1201da91788898c639cd",
786
956
  release_date="2023-07-27",
787
957
  languages=["eng-Latn"],
@@ -794,16 +964,23 @@ thenlper__gte_small = ModelMeta(
794
964
  open_weights=True,
795
965
  public_training_code=None,
796
966
  public_training_data=None,
797
- framework=["PyTorch"],
967
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
798
968
  reference="https://huggingface.co/thenlper/gte-small",
799
969
  similarity_fn_name=ScoringFunction.COSINE,
800
970
  use_instructions=None,
801
971
  training_datasets=None,
802
972
  adapted_from=None,
803
973
  superseded_by=None,
974
+ citation="""@article{li2023towards,
975
+ title={Towards general text embeddings with multi-stage contrastive learning},
976
+ author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
977
+ journal={arXiv preprint arXiv:2308.03281},
978
+ year={2023}
979
+ }""",
804
980
  )
805
981
  OrlikB__KartonBERT_USE_base_v1 = ModelMeta(
806
982
  name="OrlikB/KartonBERT-USE-base-v1",
983
+ model_type=["dense"],
807
984
  revision="1f59dd58fe57995c0e867d5e29f03763eae99645",
808
985
  release_date="2024-09-30",
809
986
  languages=["pol-Latn"],
@@ -826,6 +1003,7 @@ OrlikB__KartonBERT_USE_base_v1 = ModelMeta(
826
1003
  )
827
1004
  OrlikB__st_polish_kartonberta_base_alpha_v1 = ModelMeta(
828
1005
  name="OrlikB/st-polish-kartonberta-base-alpha-v1",
1006
+ model_type=["dense"],
829
1007
  revision="5590a0e2d7bb43674e44d7076b3ff157f7d4a1cb",
830
1008
  release_date="2023-11-12",
831
1009
  languages=["pol-Latn"],
@@ -848,6 +1026,7 @@ OrlikB__st_polish_kartonberta_base_alpha_v1 = ModelMeta(
848
1026
  )
849
1027
  sdadas__mmlw_e5_base = ModelMeta(
850
1028
  name="sdadas/mmlw-e5-base",
1029
+ model_type=["dense"],
851
1030
  revision="f10628ed55b5ec400502aff439bd714a6da0af30",
852
1031
  release_date="2023-11-17",
853
1032
  languages=["pol-Latn"],
@@ -860,16 +1039,25 @@ sdadas__mmlw_e5_base = ModelMeta(
860
1039
  open_weights=True,
861
1040
  public_training_code=None,
862
1041
  public_training_data=None,
863
- framework=["PyTorch"],
1042
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
864
1043
  reference="https://huggingface.co/sdadas/mmlw-e5-base",
865
1044
  similarity_fn_name=ScoringFunction.COSINE,
866
1045
  use_instructions=None,
867
1046
  training_datasets=E5_TRAINING_DATA,
868
1047
  adapted_from="intfloat/multilingual-e5-base",
869
1048
  superseded_by=None,
1049
+ citation="""@article{dadas2024pirb,
1050
+ title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1051
+ author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
1052
+ year={2024},
1053
+ eprint={2402.13350},
1054
+ archivePrefix={arXiv},
1055
+ primaryClass={cs.CL}
1056
+ }""",
870
1057
  )
871
1058
  dwzhu__e5_base_4k = ModelMeta(
872
1059
  name="dwzhu/e5-base-4k",
1060
+ model_type=["dense"],
873
1061
  revision="1b5664b8cb2bccd8c309429b7bfe5864402e8fbc",
874
1062
  release_date="2024-03-28",
875
1063
  languages=["eng-Latn"],
@@ -882,16 +1070,23 @@ dwzhu__e5_base_4k = ModelMeta(
882
1070
  open_weights=True,
883
1071
  public_training_code=None,
884
1072
  public_training_data=None,
885
- framework=["PyTorch"],
1073
+ framework=["PyTorch", "Transformers"],
886
1074
  reference="https://huggingface.co/dwzhu/e5-base-4k",
887
1075
  similarity_fn_name=ScoringFunction.COSINE,
888
1076
  use_instructions=None,
889
1077
  training_datasets=E5_TRAINING_DATA,
890
1078
  adapted_from="intfloat/e5-base-v2",
891
1079
  superseded_by=None,
1080
+ citation="""@article{zhu2024longembed,
1081
+ title={LongEmbed: Extending Embedding Models for Long Context Retrieval},
1082
+ author={Zhu, Dawei and Wang, Liang and Yang, Nan and Song, Yifan and Wu, Wenhao and Wei, Furu and Li, Sujian},
1083
+ journal={arXiv preprint arXiv:2404.12096},
1084
+ year={2024}
1085
+ }""",
892
1086
  )
893
1087
  sdadas__mmlw_e5_large = ModelMeta(
894
1088
  name="sdadas/mmlw-e5-large",
1089
+ model_type=["dense"],
895
1090
  revision="5c143fb045ebed664fd85b43fc45155999eb110f",
896
1091
  release_date="2023-11-17",
897
1092
  languages=["pol-Latn"],
@@ -904,16 +1099,25 @@ sdadas__mmlw_e5_large = ModelMeta(
904
1099
  open_weights=True,
905
1100
  public_training_code=None,
906
1101
  public_training_data=None,
907
- framework=["PyTorch"],
1102
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
908
1103
  reference="https://huggingface.co/sdadas/mmlw-e5-large",
909
1104
  similarity_fn_name=ScoringFunction.COSINE,
910
1105
  use_instructions=None,
911
1106
  training_datasets=E5_TRAINING_DATA,
912
1107
  adapted_from="intfloat/multilingual-e5-large",
913
1108
  superseded_by=None,
1109
+ citation="""@article{dadas2024pirb,
1110
+ title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1111
+ author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
1112
+ year={2024},
1113
+ eprint={2402.13350},
1114
+ archivePrefix={arXiv},
1115
+ primaryClass={cs.CL}
1116
+ }""",
914
1117
  )
915
1118
  sdadas__mmlw_e5_small = ModelMeta(
916
1119
  name="sdadas/mmlw-e5-small",
1120
+ model_type=["dense"],
917
1121
  revision="ff1298cb6d997f18b794d2f3d73cad2ba2ad739a",
918
1122
  release_date="2023-11-17",
919
1123
  languages=["pol-Latn"],
@@ -926,16 +1130,25 @@ sdadas__mmlw_e5_small = ModelMeta(
926
1130
  open_weights=True,
927
1131
  public_training_code=None,
928
1132
  public_training_data=None,
929
- framework=["PyTorch"],
1133
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
930
1134
  reference="https://huggingface.co/sdadas/mmlw-e5-small",
931
1135
  similarity_fn_name=ScoringFunction.COSINE,
932
1136
  use_instructions=None,
933
1137
  training_datasets=E5_TRAINING_DATA,
934
1138
  adapted_from="intfloat/multilingual-e5-small",
935
1139
  superseded_by=None,
1140
+ citation="""@article{dadas2024pirb,
1141
+ title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1142
+ author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
1143
+ year={2024},
1144
+ eprint={2402.13350},
1145
+ archivePrefix={arXiv},
1146
+ primaryClass={cs.CL}
1147
+ }""",
936
1148
  )
937
1149
  sdadas__mmlw_roberta_base = ModelMeta(
938
1150
  name="sdadas/mmlw-roberta-base",
1151
+ model_type=["dense"],
939
1152
  revision="0ac7f23f6c96af601fa6a17852bd08d5136d6365",
940
1153
  release_date="2023-11-17",
941
1154
  languages=["pol-Latn"],
@@ -948,16 +1161,25 @@ sdadas__mmlw_roberta_base = ModelMeta(
948
1161
  open_weights=True,
949
1162
  public_training_code=None,
950
1163
  public_training_data=None,
951
- framework=["PyTorch"],
1164
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
952
1165
  reference="https://huggingface.co/sdadas/mmlw-roberta-base",
953
1166
  similarity_fn_name=ScoringFunction.COSINE,
954
1167
  use_instructions=None,
955
1168
  training_datasets={"MSMARCO"},
956
1169
  adapted_from="sdadas/polish-roberta-base-v2",
957
1170
  superseded_by=None,
1171
+ citation="""@article{dadas2024pirb,
1172
+ title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1173
+ author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
1174
+ year={2024},
1175
+ eprint={2402.13350},
1176
+ archivePrefix={arXiv},
1177
+ primaryClass={cs.CL}
1178
+ }""",
958
1179
  )
959
1180
  sdadas__mmlw_roberta_large = ModelMeta(
960
1181
  name="sdadas/mmlw-roberta-large",
1182
+ model_type=["dense"],
961
1183
  revision="b8058066a8de32d0737b3cd82d8b4f4108745af9",
962
1184
  release_date="2023-11-17",
963
1185
  languages=["pol-Latn"],
@@ -970,13 +1192,21 @@ sdadas__mmlw_roberta_large = ModelMeta(
970
1192
  open_weights=True,
971
1193
  public_training_code=None,
972
1194
  public_training_data=None,
973
- framework=["PyTorch"],
1195
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
974
1196
  reference="https://huggingface.co/sdadas/mmlw-roberta-large",
975
1197
  similarity_fn_name=ScoringFunction.COSINE,
976
1198
  use_instructions=None,
977
1199
  training_datasets={"MSMARCO"},
978
1200
  adapted_from="sdadas/polish-roberta-large-v2",
979
1201
  superseded_by=None,
1202
+ citation="""@article{dadas2024pirb,
1203
+ title={{PIRB}: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods},
1204
+ author={Sławomir Dadas and Michał Perełkiewicz and Rafał Poświata},
1205
+ year={2024},
1206
+ eprint={2402.13350},
1207
+ archivePrefix={arXiv},
1208
+ primaryClass={cs.CL}
1209
+ }""",
980
1210
  )
981
1211
 
982
1212
  udever_dataset = { # discussed here: https://github.com/embeddings-benchmark/mteb/issues/2193
@@ -1035,6 +1265,7 @@ udever_languages = [
1035
1265
 
1036
1266
  izhx__udever_bloom_1b1 = ModelMeta(
1037
1267
  name="izhx/udever-bloom-1b1",
1268
+ model_type=["dense"],
1038
1269
  revision="7bf1ee29878cb040b2708a691aa4b61f27eaa252",
1039
1270
  release_date="2023-10-24",
1040
1271
  languages=udever_languages,
@@ -1047,16 +1278,23 @@ izhx__udever_bloom_1b1 = ModelMeta(
1047
1278
  open_weights=True,
1048
1279
  public_training_code=None,
1049
1280
  public_training_data=None,
1050
- framework=["PyTorch"],
1281
+ framework=["PyTorch", "Transformers"],
1051
1282
  reference="https://huggingface.co/izhx/udever-bloom-1b1",
1052
1283
  similarity_fn_name=ScoringFunction.COSINE,
1053
1284
  use_instructions=None,
1054
1285
  training_datasets=udever_dataset,
1055
1286
  adapted_from="bigscience/bloom-1b1",
1056
1287
  superseded_by=None,
1288
+ citation="""@article{zhang2023language,
1289
+ title={Language Models are Universal Embedders},
1290
+ author={Zhang, Xin and Li, Zehan and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan and Zhang, Min},
1291
+ journal={arXiv preprint arXiv:2310.08232},
1292
+ year={2023}
1293
+ }""",
1057
1294
  )
1058
1295
  izhx__udever_bloom_3b = ModelMeta(
1059
1296
  name="izhx/udever-bloom-3b",
1297
+ model_type=["dense"],
1060
1298
  revision="4edd8affe80ca89ba0f6b6ba4103fc7f25fc57b2",
1061
1299
  release_date="2023-10-24",
1062
1300
  languages=udever_languages,
@@ -1069,16 +1307,23 @@ izhx__udever_bloom_3b = ModelMeta(
1069
1307
  open_weights=True,
1070
1308
  public_training_code=None,
1071
1309
  public_training_data=None,
1072
- framework=["PyTorch"],
1310
+ framework=["PyTorch", "Transformers"],
1073
1311
  reference="https://huggingface.co/izhx/udever-bloom-3b",
1074
1312
  similarity_fn_name=ScoringFunction.COSINE,
1075
1313
  use_instructions=None,
1076
1314
  training_datasets=udever_dataset,
1077
1315
  adapted_from="bigscience/bloom-3b",
1078
1316
  superseded_by=None,
1317
+ citation="""@article{zhang2023language,
1318
+ title={Language Models are Universal Embedders},
1319
+ author={Zhang, Xin and Li, Zehan and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan and Zhang, Min},
1320
+ journal={arXiv preprint arXiv:2310.08232},
1321
+ year={2023}
1322
+ }""",
1079
1323
  )
1080
1324
  izhx__udever_bloom_560m = ModelMeta(
1081
1325
  name="izhx/udever-bloom-560m",
1326
+ model_type=["dense"],
1082
1327
  revision="b2a723e355946ec5a5c5fbed3459766627ded2bb",
1083
1328
  release_date="2023-10-24",
1084
1329
  languages=udever_languages,
@@ -1091,16 +1336,23 @@ izhx__udever_bloom_560m = ModelMeta(
1091
1336
  open_weights=True,
1092
1337
  public_training_code=None,
1093
1338
  public_training_data=None,
1094
- framework=["PyTorch"],
1339
+ framework=["PyTorch", "Transformers"],
1095
1340
  reference="https://huggingface.co/izhx/udever-bloom-560m",
1096
1341
  similarity_fn_name=ScoringFunction.COSINE,
1097
1342
  use_instructions=None,
1098
1343
  training_datasets=udever_dataset,
1099
1344
  adapted_from="bigscience/bloom-560m",
1100
1345
  superseded_by=None,
1346
+ citation="""@article{zhang2023language,
1347
+ title={Language Models are Universal Embedders},
1348
+ author={Zhang, Xin and Li, Zehan and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan and Zhang, Min},
1349
+ journal={arXiv preprint arXiv:2310.08232},
1350
+ year={2023}
1351
+ }""",
1101
1352
  )
1102
1353
  izhx__udever_bloom_7b1 = ModelMeta(
1103
1354
  name="izhx/udever-bloom-7b1",
1355
+ model_type=["dense"],
1104
1356
  revision="18e8d3e6dbd94868584877f2e72a105a17df22ef",
1105
1357
  release_date="2023-10-24",
1106
1358
  languages=udever_languages,
@@ -1113,16 +1365,23 @@ izhx__udever_bloom_7b1 = ModelMeta(
1113
1365
  open_weights=True,
1114
1366
  public_training_code=None,
1115
1367
  public_training_data=None,
1116
- framework=["PyTorch"],
1368
+ framework=["PyTorch", "Transformers"],
1117
1369
  reference="https://huggingface.co/izhx/udever-bloom-7b1",
1118
1370
  similarity_fn_name=ScoringFunction.COSINE,
1119
1371
  use_instructions=None,
1120
1372
  training_datasets=udever_dataset,
1121
1373
  adapted_from="bigscience/bloom-7b1",
1122
1374
  superseded_by=None,
1375
+ citation="""@article{zhang2023language,
1376
+ title={Language Models are Universal Embedders},
1377
+ author={Zhang, Xin and Li, Zehan and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan and Zhang, Min},
1378
+ journal={arXiv preprint arXiv:2310.08232},
1379
+ year={2023}
1380
+ }""",
1123
1381
  )
1124
1382
  avsolatorio__gist_embedding_v0 = ModelMeta(
1125
1383
  name="avsolatorio/GIST-Embedding-v0",
1384
+ model_type=["dense"],
1126
1385
  revision="bf6b2e55e92f510a570ad4d7d2da2ec8cd22590c",
1127
1386
  release_date="2024-01-31",
1128
1387
  languages=["eng-Latn"],
@@ -1135,7 +1394,7 @@ avsolatorio__gist_embedding_v0 = ModelMeta(
1135
1394
  open_weights=True,
1136
1395
  public_training_code=None,
1137
1396
  public_training_data=None,
1138
- framework=["PyTorch", "Sentence Transformers"],
1397
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
1139
1398
  reference="https://huggingface.co/avsolatorio/GIST-Embedding-v0",
1140
1399
  similarity_fn_name=ScoringFunction.COSINE,
1141
1400
  use_instructions=None,
@@ -1159,9 +1418,20 @@ avsolatorio__gist_embedding_v0 = ModelMeta(
1159
1418
  | bge_training_data,
1160
1419
  adapted_from="BAAI/bge-large-en-v1.5",
1161
1420
  superseded_by=None,
1421
+ citation="""@article{solatorio2024gistembed,
1422
+ title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning},
1423
+ author={Aivin V. Solatorio},
1424
+ journal={arXiv preprint arXiv:2402.16829},
1425
+ year={2024},
1426
+ URL={https://arxiv.org/abs/2402.16829}
1427
+ eprint={2402.16829},
1428
+ archivePrefix={arXiv},
1429
+ primaryClass={cs.LG}
1430
+ }""",
1162
1431
  )
1163
1432
  avsolatorio__gist_all_minilm_l6_v2 = ModelMeta(
1164
1433
  name="avsolatorio/GIST-all-MiniLM-L6-v2",
1434
+ model_type=["dense"],
1165
1435
  revision="ea89dfad053bba14677bb784a4269898abbdce44",
1166
1436
  release_date="2024-02-03",
1167
1437
  languages=["eng-Latn"],
@@ -1174,7 +1444,7 @@ avsolatorio__gist_all_minilm_l6_v2 = ModelMeta(
1174
1444
  open_weights=True,
1175
1445
  public_training_code=None,
1176
1446
  public_training_data=None,
1177
- framework=["PyTorch", "Sentence Transformers"],
1447
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
1178
1448
  reference="https://huggingface.co/avsolatorio/GIST-all-MiniLM-L6-v2",
1179
1449
  similarity_fn_name=ScoringFunction.COSINE,
1180
1450
  use_instructions=None,
@@ -1198,9 +1468,20 @@ avsolatorio__gist_all_minilm_l6_v2 = ModelMeta(
1198
1468
  | bge_training_data,
1199
1469
  adapted_from=None,
1200
1470
  superseded_by=None,
1471
+ citation="""@article{solatorio2024gistembed,
1472
+ title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning},
1473
+ author={Aivin V. Solatorio},
1474
+ journal={arXiv preprint arXiv:2402.16829},
1475
+ year={2024},
1476
+ URL={https://arxiv.org/abs/2402.16829}
1477
+ eprint={2402.16829},
1478
+ archivePrefix={arXiv},
1479
+ primaryClass={cs.LG}
1480
+ }""",
1201
1481
  )
1202
1482
  avsolatorio__gist_large_embedding_v0 = ModelMeta(
1203
1483
  name="avsolatorio/GIST-large-Embedding-v0",
1484
+ model_type=["dense"],
1204
1485
  revision="7831200e2f7819b994490c091cf3258a2b821f0c",
1205
1486
  release_date="2024-02-14",
1206
1487
  languages=["eng-Latn"],
@@ -1213,7 +1494,7 @@ avsolatorio__gist_large_embedding_v0 = ModelMeta(
1213
1494
  open_weights=True,
1214
1495
  public_training_code=None,
1215
1496
  public_training_data=None,
1216
- framework=["PyTorch", "Sentence Transformers"],
1497
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
1217
1498
  reference="https://huggingface.co/avsolatorio/GIST-large-Embedding-v0",
1218
1499
  similarity_fn_name=ScoringFunction.COSINE,
1219
1500
  use_instructions=None,
@@ -1237,9 +1518,20 @@ avsolatorio__gist_large_embedding_v0 = ModelMeta(
1237
1518
  | bge_training_data,
1238
1519
  adapted_from=None,
1239
1520
  superseded_by=None,
1521
+ citation="""@article{solatorio2024gistembed,
1522
+ title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning},
1523
+ author={Aivin V. Solatorio},
1524
+ journal={arXiv preprint arXiv:2402.16829},
1525
+ year={2024},
1526
+ URL={https://arxiv.org/abs/2402.16829}
1527
+ eprint={2402.16829},
1528
+ archivePrefix={arXiv},
1529
+ primaryClass={cs.LG}
1530
+ }""",
1240
1531
  )
1241
1532
  avsolatorio__gist_small_embedding_v0 = ModelMeta(
1242
1533
  name="avsolatorio/GIST-small-Embedding-v0",
1534
+ model_type=["dense"],
1243
1535
  revision="d6c4190f9e01b9994dc7cac99cf2f2b85cfb57bc",
1244
1536
  release_date="2024-02-03",
1245
1537
  languages=["eng-Latn"],
@@ -1252,7 +1544,7 @@ avsolatorio__gist_small_embedding_v0 = ModelMeta(
1252
1544
  open_weights=True,
1253
1545
  public_training_code=None,
1254
1546
  public_training_data=None,
1255
- framework=["PyTorch", "Sentence Transformers"],
1547
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
1256
1548
  reference="https://huggingface.co/avsolatorio/GIST-small-Embedding-v0",
1257
1549
  similarity_fn_name=ScoringFunction.COSINE,
1258
1550
  use_instructions=None,
@@ -1276,9 +1568,20 @@ avsolatorio__gist_small_embedding_v0 = ModelMeta(
1276
1568
  | bge_training_data,
1277
1569
  adapted_from=None,
1278
1570
  superseded_by=None,
1571
+ citation="""@article{solatorio2024gistembed,
1572
+ title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning},
1573
+ author={Aivin V. Solatorio},
1574
+ journal={arXiv preprint arXiv:2402.16829},
1575
+ year={2024},
1576
+ URL={https://arxiv.org/abs/2402.16829}
1577
+ eprint={2402.16829},
1578
+ archivePrefix={arXiv},
1579
+ primaryClass={cs.LG}
1580
+ }""",
1279
1581
  )
1280
1582
  bigscience__sgpt_bloom_7b1_msmarco = ModelMeta(
1281
1583
  name="bigscience/sgpt-bloom-7b1-msmarco",
1584
+ model_type=["dense"],
1282
1585
  revision="dc579f3d2d5a0795eba2049e16c3e36c74007ad3",
1283
1586
  release_date="2022-08-26",
1284
1587
  languages=None,
@@ -1291,16 +1594,23 @@ bigscience__sgpt_bloom_7b1_msmarco = ModelMeta(
1291
1594
  open_weights=True,
1292
1595
  public_training_code=None,
1293
1596
  public_training_data=None,
1294
- framework=["PyTorch"],
1597
+ framework=["PyTorch", "Sentence Transformers"],
1295
1598
  reference="https://huggingface.co/bigscience/sgpt-bloom-7b1-msmarco",
1296
1599
  similarity_fn_name=ScoringFunction.COSINE,
1297
1600
  use_instructions=None,
1298
1601
  training_datasets=None,
1299
1602
  adapted_from="/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3/bloom-7b1",
1300
1603
  superseded_by=None,
1604
+ citation="""@article{muennighoff2022sgpt,
1605
+ title={SGPT: GPT Sentence Embeddings for Semantic Search},
1606
+ author={Muennighoff, Niklas},
1607
+ journal={arXiv preprint arXiv:2202.08904},
1608
+ year={2022}
1609
+ }""",
1301
1610
  )
1302
1611
  aari1995__german_semantic_sts_v2 = ModelMeta(
1303
1612
  name="aari1995/German_Semantic_STS_V2",
1613
+ model_type=["dense"],
1304
1614
  revision="22912542b0ec7a7ef369837e28ffe6352a27afc9",
1305
1615
  release_date="2022-11-17",
1306
1616
  languages=["deu-Latn"],
@@ -1313,7 +1623,7 @@ aari1995__german_semantic_sts_v2 = ModelMeta(
1313
1623
  open_weights=True,
1314
1624
  public_training_code=None,
1315
1625
  public_training_data=None,
1316
- framework=["PyTorch"],
1626
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
1317
1627
  reference="https://huggingface.co/aari1995/German_Semantic_STS_V2",
1318
1628
  similarity_fn_name=ScoringFunction.COSINE,
1319
1629
  use_instructions=None,
@@ -1324,6 +1634,7 @@ aari1995__german_semantic_sts_v2 = ModelMeta(
1324
1634
  )
1325
1635
  abhinand__medembed_small_v0_1 = ModelMeta(
1326
1636
  name="abhinand/MedEmbed-small-v0.1",
1637
+ model_type=["dense"],
1327
1638
  revision="40a5850d046cfdb56154e332b4d7099b63e8d50e",
1328
1639
  release_date="2024-10-20",
1329
1640
  languages=["eng-Latn"],
@@ -1336,7 +1647,7 @@ abhinand__medembed_small_v0_1 = ModelMeta(
1336
1647
  open_weights=True,
1337
1648
  public_training_code=None,
1338
1649
  public_training_data=None,
1339
- framework=["PyTorch"],
1650
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
1340
1651
  reference="https://huggingface.co/abhinand/MedEmbed-small-v0.1",
1341
1652
  similarity_fn_name=ScoringFunction.COSINE,
1342
1653
  use_instructions=None,
@@ -1352,9 +1663,16 @@ abhinand__medembed_small_v0_1 = ModelMeta(
1352
1663
  },
1353
1664
  adapted_from="BAAI/bge-base-en-v1.5",
1354
1665
  superseded_by=None,
1666
+ citation="""@software{balachandran2024medembed,
1667
+ author = {Balachandran, Abhinand},
1668
+ title = {MedEmbed: Medical-Focused Embedding Models},
1669
+ year = {2024},
1670
+ url = {https://github.com/abhinand5/MedEmbed}
1671
+ }""",
1355
1672
  )
1356
1673
  avsolatorio__noinstruct_small_embedding_v0 = ModelMeta(
1357
1674
  name="avsolatorio/NoInstruct-small-Embedding-v0",
1675
+ model_type=["dense"],
1358
1676
  revision="b38747000553d8268915c95a55fc87e707c9aadd",
1359
1677
  release_date="2024-05-01",
1360
1678
  languages=["eng-Latn"],
@@ -1377,6 +1695,7 @@ avsolatorio__noinstruct_small_embedding_v0 = ModelMeta(
1377
1695
  )
1378
1696
  brahmairesearch__slx_v0_1 = ModelMeta(
1379
1697
  name="brahmairesearch/slx-v0.1",
1698
+ model_type=["dense"],
1380
1699
  revision="688c83fd1a7f34b25575a2bc26cfd87c11b4ce71",
1381
1700
  release_date="2024-08-13",
1382
1701
  languages=["eng-Latn"],
@@ -1389,7 +1708,7 @@ brahmairesearch__slx_v0_1 = ModelMeta(
1389
1708
  open_weights=True,
1390
1709
  public_training_code=None,
1391
1710
  public_training_data=None,
1392
- framework=["PyTorch", "Sentence Transformers"],
1711
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
1393
1712
  reference="https://huggingface.co/brahmairesearch/slx-v0.1",
1394
1713
  similarity_fn_name=ScoringFunction.COSINE,
1395
1714
  use_instructions=None,
@@ -1399,6 +1718,7 @@ brahmairesearch__slx_v0_1 = ModelMeta(
1399
1718
  )
1400
1719
  deepfile__embedder_100p = ModelMeta(
1401
1720
  name="deepfile/embedder-100p",
1721
+ model_type=["dense"],
1402
1722
  revision="aa02f08f11517977fbcdc94dc9dbf9a1ca152d9b",
1403
1723
  release_date="2023-07-24",
1404
1724
  languages=None,
@@ -1411,7 +1731,7 @@ deepfile__embedder_100p = ModelMeta(
1411
1731
  open_weights=True,
1412
1732
  public_training_code=None,
1413
1733
  public_training_data=None,
1414
- framework=["PyTorch"],
1734
+ framework=["PyTorch", "Transformers", "safetensors"],
1415
1735
  reference="https://huggingface.co/deepfile/embedder-100p",
1416
1736
  similarity_fn_name=ScoringFunction.COSINE,
1417
1737
  use_instructions=None,
@@ -1421,6 +1741,7 @@ deepfile__embedder_100p = ModelMeta(
1421
1741
  )
1422
1742
  infgrad__stella_base_en_v2 = ModelMeta(
1423
1743
  name="infgrad/stella-base-en-v2",
1744
+ model_type=["dense"],
1424
1745
  revision="c9e80ff9892d80b39dc54e30a7873f91ea161034",
1425
1746
  release_date="2023-10-19",
1426
1747
  languages=["eng-Latn"],
@@ -1433,7 +1754,7 @@ infgrad__stella_base_en_v2 = ModelMeta(
1433
1754
  open_weights=True,
1434
1755
  public_training_code=None,
1435
1756
  public_training_data=None,
1436
- framework=["PyTorch"],
1757
+ framework=["PyTorch", "Sentence Transformers"],
1437
1758
  reference="https://huggingface.co/infgrad/stella-base-en-v2",
1438
1759
  similarity_fn_name=ScoringFunction.COSINE,
1439
1760
  use_instructions=None,
@@ -1443,6 +1764,7 @@ infgrad__stella_base_en_v2 = ModelMeta(
1443
1764
  )
1444
1765
  malenia1__ternary_weight_embedding = ModelMeta(
1445
1766
  name="malenia1/ternary-weight-embedding",
1767
+ model_type=["dense"],
1446
1768
  revision="a1208fb7f646647bb62639fd2e1eb6cc2ef3738e",
1447
1769
  release_date="2024-10-23",
1448
1770
  languages=None,
@@ -1455,7 +1777,7 @@ malenia1__ternary_weight_embedding = ModelMeta(
1455
1777
  open_weights=True,
1456
1778
  public_training_code=None,
1457
1779
  public_training_data=None,
1458
- framework=["PyTorch"],
1780
+ framework=["PyTorch", "safetensors"],
1459
1781
  reference="https://huggingface.co/malenia1/ternary-weight-embedding",
1460
1782
  similarity_fn_name=ScoringFunction.COSINE,
1461
1783
  use_instructions=None,
@@ -1465,6 +1787,7 @@ malenia1__ternary_weight_embedding = ModelMeta(
1465
1787
  )
1466
1788
  omarelshehy__arabic_english_sts_matryoshka = ModelMeta(
1467
1789
  name="omarelshehy/arabic-english-sts-matryoshka",
1790
+ model_type=["dense"],
1468
1791
  revision="763d116fbe8bf7883c64635c862feeaa3768bb64",
1469
1792
  release_date="2024-10-13",
1470
1793
  languages=["ara-Arab", "eng-Latn"],
@@ -1477,13 +1800,22 @@ omarelshehy__arabic_english_sts_matryoshka = ModelMeta(
1477
1800
  open_weights=True,
1478
1801
  public_training_code=None,
1479
1802
  public_training_data=None,
1480
- framework=["PyTorch", "Sentence Transformers"],
1803
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
1481
1804
  reference="https://huggingface.co/omarelshehy/arabic-english-sts-matryoshka",
1482
1805
  similarity_fn_name=ScoringFunction.COSINE,
1483
1806
  use_instructions=None,
1484
1807
  training_datasets=None,
1485
1808
  adapted_from="FacebookAI/xlm-roberta-large",
1486
1809
  superseded_by=None,
1810
+ citation="""@inproceedings{reimers-2019-sentence-bert,
1811
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
1812
+ author = "Reimers, Nils and Gurevych, Iryna",
1813
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
1814
+ month = "11",
1815
+ year = "2019",
1816
+ publisher = "Association for Computational Linguistics",
1817
+ url = "https://arxiv.org/abs/1908.10084",
1818
+ }""",
1487
1819
  )
1488
1820
  openbmb__minicpm_embedding = ModelMeta(
1489
1821
  loader=sentence_transformers_loader,
@@ -1496,6 +1828,7 @@ openbmb__minicpm_embedding = ModelMeta(
1496
1828
  # https://huggingface.co/openbmb/MiniCPM-Embedding/blob/c0cb2de33fb366e17c30f9d53142ff11bc18e049/README.md?code=true#L405
1497
1829
  ),
1498
1830
  name="openbmb/MiniCPM-Embedding",
1831
+ model_type=["dense"],
1499
1832
  revision="c0cb2de33fb366e17c30f9d53142ff11bc18e049",
1500
1833
  release_date="2024-09-04",
1501
1834
  languages=["zho-Hans", "eng-Latn"],
@@ -1507,7 +1840,7 @@ openbmb__minicpm_embedding = ModelMeta(
1507
1840
  open_weights=True,
1508
1841
  public_training_code=None,
1509
1842
  public_training_data=None,
1510
- framework=["PyTorch", "Sentence Transformers"],
1843
+ framework=["PyTorch", "Sentence Transformers", "Transformers", "safetensors"],
1511
1844
  reference="https://huggingface.co/openbmb/MiniCPM-Embedding",
1512
1845
  similarity_fn_name=ScoringFunction.COSINE,
1513
1846
  use_instructions=None,
@@ -1518,6 +1851,7 @@ openbmb__minicpm_embedding = ModelMeta(
1518
1851
 
1519
1852
  silma_ai__silma_embedding_matryoshka_v0_1 = ModelMeta(
1520
1853
  name="silma-ai/silma-embeddding-matryoshka-v0.1",
1854
+ model_type=["dense"],
1521
1855
  revision="a520977a9542ebdb8a7206df6b7ff6977f1886ea",
1522
1856
  release_date="2024-10-12",
1523
1857
  languages=["ara-Arab", "eng-Latn"],
@@ -1530,17 +1864,25 @@ silma_ai__silma_embedding_matryoshka_v0_1 = ModelMeta(
1530
1864
  open_weights=True,
1531
1865
  public_training_code=None,
1532
1866
  public_training_data=None,
1533
- framework=["PyTorch", "Sentence Transformers"],
1867
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
1534
1868
  reference="https://huggingface.co/silma-ai/silma-embeddding-matryoshka-v0.1",
1535
1869
  similarity_fn_name=ScoringFunction.COSINE,
1536
1870
  use_instructions=None,
1537
1871
  training_datasets=None,
1538
1872
  adapted_from="/workspace/v3-matryoshka_aubmindlab-bert-base-arabertv02-2024-10-12_13-55-06/checkpoint-26250",
1539
1873
  superseded_by=None,
1874
+ citation="""@misc{silma2024embedding,
1875
+ author = {Abu Bakr Soliman, Karim Ouda, SILMA AI},
1876
+ title = {SILMA Embedding Matryoshka 0.1},
1877
+ year = {2024},
1878
+ publisher = {Hugging Face},
1879
+ howpublished = {https://huggingface.co/silma-ai/silma-embeddding-matryoshka-0.1},
1880
+ }""",
1540
1881
  )
1541
1882
 
1542
1883
  sbert_chinese_general_v1 = ModelMeta(
1543
1884
  name="DMetaSoul/sbert-chinese-general-v1",
1885
+ model_type=["dense"],
1544
1886
  revision="bd27765956bcc2fcf682de0097819947ac10037e",
1545
1887
  release_date="2022-03-25",
1546
1888
  languages=["zho-Hans"],
@@ -1553,7 +1895,7 @@ sbert_chinese_general_v1 = ModelMeta(
1553
1895
  open_weights=True,
1554
1896
  public_training_code=None,
1555
1897
  public_training_data=None,
1556
- framework=["PyTorch", "Sentence Transformers"],
1898
+ framework=["PyTorch", "Sentence Transformers", "Transformers"],
1557
1899
  reference="https://huggingface.co/DMetaSoul/sbert-chinese-general-v1",
1558
1900
  similarity_fn_name=ScoringFunction.COSINE,
1559
1901
  use_instructions=None,
@@ -1568,6 +1910,7 @@ sbert_chinese_general_v1 = ModelMeta(
1568
1910
 
1569
1911
  dmeta_embedding_zh_small = ModelMeta(
1570
1912
  name="DMetaSoul/Dmeta-embedding-zh-small",
1913
+ model_type=["dense"],
1571
1914
  revision="2050d3439a2f68999dd648c1697471acaac37a29",
1572
1915
  release_date="2024-03-25",
1573
1916
  languages=["zho-Hans"],
@@ -1580,7 +1923,7 @@ dmeta_embedding_zh_small = ModelMeta(
1580
1923
  open_weights=True,
1581
1924
  public_training_code=None,
1582
1925
  public_training_data=None,
1583
- framework=["PyTorch", "Sentence Transformers"],
1926
+ framework=["PyTorch", "Sentence Transformers", "Transformers", "safetensors"],
1584
1927
  reference="https://huggingface.co/DMetaSoul/Dmeta-embedding-zh-small/",
1585
1928
  similarity_fn_name=ScoringFunction.COSINE,
1586
1929
  use_instructions=None,
@@ -1590,6 +1933,7 @@ dmeta_embedding_zh_small = ModelMeta(
1590
1933
 
1591
1934
  xiaobu_embedding = ModelMeta(
1592
1935
  name="lier007/xiaobu-embedding",
1936
+ model_type=["dense"],
1593
1937
  revision="59c79d82eb5223cd9895f6eb8e825c7fa10e4e92",
1594
1938
  release_date="2024-01-09",
1595
1939
  languages=["zho-Hans"],
@@ -1602,7 +1946,7 @@ xiaobu_embedding = ModelMeta(
1602
1946
  open_weights=True,
1603
1947
  public_training_code=None,
1604
1948
  public_training_data=None,
1605
- framework=["PyTorch", "Sentence Transformers"],
1949
+ framework=["PyTorch", "Sentence Transformers", "Transformers"],
1606
1950
  reference="https://huggingface.co/lier007/xiaobu-embedding",
1607
1951
  similarity_fn_name=ScoringFunction.COSINE,
1608
1952
  use_instructions=None,
@@ -1613,6 +1957,7 @@ xiaobu_embedding = ModelMeta(
1613
1957
 
1614
1958
  xiaobu_embedding_v2 = ModelMeta(
1615
1959
  name="lier007/xiaobu-embedding-v2",
1960
+ model_type=["dense"],
1616
1961
  revision="1912f2e59a5c2ef802a471d735a38702a5c9485e",
1617
1962
  release_date="2024-06-30",
1618
1963
  languages=["zho-Hans"],
@@ -1625,7 +1970,7 @@ xiaobu_embedding_v2 = ModelMeta(
1625
1970
  open_weights=True,
1626
1971
  public_training_code=None,
1627
1972
  public_training_data=None,
1628
- framework=["PyTorch", "Sentence Transformers"],
1973
+ framework=["PyTorch", "Sentence Transformers", "ONNX", "safetensors"],
1629
1974
  reference="https://huggingface.co/lier007/xiaobu-embedding-v2",
1630
1975
  similarity_fn_name=ScoringFunction.COSINE,
1631
1976
  use_instructions=None,
@@ -1636,6 +1981,7 @@ xiaobu_embedding_v2 = ModelMeta(
1636
1981
 
1637
1982
  yinka_embedding = ModelMeta(
1638
1983
  name="Classical/Yinka",
1984
+ model_type=["dense"],
1639
1985
  revision="59c79d82eb5223cd9895f6eb8e825c7fa10e4e92",
1640
1986
  release_date="2024-01-09",
1641
1987
  languages=["zho-Hans"],
@@ -1648,7 +1994,7 @@ yinka_embedding = ModelMeta(
1648
1994
  open_weights=True,
1649
1995
  public_training_code=None,
1650
1996
  public_training_data=None,
1651
- framework=["PyTorch", "Sentence Transformers"],
1997
+ framework=["PyTorch", "Sentence Transformers", "Transformers"],
1652
1998
  reference="https://huggingface.co/Classical/Yinka",
1653
1999
  similarity_fn_name=ScoringFunction.COSINE,
1654
2000
  use_instructions=None,
@@ -1658,6 +2004,7 @@ yinka_embedding = ModelMeta(
1658
2004
  )
1659
2005
  conan_embedding = ModelMeta(
1660
2006
  name="TencentBAC/Conan-embedding-v1",
2007
+ model_type=["dense"],
1661
2008
  revision="bb9749a57d4f02fd71722386f8d0f5a9398d7eeb",
1662
2009
  release_date="2024-08-22",
1663
2010
  languages=["zho-Hans"],
@@ -1670,18 +2017,28 @@ conan_embedding = ModelMeta(
1670
2017
  open_weights=True,
1671
2018
  public_training_code=None,
1672
2019
  public_training_data=None,
1673
- framework=["PyTorch", "Sentence Transformers"],
2020
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
1674
2021
  reference="https://huggingface.co/Classical/Yinka",
1675
2022
  similarity_fn_name=ScoringFunction.COSINE,
1676
2023
  use_instructions=None,
1677
2024
  # source: https://arxiv.org/pdf/2408.15710
1678
2025
  training_datasets=None, # They "scraped" things from the internet, we don't know, could be leakage
1679
2026
  superseded_by=None,
2027
+ citation="""@misc{li2024conanembeddinggeneraltextembedding,
2028
+ title={Conan-embedding: General Text Embedding with More and Better Negative Samples},
2029
+ author={Shiyu Li and Yang Tang and Shizhe Chen and Xi Chen},
2030
+ year={2024},
2031
+ eprint={2408.15710},
2032
+ archivePrefix={arXiv},
2033
+ primaryClass={cs.CL},
2034
+ url={https://arxiv.org/abs/2408.15710},
2035
+ }""",
1680
2036
  )
1681
2037
 
1682
2038
  ember_v1 = ModelMeta(
1683
2039
  loader=sentence_transformers_loader,
1684
2040
  name="llmrails/ember-v1",
2041
+ model_type=["dense"],
1685
2042
  revision="5e5ce5904901f6ce1c353a95020f17f09e5d021d",
1686
2043
  release_date="2023-10-10",
1687
2044
  languages=["eng-Latn"],
@@ -1693,10 +2050,15 @@ ember_v1 = ModelMeta(
1693
2050
  open_weights=True,
1694
2051
  public_training_code=None,
1695
2052
  public_training_data=None,
1696
- framework=["PyTorch", "Sentence Transformers"],
2053
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
1697
2054
  reference="https://huggingface.co/llmrails/ember-v1",
1698
2055
  similarity_fn_name=ScoringFunction.COSINE,
1699
2056
  use_instructions=None,
1700
2057
  training_datasets=None,
1701
2058
  superseded_by=None,
2059
+ citation="""@misc{nur2024emberv1,
2060
+ title={ember-v1: SOTA embedding model},
2061
+ author={Enrike Nur and Anar Aliyev},
2062
+ year={2023},
2063
+ }""",
1702
2064
  )