mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -6,6 +6,7 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
6
6
  parsbert = ModelMeta(
7
7
  loader=sentence_transformers_loader,
8
8
  name="HooshvareLab/bert-base-parsbert-uncased",
9
+ model_type=["dense"],
9
10
  languages=["fas-Arab"],
10
11
  open_weights=True,
11
12
  revision="d73a0e2c7492c33bd5819bcdb23eba207404dd19",
@@ -17,7 +18,7 @@ parsbert = ModelMeta(
17
18
  max_tokens=512,
18
19
  reference="https://huggingface.co/HooshvareLab/bert-base-parsbert-uncased",
19
20
  similarity_fn_name=ScoringFunction.COSINE,
20
- framework=["Sentence Transformers", "PyTorch"],
21
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
21
22
  use_instructions=False,
22
23
  public_training_code=None,
23
24
  public_training_data=None,
@@ -41,6 +42,7 @@ parsbert = ModelMeta(
41
42
  bert_zwnj = ModelMeta(
42
43
  loader=sentence_transformers_loader,
43
44
  name="m3hrdadfi/bert-zwnj-wnli-mean-tokens",
45
+ model_type=["dense"],
44
46
  languages=["fas-Arab"],
45
47
  open_weights=True,
46
48
  revision="b9506ddc579ac8c398ae6dae680401ae0a1a5b23",
@@ -52,7 +54,7 @@ bert_zwnj = ModelMeta(
52
54
  max_tokens=512,
53
55
  reference="https://huggingface.co/m3hrdadfi/bert-zwnj-wnli-mean-tokens",
54
56
  similarity_fn_name=ScoringFunction.COSINE,
55
- framework=["Sentence Transformers", "PyTorch"],
57
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
56
58
  use_instructions=False,
57
59
  public_training_code=None,
58
60
  public_training_data=None,
@@ -66,6 +68,7 @@ bert_zwnj = ModelMeta(
66
68
  roberta_zwnj = ModelMeta(
67
69
  loader=sentence_transformers_loader,
68
70
  name="m3hrdadfi/roberta-zwnj-wnli-mean-tokens",
71
+ model_type=["dense"],
69
72
  languages=["fas-Arab"],
70
73
  open_weights=True,
71
74
  revision="36f912ac44e22250aee16ea533a4ff8cd848c1a1",
@@ -77,7 +80,7 @@ roberta_zwnj = ModelMeta(
77
80
  max_tokens=514,
78
81
  reference="https://huggingface.co/m3hrdadfi/roberta-zwnj-wnli-mean-tokens",
79
82
  similarity_fn_name=ScoringFunction.COSINE,
80
- framework=["Sentence Transformers", "PyTorch"],
83
+ framework=["Sentence Transformers", "PyTorch", "Transformers"],
81
84
  use_instructions=False,
82
85
  public_training_code=None,
83
86
  public_training_data=None,
@@ -90,6 +93,7 @@ roberta_zwnj = ModelMeta(
90
93
  sentence_transformer_parsbert = ModelMeta(
91
94
  loader=sentence_transformers_loader,
92
95
  name="myrkur/sentence-transformer-parsbert-fa",
96
+ model_type=["dense"],
93
97
  languages=["fas-Arab"],
94
98
  open_weights=True,
95
99
  revision="72bd0a3557622f0ae08a092f4643609e0b950cdd",
@@ -101,7 +105,7 @@ sentence_transformer_parsbert = ModelMeta(
101
105
  max_tokens=512,
102
106
  reference="https://huggingface.co/myrkur/sentence-transformer-parsbert-fa",
103
107
  similarity_fn_name=ScoringFunction.COSINE,
104
- framework=["Sentence Transformers", "PyTorch"],
108
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
105
109
  use_instructions=False,
106
110
  public_training_code=None,
107
111
  public_training_data=None,
@@ -125,7 +129,7 @@ tooka_bert_base = ModelMeta(
125
129
  max_tokens=512,
126
130
  reference="https://huggingface.co/PartAI/TookaBERT-Base",
127
131
  similarity_fn_name=ScoringFunction.COSINE,
128
- framework=["Sentence Transformers", "PyTorch"],
132
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
129
133
  use_instructions=False,
130
134
  public_training_code=None,
131
135
  public_training_data=None,
@@ -140,6 +144,7 @@ tooka_bert_base = ModelMeta(
140
144
  tooka_sbert = ModelMeta(
141
145
  loader=sentence_transformers_loader,
142
146
  name="PartAI/Tooka-SBERT",
147
+ model_type=["dense"],
143
148
  languages=["fas-Arab"],
144
149
  open_weights=True,
145
150
  revision="5d07f0c543aca654373b931ae07cd197769110fd",
@@ -151,16 +156,26 @@ tooka_sbert = ModelMeta(
151
156
  max_tokens=512,
152
157
  reference="https://huggingface.co/PartAI/Tooka-SBERT",
153
158
  similarity_fn_name=ScoringFunction.COSINE,
154
- framework=["Sentence Transformers", "PyTorch"],
159
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
155
160
  use_instructions=False,
156
161
  public_training_code=None,
157
162
  public_training_data=None,
158
163
  training_datasets=None,
164
+ citation="""@inproceedings{reimers-2019-sentence-bert,
165
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
166
+ author = "Reimers, Nils and Gurevych, Iryna",
167
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
168
+ month = "11",
169
+ year = "2019",
170
+ publisher = "Association for Computational Linguistics",
171
+ url = "https://arxiv.org/abs/1908.10084",
172
+ }""",
159
173
  )
160
174
 
161
175
  fa_bert = ModelMeta(
162
176
  loader=sentence_transformers_loader,
163
177
  name="sbunlp/fabert",
178
+ model_type=["dense"],
164
179
  languages=["fas-Arab"],
165
180
  open_weights=True,
166
181
  revision="a0e3973064c97768e121b9b95f21adc94e0ca3fb",
@@ -172,7 +187,7 @@ fa_bert = ModelMeta(
172
187
  max_tokens=512,
173
188
  reference="https://huggingface.co/sbunlp/fabert",
174
189
  similarity_fn_name=ScoringFunction.COSINE,
175
- framework=["Sentence Transformers", "PyTorch"],
190
+ framework=["Sentence Transformers", "PyTorch", "Transformers", "safetensors"],
176
191
  use_instructions=False,
177
192
  public_training_code=None,
178
193
  public_training_data=None,
@@ -180,11 +195,35 @@ fa_bert = ModelMeta(
180
195
  # It's just a base model
181
196
  # https://huggingface.co/datasets/sbunlp/hmblogs-v3
182
197
  ),
198
+ citation="""@inproceedings{masumi-etal-2025-fabert,
199
+ title = "{F}a{BERT}: Pre-training {BERT} on {P}ersian Blogs",
200
+ author = "Masumi, Mostafa and
201
+ Majd, Seyed Soroush and
202
+ Shamsfard, Mehrnoush and
203
+ Beigy, Hamid",
204
+ editor = "Bak, JinYeong and
205
+ Goot, Rob van der and
206
+ Jang, Hyeju and
207
+ Buaphet, Weerayut and
208
+ Ramponi, Alan and
209
+ Xu, Wei and
210
+ Ritter, Alan",
211
+ booktitle = "Proceedings of the Tenth Workshop on Noisy and User-generated Text",
212
+ month = may,
213
+ year = "2025",
214
+ address = "Albuquerque, New Mexico, USA",
215
+ publisher = "Association for Computational Linguistics",
216
+ url = "https://aclanthology.org/2025.wnut-1.10/",
217
+ doi = "10.18653/v1/2025.wnut-1.10",
218
+ pages = "85--96",
219
+ ISBN = "979-8-89176-232-9",
220
+ }""",
183
221
  )
184
222
 
185
223
  tooka_sbert_v2_small = ModelMeta(
186
224
  loader=sentence_transformers_loader,
187
225
  name="PartAI/Tooka-SBERT-V2-Small",
226
+ model_type=["dense"],
188
227
  languages=["fas-Arab"],
189
228
  open_weights=True,
190
229
  revision="8bbed87e36669387f71437c061430ba56d1b496f",
@@ -196,16 +235,26 @@ tooka_sbert_v2_small = ModelMeta(
196
235
  max_tokens=512,
197
236
  reference="https://huggingface.co/PartAI/Tooka-SBERT-V2-Small",
198
237
  similarity_fn_name="cosine",
199
- framework=["Sentence Transformers", "PyTorch"],
238
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
200
239
  use_instructions=False,
201
240
  public_training_code=None,
202
241
  public_training_data=None,
203
242
  training_datasets=None,
243
+ citation="""@inproceedings{reimers-2019-sentence-bert,
244
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
245
+ author = "Reimers, Nils and Gurevych, Iryna",
246
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
247
+ month = "11",
248
+ year = "2019",
249
+ publisher = "Association for Computational Linguistics",
250
+ url = "https://arxiv.org/abs/1908.10084",
251
+ }""",
204
252
  )
205
253
 
206
254
  tooka_sbert_v2_large = ModelMeta(
207
255
  loader=sentence_transformers_loader,
208
256
  name="PartAI/Tooka-SBERT-V2-Large",
257
+ model_type=["dense"],
209
258
  languages=["fas-Arab"],
210
259
  open_weights=True,
211
260
  revision="b59682efa961122cc0e4408296d5852870c82eae",
@@ -217,9 +266,18 @@ tooka_sbert_v2_large = ModelMeta(
217
266
  max_tokens=512,
218
267
  reference="https://huggingface.co/PartAI/Tooka-SBERT-V2-Large",
219
268
  similarity_fn_name="cosine",
220
- framework=["Sentence Transformers", "PyTorch"],
269
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
221
270
  use_instructions=False,
222
271
  public_training_code=None,
223
272
  public_training_data=None,
224
273
  training_datasets=None,
274
+ citation="""@inproceedings{reimers-2019-sentence-bert,
275
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
276
+ author = "Reimers, Nils and Gurevych, Iryna",
277
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
278
+ month = "11",
279
+ year = "2019",
280
+ publisher = "Association for Computational Linguistics",
281
+ url = "https://arxiv.org/abs/1908.10084",
282
+ }""",
225
283
  )
@@ -0,0 +1,205 @@
1
+ from mteb.models import sentence_transformers_loader
2
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
3
+
4
+ XLMR_LANGUAGES = [
5
+ "afr-Latn",
6
+ "amh-Latn",
7
+ "ara-Latn",
8
+ "asm-Latn",
9
+ "aze-Latn",
10
+ "bel-Latn",
11
+ "bul-Latn",
12
+ "ben-Latn",
13
+ "ben-Beng",
14
+ "bre-Latn",
15
+ "bos-Latn",
16
+ "cat-Latn",
17
+ "ces-Latn",
18
+ "cym-Latn",
19
+ "dan-Latn",
20
+ "deu-Latn",
21
+ "ell-Latn",
22
+ "eng-Latn",
23
+ "epo-Latn",
24
+ "spa-Latn",
25
+ "est-Latn",
26
+ "eus-Latn",
27
+ "fas-Latn",
28
+ "fin-Latn",
29
+ "fra-Latn",
30
+ "fry-Latn",
31
+ "gle-Latn",
32
+ "gla-Latn",
33
+ "glg-Latn",
34
+ "guj-Latn",
35
+ "hau-Latn",
36
+ "heb-Latn",
37
+ "hin-Latn",
38
+ "hin-Deva",
39
+ "hrv-Latn",
40
+ "hun-Latn",
41
+ "hye-Latn",
42
+ "ind-Latn",
43
+ "isl-Latn",
44
+ "ita-Latn",
45
+ "jpn-Latn",
46
+ "jav-Latn",
47
+ "kat-Latn",
48
+ "kaz-Latn",
49
+ "khm-Latn",
50
+ "kan-Latn",
51
+ "kor-Latn",
52
+ "kur-Latn",
53
+ "kir-Latn",
54
+ "lat-Latn",
55
+ "lao-Latn",
56
+ "lit-Latn",
57
+ "lav-Latn",
58
+ "mlg-Latn",
59
+ "mkd-Latn",
60
+ "mal-Latn",
61
+ "mon-Latn",
62
+ "mar-Latn",
63
+ "msa-Latn",
64
+ "mya-Latn",
65
+ "nep-Latn",
66
+ "nld-Latn",
67
+ "nob-Latn",
68
+ "orm-Latn",
69
+ "ori-Latn",
70
+ "pan-Latn",
71
+ "pol-Latn",
72
+ "pus-Latn",
73
+ "por-Latn",
74
+ "ron-Latn",
75
+ "rus-Latn",
76
+ "san-Latn",
77
+ "snd-Latn",
78
+ "sin-Latn",
79
+ "slk-Latn",
80
+ "slv-Latn",
81
+ "som-Latn",
82
+ "sqi-Latn",
83
+ "srp-Latn",
84
+ "sun-Latn",
85
+ "swe-Latn",
86
+ "swa-Latn",
87
+ "tam-Latn",
88
+ "tam-Taml",
89
+ "tel-Latn",
90
+ "tel-Telu",
91
+ "tha-Latn",
92
+ "tgl-Latn",
93
+ "tur-Latn",
94
+ "uig-Latn",
95
+ "ukr-Latn",
96
+ "urd-Latn",
97
+ "urd-Arab",
98
+ "uzb-Latn",
99
+ "vie-Latn",
100
+ "xho-Latn",
101
+ "yid-Latn",
102
+ "zho-Hant",
103
+ "zho-Hans",
104
+ ]
105
+
106
+
107
+ xlmr_base = ModelMeta(
108
+ loader=sentence_transformers_loader, # type: ignore[arg-type]
109
+ name="FacebookAI/xlm-roberta-base",
110
+ model_type=["dense"],
111
+ languages=XLMR_LANGUAGES,
112
+ open_weights=True,
113
+ revision="e73636d4f797dec63c3081bb6ed5c7b0bb3f2089",
114
+ release_date="2019-11-05", # arxiv paper release
115
+ n_parameters=278043648,
116
+ memory_usage_mb=1064,
117
+ embed_dim=768,
118
+ license="mit",
119
+ max_tokens=512,
120
+ reference="https://huggingface.co/FacebookAI/xlm-roberta-base",
121
+ similarity_fn_name=ScoringFunction.COSINE,
122
+ framework=[
123
+ "Sentence Transformers",
124
+ "PyTorch",
125
+ "Transformers",
126
+ "ONNX",
127
+ "safetensors",
128
+ ],
129
+ use_instructions=False,
130
+ public_training_code=None,
131
+ public_training_data=None,
132
+ training_datasets=set(),
133
+ citation="""@article{DBLP:journals/corr/abs-1911-02116,
134
+ author = {Alexis Conneau and
135
+ Kartikay Khandelwal and
136
+ Naman Goyal and
137
+ Vishrav Chaudhary and
138
+ Guillaume Wenzek and
139
+ Francisco Guzm{\'{a}}n and
140
+ Edouard Grave and
141
+ Myle Ott and
142
+ Luke Zettlemoyer and
143
+ Veselin Stoyanov},
144
+ title = {Unsupervised Cross-lingual Representation Learning at Scale},
145
+ journal = {CoRR},
146
+ volume = {abs/1911.02116},
147
+ year = {2019},
148
+ url = {http://arxiv.org/abs/1911.02116},
149
+ eprinttype = {arXiv},
150
+ eprint = {1911.02116},
151
+ timestamp = {Mon, 11 Nov 2019 18:38:09 +0100},
152
+ biburl = {https://dblp.org/rec/journals/corr/abs-1911-02116.bib},
153
+ bibsource = {dblp computer science bibliography, https://dblp.org}
154
+ }""",
155
+ )
156
+
157
+ xlmr_large = ModelMeta(
158
+ loader=sentence_transformers_loader, # type: ignore[arg-type]
159
+ name="FacebookAI/xlm-roberta-large",
160
+ model_type=["dense"],
161
+ languages=XLMR_LANGUAGES,
162
+ open_weights=True,
163
+ revision="c23d21b0620b635a76227c604d44e43a9f0ee389",
164
+ release_date="2019-11-05", # arxiv paper release
165
+ n_parameters=559890432,
166
+ memory_usage_mb=2141,
167
+ embed_dim=1024,
168
+ license="mit",
169
+ max_tokens=512,
170
+ reference="https://huggingface.co/FacebookAI/xlm-roberta-large",
171
+ similarity_fn_name=ScoringFunction.COSINE,
172
+ framework=[
173
+ "Sentence Transformers",
174
+ "PyTorch",
175
+ "Transformers",
176
+ "ONNX",
177
+ "safetensors",
178
+ ],
179
+ use_instructions=False,
180
+ public_training_code=None,
181
+ public_training_data=None,
182
+ training_datasets=set(),
183
+ citation="""@article{DBLP:journals/corr/abs-1911-02116,
184
+ author = {Alexis Conneau and
185
+ Kartikay Khandelwal and
186
+ Naman Goyal and
187
+ Vishrav Chaudhary and
188
+ Guillaume Wenzek and
189
+ Francisco Guzm{\'{a}}n and
190
+ Edouard Grave and
191
+ Myle Ott and
192
+ Luke Zettlemoyer and
193
+ Veselin Stoyanov},
194
+ title = {Unsupervised Cross-lingual Representation Learning at Scale},
195
+ journal = {CoRR},
196
+ volume = {abs/1911.02116},
197
+ year = {2019},
198
+ url = {http://arxiv.org/abs/1911.02116},
199
+ eprinttype = {arXiv},
200
+ eprint = {1911.02116},
201
+ timestamp = {Mon, 11 Nov 2019 18:38:09 +0100},
202
+ biburl = {https://dblp.org/rec/journals/corr/abs-1911-02116.bib},
203
+ bibsource = {dblp computer science bibliography, https://dblp.org}
204
+ }""",
205
+ )
@@ -7,6 +7,7 @@ from mteb.models.model_meta import ModelMeta
7
7
 
8
8
  geoembedding = ModelMeta(
9
9
  name="GeoGPT-Research-Project/GeoEmbedding",
10
+ model_type=["dense"],
10
11
  languages=["eng-Latn"],
11
12
  open_weights=True,
12
13
  revision="29803c28ea7ef6871194a8ebc85ad7bfe174928e",
@@ -25,7 +26,7 @@ geoembedding = ModelMeta(
25
26
  max_tokens=32768,
26
27
  reference="https://huggingface.co/GeoGPT-Research-Project/GeoEmbedding",
27
28
  similarity_fn_name="cosine",
28
- framework=["Sentence Transformers", "PyTorch"],
29
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
29
30
  use_instructions=True,
30
31
  public_training_code=None,
31
32
  public_training_data=None,
@@ -1,9 +1,11 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import math
3
- from typing import Any
5
+ import warnings
6
+ from typing import TYPE_CHECKING, Any
4
7
 
5
8
  import torch
6
- from PIL import Image
7
9
  from torch.utils.data import DataLoader
8
10
  from tqdm.autonotebook import tqdm
9
11
 
@@ -12,6 +14,9 @@ from mteb.models.abs_encoder import AbsEncoder
12
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
13
15
  from mteb.types import Array, BatchedInput, PromptType
14
16
 
17
+ if TYPE_CHECKING:
18
+ from PIL import Image
19
+
15
20
  logger = logging.getLogger(__name__)
16
21
 
17
22
  GME_CITATION = """@misc{zhang2024gme,
@@ -257,9 +262,9 @@ def smart_resize(
257
262
  w_bar = ceil_by_factor(width * beta, factor)
258
263
 
259
264
  if max(h_bar, w_bar) / min(h_bar, w_bar) > MAX_RATIO:
260
- logger.warning(
261
- f"Absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(h_bar, w_bar) / min(h_bar, w_bar)}"
262
- )
265
+ msg = f"Absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(h_bar, w_bar) / min(h_bar, w_bar)}"
266
+ logger.warning(msg)
267
+ warnings.warn(msg)
263
268
  if h_bar > w_bar:
264
269
  h_bar = w_bar * MAX_RATIO
265
270
  else:
@@ -267,9 +272,9 @@ def smart_resize(
267
272
  return h_bar, w_bar
268
273
 
269
274
 
270
- def fetch_image(
271
- image: str | Image.Image, size_factor: int = IMAGE_FACTOR
272
- ) -> Image.Image:
275
+ def fetch_image(image: Image.Image, size_factor: int = IMAGE_FACTOR) -> Image.Image:
276
+ from PIL import Image
277
+
273
278
  image_obj = None
274
279
  if isinstance(image, Image.Image):
275
280
  image_obj = image
@@ -342,6 +347,7 @@ training_data = {
342
347
  gme_qwen2vl_2b = ModelMeta(
343
348
  loader=GmeQwen2VL,
344
349
  name="Alibaba-NLP/gme-Qwen2-VL-2B-Instruct",
350
+ model_type=["dense"],
345
351
  languages=["eng-Latn", "cmn-Hans"],
346
352
  open_weights=True,
347
353
  revision="ce765ae71b8cdb208203cd8fb64a170b1b84293a",
@@ -354,7 +360,7 @@ gme_qwen2vl_2b = ModelMeta(
354
360
  max_tokens=32768,
355
361
  reference="https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct",
356
362
  similarity_fn_name=ScoringFunction.COSINE,
357
- framework=["PyTorch"],
363
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
358
364
  use_instructions=True,
359
365
  public_training_code=None,
360
366
  public_training_data=None,
@@ -365,6 +371,7 @@ gme_qwen2vl_2b = ModelMeta(
365
371
  gme_qwen2vl_7b = ModelMeta(
366
372
  loader=GmeQwen2VL,
367
373
  name="Alibaba-NLP/gme-Qwen2-VL-7B-Instruct",
374
+ model_type=["dense"],
368
375
  languages=["eng-Latn", "cmn-Hans"],
369
376
  open_weights=True,
370
377
  revision="477027a6480f8630363be77751f169cc3434b673",
@@ -377,7 +384,7 @@ gme_qwen2vl_7b = ModelMeta(
377
384
  max_tokens=32768,
378
385
  reference="https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-7B-Instruct",
379
386
  similarity_fn_name=ScoringFunction.COSINE,
380
- framework=["PyTorch"],
387
+ framework=["PyTorch", "Sentence Transformers", "safetensors", "Transformers"],
381
388
  use_instructions=True,
382
389
  public_training_code=None,
383
390
  public_training_data=None,
@@ -147,10 +147,10 @@ class GoogleTextEmbeddingModel(AbsEncoder):
147
147
  google_text_emb_004 = ModelMeta(
148
148
  loader=GoogleTextEmbeddingModel, # type: ignore[call-arg]
149
149
  loader_kwargs=dict(
150
- model_name="text-embedding-004",
151
150
  model_prompts=MODEL_PROMPTS,
152
151
  ),
153
152
  name="google/text-embedding-004",
153
+ model_type=["dense"],
154
154
  languages=["eng-Latn"],
155
155
  open_weights=False,
156
156
  revision="1", # revision is intended for implementation
@@ -172,10 +172,10 @@ google_text_emb_004 = ModelMeta(
172
172
  google_text_emb_005 = ModelMeta(
173
173
  loader=GoogleTextEmbeddingModel, # type: ignore[call-arg]
174
174
  loader_kwargs=dict(
175
- model_name="text-embedding-005",
176
175
  model_prompts=MODEL_PROMPTS,
177
176
  ),
178
177
  name="google/text-embedding-005",
178
+ model_type=["dense"],
179
179
  languages=["eng-Latn"],
180
180
  open_weights=False,
181
181
  revision="1", # revision is intended for implementation
@@ -197,10 +197,10 @@ google_text_emb_005 = ModelMeta(
197
197
  google_text_multilingual_emb_002 = ModelMeta(
198
198
  loader=GoogleTextEmbeddingModel, # type: ignore[call-arg]
199
199
  loader_kwargs=dict(
200
- model_name="text-embedding-002",
201
200
  model_prompts=MODEL_PROMPTS,
202
201
  ),
203
202
  name="google/text-multilingual-embedding-002",
203
+ model_type=["dense"],
204
204
  languages=MULTILINGUAL_EVALUATED_LANGUAGES, # From the list of evaluated languages in https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#supported_text_languages
205
205
  open_weights=False,
206
206
  revision="1",
@@ -222,10 +222,10 @@ google_text_multilingual_emb_002 = ModelMeta(
222
222
  google_gemini_embedding_001 = ModelMeta(
223
223
  loader=GoogleTextEmbeddingModel, # type: ignore[call-arg]
224
224
  loader_kwargs=dict(
225
- model_name="gemini-embedding-001",
226
225
  model_prompts=MODEL_PROMPTS,
227
226
  ),
228
227
  name="google/gemini-embedding-001",
228
+ model_type=["dense"],
229
229
  languages=MULTILINGUAL_EVALUATED_LANGUAGES,
230
230
  open_weights=False,
231
231
  revision="1",
@@ -260,6 +260,7 @@ def gemma_embedding_loader(model_name: str, revision: str, **kwargs):
260
260
  embedding_gemma_300m = ModelMeta(
261
261
  loader=gemma_embedding_loader,
262
262
  name="google/embeddinggemma-300m",
263
+ model_type=["dense"],
263
264
  languages=MULTILINGUAL_EVALUATED_LANGUAGES,
264
265
  open_weights=True,
265
266
  revision="64614b0b8b64f0c6c1e52b07e4e9a4e8fe4d2da2",
@@ -269,11 +270,21 @@ embedding_gemma_300m = ModelMeta(
269
270
  max_tokens=2048,
270
271
  license="gemma",
271
272
  reference="https://ai.google.dev/gemma/docs/embeddinggemma/model_card",
272
- framework=["Sentence Transformers", "PyTorch"],
273
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
273
274
  use_instructions=True,
274
275
  public_training_code=None,
275
276
  public_training_data=None,
276
277
  training_datasets=GECKO_TRAINING_DATA,
277
278
  similarity_fn_name="cosine",
278
- memory_usage_mb=578,
279
+ memory_usage_mb=1155,
280
+ citation="""
281
+ @misc{vera2025embeddinggemmapowerfullightweighttext,
282
+ title={EmbeddingGemma: Powerful and Lightweight Text Representations},
283
+ author={Henrique Schechter Vera and Sahil Dua and Biao Zhang and Daniel Salz and Ryan Mullins and Sindhu Raghuram Panyam and Sara Smoot and Iftekhar Naim and Joe Zou and Feiyang Chen and Daniel Cer and Alice Lisak and Min Choi and Lucas Gonzalez and Omar Sanseviero and Glenn Cameron and Ian Ballantyne and Kat Black and Kaifeng Chen and Weiyi Wang and Zhe Li and Gus Martins and Jinhyuk Lee and Mark Sherwood and Juyeong Ji and Renjie Wu and Jingxiao Zheng and Jyotinder Singh and Abheesht Sharma and Divyashree Sreepathihalli and Aashi Jain and Adham Elarabawy and AJ Co and Andreas Doumanoglou and Babak Samari and Ben Hora and Brian Potetz and Dahun Kim and Enrique Alfonseca and Fedor Moiseev and Feng Han and Frank Palma Gomez and Gustavo Hernández Ábrego and Hesen Zhang and Hui Hui and Jay Han and Karan Gill and Ke Chen and Koert Chen and Madhuri Shanbhogue and Michael Boratko and Paul Suganthan and Sai Meher Karthik Duddu and Sandeep Mariserla and Setareh Ariafar and Shanfeng Zhang and Shijie Zhang and Simon Baumgartner and Sonam Goenka and Steve Qiu and Tanmaya Dabral and Trevor Walker and Vikram Rao and Waleed Khawaja and Wenlei Zhou and Xiaoqi Ren and Ye Xia and Yichang Chen and Yi-Ting Chen and Zhe Dong and Zhongli Ding and Francesco Visin and Gaël Liu and Jiageng Zhang and Kathleen Kenealy and Michelle Casbon and Ravin Kumar and Thomas Mesnard and Zach Gleicher and Cormac Brick and Olivier Lacombe and Adam Roberts and Qin Yin and Yunhsuan Sung and Raphael Hoffmann and Tris Warkentin and Armand Joulin and Tom Duerig and Mojtaba Seyedhosseini},
284
+ year={2025},
285
+ eprint={2509.20354},
286
+ archivePrefix={arXiv},
287
+ primaryClass={cs.CL},
288
+ url={https://arxiv.org/abs/2509.20354},
289
+ }""",
279
290
  )
@@ -1,8 +1,9 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from PIL import Image
6
7
  from torch.utils.data import DataLoader
7
8
  from tqdm.auto import tqdm
8
9
 
@@ -15,6 +16,9 @@ from mteb.types import Array, BatchedInput, PromptType
15
16
 
16
17
  logger = logging.getLogger(__name__)
17
18
 
19
+ if TYPE_CHECKING:
20
+ from PIL import Image
21
+
18
22
 
19
23
  class GraniteVisionEmbeddingWrapper:
20
24
  def __init__(
@@ -162,6 +166,7 @@ granite_vision_embedding = ModelMeta(
162
166
  torch_dtype=torch.float16,
163
167
  ),
164
168
  name="ibm-granite/granite-vision-3.3-2b-embedding",
169
+ model_type=["dense"],
165
170
  languages=["eng-Latn"],
166
171
  revision="cee615db64d89d1552a4ee39c50f25c0fc5c66ca",
167
172
  release_date="2025-06-11",
@@ -174,7 +179,7 @@ granite_vision_embedding = ModelMeta(
174
179
  open_weights=True,
175
180
  public_training_code=None,
176
181
  public_training_data=None,
177
- framework=["PyTorch"],
182
+ framework=["PyTorch", "Transformers", "safetensors"],
178
183
  reference="https://huggingface.co/ibm-granite/granite-vision-3.3-2b-embedding",
179
184
  similarity_fn_name="MaxSim",
180
185
  use_instructions=True,
@@ -38,6 +38,7 @@ gritlm7b = ModelMeta(
38
38
  torch_dtype="auto",
39
39
  ),
40
40
  name="GritLM/GritLM-7B",
41
+ model_type=["dense"],
41
42
  languages=["eng-Latn", "fra-Latn", "deu-Latn", "ita-Latn", "spa-Latn"],
42
43
  open_weights=True,
43
44
  revision="13f00a0e36500c80ce12870ea513846a066004af",
@@ -49,7 +50,7 @@ gritlm7b = ModelMeta(
49
50
  max_tokens=32768,
50
51
  reference="https://huggingface.co/GritLM/GritLM-7B",
51
52
  similarity_fn_name=ScoringFunction.COSINE,
52
- framework=["GritLM", "PyTorch"],
53
+ framework=["GritLM", "PyTorch", "Transformers", "safetensors"],
53
54
  use_instructions=True,
54
55
  training_datasets=GRIT_LM_TRAINING_DATA,
55
56
  # section 3.1 "We finetune our final models from Mistral 7B [68] and Mixtral 8x7B [69] using adaptations of E5 [160] and the Tülu 2 data
@@ -66,6 +67,7 @@ gritlm8x7b = ModelMeta(
66
67
  torch_dtype="auto",
67
68
  ),
68
69
  name="GritLM/GritLM-8x7B",
70
+ model_type=["dense"],
69
71
  languages=["eng-Latn", "fra-Latn", "deu-Latn", "ita-Latn", "spa-Latn"],
70
72
  open_weights=True,
71
73
  revision="7f089b13e3345510281733ca1e6ff871b5b4bc76",
@@ -77,7 +79,7 @@ gritlm8x7b = ModelMeta(
77
79
  max_tokens=32768,
78
80
  reference="https://huggingface.co/GritLM/GritLM-8x7B",
79
81
  similarity_fn_name=ScoringFunction.COSINE,
80
- framework=["GritLM", "PyTorch"],
82
+ framework=["GritLM", "PyTorch", "Transformers", "safetensors"],
81
83
  use_instructions=True,
82
84
  training_datasets=GRIT_LM_TRAINING_DATA,
83
85
  citation=GRITLM_CITATION,