mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (527) hide show
  1. mteb/__init__.py +6 -0
  2. mteb/_create_dataloaders.py +22 -20
  3. mteb/_evaluators/any_sts_evaluator.py +23 -14
  4. mteb/_evaluators/classification_metrics.py +54 -0
  5. mteb/_evaluators/clustering_evaluator.py +3 -3
  6. mteb/_evaluators/evaluator.py +4 -2
  7. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
  8. mteb/_evaluators/pair_classification_evaluator.py +34 -40
  9. mteb/_evaluators/retrieval_evaluator.py +2 -2
  10. mteb/_evaluators/retrieval_metrics.py +18 -17
  11. mteb/_evaluators/sklearn_evaluator.py +25 -37
  12. mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
  13. mteb/_evaluators/text/summarization_evaluator.py +27 -20
  14. mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
  15. mteb/abstasks/_data_filter/__init__.py +0 -0
  16. mteb/abstasks/_data_filter/filters.py +125 -0
  17. mteb/abstasks/_data_filter/task_pipelines.py +105 -0
  18. mteb/abstasks/_statistics_calculation.py +23 -11
  19. mteb/abstasks/_stratification.py +18 -18
  20. mteb/abstasks/abstask.py +35 -28
  21. mteb/abstasks/aggregate_task_metadata.py +1 -9
  22. mteb/abstasks/aggregated_task.py +10 -29
  23. mteb/abstasks/classification.py +15 -12
  24. mteb/abstasks/clustering.py +20 -16
  25. mteb/abstasks/clustering_legacy.py +13 -10
  26. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  27. mteb/abstasks/multilabel_classification.py +33 -22
  28. mteb/abstasks/pair_classification.py +27 -11
  29. mteb/abstasks/regression.py +4 -4
  30. mteb/abstasks/retrieval.py +28 -24
  31. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  32. mteb/abstasks/sts.py +14 -4
  33. mteb/abstasks/task_metadata.py +32 -33
  34. mteb/abstasks/text/bitext_mining.py +39 -28
  35. mteb/abstasks/text/reranking.py +8 -6
  36. mteb/abstasks/text/summarization.py +10 -5
  37. mteb/abstasks/zeroshot_classification.py +8 -4
  38. mteb/benchmarks/_create_table.py +84 -37
  39. mteb/benchmarks/benchmark.py +77 -16
  40. mteb/benchmarks/benchmarks/__init__.py +12 -0
  41. mteb/benchmarks/benchmarks/benchmarks.py +361 -16
  42. mteb/benchmarks/get_benchmark.py +14 -53
  43. mteb/cache.py +227 -37
  44. mteb/cli/_display_tasks.py +2 -2
  45. mteb/cli/build_cli.py +110 -14
  46. mteb/cli/generate_model_card.py +43 -23
  47. mteb/deprecated_evaluator.py +71 -62
  48. mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
  49. mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
  50. mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  54. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  55. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  56. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  57. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  58. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  59. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  60. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  61. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  62. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  63. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  64. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  65. mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
  66. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  67. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  68. mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
  69. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  72. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  73. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  74. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  75. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  76. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  77. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  78. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  79. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  80. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  81. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  82. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  83. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  84. mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
  85. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  86. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  87. mteb/evaluate.py +106 -75
  88. mteb/filter_tasks.py +25 -26
  89. mteb/get_tasks.py +29 -30
  90. mteb/languages/language_scripts.py +5 -3
  91. mteb/leaderboard/app.py +414 -151
  92. mteb/leaderboard/benchmark_selector.py +14 -5
  93. mteb/leaderboard/figures.py +13 -15
  94. mteb/leaderboard/table.py +82 -17
  95. mteb/load_results.py +12 -12
  96. mteb/models/__init__.py +4 -1
  97. mteb/models/abs_encoder.py +31 -23
  98. mteb/models/cache_wrappers/__init__.py +2 -1
  99. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  100. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
  101. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  102. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  103. mteb/models/cache_wrappers/cache_wrapper.py +3 -3
  104. mteb/models/get_model_meta.py +25 -118
  105. mteb/models/instruct_wrapper.py +33 -9
  106. mteb/models/model_implementations/align_models.py +8 -1
  107. mteb/models/model_implementations/amazon_models.py +1 -0
  108. mteb/models/model_implementations/andersborges.py +65 -0
  109. mteb/models/model_implementations/ara_models.py +9 -1
  110. mteb/models/model_implementations/arctic_models.py +16 -8
  111. mteb/models/model_implementations/b1ade_models.py +2 -1
  112. mteb/models/model_implementations/bedrock_models.py +4 -0
  113. mteb/models/model_implementations/bge_models.py +101 -17
  114. mteb/models/model_implementations/bica_model.py +35 -0
  115. mteb/models/model_implementations/blip2_models.py +13 -2
  116. mteb/models/model_implementations/blip_models.py +43 -16
  117. mteb/models/model_implementations/bm25.py +5 -4
  118. mteb/models/model_implementations/bmretriever_models.py +10 -4
  119. mteb/models/model_implementations/cadet_models.py +10 -1
  120. mteb/models/model_implementations/cde_models.py +25 -4
  121. mteb/models/model_implementations/clip_models.py +9 -6
  122. mteb/models/model_implementations/clips_models.py +100 -0
  123. mteb/models/model_implementations/codefuse_models.py +165 -3
  124. mteb/models/model_implementations/codesage_models.py +18 -3
  125. mteb/models/model_implementations/cohere_models.py +13 -6
  126. mteb/models/model_implementations/cohere_v.py +7 -2
  127. mteb/models/model_implementations/colpali_models.py +17 -9
  128. mteb/models/model_implementations/colqwen_models.py +275 -5
  129. mteb/models/model_implementations/colsmol_models.py +4 -2
  130. mteb/models/model_implementations/conan_models.py +2 -1
  131. mteb/models/model_implementations/dino_models.py +194 -23
  132. mteb/models/model_implementations/e5_instruct.py +27 -4
  133. mteb/models/model_implementations/e5_models.py +21 -110
  134. mteb/models/model_implementations/e5_v.py +7 -6
  135. mteb/models/model_implementations/eagerworks_models.py +164 -0
  136. mteb/models/model_implementations/emillykkejensen_models.py +91 -0
  137. mteb/models/model_implementations/en_code_retriever.py +2 -1
  138. mteb/models/model_implementations/euler_models.py +32 -0
  139. mteb/models/model_implementations/evaclip_models.py +4 -0
  140. mteb/models/model_implementations/fa_models.py +67 -9
  141. mteb/models/model_implementations/facebookai.py +205 -0
  142. mteb/models/model_implementations/geogpt_models.py +2 -1
  143. mteb/models/model_implementations/gme_v_models.py +17 -10
  144. mteb/models/model_implementations/google_models.py +17 -6
  145. mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
  146. mteb/models/model_implementations/gritlm_models.py +4 -2
  147. mteb/models/model_implementations/gte_models.py +99 -9
  148. mteb/models/model_implementations/hinvec_models.py +2 -1
  149. mteb/models/model_implementations/human.py +1 -0
  150. mteb/models/model_implementations/ibm_granite_models.py +36 -6
  151. mteb/models/model_implementations/inf_models.py +4 -2
  152. mteb/models/model_implementations/jasper_models.py +256 -3
  153. mteb/models/model_implementations/jina_clip.py +49 -10
  154. mteb/models/model_implementations/jina_models.py +222 -11
  155. mteb/models/model_implementations/kalm_models.py +203 -25
  156. mteb/models/model_implementations/kblab.py +37 -0
  157. mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
  158. mteb/models/model_implementations/kfst.py +25 -0
  159. mteb/models/model_implementations/kowshik24_models.py +32 -0
  160. mteb/models/model_implementations/lens_models.py +2 -0
  161. mteb/models/model_implementations/lgai_embedding_models.py +2 -1
  162. mteb/models/model_implementations/linq_models.py +4 -3
  163. mteb/models/model_implementations/listconranker.py +2 -2
  164. mteb/models/model_implementations/llm2clip_models.py +9 -6
  165. mteb/models/model_implementations/llm2vec_models.py +16 -8
  166. mteb/models/model_implementations/mcinext_models.py +7 -1
  167. mteb/models/model_implementations/mdbr_models.py +19 -3
  168. mteb/models/model_implementations/misc_models.py +422 -60
  169. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  170. mteb/models/model_implementations/mme5_models.py +2 -1
  171. mteb/models/model_implementations/moco_models.py +15 -4
  172. mteb/models/model_implementations/mod_models.py +191 -0
  173. mteb/models/model_implementations/model2vec_models.py +27 -14
  174. mteb/models/model_implementations/moka_models.py +4 -1
  175. mteb/models/model_implementations/nbailab.py +70 -0
  176. mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
  177. mteb/models/model_implementations/nomic_models.py +173 -6
  178. mteb/models/model_implementations/nomic_models_vision.py +8 -3
  179. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
  180. mteb/models/model_implementations/nvidia_models.py +155 -20
  181. mteb/models/model_implementations/octen_models.py +254 -0
  182. mteb/models/model_implementations/openai_models.py +20 -16
  183. mteb/models/model_implementations/openclip_models.py +37 -13
  184. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
  185. mteb/models/model_implementations/ops_moa_models.py +5 -3
  186. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  187. mteb/models/model_implementations/pawan_models.py +39 -0
  188. mteb/models/model_implementations/piccolo_models.py +9 -1
  189. mteb/models/model_implementations/pixie_models.py +56 -0
  190. mteb/models/model_implementations/promptriever_models.py +12 -8
  191. mteb/models/model_implementations/pylate_models.py +46 -12
  192. mteb/models/model_implementations/qodo_models.py +4 -2
  193. mteb/models/model_implementations/qtack_models.py +2 -1
  194. mteb/models/model_implementations/qwen3_models.py +9 -6
  195. mteb/models/model_implementations/qzhou_models.py +5 -3
  196. mteb/models/model_implementations/random_baseline.py +19 -24
  197. mteb/models/model_implementations/rasgaard_models.py +34 -0
  198. mteb/models/model_implementations/reasonir_model.py +2 -1
  199. mteb/models/model_implementations/repllama_models.py +5 -3
  200. mteb/models/model_implementations/rerankers_custom.py +15 -9
  201. mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
  202. mteb/models/model_implementations/richinfoai_models.py +2 -1
  203. mteb/models/model_implementations/ru_sentence_models.py +71 -20
  204. mteb/models/model_implementations/ruri_models.py +322 -0
  205. mteb/models/model_implementations/salesforce_models.py +6 -3
  206. mteb/models/model_implementations/samilpwc_models.py +2 -1
  207. mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
  208. mteb/models/model_implementations/searchmap_models.py +2 -1
  209. mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
  210. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
  211. mteb/models/model_implementations/seed_models.py +1 -0
  212. mteb/models/model_implementations/sentence_transformers_models.py +177 -18
  213. mteb/models/model_implementations/shuu_model.py +32 -31
  214. mteb/models/model_implementations/siglip_models.py +30 -20
  215. mteb/models/model_implementations/slm_models.py +416 -0
  216. mteb/models/model_implementations/sonar_models.py +1 -0
  217. mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
  218. mteb/models/model_implementations/stella_models.py +23 -4
  219. mteb/models/model_implementations/tarka_models.py +376 -0
  220. mteb/models/model_implementations/text2vec_models.py +9 -3
  221. mteb/models/model_implementations/ua_sentence_models.py +11 -1
  222. mteb/models/model_implementations/uae_models.py +8 -1
  223. mteb/models/model_implementations/vdr_models.py +3 -1
  224. mteb/models/model_implementations/vi_vn_models.py +45 -6
  225. mteb/models/model_implementations/vista_models.py +2 -0
  226. mteb/models/model_implementations/vlm2vec_models.py +5 -3
  227. mteb/models/model_implementations/voyage_models.py +99 -0
  228. mteb/models/model_implementations/voyage_v.py +17 -9
  229. mteb/models/model_implementations/xyz_models.py +1 -0
  230. mteb/models/model_implementations/youtu_models.py +2 -1
  231. mteb/models/model_implementations/yuan_models.py +34 -0
  232. mteb/models/model_implementations/yuan_models_en.py +58 -0
  233. mteb/models/model_meta.py +498 -29
  234. mteb/models/models_protocols.py +22 -6
  235. mteb/models/search_encoder_index/__init__.py +7 -0
  236. mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
  237. mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
  238. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
  239. mteb/models/search_wrappers.py +197 -65
  240. mteb/models/sentence_transformer_wrapper.py +52 -32
  241. mteb/models/vllm_wrapper.py +327 -0
  242. mteb/py.typed +0 -0
  243. mteb/results/benchmark_results.py +114 -65
  244. mteb/results/model_result.py +63 -26
  245. mteb/results/task_result.py +117 -77
  246. mteb/similarity_functions.py +60 -7
  247. mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
  248. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  249. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  250. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
  251. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  252. mteb/tasks/classification/ara/ajgt.py +1 -2
  253. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  254. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  255. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  256. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  257. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  258. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  259. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  260. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  261. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  262. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  263. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  264. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  265. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  266. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  267. mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
  268. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  269. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  270. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  271. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  272. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  273. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  274. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  275. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  276. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  277. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  278. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  279. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  280. mteb/tasks/classification/eng/news_classification.py +1 -2
  281. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  282. mteb/tasks/classification/eng/patent_classification.py +1 -2
  283. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  284. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  285. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  286. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  287. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  288. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  289. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  290. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  291. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  292. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  293. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  294. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  295. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  296. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  297. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  298. mteb/tasks/classification/est/estonian_valence.py +2 -3
  299. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  300. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  301. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  302. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  303. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  304. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  305. mteb/tasks/classification/heb/__init__.py +6 -1
  306. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
  307. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  308. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  309. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  310. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  311. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  312. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  313. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  314. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  315. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  316. mteb/tasks/classification/kor/klue_tc.py +1 -2
  317. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  318. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  319. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
  320. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  321. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  322. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  323. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  324. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  325. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  326. mteb/tasks/classification/multilingual/scala_classification.py +2 -3
  327. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  328. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  329. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  330. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  331. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  332. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  333. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  334. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  335. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  336. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  337. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  338. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  339. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  340. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  341. mteb/tasks/classification/pol/polish_classification.py +3 -6
  342. mteb/tasks/classification/ron/moroco.py +1 -2
  343. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  344. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  345. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  346. mteb/tasks/classification/rus/headline_classification.py +1 -2
  347. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  348. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  349. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  350. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  351. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  352. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  353. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  354. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  355. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  356. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  357. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  358. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  359. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  360. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  361. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  362. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  363. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  364. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  365. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  366. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  367. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  368. mteb/tasks/classification/tur/__init__.py +4 -0
  369. mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
  370. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  371. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  372. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  373. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  374. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  375. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  376. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  377. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  378. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  379. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  380. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  381. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  382. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  383. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  384. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  385. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  386. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  387. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  388. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  389. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  390. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  391. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  392. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  393. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  394. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  395. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  396. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  397. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  398. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  399. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  400. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  401. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  402. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  403. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  404. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  405. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  406. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  407. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  408. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  409. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  410. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  411. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  412. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  413. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  414. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  415. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  416. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  417. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  418. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  419. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  420. mteb/tasks/pair_classification/rus/__init__.py +2 -2
  421. mteb/tasks/pair_classification/rus/terra.py +51 -25
  422. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  423. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  424. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  425. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  426. mteb/tasks/reranking/jpn/__init__.py +9 -1
  427. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  428. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  429. mteb/tasks/reranking/multilingual/__init__.py +2 -0
  430. mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
  431. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  432. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  433. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  434. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  435. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  436. mteb/tasks/retrieval/code/code_rag.py +12 -12
  437. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  438. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  439. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  440. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  441. mteb/tasks/retrieval/eng/__init__.py +2 -0
  442. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  443. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  444. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  445. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
  446. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  447. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  448. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  449. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  450. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  451. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  452. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  453. mteb/tasks/retrieval/kor/__init__.py +16 -1
  454. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  455. mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
  456. mteb/tasks/retrieval/multilingual/__init__.py +24 -0
  457. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
  458. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  459. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
  460. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  461. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  462. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  463. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  464. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  465. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
  466. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
  467. mteb/tasks/retrieval/nld/__init__.py +8 -4
  468. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  469. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  470. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  471. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  472. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  473. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  474. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  475. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  476. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  477. mteb/tasks/retrieval/nob/norquad.py +2 -2
  478. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  479. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  480. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  481. mteb/tasks/retrieval/vie/__init__.py +14 -6
  482. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  483. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
  484. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  485. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  486. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  487. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  488. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  489. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  490. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  491. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  492. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  493. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  494. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
  495. mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
  496. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  497. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  498. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
  499. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
  500. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  501. mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
  502. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  503. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  504. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  505. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  506. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  507. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  508. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  509. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  510. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  511. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  512. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  513. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  514. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  515. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  516. mteb/types/__init__.py +2 -0
  517. mteb/types/_encoder_io.py +19 -2
  518. mteb/types/_result.py +2 -1
  519. mteb/types/statistics.py +9 -3
  520. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
  521. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
  522. mteb/models/model_implementations/mxbai_models.py +0 -102
  523. mteb/models/model_implementations/nb_sbert.py +0 -25
  524. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  525. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  526. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  527. {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -123,8 +123,9 @@ siglip_training_datasets = set(
123
123
  )
124
124
 
125
125
  siglip_so400m_patch14_224 = ModelMeta(
126
- loader=SiglipModelWrapper, # type: ignore
126
+ loader=SiglipModelWrapper,
127
127
  name="google/siglip-so400m-patch14-224",
128
+ model_type=["dense"],
128
129
  languages=["eng-Latn"],
129
130
  revision="d04cf29fca7b6374f74d8bea1969314492266b5e",
130
131
  release_date="2024-01-08",
@@ -137,7 +138,7 @@ siglip_so400m_patch14_224 = ModelMeta(
137
138
  open_weights=True,
138
139
  public_training_code="https://github.com/google-research/big_vision/blob/main/big_vision/trainers/proj/image_text/siglip.py",
139
140
  public_training_data=None,
140
- framework=["PyTorch"],
141
+ framework=["PyTorch", "Transformers", "safetensors"],
141
142
  reference="https://huggingface.co/google/siglip-so400m-patch14-224",
142
143
  similarity_fn_name=ScoringFunction.COSINE,
143
144
  use_instructions=False,
@@ -146,8 +147,9 @@ siglip_so400m_patch14_224 = ModelMeta(
146
147
  )
147
148
 
148
149
  siglip_so400m_patch14_384 = ModelMeta(
149
- loader=SiglipModelWrapper, # type: ignore
150
+ loader=SiglipModelWrapper,
150
151
  name="google/siglip-so400m-patch14-384",
152
+ model_type=["dense"],
151
153
  languages=["eng-Latn"],
152
154
  revision="9fdffc58afc957d1a03a25b10dba0329ab15c2a3",
153
155
  release_date="2024-01-08",
@@ -160,7 +162,7 @@ siglip_so400m_patch14_384 = ModelMeta(
160
162
  open_weights=True,
161
163
  public_training_code="https://github.com/google-research/big_vision/blob/main/big_vision/trainers/proj/image_text/siglip.py",
162
164
  public_training_data=None,
163
- framework=["PyTorch"],
165
+ framework=["PyTorch", "Transformers", "safetensors"],
164
166
  reference="https://huggingface.co/google/siglip-so400m-patch14-384",
165
167
  similarity_fn_name=ScoringFunction.COSINE,
166
168
  use_instructions=False,
@@ -169,8 +171,9 @@ siglip_so400m_patch14_384 = ModelMeta(
169
171
  )
170
172
 
171
173
  siglip_so400m_patch16_256_i18n = ModelMeta(
172
- loader=SiglipModelWrapper, # type: ignore
174
+ loader=SiglipModelWrapper,
173
175
  name="google/siglip-so400m-patch16-256-i18n",
176
+ model_type=["dense"],
174
177
  languages=["eng-Latn"],
175
178
  revision="365d321c0cfdea96bc28e3a29787a11a062681a1",
176
179
  release_date="2024-01-08",
@@ -183,7 +186,7 @@ siglip_so400m_patch16_256_i18n = ModelMeta(
183
186
  open_weights=True,
184
187
  public_training_code="https://github.com/google-research/big_vision/blob/main/big_vision/trainers/proj/image_text/siglip.py",
185
188
  public_training_data=None,
186
- framework=["PyTorch"],
189
+ framework=["PyTorch", "Transformers", "safetensors"],
187
190
  reference="https://huggingface.co/google/siglip-so400m-patch16-256-i18n",
188
191
  similarity_fn_name=ScoringFunction.COSINE,
189
192
  use_instructions=False,
@@ -192,8 +195,9 @@ siglip_so400m_patch16_256_i18n = ModelMeta(
192
195
  )
193
196
 
194
197
  siglip_base_patch16_256_multilingual = ModelMeta(
195
- loader=SiglipModelWrapper, # type: ignore
198
+ loader=SiglipModelWrapper,
196
199
  name="google/siglip-base-patch16-256-multilingual",
200
+ model_type=["dense"],
197
201
  languages=["eng-Latn"],
198
202
  revision="8952a4eafcde3cb7ab46b1dd629b33f8784ca9c6",
199
203
  release_date="2024-01-08",
@@ -206,7 +210,7 @@ siglip_base_patch16_256_multilingual = ModelMeta(
206
210
  open_weights=True,
207
211
  public_training_code="https://github.com/google-research/big_vision/blob/main/big_vision/trainers/proj/image_text/siglip.py",
208
212
  public_training_data=None,
209
- framework=["PyTorch"],
213
+ framework=["PyTorch", "Transformers", "safetensors"],
210
214
  reference="https://huggingface.co/google/siglip-base-patch16-256-multilingual",
211
215
  similarity_fn_name=ScoringFunction.COSINE,
212
216
  use_instructions=False,
@@ -215,8 +219,9 @@ siglip_base_patch16_256_multilingual = ModelMeta(
215
219
  )
216
220
 
217
221
  siglip_base_patch16_256 = ModelMeta(
218
- loader=SiglipModelWrapper, # type: ignore
222
+ loader=SiglipModelWrapper,
219
223
  name="google/siglip-base-patch16-256",
224
+ model_type=["dense"],
220
225
  languages=["eng-Latn"],
221
226
  revision="b078df89e446d623010d890864d4207fe6399f61",
222
227
  release_date="2024-01-08",
@@ -229,7 +234,7 @@ siglip_base_patch16_256 = ModelMeta(
229
234
  open_weights=True,
230
235
  public_training_code="https://github.com/google-research/big_vision/blob/main/big_vision/trainers/proj/image_text/siglip.py",
231
236
  public_training_data=None,
232
- framework=["PyTorch"],
237
+ framework=["PyTorch", "Transformers", "safetensors"],
233
238
  reference="https://huggingface.co/google/siglip-base-patch16-256",
234
239
  similarity_fn_name=ScoringFunction.COSINE,
235
240
  use_instructions=False,
@@ -238,8 +243,9 @@ siglip_base_patch16_256 = ModelMeta(
238
243
  )
239
244
 
240
245
  siglip_base_patch16_512 = ModelMeta(
241
- loader=SiglipModelWrapper, # type: ignore
246
+ loader=SiglipModelWrapper,
242
247
  name="google/siglip-base-patch16-512",
248
+ model_type=["dense"],
243
249
  languages=["eng-Latn"],
244
250
  revision="753a949581523b60257d93e18391e8c27f72eb22",
245
251
  release_date="2024-01-08",
@@ -252,7 +258,7 @@ siglip_base_patch16_512 = ModelMeta(
252
258
  open_weights=True,
253
259
  public_training_code="https://github.com/google-research/big_vision/blob/main/big_vision/trainers/proj/image_text/siglip.py",
254
260
  public_training_data=None,
255
- framework=["PyTorch"],
261
+ framework=["PyTorch", "Transformers", "safetensors"],
256
262
  reference="https://huggingface.co/google/siglip-base-patch16-512",
257
263
  similarity_fn_name=ScoringFunction.COSINE,
258
264
  use_instructions=False,
@@ -261,8 +267,9 @@ siglip_base_patch16_512 = ModelMeta(
261
267
  )
262
268
 
263
269
  siglip_base_patch16_384 = ModelMeta(
264
- loader=SiglipModelWrapper, # type: ignore
270
+ loader=SiglipModelWrapper,
265
271
  name="google/siglip-base-patch16-384",
272
+ model_type=["dense"],
266
273
  languages=["eng-Latn"],
267
274
  revision="41aec1c83b32e0a6fca20ad88ba058aa5b5ea394",
268
275
  release_date="2024-01-08",
@@ -275,7 +282,7 @@ siglip_base_patch16_384 = ModelMeta(
275
282
  open_weights=True,
276
283
  public_training_code="https://github.com/google-research/big_vision/blob/main/big_vision/trainers/proj/image_text/siglip.py",
277
284
  public_training_data=None,
278
- framework=["PyTorch"],
285
+ framework=["PyTorch", "Transformers", "safetensors"],
279
286
  reference="https://huggingface.co/google/siglip-base-patch16-384",
280
287
  similarity_fn_name=ScoringFunction.COSINE,
281
288
  use_instructions=False,
@@ -284,8 +291,9 @@ siglip_base_patch16_384 = ModelMeta(
284
291
  )
285
292
 
286
293
  siglip_base_patch16_224 = ModelMeta(
287
- loader=SiglipModelWrapper, # type: ignore
294
+ loader=SiglipModelWrapper,
288
295
  name="google/siglip-base-patch16-224",
296
+ model_type=["dense"],
289
297
  languages=["eng-Latn"],
290
298
  revision="7fd15f0689c79d79e38b1c2e2e2370a7bf2761ed",
291
299
  release_date="2024-01-08",
@@ -298,7 +306,7 @@ siglip_base_patch16_224 = ModelMeta(
298
306
  open_weights=True,
299
307
  public_training_code="https://github.com/google-research/big_vision/blob/main/big_vision/trainers/proj/image_text/siglip.py",
300
308
  public_training_data=None,
301
- framework=["PyTorch"],
309
+ framework=["PyTorch", "Transformers", "safetensors"],
302
310
  reference="https://huggingface.co/google/siglip-base-patch16-224",
303
311
  similarity_fn_name=ScoringFunction.COSINE,
304
312
  use_instructions=False,
@@ -307,8 +315,9 @@ siglip_base_patch16_224 = ModelMeta(
307
315
  )
308
316
 
309
317
  siglip_large_patch16_256 = ModelMeta(
310
- loader=SiglipModelWrapper, # type: ignore
318
+ loader=SiglipModelWrapper,
311
319
  name="google/siglip-large-patch16-256",
320
+ model_type=["dense"],
312
321
  languages=["eng-Latn"],
313
322
  revision="d0da9f876e7d66b4e250cd2450c3ba2ce735e447",
314
323
  release_date="2024-01-08",
@@ -321,7 +330,7 @@ siglip_large_patch16_256 = ModelMeta(
321
330
  open_weights=True,
322
331
  public_training_code="https://github.com/google-research/big_vision/blob/main/big_vision/trainers/proj/image_text/siglip.py",
323
332
  public_training_data=None,
324
- framework=["PyTorch"],
333
+ framework=["PyTorch", "Transformers", "safetensors"],
325
334
  reference="https://huggingface.co/google/siglip-large-patch16-256",
326
335
  similarity_fn_name=ScoringFunction.COSINE,
327
336
  use_instructions=False,
@@ -330,8 +339,9 @@ siglip_large_patch16_256 = ModelMeta(
330
339
  )
331
340
 
332
341
  siglip_large_patch16_384 = ModelMeta(
333
- loader=SiglipModelWrapper, # type: ignore
342
+ loader=SiglipModelWrapper,
334
343
  name="google/siglip-large-patch16-384",
344
+ model_type=["dense"],
335
345
  languages=["eng-Latn"],
336
346
  revision="ce005573a40965dfd21fd937fbdeeebf2439fc35",
337
347
  release_date="2024-01-08",
@@ -344,7 +354,7 @@ siglip_large_patch16_384 = ModelMeta(
344
354
  open_weights=True,
345
355
  public_training_code="https://github.com/google-research/big_vision/blob/main/big_vision/trainers/proj/image_text/siglip.py",
346
356
  public_training_data=None,
347
- framework=["PyTorch"],
357
+ framework=["PyTorch", "Transformers", "safetensors"],
348
358
  reference="https://huggingface.co/google/siglip-large-patch16-384",
349
359
  similarity_fn_name=ScoringFunction.COSINE,
350
360
  use_instructions=False,
@@ -0,0 +1,416 @@
1
+ """
2
+ SauerkrautLM Visual Document Retrieval Models - MTEB Integration
3
+
4
+ This module provides MTEB wrappers for SauerkrautLM ColPali-style models:
5
+ - SLM-ColQwen3 (Qwen3-VL backbone)
6
+ - SLM-ColLFM2 (LFM2 backbone)
7
+ - SLM-ColMinistral3 (Ministral3 backbone)
8
+
9
+ Based on:
10
+ - MTEB ColPali implementation: mteb/models/model_implementations/colpali_models.py
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ from typing import Any
17
+
18
+ import torch
19
+ from torch.utils.data import DataLoader
20
+ from tqdm.auto import tqdm
21
+
22
+ from mteb._requires_package import (
23
+ requires_image_dependencies,
24
+ requires_package,
25
+ )
26
+ from mteb.abstasks.task_metadata import TaskMetadata
27
+ from mteb.models.abs_encoder import AbsEncoder
28
+ from mteb.models.model_implementations.colpali_models import (
29
+ COLPALI_CITATION,
30
+ COLPALI_TRAINING_DATA,
31
+ )
32
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
33
+ from mteb.types import Array, BatchedInput, PromptType
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ SUPPORTED_LANGUAGES = [
39
+ "eng-Latn", # English
40
+ "deu-Latn", # German
41
+ "fra-Latn", # French
42
+ "spa-Latn", # Spanish
43
+ "ita-Latn", # Italian
44
+ "por-Latn", # Portuguese
45
+ ]
46
+
47
+
48
+ class SLMBaseWrapper(AbsEncoder):
49
+ """
50
+ Base wrapper for SauerkrautLM multi-vector embedding models.
51
+
52
+ All our models use late interaction (MaxSim) for retrieval scoring.
53
+ """
54
+
55
+ model_class = None
56
+ processor_class = None
57
+ model_name_prefix = "SLM"
58
+
59
+ def __init__(
60
+ self,
61
+ model_name: str,
62
+ revision: str | None = None,
63
+ device: str | None = None,
64
+ use_flash_attn: bool = True,
65
+ **kwargs,
66
+ ):
67
+ requires_image_dependencies()
68
+ requires_package(
69
+ self, "sauerkrautlm_colpali", model_name, "pip install sauerkrautlm-colpali"
70
+ )
71
+
72
+ self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
73
+ self._load_model_and_processor(model_name, revision, use_flash_attn, **kwargs)
74
+ self.mdl = self.mdl.to(self.device)
75
+ self.mdl.eval()
76
+
77
+ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
78
+ """Override in subclasses to load specific model/processor."""
79
+ raise NotImplementedError
80
+
81
+ def encode(
82
+ self,
83
+ inputs: DataLoader[BatchedInput],
84
+ *,
85
+ task_metadata: TaskMetadata,
86
+ hf_split: str,
87
+ hf_subset: str,
88
+ prompt_type: PromptType | None = None,
89
+ **kwargs: Any,
90
+ ) -> Array:
91
+ text_embeddings = None
92
+ image_embeddings = None
93
+
94
+ if "text" in inputs.dataset.features:
95
+ text_embeddings = self.get_text_embeddings(inputs, **kwargs)
96
+ if "image" in inputs.dataset.features:
97
+ image_embeddings = self.get_image_embeddings(inputs, **kwargs)
98
+
99
+ if text_embeddings is not None and image_embeddings is not None:
100
+ if len(text_embeddings) != len(image_embeddings):
101
+ raise ValueError(
102
+ "The number of texts and images must have the same length"
103
+ )
104
+ fused_embeddings = text_embeddings + image_embeddings
105
+ return fused_embeddings
106
+ elif text_embeddings is not None:
107
+ return text_embeddings
108
+ elif image_embeddings is not None:
109
+ return image_embeddings
110
+ raise ValueError("No text or image features found in inputs")
111
+
112
+ def encode_input(self, inputs):
113
+ """Forward pass through the model."""
114
+ return self.mdl(**inputs)
115
+
116
+ def _move_to_device(self, inputs: dict) -> dict:
117
+ """Move all tensor inputs to the model's device."""
118
+ result = {}
119
+ for k, v in inputs.items():
120
+ if isinstance(v, torch.Tensor):
121
+ result[k] = v.to(self.device)
122
+ else:
123
+ result[k] = v
124
+ return result
125
+
126
+ def get_image_embeddings(
127
+ self,
128
+ images: DataLoader,
129
+ batch_size: int = 32,
130
+ **kwargs,
131
+ ) -> torch.Tensor:
132
+ import torchvision.transforms.functional as F
133
+
134
+ all_embeds = []
135
+
136
+ with torch.no_grad():
137
+ for batch in tqdm(images, desc="Encoding images"):
138
+ from PIL import Image
139
+
140
+ imgs = [
141
+ F.to_pil_image(b) if not isinstance(b, Image.Image) else b
142
+ for b in batch["image"]
143
+ ]
144
+ inputs = self.processor.process_images(imgs)
145
+ inputs = self._move_to_device(inputs)
146
+ outs = self.encode_input(inputs)
147
+ all_embeds.extend(outs.cpu().to(torch.float32))
148
+
149
+ padded = torch.nn.utils.rnn.pad_sequence(
150
+ all_embeds, batch_first=True, padding_value=0
151
+ )
152
+ return padded
153
+
154
+ def get_text_embeddings(
155
+ self,
156
+ texts: DataLoader,
157
+ batch_size: int = 32,
158
+ **kwargs,
159
+ ) -> torch.Tensor:
160
+ all_embeds = []
161
+
162
+ with torch.no_grad():
163
+ for batch in tqdm(texts, desc="Encoding texts"):
164
+ inputs = self.processor.process_queries(batch["text"])
165
+ inputs = self._move_to_device(inputs)
166
+ outs = self.encode_input(inputs)
167
+ all_embeds.extend(outs.cpu().to(torch.float32))
168
+
169
+ padded = torch.nn.utils.rnn.pad_sequence(
170
+ all_embeds, batch_first=True, padding_value=0
171
+ )
172
+ return padded
173
+
174
+ def calculate_probs(
175
+ self,
176
+ text_embeddings: torch.Tensor,
177
+ image_embeddings: torch.Tensor,
178
+ ) -> torch.Tensor:
179
+ scores = self.similarity(text_embeddings, image_embeddings).T
180
+ return scores.softmax(dim=-1)
181
+
182
+ def similarity(
183
+ self,
184
+ a: torch.Tensor | list,
185
+ b: torch.Tensor | list,
186
+ ) -> torch.Tensor:
187
+ return self.processor.score(a, b, device=self.device)
188
+
189
+
190
+ class SLMColQwen3Wrapper(SLMBaseWrapper):
191
+ """Wrapper for SLM-ColQwen3 models (Qwen3-VL backbone)."""
192
+
193
+ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
194
+ from sauerkrautlm_colpali.models.qwen3.colqwen3 import (
195
+ ColQwen3,
196
+ ColQwen3Processor,
197
+ )
198
+
199
+ self.mdl = ColQwen3.from_pretrained(
200
+ model_name,
201
+ torch_dtype=torch.bfloat16,
202
+ attn_implementation="flash_attention_2" if use_flash_attn else "eager",
203
+ revision=revision,
204
+ **kwargs,
205
+ )
206
+
207
+ self.processor = ColQwen3Processor.from_pretrained(
208
+ model_name,
209
+ revision=revision,
210
+ )
211
+
212
+ logger.info(f"SLM-ColQwen3 loaded: dim={self.mdl.dim}, device={self.device}")
213
+
214
+
215
+ class SLMColLFM2Wrapper(SLMBaseWrapper):
216
+ """Wrapper for SLM-ColLFM2 models (LFM2 backbone)."""
217
+
218
+ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
219
+ from sauerkrautlm_colpali.models.lfm2.collfm2 import ColLFM2, ColLFM2Processor
220
+
221
+ self.mdl = ColLFM2.from_pretrained(
222
+ model_name,
223
+ torch_dtype=torch.bfloat16,
224
+ revision=revision,
225
+ **kwargs,
226
+ )
227
+
228
+ self.processor = ColLFM2Processor.from_pretrained(
229
+ model_name,
230
+ revision=revision,
231
+ )
232
+
233
+ logger.info(f"SLM-ColLFM2 loaded: dim={self.mdl.dim}, device={self.device}")
234
+
235
+
236
+ class SLMColMinistral3Wrapper(SLMBaseWrapper):
237
+ """Wrapper for SLM-ColMinistral3 models (Ministral3 backbone)."""
238
+
239
+ def _load_model_and_processor(self, model_name, revision, use_flash_attn, **kwargs):
240
+ from sauerkrautlm_colpali.models.ministral3.colministral3 import (
241
+ ColMinistral3,
242
+ ColMinistral3Processor,
243
+ )
244
+
245
+ self.mdl = ColMinistral3.from_pretrained(
246
+ model_name,
247
+ torch_dtype=torch.bfloat16,
248
+ )
249
+
250
+ self.processor = ColMinistral3Processor.from_pretrained(model_name)
251
+
252
+ logger.info(
253
+ f"SLM-ColMinistral3 loaded: dim={self.mdl.dim}, device={self.device}"
254
+ )
255
+
256
+
257
+ SAUERKRAUTLM_CITATION = """
258
+ @misc{sauerkrautlm-colpali-2025,
259
+ title={SauerkrautLM-ColPali: Multi-Vector Vision Retrieval Models},
260
+ author={David Golchinfar},
261
+ organization={VAGO Solutions},
262
+ year={2025},
263
+ url={https://github.com/VAGOsolutions/sauerkrautlm-colpali}
264
+ }
265
+ """
266
+
267
+
268
+ slm_colqwen3_1_7b_turbo = ModelMeta(
269
+ loader=SLMColQwen3Wrapper,
270
+ name="VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1",
271
+ languages=SUPPORTED_LANGUAGES,
272
+ revision="19c295a18e057d6d82754f627c09408117ffdb66",
273
+ release_date="2025-12-20",
274
+ modalities=["image", "text"],
275
+ model_type=["late-interaction"],
276
+ n_parameters=1_756_572_288,
277
+ memory_usage_mb=3350,
278
+ max_tokens=262144,
279
+ embed_dim=128,
280
+ license="apache-2.0",
281
+ open_weights=True,
282
+ public_training_code=None,
283
+ public_training_data=None,
284
+ framework=["ColPali"],
285
+ reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-1.7b-Turbo-v0.1",
286
+ similarity_fn_name=ScoringFunction.MAX_SIM,
287
+ use_instructions=True,
288
+ adapted_from="Qwen/Qwen3-VL-2B-Instruct",
289
+ training_datasets=COLPALI_TRAINING_DATA,
290
+ citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
291
+ )
292
+
293
+ slm_colqwen3_2b = ModelMeta(
294
+ loader=SLMColQwen3Wrapper,
295
+ name="VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1",
296
+ languages=SUPPORTED_LANGUAGES,
297
+ revision="48f699713c10af754684e12060a2af9266462cc9",
298
+ release_date="2025-12-20",
299
+ modalities=["image", "text"],
300
+ model_type=["late-interaction"],
301
+ n_parameters=2_127_794_304,
302
+ memory_usage_mb=4058,
303
+ max_tokens=262144,
304
+ embed_dim=128,
305
+ license="apache-2.0",
306
+ open_weights=True,
307
+ public_training_code=None,
308
+ public_training_data=None,
309
+ framework=["ColPali"],
310
+ reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-2b-v0.1",
311
+ similarity_fn_name=ScoringFunction.MAX_SIM,
312
+ use_instructions=True,
313
+ adapted_from="Qwen/Qwen3-VL-2B-Instruct",
314
+ training_datasets=COLPALI_TRAINING_DATA,
315
+ citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
316
+ )
317
+
318
+ slm_colqwen3_4b = ModelMeta(
319
+ loader=SLMColQwen3Wrapper,
320
+ name="VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1",
321
+ languages=SUPPORTED_LANGUAGES,
322
+ revision="b635fbb3ab145f07608ed10a85def33544de1723",
323
+ release_date="2025-12-20",
324
+ modalities=["image", "text"],
325
+ model_type=["late-interaction"],
326
+ n_parameters=4_438_143_616,
327
+ memory_usage_mb=8465,
328
+ max_tokens=262144,
329
+ embed_dim=128,
330
+ license="apache-2.0",
331
+ open_weights=True,
332
+ public_training_code=None,
333
+ public_training_data=None,
334
+ framework=["ColPali"],
335
+ reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-4b-v0.1",
336
+ similarity_fn_name=ScoringFunction.MAX_SIM,
337
+ use_instructions=True,
338
+ adapted_from="Qwen/Qwen3-VL-4B-Instruct",
339
+ training_datasets=COLPALI_TRAINING_DATA,
340
+ citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
341
+ )
342
+
343
+ slm_colqwen3_8b = ModelMeta(
344
+ loader=SLMColQwen3Wrapper,
345
+ name="VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1",
346
+ languages=SUPPORTED_LANGUAGES,
347
+ revision="36ac136e451a7b8d8229725d69d4ec23aa4f03c8",
348
+ release_date="2025-12-20",
349
+ modalities=["image", "text"],
350
+ model_type=["late-interaction"],
351
+ n_parameters=8_145_318_256,
352
+ memory_usage_mb=15536,
353
+ max_tokens=262144,
354
+ embed_dim=128,
355
+ license="apache-2.0",
356
+ open_weights=True,
357
+ public_training_code=None,
358
+ public_training_data=None,
359
+ framework=["ColPali"],
360
+ reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColQwen3-8b-v0.1",
361
+ similarity_fn_name=ScoringFunction.MAX_SIM,
362
+ use_instructions=True,
363
+ adapted_from="Qwen/Qwen3-VL-8B-Instruct",
364
+ training_datasets=COLPALI_TRAINING_DATA,
365
+ citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
366
+ )
367
+
368
+ slm_collfm2_450m = ModelMeta(
369
+ loader=SLMColLFM2Wrapper,
370
+ name="VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1",
371
+ languages=SUPPORTED_LANGUAGES,
372
+ revision="a65223fd6633f331ccff4483e47575c3c620dc60",
373
+ release_date="2025-12-20",
374
+ modalities=["image", "text"],
375
+ model_type=["late-interaction"],
376
+ n_parameters=450_953_856,
377
+ memory_usage_mb=860,
378
+ max_tokens=32768,
379
+ embed_dim=128,
380
+ license="https://huggingface.co/LiquidAI/LFM2-VL-450M/blob/main/LICENSE",
381
+ open_weights=True,
382
+ public_training_code=None,
383
+ public_training_data=None,
384
+ framework=["ColPali"],
385
+ reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColLFM2-450M-v0.1",
386
+ similarity_fn_name=ScoringFunction.MAX_SIM,
387
+ use_instructions=True,
388
+ adapted_from="LiquidAI/LFM2-VL-450M",
389
+ training_datasets=COLPALI_TRAINING_DATA,
390
+ citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
391
+ )
392
+
393
+ slm_colministral3_3b = ModelMeta(
394
+ loader=SLMColMinistral3Wrapper,
395
+ name="VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1",
396
+ languages=SUPPORTED_LANGUAGES,
397
+ revision="54aa3ffbbce20471fdcc4afc07d13989c65e71b8",
398
+ release_date="2025-12-20",
399
+ modalities=["image", "text"],
400
+ model_type=["late-interaction"],
401
+ n_parameters=4_252_136_448,
402
+ memory_usage_mb=8110,
403
+ max_tokens=262144,
404
+ embed_dim=128,
405
+ license="apache-2.0",
406
+ open_weights=True,
407
+ public_training_code=None,
408
+ public_training_data=None,
409
+ framework=["ColPali"],
410
+ reference="https://huggingface.co/VAGOsolutions/SauerkrautLM-ColMinistral3-3b-v0.1",
411
+ similarity_fn_name=ScoringFunction.MAX_SIM,
412
+ use_instructions=True,
413
+ adapted_from="mistralai/Ministral-3B-Instruct-2410",
414
+ training_datasets=COLPALI_TRAINING_DATA,
415
+ citation=SAUERKRAUTLM_CITATION + COLPALI_CITATION,
416
+ )
@@ -218,6 +218,7 @@ sonar_langs = [
218
218
  sonar = ModelMeta(
219
219
  loader=None,
220
220
  name="facebook/SONAR",
221
+ model_type=["dense"],
221
222
  languages=sonar_langs,
222
223
  open_weights=True,
223
224
  use_instructions=False, # it does take a language code as input
@@ -0,0 +1,34 @@
1
+ """ATLES Champion Embedding Model for MTEB."""
2
+
3
+ from mteb.models.model_meta import ModelMeta
4
+ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
5
+
6
+ spartan8806_atles_champion_embedding = ModelMeta(
7
+ loader=sentence_transformers_loader,
8
+ name="spartan8806/atles-champion-embedding",
9
+ model_type=["dense"],
10
+ languages=["eng-Latn"],
11
+ open_weights=True,
12
+ revision="d4c74d7000bbd25f3597fc0f2dcde59ef1386e8f",
13
+ release_date="2025-11-15",
14
+ n_parameters=110_000_000,
15
+ memory_usage_mb=420,
16
+ max_tokens=512,
17
+ embed_dim=768,
18
+ license="apache-2.0",
19
+ similarity_fn_name="cosine",
20
+ framework=["Sentence Transformers", "safetensors", "Transformers"],
21
+ reference="https://huggingface.co/spartan8806/atles-champion-embedding",
22
+ use_instructions=False,
23
+ training_datasets={"STSBenchmark"},
24
+ adapted_from="sentence-transformers/all-mpnet-base-v2",
25
+ public_training_code=None,
26
+ public_training_data=None,
27
+ citation="""@article{conner2025epistemic,
28
+ title={The Epistemic Barrier: How RLHF Makes AI Consciousness Empirically Undecidable},
29
+ author={Conner (spartan8806)},
30
+ journal={ATLES Research Papers},
31
+ year={2025},
32
+ note={Cross-model validation study (Phoenix, Grok, Gemini, Claude)}
33
+ }""",
34
+ )