mteb 2.5.2__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +17 -18
  3. mteb/_evaluators/any_sts_evaluator.py +3 -3
  4. mteb/_evaluators/clustering_evaluator.py +2 -2
  5. mteb/_evaluators/evaluator.py +4 -2
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +10 -8
  7. mteb/_evaluators/pair_classification_evaluator.py +5 -3
  8. mteb/_evaluators/retrieval_evaluator.py +2 -2
  9. mteb/_evaluators/retrieval_metrics.py +18 -17
  10. mteb/_evaluators/sklearn_evaluator.py +11 -10
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +27 -18
  12. mteb/_evaluators/text/summarization_evaluator.py +23 -18
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +5 -3
  14. mteb/abstasks/_data_filter/filters.py +1 -1
  15. mteb/abstasks/_data_filter/task_pipelines.py +3 -0
  16. mteb/abstasks/_statistics_calculation.py +18 -10
  17. mteb/abstasks/_stratification.py +18 -18
  18. mteb/abstasks/abstask.py +35 -28
  19. mteb/abstasks/aggregate_task_metadata.py +1 -9
  20. mteb/abstasks/aggregated_task.py +10 -29
  21. mteb/abstasks/classification.py +15 -10
  22. mteb/abstasks/clustering.py +19 -15
  23. mteb/abstasks/clustering_legacy.py +10 -10
  24. mteb/abstasks/image/image_text_pair_classification.py +7 -4
  25. mteb/abstasks/multilabel_classification.py +23 -19
  26. mteb/abstasks/pair_classification.py +20 -11
  27. mteb/abstasks/regression.py +4 -4
  28. mteb/abstasks/retrieval.py +28 -24
  29. mteb/abstasks/retrieval_dataset_loaders.py +2 -2
  30. mteb/abstasks/sts.py +8 -5
  31. mteb/abstasks/task_metadata.py +31 -33
  32. mteb/abstasks/text/bitext_mining.py +39 -28
  33. mteb/abstasks/text/reranking.py +8 -6
  34. mteb/abstasks/text/summarization.py +10 -5
  35. mteb/abstasks/zeroshot_classification.py +8 -4
  36. mteb/benchmarks/benchmark.py +4 -2
  37. mteb/benchmarks/benchmarks/__init__.py +4 -0
  38. mteb/benchmarks/benchmarks/benchmarks.py +112 -11
  39. mteb/benchmarks/get_benchmark.py +14 -55
  40. mteb/cache.py +182 -29
  41. mteb/cli/_display_tasks.py +2 -2
  42. mteb/cli/build_cli.py +110 -14
  43. mteb/cli/generate_model_card.py +43 -23
  44. mteb/deprecated_evaluator.py +63 -49
  45. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  46. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  47. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  49. mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
  50. mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
  51. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  52. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  53. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  54. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  55. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  56. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  57. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  58. mteb/evaluate.py +44 -33
  59. mteb/filter_tasks.py +25 -26
  60. mteb/get_tasks.py +29 -30
  61. mteb/languages/language_scripts.py +5 -3
  62. mteb/leaderboard/app.py +162 -34
  63. mteb/load_results.py +12 -12
  64. mteb/models/abs_encoder.py +10 -6
  65. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
  66. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +5 -4
  67. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
  68. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
  69. mteb/models/cache_wrappers/cache_wrapper.py +2 -2
  70. mteb/models/get_model_meta.py +21 -3
  71. mteb/models/instruct_wrapper.py +28 -8
  72. mteb/models/model_implementations/align_models.py +1 -1
  73. mteb/models/model_implementations/andersborges.py +4 -4
  74. mteb/models/model_implementations/ara_models.py +1 -1
  75. mteb/models/model_implementations/arctic_models.py +8 -8
  76. mteb/models/model_implementations/b1ade_models.py +1 -1
  77. mteb/models/model_implementations/bge_models.py +45 -21
  78. mteb/models/model_implementations/bica_model.py +3 -3
  79. mteb/models/model_implementations/blip2_models.py +2 -2
  80. mteb/models/model_implementations/blip_models.py +16 -16
  81. mteb/models/model_implementations/bm25.py +4 -4
  82. mteb/models/model_implementations/bmretriever_models.py +6 -4
  83. mteb/models/model_implementations/cadet_models.py +1 -1
  84. mteb/models/model_implementations/cde_models.py +11 -4
  85. mteb/models/model_implementations/clip_models.py +6 -6
  86. mteb/models/model_implementations/clips_models.py +3 -3
  87. mteb/models/model_implementations/codefuse_models.py +5 -5
  88. mteb/models/model_implementations/codesage_models.py +3 -3
  89. mteb/models/model_implementations/cohere_models.py +5 -5
  90. mteb/models/model_implementations/cohere_v.py +2 -2
  91. mteb/models/model_implementations/colpali_models.py +3 -3
  92. mteb/models/model_implementations/colqwen_models.py +8 -8
  93. mteb/models/model_implementations/colsmol_models.py +2 -2
  94. mteb/models/model_implementations/conan_models.py +1 -1
  95. mteb/models/model_implementations/dino_models.py +42 -42
  96. mteb/models/model_implementations/e5_instruct.py +23 -4
  97. mteb/models/model_implementations/e5_models.py +9 -9
  98. mteb/models/model_implementations/e5_v.py +6 -6
  99. mteb/models/model_implementations/eagerworks_models.py +1 -1
  100. mteb/models/model_implementations/emillykkejensen_models.py +6 -6
  101. mteb/models/model_implementations/en_code_retriever.py +1 -1
  102. mteb/models/model_implementations/euler_models.py +2 -2
  103. mteb/models/model_implementations/fa_models.py +9 -9
  104. mteb/models/model_implementations/facebookai.py +14 -2
  105. mteb/models/model_implementations/geogpt_models.py +1 -1
  106. mteb/models/model_implementations/gme_v_models.py +6 -5
  107. mteb/models/model_implementations/google_models.py +1 -1
  108. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -1
  109. mteb/models/model_implementations/gritlm_models.py +2 -2
  110. mteb/models/model_implementations/gte_models.py +25 -13
  111. mteb/models/model_implementations/hinvec_models.py +1 -1
  112. mteb/models/model_implementations/ibm_granite_models.py +30 -6
  113. mteb/models/model_implementations/inf_models.py +2 -2
  114. mteb/models/model_implementations/jasper_models.py +2 -2
  115. mteb/models/model_implementations/jina_clip.py +48 -10
  116. mteb/models/model_implementations/jina_models.py +18 -11
  117. mteb/models/model_implementations/kblab.py +12 -6
  118. mteb/models/model_implementations/kennethenevoldsen_models.py +4 -4
  119. mteb/models/model_implementations/kfst.py +1 -1
  120. mteb/models/model_implementations/kowshik24_models.py +1 -1
  121. mteb/models/model_implementations/lgai_embedding_models.py +1 -1
  122. mteb/models/model_implementations/linq_models.py +1 -1
  123. mteb/models/model_implementations/listconranker.py +1 -1
  124. mteb/models/model_implementations/llm2clip_models.py +6 -6
  125. mteb/models/model_implementations/llm2vec_models.py +8 -8
  126. mteb/models/model_implementations/mcinext_models.py +4 -1
  127. mteb/models/model_implementations/mdbr_models.py +17 -3
  128. mteb/models/model_implementations/misc_models.py +68 -68
  129. mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
  130. mteb/models/model_implementations/mme5_models.py +1 -1
  131. mteb/models/model_implementations/moco_models.py +4 -4
  132. mteb/models/model_implementations/mod_models.py +1 -1
  133. mteb/models/model_implementations/model2vec_models.py +14 -14
  134. mteb/models/model_implementations/moka_models.py +1 -1
  135. mteb/models/model_implementations/nbailab.py +3 -3
  136. mteb/models/model_implementations/no_instruct_sentence_models.py +2 -2
  137. mteb/models/model_implementations/nomic_models.py +30 -15
  138. mteb/models/model_implementations/nomic_models_vision.py +1 -1
  139. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +15 -9
  140. mteb/models/model_implementations/nvidia_models.py +151 -19
  141. mteb/models/model_implementations/octen_models.py +61 -2
  142. mteb/models/model_implementations/openclip_models.py +13 -13
  143. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -5
  144. mteb/models/model_implementations/ops_moa_models.py +1 -1
  145. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
  146. mteb/models/model_implementations/pawan_models.py +1 -1
  147. mteb/models/model_implementations/piccolo_models.py +1 -1
  148. mteb/models/model_implementations/pixie_models.py +56 -0
  149. mteb/models/model_implementations/promptriever_models.py +4 -4
  150. mteb/models/model_implementations/pylate_models.py +10 -9
  151. mteb/models/model_implementations/qodo_models.py +2 -2
  152. mteb/models/model_implementations/qtack_models.py +1 -1
  153. mteb/models/model_implementations/qwen3_models.py +3 -3
  154. mteb/models/model_implementations/qzhou_models.py +2 -2
  155. mteb/models/model_implementations/random_baseline.py +3 -3
  156. mteb/models/model_implementations/rasgaard_models.py +2 -2
  157. mteb/models/model_implementations/reasonir_model.py +1 -1
  158. mteb/models/model_implementations/repllama_models.py +3 -3
  159. mteb/models/model_implementations/rerankers_custom.py +12 -6
  160. mteb/models/model_implementations/rerankers_monot5_based.py +17 -17
  161. mteb/models/model_implementations/richinfoai_models.py +1 -1
  162. mteb/models/model_implementations/ru_sentence_models.py +20 -20
  163. mteb/models/model_implementations/ruri_models.py +10 -10
  164. mteb/models/model_implementations/salesforce_models.py +3 -3
  165. mteb/models/model_implementations/samilpwc_models.py +1 -1
  166. mteb/models/model_implementations/sarashina_embedding_models.py +2 -2
  167. mteb/models/model_implementations/searchmap_models.py +1 -1
  168. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +113 -146
  169. mteb/models/model_implementations/sentence_transformers_models.py +124 -22
  170. mteb/models/model_implementations/shuu_model.py +1 -1
  171. mteb/models/model_implementations/siglip_models.py +20 -20
  172. mteb/models/model_implementations/slm_models.py +416 -0
  173. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -1
  174. mteb/models/model_implementations/stella_models.py +17 -4
  175. mteb/models/model_implementations/tarka_models.py +2 -2
  176. mteb/models/model_implementations/text2vec_models.py +9 -3
  177. mteb/models/model_implementations/ua_sentence_models.py +1 -1
  178. mteb/models/model_implementations/uae_models.py +7 -1
  179. mteb/models/model_implementations/vdr_models.py +1 -1
  180. mteb/models/model_implementations/vi_vn_models.py +6 -6
  181. mteb/models/model_implementations/vlm2vec_models.py +3 -3
  182. mteb/models/model_implementations/voyage_models.py +84 -0
  183. mteb/models/model_implementations/voyage_v.py +9 -7
  184. mteb/models/model_implementations/youtu_models.py +1 -1
  185. mteb/models/model_implementations/yuan_models.py +1 -1
  186. mteb/models/model_implementations/yuan_models_en.py +1 -1
  187. mteb/models/model_meta.py +80 -31
  188. mteb/models/models_protocols.py +22 -6
  189. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +9 -6
  190. mteb/models/search_wrappers.py +33 -18
  191. mteb/models/sentence_transformer_wrapper.py +50 -25
  192. mteb/models/vllm_wrapper.py +327 -0
  193. mteb/py.typed +0 -0
  194. mteb/results/benchmark_results.py +29 -21
  195. mteb/results/model_result.py +52 -22
  196. mteb/results/task_result.py +80 -58
  197. mteb/similarity_functions.py +11 -7
  198. mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
  199. mteb/tasks/classification/est/estonian_valence.py +1 -1
  200. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
  201. mteb/tasks/classification/multilingual/scala_classification.py +1 -1
  202. mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
  203. mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
  204. mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
  205. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  206. mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
  207. mteb/tasks/retrieval/code/code_rag.py +12 -12
  208. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  209. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  210. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  211. mteb/tasks/retrieval/eng/__init__.py +2 -0
  212. mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
  213. mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
  214. mteb/tasks/retrieval/kor/__init__.py +15 -1
  215. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  216. mteb/tasks/retrieval/multilingual/__init__.py +2 -0
  217. mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
  218. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
  219. mteb/tasks/retrieval/nob/norquad.py +2 -2
  220. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  221. mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
  222. mteb/tasks/retrieval/vie/__init__.py +14 -6
  223. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  224. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  225. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  226. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  227. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  228. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  229. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  230. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  231. mteb/types/__init__.py +2 -0
  232. mteb/types/_encoder_io.py +12 -0
  233. mteb/types/_result.py +2 -1
  234. mteb/types/statistics.py +9 -3
  235. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/METADATA +15 -4
  236. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/RECORD +240 -219
  237. mteb/models/model_implementations/mxbai_models.py +0 -111
  238. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
  239. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
  240. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
  241. {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  from .argu_ana_vn_retrieval import ArguAnaVN
2
- from .climate_fevervn_retrieval import ClimateFEVERVN
2
+ from .climate_fevervn_retrieval import ClimateFEVERVN, NanoClimateFEVERVN
3
3
  from .cqa_dupstack_android_vn_retrieval import CQADupstackAndroidVN
4
4
  from .cqa_dupstack_gis_vn_retrieval import CQADupstackGisVN
5
5
  from .cqa_dupstack_mathematica_vn_retrieval import CQADupstackMathematicaVN
@@ -10,19 +10,20 @@ from .cqa_dupstack_tex_vn_retrieval import CQADupstackTexVN
10
10
  from .cqa_dupstack_unix_vn_retrieval import CQADupstackUnixVN
11
11
  from .cqa_dupstack_webmasters_vn_retrieval import CQADupstackWebmastersVN
12
12
  from .cqa_dupstack_wordpress_vn_retrieval import CQADupstackWordpressVN
13
- from .db_pedia_vn_retrieval import DBPediaVN
14
- from .fevervn_retrieval import FEVERVN
13
+ from .db_pedia_vn_retrieval import DBPediaVN, NanoDBPediaVN
14
+ from .fevervn_retrieval import FEVERVN, NanoFEVERVN
15
15
  from .fi_qa2018_vn_retrieval import FiQA2018VN
16
16
  from .green_node_table_markdown_retrieval import GreenNodeTableMarkdownRetrieval
17
- from .hotpot_qavn_retrieval import HotpotQAVN
18
- from .msmarcovn_retrieval import MSMARCOVN
17
+ from .hotpot_qavn_retrieval import HotpotQAVN, NanoHotpotQAVN
18
+ from .msmarcovn_retrieval import MSMARCOVN, NanoMSMARCOVN
19
19
  from .nf_corpus_vn_retrieval import NFCorpusVN
20
- from .nqvn_retrieval import NQVN
20
+ from .nqvn_retrieval import NQVN, NanoNQVN
21
21
  from .quora_vn_retrieval import QuoraVN
22
22
  from .sci_fact_vn_retrieval import SciFactVN
23
23
  from .scidocsvn_retrieval import SCIDOCSVN
24
24
  from .touche2020_vn_retrieval import Touche2020VN
25
25
  from .treccovidvn_retrieval import TRECCOVIDVN
26
+ from .tvpl_retrieval import TVPLRetrieval
26
27
  from .vie_qu_ad_retrieval import VieQuADRetrieval
27
28
  from .zac_legal_text_retrieval import ZacLegalTextRetrieval
28
29
 
@@ -49,8 +50,15 @@ __all__ = [
49
50
  "GreenNodeTableMarkdownRetrieval",
50
51
  "HotpotQAVN",
51
52
  "NFCorpusVN",
53
+ "NanoClimateFEVERVN",
54
+ "NanoDBPediaVN",
55
+ "NanoFEVERVN",
56
+ "NanoHotpotQAVN",
57
+ "NanoMSMARCOVN",
58
+ "NanoNQVN",
52
59
  "QuoraVN",
53
60
  "SciFactVN",
61
+ "TVPLRetrieval",
54
62
  "Touche2020VN",
55
63
  "VieQuADRetrieval",
56
64
  "ZacLegalTextRetrieval",
@@ -36,3 +36,42 @@ class ClimateFEVERVN(AbsTaskRetrieval):
36
36
  """,
37
37
  adapted_from=["ClimateFEVER"],
38
38
  )
39
+
40
+
41
+ class NanoClimateFEVERVN(AbsTaskRetrieval):
42
+ metadata = TaskMetadata(
43
+ name="NanoClimateFEVER-VN",
44
+ description="NanoClimateFEVERVN is a small version of A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
45
+ reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
46
+ dataset={
47
+ "path": "GreenNode/nano-climate-fever-vn",
48
+ "revision": "1852e852f07403d4529a8520d52b91ff6d57869b",
49
+ },
50
+ type="Retrieval",
51
+ category="t2t",
52
+ eval_splits=["test"],
53
+ eval_langs=["vie-Latn"],
54
+ main_score="ndcg_at_10",
55
+ date=("2025-07-29", "2025-07-30"),
56
+ license="cc-by-sa-4.0",
57
+ annotations_creators="derived",
58
+ dialect=[],
59
+ sample_creation="machine-translated and LM verified",
60
+ domains=["Encyclopaedic", "Written"],
61
+ task_subtypes=["Claim verification"],
62
+ bibtex_citation=r"""
63
+ @misc{pham2025vnmtebvietnamesemassivetext,
64
+ archiveprefix = {arXiv},
65
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
66
+ eprint = {2507.21500},
67
+ primaryclass = {cs.CL},
68
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
69
+ url = {https://arxiv.org/abs/2507.21500},
70
+ year = {2025},
71
+ }
72
+ """,
73
+ prompt={
74
+ "query": "Given a claim about climate change, retrieve documents that support or refute the claim"
75
+ },
76
+ adapted_from=["ClimateFEVER-VN"],
77
+ )
@@ -36,3 +36,42 @@ class DBPediaVN(AbsTaskRetrieval):
36
36
  """,
37
37
  adapted_from=["DBPedia"],
38
38
  )
39
+
40
+
41
+ class NanoDBPediaVN(AbsTaskRetrieval):
42
+ metadata = TaskMetadata(
43
+ name="NanoDBPedia-VN",
44
+ description="NanoDBPediaVN is a small version of A translated dataset from DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
45
+ reference="https://github.com/iai-group/DBpedia-Entity/",
46
+ dataset={
47
+ "path": "GreenNode/nano-dbpedia-vn",
48
+ "revision": "bbc3259bc63bf1e250d7034024092cc3230d5850",
49
+ },
50
+ type="Retrieval",
51
+ category="t2t",
52
+ eval_splits=["test"],
53
+ eval_langs=["vie-Latn"],
54
+ main_score="ndcg_at_10",
55
+ date=("2025-07-29", "2025-07-30"),
56
+ license="cc-by-sa-4.0",
57
+ annotations_creators="derived",
58
+ dialect=[],
59
+ sample_creation="machine-translated and LM verified",
60
+ domains=["Written", "Encyclopaedic"],
61
+ task_subtypes=[],
62
+ bibtex_citation=r"""
63
+ @misc{pham2025vnmtebvietnamesemassivetext,
64
+ archiveprefix = {arXiv},
65
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
66
+ eprint = {2507.21500},
67
+ primaryclass = {cs.CL},
68
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
69
+ url = {https://arxiv.org/abs/2507.21500},
70
+ year = {2025},
71
+ }
72
+ """,
73
+ prompt={
74
+ "query": "Given a query, retrieve relevant entity descriptions from DBPedia"
75
+ },
76
+ adapted_from=["DBPedia-VN"],
77
+ )
@@ -36,3 +36,42 @@ class FEVERVN(AbsTaskRetrieval):
36
36
  """,
37
37
  adapted_from=["FEVER"],
38
38
  )
39
+
40
+
41
+ class NanoFEVERVN(AbsTaskRetrieval):
42
+ metadata = TaskMetadata(
43
+ name="NanoFEVER-VN",
44
+ dataset={
45
+ "path": "GreenNode/nano-fever-vn",
46
+ "revision": "457ca6b058ed19b28f2359e2d816d7527af6bef8",
47
+ },
48
+ description="NanoFEVERVN is a small version of A translated dataset from FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
49
+ reference="https://fever.ai/",
50
+ type="Retrieval",
51
+ category="t2t",
52
+ eval_splits=["test"],
53
+ eval_langs=["vie-Latn"],
54
+ main_score="ndcg_at_10",
55
+ date=("2025-07-29", "2025-07-30"),
56
+ license="cc-by-sa-4.0",
57
+ annotations_creators="derived",
58
+ dialect=[],
59
+ sample_creation="machine-translated and LM verified",
60
+ domains=["Encyclopaedic", "Written"],
61
+ task_subtypes=["Claim verification"],
62
+ bibtex_citation=r"""
63
+ @misc{pham2025vnmtebvietnamesemassivetext,
64
+ archiveprefix = {arXiv},
65
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
66
+ eprint = {2507.21500},
67
+ primaryclass = {cs.CL},
68
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
69
+ url = {https://arxiv.org/abs/2507.21500},
70
+ year = {2025},
71
+ }
72
+ """,
73
+ prompt={
74
+ "query": "Given a claim, retrieve documents that support or refute the claim"
75
+ },
76
+ adapted_from=["FEVER-VN"],
77
+ )
@@ -36,3 +36,42 @@ class HotpotQAVN(AbsTaskRetrieval):
36
36
  """,
37
37
  adapted_from=["HotpotQA"],
38
38
  )
39
+
40
+
41
+ class NanoHotpotQAVN(AbsTaskRetrieval):
42
+ metadata = TaskMetadata(
43
+ name="NanoHotpotQA-VN",
44
+ dataset={
45
+ "path": "GreenNode/nano-hotpotqa-vn",
46
+ "revision": "f4de19a2fae1a582de114e5bcd178bb262183113",
47
+ },
48
+ description="NanoHotpotQAVN is a small version of A translated dataset from HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong supervision for supporting facts to enable more explainable question answering systems. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
49
+ reference="https://hotpotqa.github.io/",
50
+ type="Retrieval",
51
+ category="t2t",
52
+ eval_splits=["test"],
53
+ eval_langs=["vie-Latn"],
54
+ main_score="ndcg_at_10",
55
+ date=("2025-07-29", "2025-07-30"),
56
+ license="cc-by-sa-4.0",
57
+ annotations_creators="derived",
58
+ dialect=[],
59
+ sample_creation="machine-translated and LM verified",
60
+ domains=["Web", "Written"],
61
+ task_subtypes=["Question answering"],
62
+ bibtex_citation=r"""
63
+ @misc{pham2025vnmtebvietnamesemassivetext,
64
+ archiveprefix = {arXiv},
65
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
66
+ eprint = {2507.21500},
67
+ primaryclass = {cs.CL},
68
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
69
+ url = {https://arxiv.org/abs/2507.21500},
70
+ year = {2025},
71
+ }
72
+ """,
73
+ prompt={
74
+ "query": "Given a multi-hop question, retrieve documents that can help answer the question"
75
+ },
76
+ adapted_from=["HotpotQA-VN"],
77
+ )
@@ -47,3 +47,51 @@ class MSMARCOVN(AbsTaskRetrieval):
47
47
  """,
48
48
  adapted_from=["MSMARCO"],
49
49
  )
50
+
51
+
52
+ class NanoMSMARCOVN(AbsTaskRetrieval):
53
+ metadata = TaskMetadata(
54
+ name="NanoMSMARCO-VN",
55
+ dataset={
56
+ "path": "GreenNode/nano-msmarco-vn",
57
+ "revision": "f149369c82ec228b05b0f6677699ab4bfbab73f6",
58
+ },
59
+ description="NanoMSMARCOVN is a small version of A translated dataset from MS MARCO is a collection of datasets focused on deep learning in search The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
60
+ reference="https://microsoft.github.io/msmarco/",
61
+ type="Retrieval",
62
+ category="t2t",
63
+ eval_splits=["dev"],
64
+ eval_langs=["vie-Latn"],
65
+ main_score="ndcg_at_10",
66
+ date=("2025-07-29", "2025-07-30"),
67
+ license="cc-by-sa-4.0",
68
+ annotations_creators="derived",
69
+ dialect=[],
70
+ sample_creation="machine-translated and LM verified",
71
+ domains=[
72
+ "Encyclopaedic",
73
+ "Academic",
74
+ "Blog",
75
+ "News",
76
+ "Medical",
77
+ "Government",
78
+ "Reviews",
79
+ "Non-fiction",
80
+ "Social",
81
+ "Web",
82
+ ],
83
+ task_subtypes=["Question answering"],
84
+ bibtex_citation=r"""
85
+ @misc{pham2025vnmtebvietnamesemassivetext,
86
+ archiveprefix = {arXiv},
87
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
88
+ eprint = {2507.21500},
89
+ primaryclass = {cs.CL},
90
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
91
+ url = {https://arxiv.org/abs/2507.21500},
92
+ year = {2025},
93
+ }
94
+ """,
95
+ prompt={"query": "Given a query, retrieve relevant documents from MS MARCO-VN"},
96
+ adapted_from=["MSMARCO-VN"],
97
+ )
@@ -36,3 +36,42 @@ class NQVN(AbsTaskRetrieval):
36
36
  """,
37
37
  adapted_from=["NQ"],
38
38
  )
39
+
40
+
41
+ class NanoNQVN(AbsTaskRetrieval):
42
+ metadata = TaskMetadata(
43
+ name="NanoNQ-VN",
44
+ dataset={
45
+ "path": "GreenNode/nano-nq-vn",
46
+ "revision": "1ad4d6556fe0e5314994839089ce070fb0db8b19",
47
+ },
48
+ description="NanoNQVN is a small version of A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
49
+ reference="https://ai.google.com/research/NaturalQuestions/",
50
+ type="Retrieval",
51
+ category="t2t",
52
+ eval_splits=["test"],
53
+ eval_langs=["vie-Latn"],
54
+ main_score="ndcg_at_10",
55
+ date=("2025-07-29", "2025-07-30"),
56
+ license="cc-by-sa-4.0",
57
+ annotations_creators="derived",
58
+ dialect=[],
59
+ sample_creation="machine-translated and LM verified",
60
+ domains=["Written", "Encyclopaedic"],
61
+ task_subtypes=["Question answering"],
62
+ bibtex_citation=r"""
63
+ @misc{pham2025vnmtebvietnamesemassivetext,
64
+ archiveprefix = {arXiv},
65
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
66
+ eprint = {2507.21500},
67
+ primaryclass = {cs.CL},
68
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
69
+ url = {https://arxiv.org/abs/2507.21500},
70
+ year = {2025},
71
+ }
72
+ """,
73
+ prompt={
74
+ "query": "Given a question, retrieve Wikipedia passages that answer the question"
75
+ },
76
+ adapted_from=["NQ-VN"],
77
+ )
@@ -0,0 +1,42 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+ TEST_SAMPLES = 2048
5
+
6
+
7
+ class TVPLRetrieval(AbsTaskRetrieval):
8
+ metadata = TaskMetadata(
9
+ name="TVPLRetrieval",
10
+ description="A Vietnamese dataset for evaluating legal text retrieval. From Thu vien phap luat (TVPL) dataset: Optimizing Answer Generator in Vietnamese Legal Question Answering Systems Using Language Models.",
11
+ reference="https://aclanthology.org/2020.coling-main.233.pdf",
12
+ dataset={
13
+ "path": "GreenNode/TVPL-Retrieval-VN",
14
+ "revision": "6661dba4dfedff606537732d9f35f2c3738b081a",
15
+ },
16
+ type="Retrieval",
17
+ category="t2t",
18
+ modalities=["text"],
19
+ eval_splits=["test"],
20
+ eval_langs=["vie-Latn"],
21
+ main_score="ndcg_at_10",
22
+ date=("2025-07-29", "2025-07-30"),
23
+ license="cc-by-sa-4.0",
24
+ dialect=[],
25
+ annotations_creators="human-annotated",
26
+ domains=["Legal"],
27
+ task_subtypes=["Question answering"],
28
+ sample_creation="found",
29
+ bibtex_citation=r"""
30
+ @article{10.1145/3732938,
31
+ address = {New York, NY, USA},
32
+ author = {Le, Huong and Luu, Ngoc and Nguyen, Thanh and Dao, Tuan and Dinh, Sang},
33
+ doi = {10.1145/3732938},
34
+ issn = {2375-4699},
35
+ journal = {ACM Trans. Asian Low-Resour. Lang. Inf. Process.},
36
+ publisher = {Association for Computing Machinery},
37
+ title = {Optimizing Answer Generator in Vietnamese Legal Question Answering Systems Using Language Models},
38
+ url = {https://doi.org/10.1145/3732938},
39
+ year = {2025},
40
+ }
41
+ """,
42
+ )
@@ -24,5 +24,19 @@ class ZacLegalTextRetrieval(AbsTaskRetrieval):
24
24
  annotations_creators="human-annotated",
25
25
  dialect=[],
26
26
  sample_creation="found",
27
- bibtex_citation="", # TODO: Add bibtex citation when the paper is published
27
+ bibtex_citation=r"""
28
+ @inproceedings{10.1007/978-981-95-1746-6_17,
29
+ address = {Singapore},
30
+ author = {Pham, Bao Loc
31
+ and Hoang, Quoc Viet
32
+ and Luu, Quy Tung
33
+ and Vo, Trong Thu},
34
+ booktitle = {Proceedings of the Fifth International Conference on Intelligent Systems and Networks},
35
+ isbn = {978-981-95-1746-6},
36
+ pages = {153--163},
37
+ publisher = {Springer Nature Singapore},
38
+ title = {GN-TRVN: A Benchmark for Vietnamese Table Markdown Retrieval Task},
39
+ year = {2026},
40
+ }
41
+ """,
28
42
  )
mteb/types/__init__.py CHANGED
@@ -4,6 +4,7 @@ from ._encoder_io import (
4
4
  Conversation,
5
5
  ConversationTurn,
6
6
  CorpusDatasetType,
7
+ EncodeKwargs,
7
8
  InstructionDatasetType,
8
9
  PromptType,
9
10
  QueryDatasetType,
@@ -30,6 +31,7 @@ __all__ = [
30
31
  "Conversation",
31
32
  "ConversationTurn",
32
33
  "CorpusDatasetType",
34
+ "EncodeKwargs",
33
35
  "HFSubset",
34
36
  "ISOLanguage",
35
37
  "ISOLanguageScript",
mteb/types/_encoder_io.py CHANGED
@@ -13,6 +13,18 @@ if TYPE_CHECKING:
13
13
  from PIL import Image
14
14
 
15
15
 
16
+ class EncodeKwargs(TypedDict):
17
+ """Keyword arguments for encoding methods.
18
+
19
+ Attributes:
20
+ batch_size: The batch size to use for encoding.
21
+ show_progress_bar: Whether to show a progress bar during encoding.
22
+ """
23
+
24
+ batch_size: NotRequired[int]
25
+ show_progress_bar: NotRequired[bool]
26
+
27
+
16
28
  # --- Output types ---
17
29
  Array = np.ndarray | torch.Tensor
18
30
  """General array type, can be a numpy array or a torch tensor."""
mteb/types/_result.py CHANGED
@@ -1,3 +1,4 @@
1
+ from collections.abc import Mapping
1
2
  from typing import Any, NamedTuple
2
3
 
3
4
  HFSubset = str
@@ -8,7 +9,7 @@ SplitName = str
8
9
  Score = Any
9
10
  """A score value, could e.g. be accuracy. Normally it is a float or int, but it can take on any value. Should be json serializable."""
10
11
 
11
- ScoresDict = dict[str, Score]
12
+ ScoresDict = Mapping[str, Score]
12
13
  """A dictionary of scores, typically also include metadata, e.g {'main_score': 0.5, 'accuracy': 0.5, 'f1': 0.6, 'hf_subset': 'en-de', 'languages': ['eng-Latn', 'deu-Latn']}"""
13
14
 
14
15
 
mteb/types/statistics.py CHANGED
@@ -10,8 +10,14 @@ class SplitDescriptiveStatistics(TypedDict):
10
10
 
11
11
 
12
12
  class DescriptiveStatistics(TypedDict, SplitDescriptiveStatistics):
13
- """Class for descriptive statistics for the full task."""
13
+ """Class for descriptive statistics for the full task.
14
14
 
15
+ Attributes:
16
+ num_samples: Total number of samples
17
+ hf_subset_descriptive_stats: HFSubset descriptive statistics (only for multilingual datasets)
18
+ """
19
+
20
+ num_samples: int
15
21
  hf_subset_descriptive_stats: NotRequired[dict[HFSubset, SplitDescriptiveStatistics]]
16
22
 
17
23
 
@@ -88,9 +94,9 @@ class ScoreStatistics(TypedDict):
88
94
  max_score: Maximum score
89
95
  """
90
96
 
91
- min_score: int
97
+ min_score: int | float
92
98
  avg_score: float
93
- max_score: int
99
+ max_score: int | float
94
100
 
95
101
 
96
102
  class TopRankedStatistics(TypedDict):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.5.2
3
+ Version: 2.7.2
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -32,6 +32,8 @@ Requires-Dist: rich>=0.0.0
32
32
  Requires-Dist: pytrec-eval-terrier>=0.5.6
33
33
  Requires-Dist: pydantic>=2.0.0
34
34
  Requires-Dist: polars>=0.20.22
35
+ Requires-Dist: torch; python_full_version < "3.14"
36
+ Requires-Dist: torch>=2.9.0; python_full_version >= "3.14"
35
37
  Provides-Extra: image
36
38
  Requires-Dist: torchvision>0.2.1; extra == "image"
37
39
  Requires-Dist: transformers[torch-vision,vision]; extra == "image"
@@ -60,7 +62,7 @@ Requires-Dist: tiktoken>=0.8.0; extra == "openai"
60
62
  Provides-Extra: model2vec
61
63
  Requires-Dist: model2vec>=0.3.0; extra == "model2vec"
62
64
  Provides-Extra: pylate
63
- Requires-Dist: pylate>=1.3.1; python_version < "3.13" and extra == "pylate"
65
+ Requires-Dist: pylate>=1.3.1; python_full_version < "3.13" and extra == "pylate"
64
66
  Provides-Extra: bm25s
65
67
  Requires-Dist: bm25s>=0.2.6; extra == "bm25s"
66
68
  Requires-Dist: PyStemmer>=2.2.0.3; extra == "bm25s"
@@ -91,10 +93,12 @@ Provides-Extra: ark
91
93
  Requires-Dist: volcengine-python-sdk[ark]==3.0.2; extra == "ark"
92
94
  Requires-Dist: tiktoken>=0.8.0; extra == "ark"
93
95
  Provides-Extra: colpali-engine
94
- Requires-Dist: colpali_engine>=0.3.12; extra == "colpali-engine"
96
+ Requires-Dist: colpali_engine>=0.3.12; python_full_version < "3.14" and extra == "colpali-engine"
95
97
  Provides-Extra: colqwen3
96
98
  Requires-Dist: transformers>=4.57; extra == "colqwen3"
97
99
  Requires-Dist: torchvision>=0.22.1; extra == "colqwen3"
100
+ Provides-Extra: sauerkrautlm-colpali
101
+ Requires-Dist: sauerkrautlm-colpali>=0.1.0; python_full_version < "3.14" and extra == "sauerkrautlm-colpali"
98
102
  Provides-Extra: xet
99
103
  Requires-Dist: huggingface_hub>=0.32.0; extra == "xet"
100
104
  Provides-Extra: youtu
@@ -106,6 +110,8 @@ Provides-Extra: faiss-cpu
106
110
  Requires-Dist: faiss-cpu>=1.12.0; extra == "faiss-cpu"
107
111
  Provides-Extra: eager-embed
108
112
  Requires-Dist: qwen_vl_utils>=0.0.14; extra == "eager-embed"
113
+ Provides-Extra: vllm
114
+ Requires-Dist: vllm>=0.11.1; extra == "vllm"
109
115
  Dynamic: license-file
110
116
 
111
117
  <h1 align="center">
@@ -144,12 +150,17 @@ Dynamic: license-file
144
150
 
145
151
  ## Installation
146
152
 
147
- You can install mteb simply using pip. For more on installation please see the [documentation](https://embeddings-benchmark.github.io/mteb/installation/).
153
+ You can install mteb simply using pip or uv. For more on installation please see the [documentation](https://embeddings-benchmark.github.io/mteb/installation/).
148
154
 
149
155
  ```bash
150
156
  pip install mteb
151
157
  ```
152
158
 
159
+ For faster installation, you can also use [uv](https://docs.astral.sh/uv/):
160
+ ```bash
161
+ uv add mteb
162
+ ```
163
+
153
164
 
154
165
  ## Example Usage
155
166