mteb 2.6.5__py3-none-any.whl → 2.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
  2. mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
  3. mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
  4. mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
  5. mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
  6. mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
  7. mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
  8. mteb/models/model_implementations/jina_clip.py +46 -8
  9. mteb/models/model_implementations/nvidia_models.py +73 -5
  10. mteb/models/model_implementations/octen_models.py +1 -1
  11. mteb/models/model_implementations/sentence_transformers_models.py +66 -0
  12. mteb/tasks/retrieval/vie/__init__.py +14 -6
  13. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
  14. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
  15. mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
  16. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
  17. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
  18. mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
  19. mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
  20. mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
  21. {mteb-2.6.5.dist-info → mteb-2.6.7.dist-info}/METADATA +1 -3
  22. {mteb-2.6.5.dist-info → mteb-2.6.7.dist-info}/RECORD +26 -18
  23. {mteb-2.6.5.dist-info → mteb-2.6.7.dist-info}/WHEEL +0 -0
  24. {mteb-2.6.5.dist-info → mteb-2.6.7.dist-info}/entry_points.txt +0 -0
  25. {mteb-2.6.5.dist-info → mteb-2.6.7.dist-info}/licenses/LICENSE +0 -0
  26. {mteb-2.6.5.dist-info → mteb-2.6.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 102198,
4
+ "number_of_characters": 47870352,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 47719757,
7
+ "min_text_length": 9,
8
+ "average_text_length": 472.01951591046225,
9
+ "max_text_length": 8686,
10
+ "unique_texts": 101097
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 150595,
15
+ "min_text_length": 30,
16
+ "average_text_length": 136.78019981834694,
17
+ "max_text_length": 404,
18
+ "unique_texts": 1099
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 3401,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 3.089009990917348,
25
+ "max_relevant_docs_per_query": 5,
26
+ "unique_relevant_docs": 1123
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 132137,
4
+ "number_of_characters": 43323279,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 43311486,
7
+ "min_text_length": 11,
8
+ "average_text_length": 328.5778249819823,
9
+ "max_text_length": 8576,
10
+ "unique_texts": 131814
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 11793,
15
+ "min_text_length": 6,
16
+ "average_text_length": 36.62422360248447,
17
+ "max_text_length": 100,
18
+ "unique_texts": 321
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 11620,
23
+ "min_relevant_docs_per_query": 31,
24
+ "average_relevant_docs_per_query": 36.08695652173913,
25
+ "max_relevant_docs_per_query": 1288,
26
+ "unique_relevant_docs": 32537
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 106558,
4
+ "number_of_characters": 48164581,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 47886101,
7
+ "min_text_length": 9,
8
+ "average_text_length": 472.6783768310499,
9
+ "max_text_length": 8689,
10
+ "unique_texts": 101308
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 278480,
15
+ "min_text_length": 11,
16
+ "average_text_length": 53.04380952380952,
17
+ "max_text_length": 196,
18
+ "unique_texts": 5124
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 6254,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.1912380952380952,
25
+ "max_relevant_docs_per_query": 15,
26
+ "unique_relevant_docs": 1324
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 117974,
4
+ "number_of_characters": 35927363,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 35335613,
7
+ "min_text_length": 22,
8
+ "average_text_length": 316.47705838625023,
9
+ "max_text_length": 4105,
10
+ "unique_texts": 111651
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 591750,
15
+ "min_text_length": 21,
16
+ "average_text_length": 93.61651637399146,
17
+ "max_text_length": 280,
18
+ "unique_texts": 6321
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 12642,
23
+ "min_relevant_docs_per_query": 2,
24
+ "average_relevant_docs_per_query": 2.0,
25
+ "max_relevant_docs_per_query": 2,
26
+ "unique_relevant_docs": 11874
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "dev": {
3
+ "num_samples": 107153,
4
+ "number_of_characters": 33316879,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 33200903,
7
+ "min_text_length": 2,
8
+ "average_text_length": 320.30199218561575,
9
+ "max_text_length": 1712,
10
+ "unique_texts": 103641
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 115976,
15
+ "min_text_length": 8,
16
+ "average_text_length": 33.15494568324757,
17
+ "max_text_length": 190,
18
+ "unique_texts": 3498
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 3700,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0577472841623785,
25
+ "max_relevant_docs_per_query": 4,
26
+ "unique_relevant_docs": 3698
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 104095,
4
+ "number_of_characters": 52312680,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 52220289,
7
+ "min_text_length": 10,
8
+ "average_text_length": 510.98673124908265,
9
+ "max_text_length": 10245,
10
+ "unique_texts": 102181
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 92391,
15
+ "min_text_length": 22,
16
+ "average_text_length": 48.62684210526316,
17
+ "max_text_length": 113,
18
+ "unique_texts": 1900
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 2283,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.201578947368421,
25
+ "max_relevant_docs_per_query": 4,
26
+ "unique_relevant_docs": 2283
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 20561,
4
+ "number_of_characters": 10832770,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 9929303,
7
+ "min_text_length": 9,
8
+ "average_text_length": 938.8524016641452,
9
+ "max_text_length": 6319,
10
+ "unique_texts": 10573
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 903467,
15
+ "min_text_length": 13,
16
+ "average_text_length": 90.48242363545317,
17
+ "max_text_length": 228,
18
+ "unique_texts": 9985
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 11158,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.1174762143214823,
25
+ "max_relevant_docs_per_query": 8,
26
+ "unique_relevant_docs": 10576
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -7,6 +7,7 @@ from tqdm.auto import tqdm
7
7
  from mteb._requires_package import requires_image_dependencies
8
8
  from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
+ from mteb.models.model_implementations.colpali_models import COLPALI_TRAINING_DATA
10
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
12
  from mteb.types import Array, BatchedInput, PromptType
12
13
 
@@ -120,6 +121,15 @@ class JinaCLIPModel(AbsEncoder):
120
121
  raise ValueError
121
122
 
122
123
 
124
+ _JINA_CLIP_TRAIN_DATASETS_V1 = {
125
+ # LAION400M
126
+ # ShareGPT4V
127
+ "MSMARCO",
128
+ "NQ",
129
+ "HotpotQA",
130
+ # Natural Language Inference (NLI) dataset (Bowman et al., 2015)
131
+ }
132
+
123
133
  jina_clip_v1 = ModelMeta(
124
134
  loader=JinaCLIPModel,
125
135
  name="jinaai/jina-clip-v1",
@@ -140,13 +150,41 @@ jina_clip_v1 = ModelMeta(
140
150
  reference="https://huggingface.co/jinaai/jina-clip-v1",
141
151
  similarity_fn_name=ScoringFunction.COSINE,
142
152
  use_instructions=True,
143
- training_datasets={
144
- # LAION400M
145
- # ShareGPT4V
146
- "MSMARCO",
147
- # NQ
148
- # HotpotQA
149
- # Natural Language Inference (NLI) dataset (Bowman et al., 2015)
150
- },
153
+ training_datasets=_JINA_CLIP_TRAIN_DATASETS_V1,
151
154
  citation=JINA_CLIP_CITATION,
155
+ superseded_by="jinaai/jina-clip-v2",
156
+ )
157
+
158
+ jina_clip_v2 = ModelMeta(
159
+ loader=JinaCLIPModel,
160
+ name="jinaai/jina-clip-v2",
161
+ revision="344d954da76eb8ad47a7aaff42d012e30c15b8fe",
162
+ release_date="2024-10-09",
163
+ languages=["eng-Latn"],
164
+ n_parameters=865278477,
165
+ memory_usage_mb=1650.0,
166
+ max_tokens=8192,
167
+ embed_dim=1024,
168
+ license="cc-by-nc-4.0",
169
+ open_weights=True,
170
+ public_training_code=None,
171
+ public_training_data=None,
172
+ framework=["PyTorch", "Sentence Transformers"],
173
+ reference="https://huggingface.co/jinaai/jina-clip-v2",
174
+ similarity_fn_name=ScoringFunction.COSINE,
175
+ use_instructions=False,
176
+ training_datasets=_JINA_CLIP_TRAIN_DATASETS_V1 | COLPALI_TRAINING_DATA,
177
+ modalities=["text", "image"],
178
+ model_type=["dense"],
179
+ citation="""
180
+ @misc{koukounas2024jinaclipv2multilingualmultimodalembeddings,
181
+ title={jina-clip-v2: Multilingual Multimodal Embeddings for Text and Images},
182
+ author={Andreas Koukounas and Georgios Mastrapas and Bo Wang and Mohammad Kalim Akram and Sedigheh Eslami and Michael Günther and Isabelle Mohr and Saba Sturua and Scott Martens and Nan Wang and Han Xiao},
183
+ year={2024},
184
+ eprint={2412.08802},
185
+ archivePrefix={arXiv},
186
+ primaryClass={cs.CL},
187
+ url={https://arxiv.org/abs/2412.08802},
188
+ }
189
+ """,
152
190
  )
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from collections.abc import Callable
2
3
  from typing import Any
3
4
 
4
5
  import torch
@@ -29,7 +30,7 @@ NV_RETRIEVER_CITATION = """@misc{moreira2025nvretrieverimprovingtextembedding,
29
30
  }"""
30
31
 
31
32
 
32
- def instruction_template(
33
+ def _instruction_template(
33
34
  instruction: str, prompt_type: PromptType | None = None
34
35
  ) -> str:
35
36
  return f"Instruct: {instruction}\nQuery: " if instruction else ""
@@ -100,10 +101,77 @@ nvidia_training_datasets = {
100
101
  "MrTidyRetrieval",
101
102
  }
102
103
 
104
+
105
+ class _NVEmbedWrapper(InstructSentenceTransformerModel):
106
+ """Inherited, because nvembed requires `sbert==2`, but it doesn't have tokenizers kwargs"""
107
+
108
+ def __init__(
109
+ self,
110
+ model_name: str,
111
+ revision: str,
112
+ instruction_template: str
113
+ | Callable[[str, PromptType | None], str]
114
+ | None = None,
115
+ max_seq_length: int | None = None,
116
+ apply_instruction_to_passages: bool = True,
117
+ padding_side: str | None = None,
118
+ add_eos_token: bool = False,
119
+ prompts_dict: dict[str, str] | None = None,
120
+ **kwargs: Any,
121
+ ):
122
+ from sentence_transformers import __version__ as sbert_version
123
+
124
+ required_transformers_version = "4.42.4"
125
+ required_sbert_version = "2.7.0"
126
+
127
+ if Version(transformers_version) != Version(required_transformers_version):
128
+ raise RuntimeError(
129
+ f"transformers version {transformers_version} is not match with required "
130
+ f"install version {required_transformers_version} to run `nvidia/NV-Embed-v2`"
131
+ )
132
+
133
+ if Version(sbert_version) != Version(required_sbert_version):
134
+ raise RuntimeError(
135
+ f"sbert version {sbert_version} is not match with required "
136
+ f"install version {required_sbert_version} to run `nvidia/NV-Embed-v2`"
137
+ )
138
+
139
+ requires_package(
140
+ self, "flash_attn", model_name, "pip install 'mteb[flash_attention]'"
141
+ )
142
+
143
+ from sentence_transformers import SentenceTransformer
144
+
145
+ if (
146
+ isinstance(instruction_template, str)
147
+ and "{instruction}" not in instruction_template
148
+ ):
149
+ raise ValueError(
150
+ "Instruction template must contain the string '{instruction}'."
151
+ )
152
+ if instruction_template is None:
153
+ logger.warning(
154
+ "No instruction template provided. Instructions will be used as-is."
155
+ )
156
+
157
+ self.instruction_template = instruction_template
158
+
159
+ self.model_name = model_name
160
+ self.model = SentenceTransformer(model_name, revision=revision, **kwargs)
161
+ self.model.tokenizer.padding_side = padding_side
162
+ self.model.tokenizer.add_eos_token = add_eos_token
163
+
164
+ if max_seq_length:
165
+ # https://github.com/huggingface/sentence-transformers/issues/3575
166
+ self.model.max_seq_length = max_seq_length
167
+ self.apply_instruction_to_passages = apply_instruction_to_passages
168
+ self.prompts_dict = prompts_dict
169
+
170
+
103
171
  NV_embed_v2 = ModelMeta(
104
- loader=InstructSentenceTransformerModel,
172
+ loader=_NVEmbedWrapper,
105
173
  loader_kwargs=dict(
106
- instruction_template=instruction_template,
174
+ instruction_template=_instruction_template,
107
175
  trust_remote_code=True,
108
176
  max_seq_length=32768,
109
177
  padding_side="right",
@@ -132,9 +200,9 @@ NV_embed_v2 = ModelMeta(
132
200
  )
133
201
 
134
202
  NV_embed_v1 = ModelMeta(
135
- loader=InstructSentenceTransformerModel,
203
+ loader=_NVEmbedWrapper,
136
204
  loader_kwargs=dict(
137
- instruction_template=instruction_template,
205
+ instruction_template=_instruction_template,
138
206
  trust_remote_code=True,
139
207
  max_seq_length=32768,
140
208
  padding_side="right",
@@ -205,7 +205,7 @@ Octen_Embedding_8B = ModelMeta(
205
205
  name="bflhc/Octen-Embedding-8B",
206
206
  languages=multilingual_langs,
207
207
  open_weights=True,
208
- revision="2030603c2926ab005fafd824fac5911e271be21f",
208
+ revision="f7db178d5a82fb841f606a6a67c423cead2fdbba",
209
209
  release_date="2025-12-23",
210
210
  n_parameters=7567295488,
211
211
  memory_usage_mb=14433,
@@ -1,5 +1,7 @@
1
1
  """Implementation of Sentence Transformers model validated in MTEB."""
2
2
 
3
+ import numpy as np
4
+
3
5
  from mteb.models.model_meta import ModelMeta, ScoringFunction
4
6
  from mteb.models.sentence_transformer_wrapper import (
5
7
  SentenceTransformerEncoderWrapper,
@@ -773,3 +775,67 @@ gtr_t5_base = ModelMeta(
773
775
  },
774
776
  citation=GTR_CITATION,
775
777
  )
778
+
779
+ static_retrieval_mrl_en_v1 = ModelMeta(
780
+ loader=sentence_transformers_loader,
781
+ name="sentence-transformers/static-retrieval-mrl-en-v1",
782
+ revision="f60985c706f192d45d218078e49e5a8b6f15283a",
783
+ release_date="2024-10-24",
784
+ languages=["eng-Latn"],
785
+ n_parameters=3_125_4528,
786
+ memory_usage_mb=119,
787
+ max_tokens=np.inf,
788
+ embed_dim=1024,
789
+ license="apache-2.0",
790
+ open_weights=True,
791
+ public_training_code="https://huggingface.co/sentence-transformers/static-retrieval-mrl-en-v1/blob/main/train.py",
792
+ public_training_data=None,
793
+ framework=["PyTorch", "Sentence Transformers"],
794
+ reference="https://huggingface.co/sentence-transformers/static-retrieval-mrl-en-v1",
795
+ similarity_fn_name=ScoringFunction.COSINE,
796
+ use_instructions=False,
797
+ training_datasets={
798
+ "MSMARCO",
799
+ # gooaq
800
+ # s2orc
801
+ # allnli
802
+ # paq
803
+ # trivia-qa
804
+ # swim-ir-monolingual
805
+ # PubMedQA
806
+ # swim
807
+ "MIRACLRetrieval",
808
+ "MultiLongDocRetrieval",
809
+ "MrTidyRetrieval",
810
+ },
811
+ modalities=["text"],
812
+ model_type=["dense"],
813
+ )
814
+
815
+ multi_qa_mpnet_base_dot_v1 = ModelMeta(
816
+ loader=sentence_transformers_loader,
817
+ name="sentence-transformers/multi-qa-mpnet-base-dot-v1",
818
+ revision="3af7c6da5b3e1bea796ef6c97fe237538cbe6e7f",
819
+ release_date="2021-08-23",
820
+ languages=["eng-Latn"],
821
+ n_parameters=109486978,
822
+ memory_usage_mb=418.0,
823
+ max_tokens=512,
824
+ embed_dim=768,
825
+ license=None,
826
+ open_weights=True,
827
+ public_training_code="https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1/blob/main/train_script.py",
828
+ public_training_data=None,
829
+ framework=["PyTorch", "Sentence Transformers"],
830
+ reference="https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1",
831
+ similarity_fn_name=ScoringFunction.DOT_PRODUCT,
832
+ use_instructions=False,
833
+ training_datasets={
834
+ "MSMARCO",
835
+ "YahooAnswersTopicsClassification",
836
+ "NQ",
837
+ },
838
+ adapted_from="microsoft/mpnet-base",
839
+ modalities=["text"],
840
+ model_type=["dense"],
841
+ )
@@ -1,5 +1,5 @@
1
1
  from .argu_ana_vn_retrieval import ArguAnaVN
2
- from .climate_fevervn_retrieval import ClimateFEVERVN
2
+ from .climate_fevervn_retrieval import ClimateFEVERVN, NanoClimateFEVERVN
3
3
  from .cqa_dupstack_android_vn_retrieval import CQADupstackAndroidVN
4
4
  from .cqa_dupstack_gis_vn_retrieval import CQADupstackGisVN
5
5
  from .cqa_dupstack_mathematica_vn_retrieval import CQADupstackMathematicaVN
@@ -10,19 +10,20 @@ from .cqa_dupstack_tex_vn_retrieval import CQADupstackTexVN
10
10
  from .cqa_dupstack_unix_vn_retrieval import CQADupstackUnixVN
11
11
  from .cqa_dupstack_webmasters_vn_retrieval import CQADupstackWebmastersVN
12
12
  from .cqa_dupstack_wordpress_vn_retrieval import CQADupstackWordpressVN
13
- from .db_pedia_vn_retrieval import DBPediaVN
14
- from .fevervn_retrieval import FEVERVN
13
+ from .db_pedia_vn_retrieval import DBPediaVN, NanoDBPediaVN
14
+ from .fevervn_retrieval import FEVERVN, NanoFEVERVN
15
15
  from .fi_qa2018_vn_retrieval import FiQA2018VN
16
16
  from .green_node_table_markdown_retrieval import GreenNodeTableMarkdownRetrieval
17
- from .hotpot_qavn_retrieval import HotpotQAVN
18
- from .msmarcovn_retrieval import MSMARCOVN
17
+ from .hotpot_qavn_retrieval import HotpotQAVN, NanoHotpotQAVN
18
+ from .msmarcovn_retrieval import MSMARCOVN, NanoMSMARCOVN
19
19
  from .nf_corpus_vn_retrieval import NFCorpusVN
20
- from .nqvn_retrieval import NQVN
20
+ from .nqvn_retrieval import NQVN, NanoNQVN
21
21
  from .quora_vn_retrieval import QuoraVN
22
22
  from .sci_fact_vn_retrieval import SciFactVN
23
23
  from .scidocsvn_retrieval import SCIDOCSVN
24
24
  from .touche2020_vn_retrieval import Touche2020VN
25
25
  from .treccovidvn_retrieval import TRECCOVIDVN
26
+ from .tvpl_retrieval import TVPLRetrieval
26
27
  from .vie_qu_ad_retrieval import VieQuADRetrieval
27
28
  from .zac_legal_text_retrieval import ZacLegalTextRetrieval
28
29
 
@@ -49,8 +50,15 @@ __all__ = [
49
50
  "GreenNodeTableMarkdownRetrieval",
50
51
  "HotpotQAVN",
51
52
  "NFCorpusVN",
53
+ "NanoClimateFEVERVN",
54
+ "NanoDBPediaVN",
55
+ "NanoFEVERVN",
56
+ "NanoHotpotQAVN",
57
+ "NanoMSMARCOVN",
58
+ "NanoNQVN",
52
59
  "QuoraVN",
53
60
  "SciFactVN",
61
+ "TVPLRetrieval",
54
62
  "Touche2020VN",
55
63
  "VieQuADRetrieval",
56
64
  "ZacLegalTextRetrieval",
@@ -36,3 +36,42 @@ class ClimateFEVERVN(AbsTaskRetrieval):
36
36
  """,
37
37
  adapted_from=["ClimateFEVER"],
38
38
  )
39
+
40
+
41
+ class NanoClimateFEVERVN(AbsTaskRetrieval):
42
+ metadata = TaskMetadata(
43
+ name="NanoClimateFEVER-VN",
44
+ description="NanoClimateFEVERVN is a small version of A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
45
+ reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
46
+ dataset={
47
+ "path": "GreenNode/nano-climate-fever-vn",
48
+ "revision": "1852e852f07403d4529a8520d52b91ff6d57869b",
49
+ },
50
+ type="Retrieval",
51
+ category="t2t",
52
+ eval_splits=["test"],
53
+ eval_langs=["vie-Latn"],
54
+ main_score="ndcg_at_10",
55
+ date=("2025-07-29", "2025-07-30"),
56
+ license="cc-by-sa-4.0",
57
+ annotations_creators="derived",
58
+ dialect=[],
59
+ sample_creation="machine-translated and LM verified",
60
+ domains=["Encyclopaedic", "Written"],
61
+ task_subtypes=["Claim verification"],
62
+ bibtex_citation=r"""
63
+ @misc{pham2025vnmtebvietnamesemassivetext,
64
+ archiveprefix = {arXiv},
65
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
66
+ eprint = {2507.21500},
67
+ primaryclass = {cs.CL},
68
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
69
+ url = {https://arxiv.org/abs/2507.21500},
70
+ year = {2025},
71
+ }
72
+ """,
73
+ prompt={
74
+ "query": "Given a claim about climate change, retrieve documents that support or refute the claim"
75
+ },
76
+ adapted_from=["ClimateFEVER-VN"],
77
+ )
@@ -36,3 +36,42 @@ class DBPediaVN(AbsTaskRetrieval):
36
36
  """,
37
37
  adapted_from=["DBPedia"],
38
38
  )
39
+
40
+
41
+ class NanoDBPediaVN(AbsTaskRetrieval):
42
+ metadata = TaskMetadata(
43
+ name="NanoDBPedia-VN",
44
+ description="NanoDBPediaVN is a small version of A translated dataset from DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
45
+ reference="https://github.com/iai-group/DBpedia-Entity/",
46
+ dataset={
47
+ "path": "GreenNode/nano-dbpedia-vn",
48
+ "revision": "bbc3259bc63bf1e250d7034024092cc3230d5850",
49
+ },
50
+ type="Retrieval",
51
+ category="t2t",
52
+ eval_splits=["test"],
53
+ eval_langs=["vie-Latn"],
54
+ main_score="ndcg_at_10",
55
+ date=("2025-07-29", "2025-07-30"),
56
+ license="cc-by-sa-4.0",
57
+ annotations_creators="derived",
58
+ dialect=[],
59
+ sample_creation="machine-translated and LM verified",
60
+ domains=["Written", "Encyclopaedic"],
61
+ task_subtypes=[],
62
+ bibtex_citation=r"""
63
+ @misc{pham2025vnmtebvietnamesemassivetext,
64
+ archiveprefix = {arXiv},
65
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
66
+ eprint = {2507.21500},
67
+ primaryclass = {cs.CL},
68
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
69
+ url = {https://arxiv.org/abs/2507.21500},
70
+ year = {2025},
71
+ }
72
+ """,
73
+ prompt={
74
+ "query": "Given a query, retrieve relevant entity descriptions from DBPedia"
75
+ },
76
+ adapted_from=["DBPedia-VN"],
77
+ )
@@ -36,3 +36,42 @@ class FEVERVN(AbsTaskRetrieval):
36
36
  """,
37
37
  adapted_from=["FEVER"],
38
38
  )
39
+
40
+
41
+ class NanoFEVERVN(AbsTaskRetrieval):
42
+ metadata = TaskMetadata(
43
+ name="NanoFEVER-VN",
44
+ dataset={
45
+ "path": "GreenNode/nano-fever-vn",
46
+ "revision": "457ca6b058ed19b28f2359e2d816d7527af6bef8",
47
+ },
48
+ description="NanoFEVERVN is a small version of A translated dataset from FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
49
+ reference="https://fever.ai/",
50
+ type="Retrieval",
51
+ category="t2t",
52
+ eval_splits=["test"],
53
+ eval_langs=["vie-Latn"],
54
+ main_score="ndcg_at_10",
55
+ date=("2025-07-29", "2025-07-30"),
56
+ license="cc-by-sa-4.0",
57
+ annotations_creators="derived",
58
+ dialect=[],
59
+ sample_creation="machine-translated and LM verified",
60
+ domains=["Encyclopaedic", "Written"],
61
+ task_subtypes=["Claim verification"],
62
+ bibtex_citation=r"""
63
+ @misc{pham2025vnmtebvietnamesemassivetext,
64
+ archiveprefix = {arXiv},
65
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
66
+ eprint = {2507.21500},
67
+ primaryclass = {cs.CL},
68
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
69
+ url = {https://arxiv.org/abs/2507.21500},
70
+ year = {2025},
71
+ }
72
+ """,
73
+ prompt={
74
+ "query": "Given a claim, retrieve documents that support or refute the claim"
75
+ },
76
+ adapted_from=["FEVER-VN"],
77
+ )
@@ -36,3 +36,42 @@ class HotpotQAVN(AbsTaskRetrieval):
36
36
  """,
37
37
  adapted_from=["HotpotQA"],
38
38
  )
39
+
40
+
41
+ class NanoHotpotQAVN(AbsTaskRetrieval):
42
+ metadata = TaskMetadata(
43
+ name="NanoHotpotQA-VN",
44
+ dataset={
45
+ "path": "GreenNode/nano-hotpotqa-vn",
46
+ "revision": "f4de19a2fae1a582de114e5bcd178bb262183113",
47
+ },
48
+ description="NanoHotpotQAVN is a small version of A translated dataset from HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong supervision for supporting facts to enable more explainable question answering systems. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
49
+ reference="https://hotpotqa.github.io/",
50
+ type="Retrieval",
51
+ category="t2t",
52
+ eval_splits=["test"],
53
+ eval_langs=["vie-Latn"],
54
+ main_score="ndcg_at_10",
55
+ date=("2025-07-29", "2025-07-30"),
56
+ license="cc-by-sa-4.0",
57
+ annotations_creators="derived",
58
+ dialect=[],
59
+ sample_creation="machine-translated and LM verified",
60
+ domains=["Web", "Written"],
61
+ task_subtypes=["Question answering"],
62
+ bibtex_citation=r"""
63
+ @misc{pham2025vnmtebvietnamesemassivetext,
64
+ archiveprefix = {arXiv},
65
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
66
+ eprint = {2507.21500},
67
+ primaryclass = {cs.CL},
68
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
69
+ url = {https://arxiv.org/abs/2507.21500},
70
+ year = {2025},
71
+ }
72
+ """,
73
+ prompt={
74
+ "query": "Given a multi-hop question, retrieve documents that can help answer the question"
75
+ },
76
+ adapted_from=["HotpotQA-VN"],
77
+ )
@@ -47,3 +47,51 @@ class MSMARCOVN(AbsTaskRetrieval):
47
47
  """,
48
48
  adapted_from=["MSMARCO"],
49
49
  )
50
+
51
+
52
+ class NanoMSMARCOVN(AbsTaskRetrieval):
53
+ metadata = TaskMetadata(
54
+ name="NanoMSMARCO-VN",
55
+ dataset={
56
+ "path": "GreenNode/nano-msmarco-vn",
57
+ "revision": "f149369c82ec228b05b0f6677699ab4bfbab73f6",
58
+ },
59
+ description="NanoMSMARCOVN is a small version of A translated dataset from MS MARCO is a collection of datasets focused on deep learning in search The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
60
+ reference="https://microsoft.github.io/msmarco/",
61
+ type="Retrieval",
62
+ category="t2t",
63
+ eval_splits=["dev"],
64
+ eval_langs=["vie-Latn"],
65
+ main_score="ndcg_at_10",
66
+ date=("2025-07-29", "2025-07-30"),
67
+ license="cc-by-sa-4.0",
68
+ annotations_creators="derived",
69
+ dialect=[],
70
+ sample_creation="machine-translated and LM verified",
71
+ domains=[
72
+ "Encyclopaedic",
73
+ "Academic",
74
+ "Blog",
75
+ "News",
76
+ "Medical",
77
+ "Government",
78
+ "Reviews",
79
+ "Non-fiction",
80
+ "Social",
81
+ "Web",
82
+ ],
83
+ task_subtypes=["Question answering"],
84
+ bibtex_citation=r"""
85
+ @misc{pham2025vnmtebvietnamesemassivetext,
86
+ archiveprefix = {arXiv},
87
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
88
+ eprint = {2507.21500},
89
+ primaryclass = {cs.CL},
90
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
91
+ url = {https://arxiv.org/abs/2507.21500},
92
+ year = {2025},
93
+ }
94
+ """,
95
+ prompt={"query": "Given a query, retrieve relevant documents from MS MARCO-VN"},
96
+ adapted_from=["MSMARCO-VN"],
97
+ )
@@ -36,3 +36,42 @@ class NQVN(AbsTaskRetrieval):
36
36
  """,
37
37
  adapted_from=["NQ"],
38
38
  )
39
+
40
+
41
+ class NanoNQVN(AbsTaskRetrieval):
42
+ metadata = TaskMetadata(
43
+ name="NanoNQ-VN",
44
+ dataset={
45
+ "path": "GreenNode/nano-nq-vn",
46
+ "revision": "1ad4d6556fe0e5314994839089ce070fb0db8b19",
47
+ },
48
+ description="NanoNQVN is a small version of A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
49
+ reference="https://ai.google.com/research/NaturalQuestions/",
50
+ type="Retrieval",
51
+ category="t2t",
52
+ eval_splits=["test"],
53
+ eval_langs=["vie-Latn"],
54
+ main_score="ndcg_at_10",
55
+ date=("2025-07-29", "2025-07-30"),
56
+ license="cc-by-sa-4.0",
57
+ annotations_creators="derived",
58
+ dialect=[],
59
+ sample_creation="machine-translated and LM verified",
60
+ domains=["Written", "Encyclopaedic"],
61
+ task_subtypes=["Question answering"],
62
+ bibtex_citation=r"""
63
+ @misc{pham2025vnmtebvietnamesemassivetext,
64
+ archiveprefix = {arXiv},
65
+ author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
66
+ eprint = {2507.21500},
67
+ primaryclass = {cs.CL},
68
+ title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
69
+ url = {https://arxiv.org/abs/2507.21500},
70
+ year = {2025},
71
+ }
72
+ """,
73
+ prompt={
74
+ "query": "Given a question, retrieve Wikipedia passages that answer the question"
75
+ },
76
+ adapted_from=["NQ-VN"],
77
+ )
@@ -0,0 +1,42 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+ TEST_SAMPLES = 2048
5
+
6
+
7
+ class TVPLRetrieval(AbsTaskRetrieval):
8
+ metadata = TaskMetadata(
9
+ name="TVPLRetrieval",
10
+ description="A Vietnamese dataset for evaluating legal text retrieval. From Thu vien phap luat (TVPL) dataset: Optimizing Answer Generator in Vietnamese Legal Question Answering Systems Using Language Models.",
11
+ reference="https://aclanthology.org/2020.coling-main.233.pdf",
12
+ dataset={
13
+ "path": "GreenNode/TVPL-Retrieval-VN",
14
+ "revision": "6661dba4dfedff606537732d9f35f2c3738b081a",
15
+ },
16
+ type="Retrieval",
17
+ category="t2t",
18
+ modalities=["text"],
19
+ eval_splits=["test"],
20
+ eval_langs=["vie-Latn"],
21
+ main_score="ndcg_at_10",
22
+ date=("2025-07-29", "2025-07-30"),
23
+ license="cc-by-sa-4.0",
24
+ dialect=[],
25
+ annotations_creators="human-annotated",
26
+ domains=["Legal"],
27
+ task_subtypes=["Question answering"],
28
+ sample_creation="found",
29
+ bibtex_citation=r"""
30
+ @article{10.1145/3732938,
31
+ address = {New York, NY, USA},
32
+ author = {Le, Huong and Luu, Ngoc and Nguyen, Thanh and Dao, Tuan and Dinh, Sang},
33
+ doi = {10.1145/3732938},
34
+ issn = {2375-4699},
35
+ journal = {ACM Trans. Asian Low-Resour. Lang. Inf. Process.},
36
+ publisher = {Association for Computing Machinery},
37
+ title = {Optimizing Answer Generator in Vietnamese Legal Question Answering Systems Using Language Models},
38
+ url = {https://doi.org/10.1145/3732938},
39
+ year = {2025},
40
+ }
41
+ """,
42
+ )
@@ -24,5 +24,19 @@ class ZacLegalTextRetrieval(AbsTaskRetrieval):
24
24
  annotations_creators="human-annotated",
25
25
  dialect=[],
26
26
  sample_creation="found",
27
- bibtex_citation="", # TODO: Add bibtex citation when the paper is published
27
+ bibtex_citation=r"""
28
+ @inproceedings{10.1007/978-981-95-1746-6_17,
29
+ address = {Singapore},
30
+ author = {Pham, Bao Loc
31
+ and Hoang, Quoc Viet
32
+ and Luu, Quy Tung
33
+ and Vo, Trong Thu},
34
+ booktitle = {Proceedings of the Fifth International Conference on Intelligent Systems and Networks},
35
+ isbn = {978-981-95-1746-6},
36
+ pages = {153--163},
37
+ publisher = {Springer Nature Singapore},
38
+ title = {GN-TRVN: A Benchmark for Vietnamese Table Markdown Retrieval Task},
39
+ year = {2026},
40
+ }
41
+ """,
28
42
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.6.5
3
+ Version: 2.6.7
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -32,8 +32,6 @@ Requires-Dist: rich>=0.0.0
32
32
  Requires-Dist: pytrec-eval-terrier>=0.5.6
33
33
  Requires-Dist: pydantic>=2.0.0
34
34
  Requires-Dist: polars>=0.20.22
35
- Requires-Dist: torch<2.9.0; python_full_version < "3.14"
36
- Requires-Dist: torch>=2.9.0; python_full_version >= "3.14"
37
35
  Provides-Extra: image
38
36
  Requires-Dist: torchvision>0.2.1; extra == "image"
39
37
  Requires-Dist: transformers[torch-vision,vision]; extra == "image"
@@ -1254,13 +1254,19 @@ mteb/descriptive_stats/Retrieval/NQ-VN.json,sha256=lz7Jb865vUqLOxZhd8StxxAmlyNg-
1254
1254
  mteb/descriptive_stats/Retrieval/NQ.json,sha256=ylIFn-uHev-jkcua8SUmiDCRanM9uCkvRElU-kIGIJg,1014
1255
1255
  mteb/descriptive_stats/Retrieval/NQHardNegatives.json,sha256=uPcQxhFQ9R7HGcEu8c9U4K1a5yYntN-mVK4anaRHtNo,986
1256
1256
  mteb/descriptive_stats/Retrieval/NanoArguAnaRetrieval.json,sha256=Qv5QaFK0wXUec-9rv6K71oTgwdeOWxPpGEA-gu0-BkI,976
1257
+ mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json,sha256=8isr2BOTbbFU4_Ivwof3-MTdxngG2SMl_GYrD_vbg3Q,1010
1257
1258
  mteb/descriptive_stats/Retrieval/NanoClimateFeverRetrieval.json,sha256=CdBXQhfQhtKG9_64I6AXDV4giSRppmLZDB3S8M28TOA,973
1259
+ mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json,sha256=xhNOXfcG-shzlptKuHBu9dkRXQAbmlknQqu8vhxKb6g,1012
1258
1260
  mteb/descriptive_stats/Retrieval/NanoDBPediaRetrieval.json,sha256=eAZJSH9WPk6AVkonlshmX9RHqq-b6iLTPmzO3yJFesk,974
1261
+ mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json,sha256=vb5rzqP3SHVo3R85xRaS-nXUfH0b6KQMAbFSmK6U--o,1010
1259
1262
  mteb/descriptive_stats/Retrieval/NanoFEVERRetrieval.json,sha256=ac505XP13F5NRmCaQPwKdH-v9JTESsieu-K1IEa4j-I,971
1260
1263
  mteb/descriptive_stats/Retrieval/NanoFiQA2018Retrieval.json,sha256=eB00Q60zKfJmIY6HO083-eWIKo1STY8z4WdzRrKMI4I,973
1264
+ mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json,sha256=JVJUfPow4rwxuUuMNJ_ygusaYDm1s7tBJX5IzUSfXLQ,998
1261
1265
  mteb/descriptive_stats/Retrieval/NanoHotpotQARetrieval.json,sha256=mAgjR7ekGKqk0QtiZxK-iuPWJIFWi9yvAO6j9liz-iQ,972
1266
+ mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json,sha256=InXDVB08Q11Pb9IU2H2s7rZT_DXnbdpcs2duj66EdHI,1008
1262
1267
  mteb/descriptive_stats/Retrieval/NanoMSMARCORetrieval.json,sha256=_2ap0Bglk-hVK2rYJy3E4ECVm6Kf3yqhvWYQ99ZXruM,970
1263
1268
  mteb/descriptive_stats/Retrieval/NanoNFCorpusRetrieval.json,sha256=BjRdljofdnrJqn8BQdRpoxoanU5-XdeSn48085N2o4Q,977
1269
+ mteb/descriptive_stats/Retrieval/NanoNQ-VN.json,sha256=l_49qFdL8DtxaZ9i9lX5dJcxG1KnjvaOO-eyuVWsUAM,1010
1264
1270
  mteb/descriptive_stats/Retrieval/NanoNQRetrieval.json,sha256=q9TBcEqGV8fmoK4_32a-yDLhGN6FAj049XuN95Hhiiw,969
1265
1271
  mteb/descriptive_stats/Retrieval/NanoQuoraRetrieval.json,sha256=sG9ZgROl8kqDk3n2Rmb7zMgUmu0S8LqILZvjdevf-rQ,967
1266
1272
  mteb/descriptive_stats/Retrieval/NanoSCIDOCSRetrieval.json,sha256=J0a84pa1TupKlHC5Oi9zqhOkmKz2TqlcxPPXt58zuBU,973
@@ -1346,6 +1352,7 @@ mteb/descriptive_stats/Retrieval/TRECCOVID.json,sha256=VMICXZ2lA7GfiUyudOxYGRnMm
1346
1352
  mteb/descriptive_stats/Retrieval/TRECDL2019.json,sha256=6BOe9qATrKaRz8_cCMbqwXiu-ZiZq--Cm37uwTqSvJs,1013
1347
1353
  mteb/descriptive_stats/Retrieval/TRECDL2020.json,sha256=0WFbaPL2dyp5FZ1Wf0yAOVhndUfxP11sep562RHMplA,1014
1348
1354
  mteb/descriptive_stats/Retrieval/TV2Nordretrieval.json,sha256=_oDN_OfptM5ak-d4OXA-RU0hrtjvfg18jhir8CckxZ0,985
1355
+ mteb/descriptive_stats/Retrieval/TVPLRetrieval.json,sha256=m-t90SylbkuUUyu-MprOpB27h8xkoqjA2ebhvq5Vl98,1007
1349
1356
  mteb/descriptive_stats/Retrieval/TempReasonL1.json,sha256=-MpwGucuNT0aKOMWwGld9POo_vkSnjpnih8xIFnN5d4,975
1350
1357
  mteb/descriptive_stats/Retrieval/TempReasonL2Context.json,sha256=Gd2cVFAsdF1RHHWIbKI9hZLWgrbFzp8p0xoa6NU1uGM,996
1351
1358
  mteb/descriptive_stats/Retrieval/TempReasonL2Fact.json,sha256=om5WmIGXJLeMI-b0Tp7-odKRH-S9kx6OHXlnAD62rLk,992
@@ -1507,7 +1514,7 @@ mteb/models/model_implementations/human.py,sha256=EtYa8G7Dc8fDcelBVw0xTpxGGx1YKK
1507
1514
  mteb/models/model_implementations/ibm_granite_models.py,sha256=ljHjuPuBkIwJvp5WZ3csjTOIb14nLh1h3OYkW-CEeHY,8464
1508
1515
  mteb/models/model_implementations/inf_models.py,sha256=SXXs3s9PWo08fzrxG_WOXGc_gvbpmkt-Blt7YoGcPRo,3020
1509
1516
  mteb/models/model_implementations/jasper_models.py,sha256=buJgllGIeyi7LsxDJY3UYJs_YzdDBkU3QpuQyU6VoTc,16293
1510
- mteb/models/model_implementations/jina_clip.py,sha256=QZUe7fm0otnnPHAIYnxcRwE1VHpNt3Xs-FGlUV6Itwc,5167
1517
+ mteb/models/model_implementations/jina_clip.py,sha256=0XhRSWTPR3ERAsOoVOxhB1yV6v1pEY8EQcTy1ChtSoU,6595
1511
1518
  mteb/models/model_implementations/jina_models.py,sha256=kFmkAWUFoJpq_1tRQIspk54lsik2vIoQcy5DS7YKgQ0,35198
1512
1519
  mteb/models/model_implementations/kalm_models.py,sha256=SHqkw5p7HzmQrb_bIFjRp1rsuv2v531nXIk390h_ojY,62115
1513
1520
  mteb/models/model_implementations/kblab.py,sha256=EisTJXijICN2pyfWT_89qUnNO7TH95t1LxCxjzJnzQo,1237
@@ -1534,8 +1541,8 @@ mteb/models/model_implementations/no_instruct_sentence_models.py,sha256=qLiMok_O
1534
1541
  mteb/models/model_implementations/nomic_models.py,sha256=dmQC_cWg6hAmiBHK7fXoXEiGBJnJvrq0RsnCcJ2qe1Q,15137
1535
1542
  mteb/models/model_implementations/nomic_models_vision.py,sha256=usCKfZCR7aEi_DnNmVAYjH-lXx_ipQkBVtUAmhJ90QI,6870
1536
1543
  mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py,sha256=6dTGtK1GiaYdpJ4IQFgCCOkGyHQyuEUatKs-Uv-1YmE,6450
1537
- mteb/models/model_implementations/nvidia_models.py,sha256=_lLfFl4-uSKpZdj_SDpdKiI2Gb5C1GgPqWSS-QdlYMM,21768
1538
- mteb/models/model_implementations/octen_models.py,sha256=J_-eNARXLgN8H_v5fobOr01RXK-G3oWdv02hG4L_gWY,7511
1544
+ mteb/models/model_implementations/nvidia_models.py,sha256=JMy0x7EWGrAxZ9s63F2vSPdPS-9yF3RIS4uj3N2UrVI,24315
1545
+ mteb/models/model_implementations/octen_models.py,sha256=FwQAcB_z6bFohpFlNQK2ugLBEOQUu533auOhrNqMxaM,7511
1539
1546
  mteb/models/model_implementations/openai_models.py,sha256=905BajYi_XyOZgqU3AeKpwIttLoUitaAyc48sTWI6Jg,9482
1540
1547
  mteb/models/model_implementations/openclip_models.py,sha256=MyosgeYSrgBXGuGFtI2Tyxksxpb7bADFJVSYFCLweVA,11622
1541
1548
  mteb/models/model_implementations/opensearch_neural_sparse_models.py,sha256=TnIHut_IHvplvovlcTZ-PWnEldTzcru5JdUIaTH-8Do,8636
@@ -1565,7 +1572,7 @@ mteb/models/model_implementations/searchmap_models.py,sha256=xVQPkO7aLp_kBFiMDAm
1565
1572
  mteb/models/model_implementations/seed_1_6_embedding_models.py,sha256=gcGKEY-n7DWGPlXYhO_kcNJ3lkBEnbw8NUxADNs3siM,18635
1566
1573
  mteb/models/model_implementations/seed_1_6_embedding_models_1215.py,sha256=OoTHcDRQGOuSzf08V62EXrSEdRsXhnMv2ZN9feJWs9s,36443
1567
1574
  mteb/models/model_implementations/seed_models.py,sha256=9UF2AQ0Uue8DD73SjYhHn2hLxey_7Iq9ii9TkRaA3CM,14168
1568
- mteb/models/model_implementations/sentence_transformers_models.py,sha256=WFWB7SPY9WS9b-SWiSAWSszQ7lJO-QGBxnIN8bU3kWE,23969
1575
+ mteb/models/model_implementations/sentence_transformers_models.py,sha256=6oULaf2mTyVe7vy9oS_QoKuxXXPaAqjQgSooMTG0xow,26071
1569
1576
  mteb/models/model_implementations/shuu_model.py,sha256=1jDFFPAfbfrSzC4vbHczO4yqy3Xh4tWiDAd3FS9-T6M,1177
1570
1577
  mteb/models/model_implementations/siglip_models.py,sha256=SOSyp-B7w6Vvqas_10D_1rvpJcKSQuJmXGy7Wdtsw7o,13012
1571
1578
  mteb/models/model_implementations/slm_models.py,sha256=JXjBio-9NFHLefU4Ny1Z-fFkyvvIz0U2kQ6t5s-PzlQ,13427
@@ -2474,9 +2481,9 @@ mteb/tasks/retrieval/swe/swe_faq_retrieval.py,sha256=s-o7IM_l7giuK4bJMdYkq2CtE0Q
2474
2481
  mteb/tasks/retrieval/swe/swedn_retrieval.py,sha256=RFcpp0u-EKIwSRXR37tJ0_haY6Jvlfj8DWCgrD-0tnU,1512
2475
2482
  mteb/tasks/retrieval/tur/__init__.py,sha256=tAKhhsTK6meiZwRMIvbx7_ye90JAAW3dlS8iI0r_vg8,84
2476
2483
  mteb/tasks/retrieval/tur/tur_hist_quad.py,sha256=s7S5RrdwPx-0aatUwbgFbuLtj8927yQUHp1SEODfAl0,3669
2477
- mteb/tasks/retrieval/vie/__init__.py,sha256=j69iltc-is1oqx0oIV1RVjjM46LLH-JJQzKnxm4cYvc,2142
2484
+ mteb/tasks/retrieval/vie/__init__.py,sha256=8k8aUndynSTP72j75e2tcU-8omMuGzOVZp3KxIAGaBg,2419
2478
2485
  mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py,sha256=wmE6syUs0sLs7xgIOxXQuiQzpxrskdsTc5sK46v1YEQ,1754
2479
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py,sha256=4GMO5qSYbP0pFtf1yklMZNqFgh8qi1Xo2IXQDl9t14s,1849
2486
+ mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py,sha256=eonoS9NWKw-okR9Eqe4B8YgzGSbw0t7FcNpt0JwxyKU,3788
2480
2487
  mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py,sha256=1c6s1C0j1x7kE92WMv9JB4I_rdsHboyP-QILU-18rQ4,1851
2481
2488
  mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py,sha256=h--L4OiLIalxHnSulEiUZjMo7JRxjia-mKOnnoaOkzI,1813
2482
2489
  mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py,sha256=Jm5-2YbfBObFW_Ygwu03PAnSNMcZkH_7SL8L18KVWvQ,1857
@@ -2487,21 +2494,22 @@ mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py,sha256=9EiLKJrpRXACmxZ
2487
2494
  mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py,sha256=7Mr2sZrAKzFDeMT_7eQQ_52OKzefGFAnkcHmO4lntIo,1824
2488
2495
  mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py,sha256=2zDcrsCfcTAcybUmTpGeJQxUxNpkY7Ha8Tf0xwfqTcQ,1810
2489
2496
  mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py,sha256=ppFPam-3AXXVLVp_DiXeHaSr16Va44_-eRkOH0m5ypo,1821
2490
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py,sha256=hOiwz2bcayDW6VrCvsIGeYh1TT7koByM76rZZwtp9KA,1754
2491
- mteb/tasks/retrieval/vie/fevervn_retrieval.py,sha256=xLGoXefGk1l1AFiOSf2Ja0fM_rAQp4tdaR8H6jJqYlI,1853
2497
+ mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py,sha256=9YEubKLDCMJhck_EjY4r3VzAFDu-P4SWR5CLnHdSkTQ,3571
2498
+ mteb/tasks/retrieval/vie/fevervn_retrieval.py,sha256=JLrpB90G5c7ZR2jM9GsYE2YQ51qTnn5FH-LDzO99Z1Q,3768
2492
2499
  mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py,sha256=FGfFuLzRCTuupRxZdjVbBiwCOSspb3vwvtNAKvyXjso,1714
2493
2500
  mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py,sha256=O7iIcuvqhrHjB7J1VxH9YJ3v6cuFFBQdrrnYwLgeRfE,2429
2494
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py,sha256=FYWj8EhnfwDuPRxZ8uTeGkfa2Q-jDU2bliTmp975Coc,1837
2495
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py,sha256=xtJ1-rjx4slwSR8p6NedqItTk-79ZzT2f9FlDOhbzkE,1958
2501
+ mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py,sha256=Vg_YI8YbZpXMmwZXS-2KLRutL2Nehw5tW231S2qShd4,3753
2502
+ mteb/tasks/retrieval/vie/msmarcovn_retrieval.py,sha256=syDFYmXL2xK3xCQrBAopGul8_3pDZzBdIjMpk2XbA1s,3951
2496
2503
  mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py,sha256=4S8IDJ-TVjKEy2teM8GOeDzHIZR8txkPvX0sGDYIyqs,1780
2497
- mteb/tasks/retrieval/vie/nqvn_retrieval.py,sha256=tQT2t6XcflVRM78t_5TujWD27e9uCMrsfN0DBjDBY0E,1744
2504
+ mteb/tasks/retrieval/vie/nqvn_retrieval.py,sha256=f8LmUGAmsMnCdn-ovfPcpX12X4rmdpXj3F-q6GwjBEc,3551
2498
2505
  mteb/tasks/retrieval/vie/quora_vn_retrieval.py,sha256=VkgKCFbDkOuZAsMl36lOr-MuvbhNfE8zUmmiySW9lSY,1837
2499
2506
  mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py,sha256=7F3wSU9N2BAj4Jmzw7sjbcxTyYDYs_3I1434X3riaZ4,1773
2500
2507
  mteb/tasks/retrieval/vie/scidocsvn_retrieval.py,sha256=WlcfDfF43jsNf9D_Bl3k02RiiPdedORID6CEEMAYTLc,1815
2501
2508
  mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py,sha256=DKcNwCCdANt7hNr3fLao9jkIJJjfxJ0jLLbD7_b-KnE,1752
2502
2509
  mteb/tasks/retrieval/vie/treccovidvn_retrieval.py,sha256=ZlFFL37Zd_sbKXaUZx41XTxps-nnOi3PnBNCy9KvlJU,1826
2510
+ mteb/tasks/retrieval/vie/tvpl_retrieval.py,sha256=CGwgT9spHONw9cOeuum_BS7khZbooqoNqJgVV6Utfic,1611
2503
2511
  mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py,sha256=eZh1rR43iXDHoylOGKjrUCopzEujE-1GSGTn2TMrkro,3621
2504
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py,sha256=Y93j0EwG6-bcc0DMLvHP9q3r9b_3xLXu6YBR0Q5HDho,985
2512
+ mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py,sha256=BI2GbbkOPnWQpbn9ul6ShHugAZ994iiS7hVi5v1K17Y,1386
2505
2513
  mteb/tasks/retrieval/zho/__init__.py,sha256=dIN-rPfrEjkCuUCha8SpQdlzWYY6IMO_HLxebcBhQxA,438
2506
2514
  mteb/tasks/retrieval/zho/cmteb_retrieval.py,sha256=DXNkvMQQZsKv1U5L_0boKEXGLDPn4RfauIlxwb0f-EQ,10789
2507
2515
  mteb/tasks/retrieval/zho/le_ca_r_dv2_retrieval.py,sha256=O7kNB_7rpgG7_KsKC0SUKG42dhx66Rakk77uy4Iufk0,1293
@@ -2604,9 +2612,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2604
2612
  mteb/types/_result.py,sha256=UKNokV9pu3G74MGebocU512aU_fFU9I9nPKnrG9Q0iE,1035
2605
2613
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2606
2614
  mteb/types/statistics.py,sha256=GwkBPmAr18Onu-vHtzHs0PFrhCozdOMiT13HwnWL4ZM,3961
2607
- mteb-2.6.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2608
- mteb-2.6.5.dist-info/METADATA,sha256=27kspNt-a7zJ0Ihl2nB5m4Ak1-hba5xQjBuqGnCFWcQ,14397
2609
- mteb-2.6.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2610
- mteb-2.6.5.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2611
- mteb-2.6.5.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2612
- mteb-2.6.5.dist-info/RECORD,,
2615
+ mteb-2.6.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2616
+ mteb-2.6.7.dist-info/METADATA,sha256=p99o5hSYjMeWfoMLwNljk7_mDzsRjVXBbwPzsobuyWA,14281
2617
+ mteb-2.6.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2618
+ mteb-2.6.7.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2619
+ mteb-2.6.7.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2620
+ mteb-2.6.7.dist-info/RECORD,,
File without changes