mteb 2.6.5__py3-none-any.whl → 2.6.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/models/model_implementations/jina_clip.py +46 -8
- mteb/models/model_implementations/nvidia_models.py +73 -5
- mteb/models/model_implementations/octen_models.py +1 -1
- mteb/models/model_implementations/sentence_transformers_models.py +66 -0
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- {mteb-2.6.5.dist-info → mteb-2.6.7.dist-info}/METADATA +1 -3
- {mteb-2.6.5.dist-info → mteb-2.6.7.dist-info}/RECORD +26 -18
- {mteb-2.6.5.dist-info → mteb-2.6.7.dist-info}/WHEEL +0 -0
- {mteb-2.6.5.dist-info → mteb-2.6.7.dist-info}/entry_points.txt +0 -0
- {mteb-2.6.5.dist-info → mteb-2.6.7.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.6.5.dist-info → mteb-2.6.7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 102198,
|
|
4
|
+
"number_of_characters": 47870352,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 47719757,
|
|
7
|
+
"min_text_length": 9,
|
|
8
|
+
"average_text_length": 472.01951591046225,
|
|
9
|
+
"max_text_length": 8686,
|
|
10
|
+
"unique_texts": 101097
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 150595,
|
|
15
|
+
"min_text_length": 30,
|
|
16
|
+
"average_text_length": 136.78019981834694,
|
|
17
|
+
"max_text_length": 404,
|
|
18
|
+
"unique_texts": 1099
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 3401,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 3.089009990917348,
|
|
25
|
+
"max_relevant_docs_per_query": 5,
|
|
26
|
+
"unique_relevant_docs": 1123
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 132137,
|
|
4
|
+
"number_of_characters": 43323279,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 43311486,
|
|
7
|
+
"min_text_length": 11,
|
|
8
|
+
"average_text_length": 328.5778249819823,
|
|
9
|
+
"max_text_length": 8576,
|
|
10
|
+
"unique_texts": 131814
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 11793,
|
|
15
|
+
"min_text_length": 6,
|
|
16
|
+
"average_text_length": 36.62422360248447,
|
|
17
|
+
"max_text_length": 100,
|
|
18
|
+
"unique_texts": 321
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 11620,
|
|
23
|
+
"min_relevant_docs_per_query": 31,
|
|
24
|
+
"average_relevant_docs_per_query": 36.08695652173913,
|
|
25
|
+
"max_relevant_docs_per_query": 1288,
|
|
26
|
+
"unique_relevant_docs": 32537
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 106558,
|
|
4
|
+
"number_of_characters": 48164581,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 47886101,
|
|
7
|
+
"min_text_length": 9,
|
|
8
|
+
"average_text_length": 472.6783768310499,
|
|
9
|
+
"max_text_length": 8689,
|
|
10
|
+
"unique_texts": 101308
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 278480,
|
|
15
|
+
"min_text_length": 11,
|
|
16
|
+
"average_text_length": 53.04380952380952,
|
|
17
|
+
"max_text_length": 196,
|
|
18
|
+
"unique_texts": 5124
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 6254,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.1912380952380952,
|
|
25
|
+
"max_relevant_docs_per_query": 15,
|
|
26
|
+
"unique_relevant_docs": 1324
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 117974,
|
|
4
|
+
"number_of_characters": 35927363,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 35335613,
|
|
7
|
+
"min_text_length": 22,
|
|
8
|
+
"average_text_length": 316.47705838625023,
|
|
9
|
+
"max_text_length": 4105,
|
|
10
|
+
"unique_texts": 111651
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 591750,
|
|
15
|
+
"min_text_length": 21,
|
|
16
|
+
"average_text_length": 93.61651637399146,
|
|
17
|
+
"max_text_length": 280,
|
|
18
|
+
"unique_texts": 6321
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 12642,
|
|
23
|
+
"min_relevant_docs_per_query": 2,
|
|
24
|
+
"average_relevant_docs_per_query": 2.0,
|
|
25
|
+
"max_relevant_docs_per_query": 2,
|
|
26
|
+
"unique_relevant_docs": 11874
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"dev": {
|
|
3
|
+
"num_samples": 107153,
|
|
4
|
+
"number_of_characters": 33316879,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 33200903,
|
|
7
|
+
"min_text_length": 2,
|
|
8
|
+
"average_text_length": 320.30199218561575,
|
|
9
|
+
"max_text_length": 1712,
|
|
10
|
+
"unique_texts": 103641
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 115976,
|
|
15
|
+
"min_text_length": 8,
|
|
16
|
+
"average_text_length": 33.15494568324757,
|
|
17
|
+
"max_text_length": 190,
|
|
18
|
+
"unique_texts": 3498
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 3700,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0577472841623785,
|
|
25
|
+
"max_relevant_docs_per_query": 4,
|
|
26
|
+
"unique_relevant_docs": 3698
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 104095,
|
|
4
|
+
"number_of_characters": 52312680,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 52220289,
|
|
7
|
+
"min_text_length": 10,
|
|
8
|
+
"average_text_length": 510.98673124908265,
|
|
9
|
+
"max_text_length": 10245,
|
|
10
|
+
"unique_texts": 102181
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 92391,
|
|
15
|
+
"min_text_length": 22,
|
|
16
|
+
"average_text_length": 48.62684210526316,
|
|
17
|
+
"max_text_length": 113,
|
|
18
|
+
"unique_texts": 1900
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 2283,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.201578947368421,
|
|
25
|
+
"max_relevant_docs_per_query": 4,
|
|
26
|
+
"unique_relevant_docs": 2283
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 20561,
|
|
4
|
+
"number_of_characters": 10832770,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 9929303,
|
|
7
|
+
"min_text_length": 9,
|
|
8
|
+
"average_text_length": 938.8524016641452,
|
|
9
|
+
"max_text_length": 6319,
|
|
10
|
+
"unique_texts": 10573
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 903467,
|
|
15
|
+
"min_text_length": 13,
|
|
16
|
+
"average_text_length": 90.48242363545317,
|
|
17
|
+
"max_text_length": 228,
|
|
18
|
+
"unique_texts": 9985
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 11158,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.1174762143214823,
|
|
25
|
+
"max_relevant_docs_per_query": 8,
|
|
26
|
+
"unique_relevant_docs": 10576
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -7,6 +7,7 @@ from tqdm.auto import tqdm
|
|
|
7
7
|
from mteb._requires_package import requires_image_dependencies
|
|
8
8
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
9
9
|
from mteb.models.abs_encoder import AbsEncoder
|
|
10
|
+
from mteb.models.model_implementations.colpali_models import COLPALI_TRAINING_DATA
|
|
10
11
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
11
12
|
from mteb.types import Array, BatchedInput, PromptType
|
|
12
13
|
|
|
@@ -120,6 +121,15 @@ class JinaCLIPModel(AbsEncoder):
|
|
|
120
121
|
raise ValueError
|
|
121
122
|
|
|
122
123
|
|
|
124
|
+
_JINA_CLIP_TRAIN_DATASETS_V1 = {
|
|
125
|
+
# LAION400M
|
|
126
|
+
# ShareGPT4V
|
|
127
|
+
"MSMARCO",
|
|
128
|
+
"NQ",
|
|
129
|
+
"HotpotQA",
|
|
130
|
+
# Natural Language Inference (NLI) dataset (Bowman et al., 2015)
|
|
131
|
+
}
|
|
132
|
+
|
|
123
133
|
jina_clip_v1 = ModelMeta(
|
|
124
134
|
loader=JinaCLIPModel,
|
|
125
135
|
name="jinaai/jina-clip-v1",
|
|
@@ -140,13 +150,41 @@ jina_clip_v1 = ModelMeta(
|
|
|
140
150
|
reference="https://huggingface.co/jinaai/jina-clip-v1",
|
|
141
151
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
142
152
|
use_instructions=True,
|
|
143
|
-
training_datasets=
|
|
144
|
-
# LAION400M
|
|
145
|
-
# ShareGPT4V
|
|
146
|
-
"MSMARCO",
|
|
147
|
-
# NQ
|
|
148
|
-
# HotpotQA
|
|
149
|
-
# Natural Language Inference (NLI) dataset (Bowman et al., 2015)
|
|
150
|
-
},
|
|
153
|
+
training_datasets=_JINA_CLIP_TRAIN_DATASETS_V1,
|
|
151
154
|
citation=JINA_CLIP_CITATION,
|
|
155
|
+
superseded_by="jinaai/jina-clip-v2",
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
jina_clip_v2 = ModelMeta(
|
|
159
|
+
loader=JinaCLIPModel,
|
|
160
|
+
name="jinaai/jina-clip-v2",
|
|
161
|
+
revision="344d954da76eb8ad47a7aaff42d012e30c15b8fe",
|
|
162
|
+
release_date="2024-10-09",
|
|
163
|
+
languages=["eng-Latn"],
|
|
164
|
+
n_parameters=865278477,
|
|
165
|
+
memory_usage_mb=1650.0,
|
|
166
|
+
max_tokens=8192,
|
|
167
|
+
embed_dim=1024,
|
|
168
|
+
license="cc-by-nc-4.0",
|
|
169
|
+
open_weights=True,
|
|
170
|
+
public_training_code=None,
|
|
171
|
+
public_training_data=None,
|
|
172
|
+
framework=["PyTorch", "Sentence Transformers"],
|
|
173
|
+
reference="https://huggingface.co/jinaai/jina-clip-v2",
|
|
174
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
175
|
+
use_instructions=False,
|
|
176
|
+
training_datasets=_JINA_CLIP_TRAIN_DATASETS_V1 | COLPALI_TRAINING_DATA,
|
|
177
|
+
modalities=["text", "image"],
|
|
178
|
+
model_type=["dense"],
|
|
179
|
+
citation="""
|
|
180
|
+
@misc{koukounas2024jinaclipv2multilingualmultimodalembeddings,
|
|
181
|
+
title={jina-clip-v2: Multilingual Multimodal Embeddings for Text and Images},
|
|
182
|
+
author={Andreas Koukounas and Georgios Mastrapas and Bo Wang and Mohammad Kalim Akram and Sedigheh Eslami and Michael Günther and Isabelle Mohr and Saba Sturua and Scott Martens and Nan Wang and Han Xiao},
|
|
183
|
+
year={2024},
|
|
184
|
+
eprint={2412.08802},
|
|
185
|
+
archivePrefix={arXiv},
|
|
186
|
+
primaryClass={cs.CL},
|
|
187
|
+
url={https://arxiv.org/abs/2412.08802},
|
|
188
|
+
}
|
|
189
|
+
""",
|
|
152
190
|
)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from collections.abc import Callable
|
|
2
3
|
from typing import Any
|
|
3
4
|
|
|
4
5
|
import torch
|
|
@@ -29,7 +30,7 @@ NV_RETRIEVER_CITATION = """@misc{moreira2025nvretrieverimprovingtextembedding,
|
|
|
29
30
|
}"""
|
|
30
31
|
|
|
31
32
|
|
|
32
|
-
def
|
|
33
|
+
def _instruction_template(
|
|
33
34
|
instruction: str, prompt_type: PromptType | None = None
|
|
34
35
|
) -> str:
|
|
35
36
|
return f"Instruct: {instruction}\nQuery: " if instruction else ""
|
|
@@ -100,10 +101,77 @@ nvidia_training_datasets = {
|
|
|
100
101
|
"MrTidyRetrieval",
|
|
101
102
|
}
|
|
102
103
|
|
|
104
|
+
|
|
105
|
+
class _NVEmbedWrapper(InstructSentenceTransformerModel):
|
|
106
|
+
"""Inherited, because nvembed requires `sbert==2`, but it doesn't have tokenizers kwargs"""
|
|
107
|
+
|
|
108
|
+
def __init__(
|
|
109
|
+
self,
|
|
110
|
+
model_name: str,
|
|
111
|
+
revision: str,
|
|
112
|
+
instruction_template: str
|
|
113
|
+
| Callable[[str, PromptType | None], str]
|
|
114
|
+
| None = None,
|
|
115
|
+
max_seq_length: int | None = None,
|
|
116
|
+
apply_instruction_to_passages: bool = True,
|
|
117
|
+
padding_side: str | None = None,
|
|
118
|
+
add_eos_token: bool = False,
|
|
119
|
+
prompts_dict: dict[str, str] | None = None,
|
|
120
|
+
**kwargs: Any,
|
|
121
|
+
):
|
|
122
|
+
from sentence_transformers import __version__ as sbert_version
|
|
123
|
+
|
|
124
|
+
required_transformers_version = "4.42.4"
|
|
125
|
+
required_sbert_version = "2.7.0"
|
|
126
|
+
|
|
127
|
+
if Version(transformers_version) != Version(required_transformers_version):
|
|
128
|
+
raise RuntimeError(
|
|
129
|
+
f"transformers version {transformers_version} is not match with required "
|
|
130
|
+
f"install version {required_transformers_version} to run `nvidia/NV-Embed-v2`"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
if Version(sbert_version) != Version(required_sbert_version):
|
|
134
|
+
raise RuntimeError(
|
|
135
|
+
f"sbert version {sbert_version} is not match with required "
|
|
136
|
+
f"install version {required_sbert_version} to run `nvidia/NV-Embed-v2`"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
requires_package(
|
|
140
|
+
self, "flash_attn", model_name, "pip install 'mteb[flash_attention]'"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
from sentence_transformers import SentenceTransformer
|
|
144
|
+
|
|
145
|
+
if (
|
|
146
|
+
isinstance(instruction_template, str)
|
|
147
|
+
and "{instruction}" not in instruction_template
|
|
148
|
+
):
|
|
149
|
+
raise ValueError(
|
|
150
|
+
"Instruction template must contain the string '{instruction}'."
|
|
151
|
+
)
|
|
152
|
+
if instruction_template is None:
|
|
153
|
+
logger.warning(
|
|
154
|
+
"No instruction template provided. Instructions will be used as-is."
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
self.instruction_template = instruction_template
|
|
158
|
+
|
|
159
|
+
self.model_name = model_name
|
|
160
|
+
self.model = SentenceTransformer(model_name, revision=revision, **kwargs)
|
|
161
|
+
self.model.tokenizer.padding_side = padding_side
|
|
162
|
+
self.model.tokenizer.add_eos_token = add_eos_token
|
|
163
|
+
|
|
164
|
+
if max_seq_length:
|
|
165
|
+
# https://github.com/huggingface/sentence-transformers/issues/3575
|
|
166
|
+
self.model.max_seq_length = max_seq_length
|
|
167
|
+
self.apply_instruction_to_passages = apply_instruction_to_passages
|
|
168
|
+
self.prompts_dict = prompts_dict
|
|
169
|
+
|
|
170
|
+
|
|
103
171
|
NV_embed_v2 = ModelMeta(
|
|
104
|
-
loader=
|
|
172
|
+
loader=_NVEmbedWrapper,
|
|
105
173
|
loader_kwargs=dict(
|
|
106
|
-
instruction_template=
|
|
174
|
+
instruction_template=_instruction_template,
|
|
107
175
|
trust_remote_code=True,
|
|
108
176
|
max_seq_length=32768,
|
|
109
177
|
padding_side="right",
|
|
@@ -132,9 +200,9 @@ NV_embed_v2 = ModelMeta(
|
|
|
132
200
|
)
|
|
133
201
|
|
|
134
202
|
NV_embed_v1 = ModelMeta(
|
|
135
|
-
loader=
|
|
203
|
+
loader=_NVEmbedWrapper,
|
|
136
204
|
loader_kwargs=dict(
|
|
137
|
-
instruction_template=
|
|
205
|
+
instruction_template=_instruction_template,
|
|
138
206
|
trust_remote_code=True,
|
|
139
207
|
max_seq_length=32768,
|
|
140
208
|
padding_side="right",
|
|
@@ -205,7 +205,7 @@ Octen_Embedding_8B = ModelMeta(
|
|
|
205
205
|
name="bflhc/Octen-Embedding-8B",
|
|
206
206
|
languages=multilingual_langs,
|
|
207
207
|
open_weights=True,
|
|
208
|
-
revision="
|
|
208
|
+
revision="f7db178d5a82fb841f606a6a67c423cead2fdbba",
|
|
209
209
|
release_date="2025-12-23",
|
|
210
210
|
n_parameters=7567295488,
|
|
211
211
|
memory_usage_mb=14433,
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Implementation of Sentence Transformers model validated in MTEB."""
|
|
2
2
|
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
3
5
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
4
6
|
from mteb.models.sentence_transformer_wrapper import (
|
|
5
7
|
SentenceTransformerEncoderWrapper,
|
|
@@ -773,3 +775,67 @@ gtr_t5_base = ModelMeta(
|
|
|
773
775
|
},
|
|
774
776
|
citation=GTR_CITATION,
|
|
775
777
|
)
|
|
778
|
+
|
|
779
|
+
static_retrieval_mrl_en_v1 = ModelMeta(
|
|
780
|
+
loader=sentence_transformers_loader,
|
|
781
|
+
name="sentence-transformers/static-retrieval-mrl-en-v1",
|
|
782
|
+
revision="f60985c706f192d45d218078e49e5a8b6f15283a",
|
|
783
|
+
release_date="2024-10-24",
|
|
784
|
+
languages=["eng-Latn"],
|
|
785
|
+
n_parameters=3_125_4528,
|
|
786
|
+
memory_usage_mb=119,
|
|
787
|
+
max_tokens=np.inf,
|
|
788
|
+
embed_dim=1024,
|
|
789
|
+
license="apache-2.0",
|
|
790
|
+
open_weights=True,
|
|
791
|
+
public_training_code="https://huggingface.co/sentence-transformers/static-retrieval-mrl-en-v1/blob/main/train.py",
|
|
792
|
+
public_training_data=None,
|
|
793
|
+
framework=["PyTorch", "Sentence Transformers"],
|
|
794
|
+
reference="https://huggingface.co/sentence-transformers/static-retrieval-mrl-en-v1",
|
|
795
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
796
|
+
use_instructions=False,
|
|
797
|
+
training_datasets={
|
|
798
|
+
"MSMARCO",
|
|
799
|
+
# gooaq
|
|
800
|
+
# s2orc
|
|
801
|
+
# allnli
|
|
802
|
+
# paq
|
|
803
|
+
# trivia-qa
|
|
804
|
+
# swim-ir-monolingual
|
|
805
|
+
# PubMedQA
|
|
806
|
+
# swim
|
|
807
|
+
"MIRACLRetrieval",
|
|
808
|
+
"MultiLongDocRetrieval",
|
|
809
|
+
"MrTidyRetrieval",
|
|
810
|
+
},
|
|
811
|
+
modalities=["text"],
|
|
812
|
+
model_type=["dense"],
|
|
813
|
+
)
|
|
814
|
+
|
|
815
|
+
multi_qa_mpnet_base_dot_v1 = ModelMeta(
|
|
816
|
+
loader=sentence_transformers_loader,
|
|
817
|
+
name="sentence-transformers/multi-qa-mpnet-base-dot-v1",
|
|
818
|
+
revision="3af7c6da5b3e1bea796ef6c97fe237538cbe6e7f",
|
|
819
|
+
release_date="2021-08-23",
|
|
820
|
+
languages=["eng-Latn"],
|
|
821
|
+
n_parameters=109486978,
|
|
822
|
+
memory_usage_mb=418.0,
|
|
823
|
+
max_tokens=512,
|
|
824
|
+
embed_dim=768,
|
|
825
|
+
license=None,
|
|
826
|
+
open_weights=True,
|
|
827
|
+
public_training_code="https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1/blob/main/train_script.py",
|
|
828
|
+
public_training_data=None,
|
|
829
|
+
framework=["PyTorch", "Sentence Transformers"],
|
|
830
|
+
reference="https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1",
|
|
831
|
+
similarity_fn_name=ScoringFunction.DOT_PRODUCT,
|
|
832
|
+
use_instructions=False,
|
|
833
|
+
training_datasets={
|
|
834
|
+
"MSMARCO",
|
|
835
|
+
"YahooAnswersTopicsClassification",
|
|
836
|
+
"NQ",
|
|
837
|
+
},
|
|
838
|
+
adapted_from="microsoft/mpnet-base",
|
|
839
|
+
modalities=["text"],
|
|
840
|
+
model_type=["dense"],
|
|
841
|
+
)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from .argu_ana_vn_retrieval import ArguAnaVN
|
|
2
|
-
from .climate_fevervn_retrieval import ClimateFEVERVN
|
|
2
|
+
from .climate_fevervn_retrieval import ClimateFEVERVN, NanoClimateFEVERVN
|
|
3
3
|
from .cqa_dupstack_android_vn_retrieval import CQADupstackAndroidVN
|
|
4
4
|
from .cqa_dupstack_gis_vn_retrieval import CQADupstackGisVN
|
|
5
5
|
from .cqa_dupstack_mathematica_vn_retrieval import CQADupstackMathematicaVN
|
|
@@ -10,19 +10,20 @@ from .cqa_dupstack_tex_vn_retrieval import CQADupstackTexVN
|
|
|
10
10
|
from .cqa_dupstack_unix_vn_retrieval import CQADupstackUnixVN
|
|
11
11
|
from .cqa_dupstack_webmasters_vn_retrieval import CQADupstackWebmastersVN
|
|
12
12
|
from .cqa_dupstack_wordpress_vn_retrieval import CQADupstackWordpressVN
|
|
13
|
-
from .db_pedia_vn_retrieval import DBPediaVN
|
|
14
|
-
from .fevervn_retrieval import FEVERVN
|
|
13
|
+
from .db_pedia_vn_retrieval import DBPediaVN, NanoDBPediaVN
|
|
14
|
+
from .fevervn_retrieval import FEVERVN, NanoFEVERVN
|
|
15
15
|
from .fi_qa2018_vn_retrieval import FiQA2018VN
|
|
16
16
|
from .green_node_table_markdown_retrieval import GreenNodeTableMarkdownRetrieval
|
|
17
|
-
from .hotpot_qavn_retrieval import HotpotQAVN
|
|
18
|
-
from .msmarcovn_retrieval import MSMARCOVN
|
|
17
|
+
from .hotpot_qavn_retrieval import HotpotQAVN, NanoHotpotQAVN
|
|
18
|
+
from .msmarcovn_retrieval import MSMARCOVN, NanoMSMARCOVN
|
|
19
19
|
from .nf_corpus_vn_retrieval import NFCorpusVN
|
|
20
|
-
from .nqvn_retrieval import NQVN
|
|
20
|
+
from .nqvn_retrieval import NQVN, NanoNQVN
|
|
21
21
|
from .quora_vn_retrieval import QuoraVN
|
|
22
22
|
from .sci_fact_vn_retrieval import SciFactVN
|
|
23
23
|
from .scidocsvn_retrieval import SCIDOCSVN
|
|
24
24
|
from .touche2020_vn_retrieval import Touche2020VN
|
|
25
25
|
from .treccovidvn_retrieval import TRECCOVIDVN
|
|
26
|
+
from .tvpl_retrieval import TVPLRetrieval
|
|
26
27
|
from .vie_qu_ad_retrieval import VieQuADRetrieval
|
|
27
28
|
from .zac_legal_text_retrieval import ZacLegalTextRetrieval
|
|
28
29
|
|
|
@@ -49,8 +50,15 @@ __all__ = [
|
|
|
49
50
|
"GreenNodeTableMarkdownRetrieval",
|
|
50
51
|
"HotpotQAVN",
|
|
51
52
|
"NFCorpusVN",
|
|
53
|
+
"NanoClimateFEVERVN",
|
|
54
|
+
"NanoDBPediaVN",
|
|
55
|
+
"NanoFEVERVN",
|
|
56
|
+
"NanoHotpotQAVN",
|
|
57
|
+
"NanoMSMARCOVN",
|
|
58
|
+
"NanoNQVN",
|
|
52
59
|
"QuoraVN",
|
|
53
60
|
"SciFactVN",
|
|
61
|
+
"TVPLRetrieval",
|
|
54
62
|
"Touche2020VN",
|
|
55
63
|
"VieQuADRetrieval",
|
|
56
64
|
"ZacLegalTextRetrieval",
|
|
@@ -36,3 +36,42 @@ class ClimateFEVERVN(AbsTaskRetrieval):
|
|
|
36
36
|
""",
|
|
37
37
|
adapted_from=["ClimateFEVER"],
|
|
38
38
|
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NanoClimateFEVERVN(AbsTaskRetrieval):
|
|
42
|
+
metadata = TaskMetadata(
|
|
43
|
+
name="NanoClimateFEVER-VN",
|
|
44
|
+
description="NanoClimateFEVERVN is a small version of A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
45
|
+
reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
|
|
46
|
+
dataset={
|
|
47
|
+
"path": "GreenNode/nano-climate-fever-vn",
|
|
48
|
+
"revision": "1852e852f07403d4529a8520d52b91ff6d57869b",
|
|
49
|
+
},
|
|
50
|
+
type="Retrieval",
|
|
51
|
+
category="t2t",
|
|
52
|
+
eval_splits=["test"],
|
|
53
|
+
eval_langs=["vie-Latn"],
|
|
54
|
+
main_score="ndcg_at_10",
|
|
55
|
+
date=("2025-07-29", "2025-07-30"),
|
|
56
|
+
license="cc-by-sa-4.0",
|
|
57
|
+
annotations_creators="derived",
|
|
58
|
+
dialect=[],
|
|
59
|
+
sample_creation="machine-translated and LM verified",
|
|
60
|
+
domains=["Encyclopaedic", "Written"],
|
|
61
|
+
task_subtypes=["Claim verification"],
|
|
62
|
+
bibtex_citation=r"""
|
|
63
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
64
|
+
archiveprefix = {arXiv},
|
|
65
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
66
|
+
eprint = {2507.21500},
|
|
67
|
+
primaryclass = {cs.CL},
|
|
68
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
69
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
70
|
+
year = {2025},
|
|
71
|
+
}
|
|
72
|
+
""",
|
|
73
|
+
prompt={
|
|
74
|
+
"query": "Given a claim about climate change, retrieve documents that support or refute the claim"
|
|
75
|
+
},
|
|
76
|
+
adapted_from=["ClimateFEVER-VN"],
|
|
77
|
+
)
|
|
@@ -36,3 +36,42 @@ class DBPediaVN(AbsTaskRetrieval):
|
|
|
36
36
|
""",
|
|
37
37
|
adapted_from=["DBPedia"],
|
|
38
38
|
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NanoDBPediaVN(AbsTaskRetrieval):
|
|
42
|
+
metadata = TaskMetadata(
|
|
43
|
+
name="NanoDBPedia-VN",
|
|
44
|
+
description="NanoDBPediaVN is a small version of A translated dataset from DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
45
|
+
reference="https://github.com/iai-group/DBpedia-Entity/",
|
|
46
|
+
dataset={
|
|
47
|
+
"path": "GreenNode/nano-dbpedia-vn",
|
|
48
|
+
"revision": "bbc3259bc63bf1e250d7034024092cc3230d5850",
|
|
49
|
+
},
|
|
50
|
+
type="Retrieval",
|
|
51
|
+
category="t2t",
|
|
52
|
+
eval_splits=["test"],
|
|
53
|
+
eval_langs=["vie-Latn"],
|
|
54
|
+
main_score="ndcg_at_10",
|
|
55
|
+
date=("2025-07-29", "2025-07-30"),
|
|
56
|
+
license="cc-by-sa-4.0",
|
|
57
|
+
annotations_creators="derived",
|
|
58
|
+
dialect=[],
|
|
59
|
+
sample_creation="machine-translated and LM verified",
|
|
60
|
+
domains=["Written", "Encyclopaedic"],
|
|
61
|
+
task_subtypes=[],
|
|
62
|
+
bibtex_citation=r"""
|
|
63
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
64
|
+
archiveprefix = {arXiv},
|
|
65
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
66
|
+
eprint = {2507.21500},
|
|
67
|
+
primaryclass = {cs.CL},
|
|
68
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
69
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
70
|
+
year = {2025},
|
|
71
|
+
}
|
|
72
|
+
""",
|
|
73
|
+
prompt={
|
|
74
|
+
"query": "Given a query, retrieve relevant entity descriptions from DBPedia"
|
|
75
|
+
},
|
|
76
|
+
adapted_from=["DBPedia-VN"],
|
|
77
|
+
)
|
|
@@ -36,3 +36,42 @@ class FEVERVN(AbsTaskRetrieval):
|
|
|
36
36
|
""",
|
|
37
37
|
adapted_from=["FEVER"],
|
|
38
38
|
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NanoFEVERVN(AbsTaskRetrieval):
|
|
42
|
+
metadata = TaskMetadata(
|
|
43
|
+
name="NanoFEVER-VN",
|
|
44
|
+
dataset={
|
|
45
|
+
"path": "GreenNode/nano-fever-vn",
|
|
46
|
+
"revision": "457ca6b058ed19b28f2359e2d816d7527af6bef8",
|
|
47
|
+
},
|
|
48
|
+
description="NanoFEVERVN is a small version of A translated dataset from FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
49
|
+
reference="https://fever.ai/",
|
|
50
|
+
type="Retrieval",
|
|
51
|
+
category="t2t",
|
|
52
|
+
eval_splits=["test"],
|
|
53
|
+
eval_langs=["vie-Latn"],
|
|
54
|
+
main_score="ndcg_at_10",
|
|
55
|
+
date=("2025-07-29", "2025-07-30"),
|
|
56
|
+
license="cc-by-sa-4.0",
|
|
57
|
+
annotations_creators="derived",
|
|
58
|
+
dialect=[],
|
|
59
|
+
sample_creation="machine-translated and LM verified",
|
|
60
|
+
domains=["Encyclopaedic", "Written"],
|
|
61
|
+
task_subtypes=["Claim verification"],
|
|
62
|
+
bibtex_citation=r"""
|
|
63
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
64
|
+
archiveprefix = {arXiv},
|
|
65
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
66
|
+
eprint = {2507.21500},
|
|
67
|
+
primaryclass = {cs.CL},
|
|
68
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
69
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
70
|
+
year = {2025},
|
|
71
|
+
}
|
|
72
|
+
""",
|
|
73
|
+
prompt={
|
|
74
|
+
"query": "Given a claim, retrieve documents that support or refute the claim"
|
|
75
|
+
},
|
|
76
|
+
adapted_from=["FEVER-VN"],
|
|
77
|
+
)
|
|
@@ -36,3 +36,42 @@ class HotpotQAVN(AbsTaskRetrieval):
|
|
|
36
36
|
""",
|
|
37
37
|
adapted_from=["HotpotQA"],
|
|
38
38
|
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NanoHotpotQAVN(AbsTaskRetrieval):
|
|
42
|
+
metadata = TaskMetadata(
|
|
43
|
+
name="NanoHotpotQA-VN",
|
|
44
|
+
dataset={
|
|
45
|
+
"path": "GreenNode/nano-hotpotqa-vn",
|
|
46
|
+
"revision": "f4de19a2fae1a582de114e5bcd178bb262183113",
|
|
47
|
+
},
|
|
48
|
+
description="NanoHotpotQAVN is a small version of A translated dataset from HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong supervision for supporting facts to enable more explainable question answering systems. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
49
|
+
reference="https://hotpotqa.github.io/",
|
|
50
|
+
type="Retrieval",
|
|
51
|
+
category="t2t",
|
|
52
|
+
eval_splits=["test"],
|
|
53
|
+
eval_langs=["vie-Latn"],
|
|
54
|
+
main_score="ndcg_at_10",
|
|
55
|
+
date=("2025-07-29", "2025-07-30"),
|
|
56
|
+
license="cc-by-sa-4.0",
|
|
57
|
+
annotations_creators="derived",
|
|
58
|
+
dialect=[],
|
|
59
|
+
sample_creation="machine-translated and LM verified",
|
|
60
|
+
domains=["Web", "Written"],
|
|
61
|
+
task_subtypes=["Question answering"],
|
|
62
|
+
bibtex_citation=r"""
|
|
63
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
64
|
+
archiveprefix = {arXiv},
|
|
65
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
66
|
+
eprint = {2507.21500},
|
|
67
|
+
primaryclass = {cs.CL},
|
|
68
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
69
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
70
|
+
year = {2025},
|
|
71
|
+
}
|
|
72
|
+
""",
|
|
73
|
+
prompt={
|
|
74
|
+
"query": "Given a multi-hop question, retrieve documents that can help answer the question"
|
|
75
|
+
},
|
|
76
|
+
adapted_from=["HotpotQA-VN"],
|
|
77
|
+
)
|
|
@@ -47,3 +47,51 @@ class MSMARCOVN(AbsTaskRetrieval):
|
|
|
47
47
|
""",
|
|
48
48
|
adapted_from=["MSMARCO"],
|
|
49
49
|
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class NanoMSMARCOVN(AbsTaskRetrieval):
|
|
53
|
+
metadata = TaskMetadata(
|
|
54
|
+
name="NanoMSMARCO-VN",
|
|
55
|
+
dataset={
|
|
56
|
+
"path": "GreenNode/nano-msmarco-vn",
|
|
57
|
+
"revision": "f149369c82ec228b05b0f6677699ab4bfbab73f6",
|
|
58
|
+
},
|
|
59
|
+
description="NanoMSMARCOVN is a small version of A translated dataset from MS MARCO is a collection of datasets focused on deep learning in search The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
60
|
+
reference="https://microsoft.github.io/msmarco/",
|
|
61
|
+
type="Retrieval",
|
|
62
|
+
category="t2t",
|
|
63
|
+
eval_splits=["dev"],
|
|
64
|
+
eval_langs=["vie-Latn"],
|
|
65
|
+
main_score="ndcg_at_10",
|
|
66
|
+
date=("2025-07-29", "2025-07-30"),
|
|
67
|
+
license="cc-by-sa-4.0",
|
|
68
|
+
annotations_creators="derived",
|
|
69
|
+
dialect=[],
|
|
70
|
+
sample_creation="machine-translated and LM verified",
|
|
71
|
+
domains=[
|
|
72
|
+
"Encyclopaedic",
|
|
73
|
+
"Academic",
|
|
74
|
+
"Blog",
|
|
75
|
+
"News",
|
|
76
|
+
"Medical",
|
|
77
|
+
"Government",
|
|
78
|
+
"Reviews",
|
|
79
|
+
"Non-fiction",
|
|
80
|
+
"Social",
|
|
81
|
+
"Web",
|
|
82
|
+
],
|
|
83
|
+
task_subtypes=["Question answering"],
|
|
84
|
+
bibtex_citation=r"""
|
|
85
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
86
|
+
archiveprefix = {arXiv},
|
|
87
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
88
|
+
eprint = {2507.21500},
|
|
89
|
+
primaryclass = {cs.CL},
|
|
90
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
91
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
92
|
+
year = {2025},
|
|
93
|
+
}
|
|
94
|
+
""",
|
|
95
|
+
prompt={"query": "Given a query, retrieve relevant documents from MS MARCO-VN"},
|
|
96
|
+
adapted_from=["MSMARCO-VN"],
|
|
97
|
+
)
|
|
@@ -36,3 +36,42 @@ class NQVN(AbsTaskRetrieval):
|
|
|
36
36
|
""",
|
|
37
37
|
adapted_from=["NQ"],
|
|
38
38
|
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NanoNQVN(AbsTaskRetrieval):
|
|
42
|
+
metadata = TaskMetadata(
|
|
43
|
+
name="NanoNQ-VN",
|
|
44
|
+
dataset={
|
|
45
|
+
"path": "GreenNode/nano-nq-vn",
|
|
46
|
+
"revision": "1ad4d6556fe0e5314994839089ce070fb0db8b19",
|
|
47
|
+
},
|
|
48
|
+
description="NanoNQVN is a small version of A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
49
|
+
reference="https://ai.google.com/research/NaturalQuestions/",
|
|
50
|
+
type="Retrieval",
|
|
51
|
+
category="t2t",
|
|
52
|
+
eval_splits=["test"],
|
|
53
|
+
eval_langs=["vie-Latn"],
|
|
54
|
+
main_score="ndcg_at_10",
|
|
55
|
+
date=("2025-07-29", "2025-07-30"),
|
|
56
|
+
license="cc-by-sa-4.0",
|
|
57
|
+
annotations_creators="derived",
|
|
58
|
+
dialect=[],
|
|
59
|
+
sample_creation="machine-translated and LM verified",
|
|
60
|
+
domains=["Written", "Encyclopaedic"],
|
|
61
|
+
task_subtypes=["Question answering"],
|
|
62
|
+
bibtex_citation=r"""
|
|
63
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
64
|
+
archiveprefix = {arXiv},
|
|
65
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
66
|
+
eprint = {2507.21500},
|
|
67
|
+
primaryclass = {cs.CL},
|
|
68
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
69
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
70
|
+
year = {2025},
|
|
71
|
+
}
|
|
72
|
+
""",
|
|
73
|
+
prompt={
|
|
74
|
+
"query": "Given a question, retrieve Wikipedia passages that answer the question"
|
|
75
|
+
},
|
|
76
|
+
adapted_from=["NQ-VN"],
|
|
77
|
+
)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
TEST_SAMPLES = 2048
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TVPLRetrieval(AbsTaskRetrieval):
|
|
8
|
+
metadata = TaskMetadata(
|
|
9
|
+
name="TVPLRetrieval",
|
|
10
|
+
description="A Vietnamese dataset for evaluating legal text retrieval. From Thu vien phap luat (TVPL) dataset: Optimizing Answer Generator in Vietnamese Legal Question Answering Systems Using Language Models.",
|
|
11
|
+
reference="https://aclanthology.org/2020.coling-main.233.pdf",
|
|
12
|
+
dataset={
|
|
13
|
+
"path": "GreenNode/TVPL-Retrieval-VN",
|
|
14
|
+
"revision": "6661dba4dfedff606537732d9f35f2c3738b081a",
|
|
15
|
+
},
|
|
16
|
+
type="Retrieval",
|
|
17
|
+
category="t2t",
|
|
18
|
+
modalities=["text"],
|
|
19
|
+
eval_splits=["test"],
|
|
20
|
+
eval_langs=["vie-Latn"],
|
|
21
|
+
main_score="ndcg_at_10",
|
|
22
|
+
date=("2025-07-29", "2025-07-30"),
|
|
23
|
+
license="cc-by-sa-4.0",
|
|
24
|
+
dialect=[],
|
|
25
|
+
annotations_creators="human-annotated",
|
|
26
|
+
domains=["Legal"],
|
|
27
|
+
task_subtypes=["Question answering"],
|
|
28
|
+
sample_creation="found",
|
|
29
|
+
bibtex_citation=r"""
|
|
30
|
+
@article{10.1145/3732938,
|
|
31
|
+
address = {New York, NY, USA},
|
|
32
|
+
author = {Le, Huong and Luu, Ngoc and Nguyen, Thanh and Dao, Tuan and Dinh, Sang},
|
|
33
|
+
doi = {10.1145/3732938},
|
|
34
|
+
issn = {2375-4699},
|
|
35
|
+
journal = {ACM Trans. Asian Low-Resour. Lang. Inf. Process.},
|
|
36
|
+
publisher = {Association for Computing Machinery},
|
|
37
|
+
title = {Optimizing Answer Generator in Vietnamese Legal Question Answering Systems Using Language Models},
|
|
38
|
+
url = {https://doi.org/10.1145/3732938},
|
|
39
|
+
year = {2025},
|
|
40
|
+
}
|
|
41
|
+
""",
|
|
42
|
+
)
|
|
@@ -24,5 +24,19 @@ class ZacLegalTextRetrieval(AbsTaskRetrieval):
|
|
|
24
24
|
annotations_creators="human-annotated",
|
|
25
25
|
dialect=[],
|
|
26
26
|
sample_creation="found",
|
|
27
|
-
bibtex_citation=""
|
|
27
|
+
bibtex_citation=r"""
|
|
28
|
+
@inproceedings{10.1007/978-981-95-1746-6_17,
|
|
29
|
+
address = {Singapore},
|
|
30
|
+
author = {Pham, Bao Loc
|
|
31
|
+
and Hoang, Quoc Viet
|
|
32
|
+
and Luu, Quy Tung
|
|
33
|
+
and Vo, Trong Thu},
|
|
34
|
+
booktitle = {Proceedings of the Fifth International Conference on Intelligent Systems and Networks},
|
|
35
|
+
isbn = {978-981-95-1746-6},
|
|
36
|
+
pages = {153--163},
|
|
37
|
+
publisher = {Springer Nature Singapore},
|
|
38
|
+
title = {GN-TRVN: A Benchmark for Vietnamese Table Markdown Retrieval Task},
|
|
39
|
+
year = {2026},
|
|
40
|
+
}
|
|
41
|
+
""",
|
|
28
42
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.6.
|
|
3
|
+
Version: 2.6.7
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -32,8 +32,6 @@ Requires-Dist: rich>=0.0.0
|
|
|
32
32
|
Requires-Dist: pytrec-eval-terrier>=0.5.6
|
|
33
33
|
Requires-Dist: pydantic>=2.0.0
|
|
34
34
|
Requires-Dist: polars>=0.20.22
|
|
35
|
-
Requires-Dist: torch<2.9.0; python_full_version < "3.14"
|
|
36
|
-
Requires-Dist: torch>=2.9.0; python_full_version >= "3.14"
|
|
37
35
|
Provides-Extra: image
|
|
38
36
|
Requires-Dist: torchvision>0.2.1; extra == "image"
|
|
39
37
|
Requires-Dist: transformers[torch-vision,vision]; extra == "image"
|
|
@@ -1254,13 +1254,19 @@ mteb/descriptive_stats/Retrieval/NQ-VN.json,sha256=lz7Jb865vUqLOxZhd8StxxAmlyNg-
|
|
|
1254
1254
|
mteb/descriptive_stats/Retrieval/NQ.json,sha256=ylIFn-uHev-jkcua8SUmiDCRanM9uCkvRElU-kIGIJg,1014
|
|
1255
1255
|
mteb/descriptive_stats/Retrieval/NQHardNegatives.json,sha256=uPcQxhFQ9R7HGcEu8c9U4K1a5yYntN-mVK4anaRHtNo,986
|
|
1256
1256
|
mteb/descriptive_stats/Retrieval/NanoArguAnaRetrieval.json,sha256=Qv5QaFK0wXUec-9rv6K71oTgwdeOWxPpGEA-gu0-BkI,976
|
|
1257
|
+
mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json,sha256=8isr2BOTbbFU4_Ivwof3-MTdxngG2SMl_GYrD_vbg3Q,1010
|
|
1257
1258
|
mteb/descriptive_stats/Retrieval/NanoClimateFeverRetrieval.json,sha256=CdBXQhfQhtKG9_64I6AXDV4giSRppmLZDB3S8M28TOA,973
|
|
1259
|
+
mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json,sha256=xhNOXfcG-shzlptKuHBu9dkRXQAbmlknQqu8vhxKb6g,1012
|
|
1258
1260
|
mteb/descriptive_stats/Retrieval/NanoDBPediaRetrieval.json,sha256=eAZJSH9WPk6AVkonlshmX9RHqq-b6iLTPmzO3yJFesk,974
|
|
1261
|
+
mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json,sha256=vb5rzqP3SHVo3R85xRaS-nXUfH0b6KQMAbFSmK6U--o,1010
|
|
1259
1262
|
mteb/descriptive_stats/Retrieval/NanoFEVERRetrieval.json,sha256=ac505XP13F5NRmCaQPwKdH-v9JTESsieu-K1IEa4j-I,971
|
|
1260
1263
|
mteb/descriptive_stats/Retrieval/NanoFiQA2018Retrieval.json,sha256=eB00Q60zKfJmIY6HO083-eWIKo1STY8z4WdzRrKMI4I,973
|
|
1264
|
+
mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json,sha256=JVJUfPow4rwxuUuMNJ_ygusaYDm1s7tBJX5IzUSfXLQ,998
|
|
1261
1265
|
mteb/descriptive_stats/Retrieval/NanoHotpotQARetrieval.json,sha256=mAgjR7ekGKqk0QtiZxK-iuPWJIFWi9yvAO6j9liz-iQ,972
|
|
1266
|
+
mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json,sha256=InXDVB08Q11Pb9IU2H2s7rZT_DXnbdpcs2duj66EdHI,1008
|
|
1262
1267
|
mteb/descriptive_stats/Retrieval/NanoMSMARCORetrieval.json,sha256=_2ap0Bglk-hVK2rYJy3E4ECVm6Kf3yqhvWYQ99ZXruM,970
|
|
1263
1268
|
mteb/descriptive_stats/Retrieval/NanoNFCorpusRetrieval.json,sha256=BjRdljofdnrJqn8BQdRpoxoanU5-XdeSn48085N2o4Q,977
|
|
1269
|
+
mteb/descriptive_stats/Retrieval/NanoNQ-VN.json,sha256=l_49qFdL8DtxaZ9i9lX5dJcxG1KnjvaOO-eyuVWsUAM,1010
|
|
1264
1270
|
mteb/descriptive_stats/Retrieval/NanoNQRetrieval.json,sha256=q9TBcEqGV8fmoK4_32a-yDLhGN6FAj049XuN95Hhiiw,969
|
|
1265
1271
|
mteb/descriptive_stats/Retrieval/NanoQuoraRetrieval.json,sha256=sG9ZgROl8kqDk3n2Rmb7zMgUmu0S8LqILZvjdevf-rQ,967
|
|
1266
1272
|
mteb/descriptive_stats/Retrieval/NanoSCIDOCSRetrieval.json,sha256=J0a84pa1TupKlHC5Oi9zqhOkmKz2TqlcxPPXt58zuBU,973
|
|
@@ -1346,6 +1352,7 @@ mteb/descriptive_stats/Retrieval/TRECCOVID.json,sha256=VMICXZ2lA7GfiUyudOxYGRnMm
|
|
|
1346
1352
|
mteb/descriptive_stats/Retrieval/TRECDL2019.json,sha256=6BOe9qATrKaRz8_cCMbqwXiu-ZiZq--Cm37uwTqSvJs,1013
|
|
1347
1353
|
mteb/descriptive_stats/Retrieval/TRECDL2020.json,sha256=0WFbaPL2dyp5FZ1Wf0yAOVhndUfxP11sep562RHMplA,1014
|
|
1348
1354
|
mteb/descriptive_stats/Retrieval/TV2Nordretrieval.json,sha256=_oDN_OfptM5ak-d4OXA-RU0hrtjvfg18jhir8CckxZ0,985
|
|
1355
|
+
mteb/descriptive_stats/Retrieval/TVPLRetrieval.json,sha256=m-t90SylbkuUUyu-MprOpB27h8xkoqjA2ebhvq5Vl98,1007
|
|
1349
1356
|
mteb/descriptive_stats/Retrieval/TempReasonL1.json,sha256=-MpwGucuNT0aKOMWwGld9POo_vkSnjpnih8xIFnN5d4,975
|
|
1350
1357
|
mteb/descriptive_stats/Retrieval/TempReasonL2Context.json,sha256=Gd2cVFAsdF1RHHWIbKI9hZLWgrbFzp8p0xoa6NU1uGM,996
|
|
1351
1358
|
mteb/descriptive_stats/Retrieval/TempReasonL2Fact.json,sha256=om5WmIGXJLeMI-b0Tp7-odKRH-S9kx6OHXlnAD62rLk,992
|
|
@@ -1507,7 +1514,7 @@ mteb/models/model_implementations/human.py,sha256=EtYa8G7Dc8fDcelBVw0xTpxGGx1YKK
|
|
|
1507
1514
|
mteb/models/model_implementations/ibm_granite_models.py,sha256=ljHjuPuBkIwJvp5WZ3csjTOIb14nLh1h3OYkW-CEeHY,8464
|
|
1508
1515
|
mteb/models/model_implementations/inf_models.py,sha256=SXXs3s9PWo08fzrxG_WOXGc_gvbpmkt-Blt7YoGcPRo,3020
|
|
1509
1516
|
mteb/models/model_implementations/jasper_models.py,sha256=buJgllGIeyi7LsxDJY3UYJs_YzdDBkU3QpuQyU6VoTc,16293
|
|
1510
|
-
mteb/models/model_implementations/jina_clip.py,sha256=
|
|
1517
|
+
mteb/models/model_implementations/jina_clip.py,sha256=0XhRSWTPR3ERAsOoVOxhB1yV6v1pEY8EQcTy1ChtSoU,6595
|
|
1511
1518
|
mteb/models/model_implementations/jina_models.py,sha256=kFmkAWUFoJpq_1tRQIspk54lsik2vIoQcy5DS7YKgQ0,35198
|
|
1512
1519
|
mteb/models/model_implementations/kalm_models.py,sha256=SHqkw5p7HzmQrb_bIFjRp1rsuv2v531nXIk390h_ojY,62115
|
|
1513
1520
|
mteb/models/model_implementations/kblab.py,sha256=EisTJXijICN2pyfWT_89qUnNO7TH95t1LxCxjzJnzQo,1237
|
|
@@ -1534,8 +1541,8 @@ mteb/models/model_implementations/no_instruct_sentence_models.py,sha256=qLiMok_O
|
|
|
1534
1541
|
mteb/models/model_implementations/nomic_models.py,sha256=dmQC_cWg6hAmiBHK7fXoXEiGBJnJvrq0RsnCcJ2qe1Q,15137
|
|
1535
1542
|
mteb/models/model_implementations/nomic_models_vision.py,sha256=usCKfZCR7aEi_DnNmVAYjH-lXx_ipQkBVtUAmhJ90QI,6870
|
|
1536
1543
|
mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py,sha256=6dTGtK1GiaYdpJ4IQFgCCOkGyHQyuEUatKs-Uv-1YmE,6450
|
|
1537
|
-
mteb/models/model_implementations/nvidia_models.py,sha256=
|
|
1538
|
-
mteb/models/model_implementations/octen_models.py,sha256=
|
|
1544
|
+
mteb/models/model_implementations/nvidia_models.py,sha256=JMy0x7EWGrAxZ9s63F2vSPdPS-9yF3RIS4uj3N2UrVI,24315
|
|
1545
|
+
mteb/models/model_implementations/octen_models.py,sha256=FwQAcB_z6bFohpFlNQK2ugLBEOQUu533auOhrNqMxaM,7511
|
|
1539
1546
|
mteb/models/model_implementations/openai_models.py,sha256=905BajYi_XyOZgqU3AeKpwIttLoUitaAyc48sTWI6Jg,9482
|
|
1540
1547
|
mteb/models/model_implementations/openclip_models.py,sha256=MyosgeYSrgBXGuGFtI2Tyxksxpb7bADFJVSYFCLweVA,11622
|
|
1541
1548
|
mteb/models/model_implementations/opensearch_neural_sparse_models.py,sha256=TnIHut_IHvplvovlcTZ-PWnEldTzcru5JdUIaTH-8Do,8636
|
|
@@ -1565,7 +1572,7 @@ mteb/models/model_implementations/searchmap_models.py,sha256=xVQPkO7aLp_kBFiMDAm
|
|
|
1565
1572
|
mteb/models/model_implementations/seed_1_6_embedding_models.py,sha256=gcGKEY-n7DWGPlXYhO_kcNJ3lkBEnbw8NUxADNs3siM,18635
|
|
1566
1573
|
mteb/models/model_implementations/seed_1_6_embedding_models_1215.py,sha256=OoTHcDRQGOuSzf08V62EXrSEdRsXhnMv2ZN9feJWs9s,36443
|
|
1567
1574
|
mteb/models/model_implementations/seed_models.py,sha256=9UF2AQ0Uue8DD73SjYhHn2hLxey_7Iq9ii9TkRaA3CM,14168
|
|
1568
|
-
mteb/models/model_implementations/sentence_transformers_models.py,sha256=
|
|
1575
|
+
mteb/models/model_implementations/sentence_transformers_models.py,sha256=6oULaf2mTyVe7vy9oS_QoKuxXXPaAqjQgSooMTG0xow,26071
|
|
1569
1576
|
mteb/models/model_implementations/shuu_model.py,sha256=1jDFFPAfbfrSzC4vbHczO4yqy3Xh4tWiDAd3FS9-T6M,1177
|
|
1570
1577
|
mteb/models/model_implementations/siglip_models.py,sha256=SOSyp-B7w6Vvqas_10D_1rvpJcKSQuJmXGy7Wdtsw7o,13012
|
|
1571
1578
|
mteb/models/model_implementations/slm_models.py,sha256=JXjBio-9NFHLefU4Ny1Z-fFkyvvIz0U2kQ6t5s-PzlQ,13427
|
|
@@ -2474,9 +2481,9 @@ mteb/tasks/retrieval/swe/swe_faq_retrieval.py,sha256=s-o7IM_l7giuK4bJMdYkq2CtE0Q
|
|
|
2474
2481
|
mteb/tasks/retrieval/swe/swedn_retrieval.py,sha256=RFcpp0u-EKIwSRXR37tJ0_haY6Jvlfj8DWCgrD-0tnU,1512
|
|
2475
2482
|
mteb/tasks/retrieval/tur/__init__.py,sha256=tAKhhsTK6meiZwRMIvbx7_ye90JAAW3dlS8iI0r_vg8,84
|
|
2476
2483
|
mteb/tasks/retrieval/tur/tur_hist_quad.py,sha256=s7S5RrdwPx-0aatUwbgFbuLtj8927yQUHp1SEODfAl0,3669
|
|
2477
|
-
mteb/tasks/retrieval/vie/__init__.py,sha256=
|
|
2484
|
+
mteb/tasks/retrieval/vie/__init__.py,sha256=8k8aUndynSTP72j75e2tcU-8omMuGzOVZp3KxIAGaBg,2419
|
|
2478
2485
|
mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py,sha256=wmE6syUs0sLs7xgIOxXQuiQzpxrskdsTc5sK46v1YEQ,1754
|
|
2479
|
-
mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py,sha256=
|
|
2486
|
+
mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py,sha256=eonoS9NWKw-okR9Eqe4B8YgzGSbw0t7FcNpt0JwxyKU,3788
|
|
2480
2487
|
mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py,sha256=1c6s1C0j1x7kE92WMv9JB4I_rdsHboyP-QILU-18rQ4,1851
|
|
2481
2488
|
mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py,sha256=h--L4OiLIalxHnSulEiUZjMo7JRxjia-mKOnnoaOkzI,1813
|
|
2482
2489
|
mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py,sha256=Jm5-2YbfBObFW_Ygwu03PAnSNMcZkH_7SL8L18KVWvQ,1857
|
|
@@ -2487,21 +2494,22 @@ mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py,sha256=9EiLKJrpRXACmxZ
|
|
|
2487
2494
|
mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py,sha256=7Mr2sZrAKzFDeMT_7eQQ_52OKzefGFAnkcHmO4lntIo,1824
|
|
2488
2495
|
mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py,sha256=2zDcrsCfcTAcybUmTpGeJQxUxNpkY7Ha8Tf0xwfqTcQ,1810
|
|
2489
2496
|
mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py,sha256=ppFPam-3AXXVLVp_DiXeHaSr16Va44_-eRkOH0m5ypo,1821
|
|
2490
|
-
mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py,sha256=
|
|
2491
|
-
mteb/tasks/retrieval/vie/fevervn_retrieval.py,sha256=
|
|
2497
|
+
mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py,sha256=9YEubKLDCMJhck_EjY4r3VzAFDu-P4SWR5CLnHdSkTQ,3571
|
|
2498
|
+
mteb/tasks/retrieval/vie/fevervn_retrieval.py,sha256=JLrpB90G5c7ZR2jM9GsYE2YQ51qTnn5FH-LDzO99Z1Q,3768
|
|
2492
2499
|
mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py,sha256=FGfFuLzRCTuupRxZdjVbBiwCOSspb3vwvtNAKvyXjso,1714
|
|
2493
2500
|
mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py,sha256=O7iIcuvqhrHjB7J1VxH9YJ3v6cuFFBQdrrnYwLgeRfE,2429
|
|
2494
|
-
mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py,sha256=
|
|
2495
|
-
mteb/tasks/retrieval/vie/msmarcovn_retrieval.py,sha256=
|
|
2501
|
+
mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py,sha256=Vg_YI8YbZpXMmwZXS-2KLRutL2Nehw5tW231S2qShd4,3753
|
|
2502
|
+
mteb/tasks/retrieval/vie/msmarcovn_retrieval.py,sha256=syDFYmXL2xK3xCQrBAopGul8_3pDZzBdIjMpk2XbA1s,3951
|
|
2496
2503
|
mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py,sha256=4S8IDJ-TVjKEy2teM8GOeDzHIZR8txkPvX0sGDYIyqs,1780
|
|
2497
|
-
mteb/tasks/retrieval/vie/nqvn_retrieval.py,sha256=
|
|
2504
|
+
mteb/tasks/retrieval/vie/nqvn_retrieval.py,sha256=f8LmUGAmsMnCdn-ovfPcpX12X4rmdpXj3F-q6GwjBEc,3551
|
|
2498
2505
|
mteb/tasks/retrieval/vie/quora_vn_retrieval.py,sha256=VkgKCFbDkOuZAsMl36lOr-MuvbhNfE8zUmmiySW9lSY,1837
|
|
2499
2506
|
mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py,sha256=7F3wSU9N2BAj4Jmzw7sjbcxTyYDYs_3I1434X3riaZ4,1773
|
|
2500
2507
|
mteb/tasks/retrieval/vie/scidocsvn_retrieval.py,sha256=WlcfDfF43jsNf9D_Bl3k02RiiPdedORID6CEEMAYTLc,1815
|
|
2501
2508
|
mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py,sha256=DKcNwCCdANt7hNr3fLao9jkIJJjfxJ0jLLbD7_b-KnE,1752
|
|
2502
2509
|
mteb/tasks/retrieval/vie/treccovidvn_retrieval.py,sha256=ZlFFL37Zd_sbKXaUZx41XTxps-nnOi3PnBNCy9KvlJU,1826
|
|
2510
|
+
mteb/tasks/retrieval/vie/tvpl_retrieval.py,sha256=CGwgT9spHONw9cOeuum_BS7khZbooqoNqJgVV6Utfic,1611
|
|
2503
2511
|
mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py,sha256=eZh1rR43iXDHoylOGKjrUCopzEujE-1GSGTn2TMrkro,3621
|
|
2504
|
-
mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py,sha256=
|
|
2512
|
+
mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py,sha256=BI2GbbkOPnWQpbn9ul6ShHugAZ994iiS7hVi5v1K17Y,1386
|
|
2505
2513
|
mteb/tasks/retrieval/zho/__init__.py,sha256=dIN-rPfrEjkCuUCha8SpQdlzWYY6IMO_HLxebcBhQxA,438
|
|
2506
2514
|
mteb/tasks/retrieval/zho/cmteb_retrieval.py,sha256=DXNkvMQQZsKv1U5L_0boKEXGLDPn4RfauIlxwb0f-EQ,10789
|
|
2507
2515
|
mteb/tasks/retrieval/zho/le_ca_r_dv2_retrieval.py,sha256=O7kNB_7rpgG7_KsKC0SUKG42dhx66Rakk77uy4Iufk0,1293
|
|
@@ -2604,9 +2612,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
|
|
|
2604
2612
|
mteb/types/_result.py,sha256=UKNokV9pu3G74MGebocU512aU_fFU9I9nPKnrG9Q0iE,1035
|
|
2605
2613
|
mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
|
|
2606
2614
|
mteb/types/statistics.py,sha256=GwkBPmAr18Onu-vHtzHs0PFrhCozdOMiT13HwnWL4ZM,3961
|
|
2607
|
-
mteb-2.6.
|
|
2608
|
-
mteb-2.6.
|
|
2609
|
-
mteb-2.6.
|
|
2610
|
-
mteb-2.6.
|
|
2611
|
-
mteb-2.6.
|
|
2612
|
-
mteb-2.6.
|
|
2615
|
+
mteb-2.6.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
2616
|
+
mteb-2.6.7.dist-info/METADATA,sha256=p99o5hSYjMeWfoMLwNljk7_mDzsRjVXBbwPzsobuyWA,14281
|
|
2617
|
+
mteb-2.6.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
2618
|
+
mteb-2.6.7.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
|
|
2619
|
+
mteb-2.6.7.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
|
|
2620
|
+
mteb-2.6.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|