mteb 2.3.0__py3-none-any.whl → 2.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
- mteb/evaluate.py +38 -7
- mteb/models/model_implementations/colpali_models.py +4 -4
- mteb/models/model_implementations/colqwen_models.py +206 -2
- mteb/models/model_implementations/euler_models.py +25 -0
- mteb/models/model_implementations/jina_models.py +203 -5
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +8 -9
- mteb/models/model_implementations/vdr_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +57 -0
- mteb/results/model_result.py +2 -1
- mteb/results/task_result.py +12 -0
- mteb/tasks/reranking/multilingual/__init__.py +2 -0
- mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
- {mteb-2.3.0.dist-info → mteb-2.3.1.dist-info}/METADATA +4 -1
- {mteb-2.3.0.dist-info → mteb-2.3.1.dist-info}/RECORD +21 -17
- {mteb-2.3.0.dist-info → mteb-2.3.1.dist-info}/WHEEL +0 -0
- {mteb-2.3.0.dist-info → mteb-2.3.1.dist-info}/entry_points.txt +0 -0
- {mteb-2.3.0.dist-info → mteb-2.3.1.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.3.0.dist-info → mteb-2.3.1.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,19 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from typing import Any
|
|
2
3
|
|
|
3
4
|
import torch
|
|
5
|
+
from PIL import Image
|
|
6
|
+
from torch.utils.data import DataLoader
|
|
7
|
+
from tqdm.auto import tqdm
|
|
4
8
|
|
|
5
9
|
from mteb._requires_package import (
|
|
10
|
+
requires_image_dependencies,
|
|
6
11
|
requires_package,
|
|
7
12
|
)
|
|
8
|
-
from mteb.
|
|
13
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
14
|
+
from mteb.models.abs_encoder import AbsEncoder
|
|
15
|
+
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
16
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
9
17
|
|
|
10
18
|
from .colpali_models import (
|
|
11
19
|
COLPALI_CITATION,
|
|
@@ -73,6 +81,132 @@ class ColQwen2_5Wrapper(ColPaliEngineWrapper): # noqa: N801
|
|
|
73
81
|
)
|
|
74
82
|
|
|
75
83
|
|
|
84
|
+
class ColQwen3Wrapper(AbsEncoder):
|
|
85
|
+
"""Wrapper for the ColQwen3 vision-language retrieval model."""
|
|
86
|
+
|
|
87
|
+
def __init__(
|
|
88
|
+
self,
|
|
89
|
+
model_name: str,
|
|
90
|
+
*,
|
|
91
|
+
revision: str | None = None,
|
|
92
|
+
device: str | None = None,
|
|
93
|
+
dtype: torch.dtype | str | None = torch.bfloat16,
|
|
94
|
+
**kwargs: Any,
|
|
95
|
+
):
|
|
96
|
+
requires_image_dependencies()
|
|
97
|
+
requires_package(self, "transformers", model_name, "pip install mteb[colqwen3]")
|
|
98
|
+
from transformers import AutoModel, AutoProcessor
|
|
99
|
+
|
|
100
|
+
self.device = device or (
|
|
101
|
+
"cuda"
|
|
102
|
+
if torch.cuda.is_available()
|
|
103
|
+
else "mps"
|
|
104
|
+
if torch.backends.mps.is_available()
|
|
105
|
+
else "cpu"
|
|
106
|
+
)
|
|
107
|
+
self.model = AutoModel.from_pretrained(
|
|
108
|
+
model_name,
|
|
109
|
+
revision=revision,
|
|
110
|
+
dtype=dtype,
|
|
111
|
+
trust_remote_code=True,
|
|
112
|
+
**kwargs,
|
|
113
|
+
).to(self.device)
|
|
114
|
+
self.model.eval()
|
|
115
|
+
|
|
116
|
+
self.processor = AutoProcessor.from_pretrained(
|
|
117
|
+
model_name,
|
|
118
|
+
revision=revision,
|
|
119
|
+
trust_remote_code=True,
|
|
120
|
+
max_num_visual_tokens=1280,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
def encode(
|
|
124
|
+
self,
|
|
125
|
+
inputs: DataLoader[BatchedInput],
|
|
126
|
+
*,
|
|
127
|
+
task_metadata: TaskMetadata,
|
|
128
|
+
hf_split: str,
|
|
129
|
+
hf_subset: str,
|
|
130
|
+
prompt_type: PromptType | None = None,
|
|
131
|
+
**kwargs: Any,
|
|
132
|
+
) -> Array:
|
|
133
|
+
if (
|
|
134
|
+
"text" not in inputs.dataset.features
|
|
135
|
+
and "image" not in inputs.dataset.features
|
|
136
|
+
):
|
|
137
|
+
raise ValueError("No text or image features found in inputs.")
|
|
138
|
+
return self.get_fused_embeddings(inputs, **kwargs)
|
|
139
|
+
|
|
140
|
+
def _encode_inputs(self, encoded_inputs: dict[str, torch.Tensor]) -> torch.Tensor:
|
|
141
|
+
outputs = self.model(**encoded_inputs)
|
|
142
|
+
# Avoid boolean casting of tensors when checking for custom attributes.
|
|
143
|
+
embeddings = getattr(outputs, "embeddings", None)
|
|
144
|
+
if embeddings is None:
|
|
145
|
+
embeddings = outputs[0]
|
|
146
|
+
return embeddings
|
|
147
|
+
|
|
148
|
+
def get_fused_embeddings(
|
|
149
|
+
self,
|
|
150
|
+
image_texts_pairs: DataLoader[BatchedInput] | None = None,
|
|
151
|
+
batch_size: int = 32,
|
|
152
|
+
show_progress_bar: bool = True,
|
|
153
|
+
fusion_mode="concat",
|
|
154
|
+
**kwargs: Any,
|
|
155
|
+
):
|
|
156
|
+
import torchvision.transforms.functional as F
|
|
157
|
+
|
|
158
|
+
contains_image = "image" in image_texts_pairs.dataset.features
|
|
159
|
+
contains_text = "text" in image_texts_pairs.dataset.features
|
|
160
|
+
contains_both = contains_image and contains_text
|
|
161
|
+
|
|
162
|
+
if contains_both:
|
|
163
|
+
progress_desc = "Encoding images+texts"
|
|
164
|
+
elif contains_image:
|
|
165
|
+
progress_desc = "Encoding images"
|
|
166
|
+
elif contains_text:
|
|
167
|
+
progress_desc = "Encoding texts"
|
|
168
|
+
else:
|
|
169
|
+
raise ValueError("No text or image features found in inputs.")
|
|
170
|
+
|
|
171
|
+
all_embeds: list[torch.Tensor] = []
|
|
172
|
+
with torch.no_grad():
|
|
173
|
+
for batch in tqdm(
|
|
174
|
+
image_texts_pairs,
|
|
175
|
+
disable=not show_progress_bar,
|
|
176
|
+
desc=progress_desc,
|
|
177
|
+
):
|
|
178
|
+
if contains_image:
|
|
179
|
+
imgs = [
|
|
180
|
+
F.to_pil_image(b.to(self.device))
|
|
181
|
+
if not isinstance(b, Image.Image)
|
|
182
|
+
else b
|
|
183
|
+
for b in batch["image"]
|
|
184
|
+
]
|
|
185
|
+
else:
|
|
186
|
+
imgs = None
|
|
187
|
+
if contains_text:
|
|
188
|
+
texts = batch["text"]
|
|
189
|
+
else:
|
|
190
|
+
texts = None
|
|
191
|
+
if contains_both:
|
|
192
|
+
assert len(imgs) == len(texts), (
|
|
193
|
+
f"The number of texts and images must have the same length, got {len(imgs)} and {len(texts)}"
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
inputs = self.processor(images=imgs, text=texts)
|
|
197
|
+
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
|
198
|
+
outs = self._encode_inputs(inputs)
|
|
199
|
+
all_embeds.extend(outs.cpu().to(torch.float32))
|
|
200
|
+
|
|
201
|
+
padded = torch.nn.utils.rnn.pad_sequence(
|
|
202
|
+
all_embeds, batch_first=True, padding_value=0
|
|
203
|
+
)
|
|
204
|
+
return padded
|
|
205
|
+
|
|
206
|
+
def similarity(self, a, b):
|
|
207
|
+
return self.processor.score_multi_vector(a, b, device=self.device)
|
|
208
|
+
|
|
209
|
+
|
|
76
210
|
colqwen2 = ModelMeta(
|
|
77
211
|
loader=ColQwen2Wrapper,
|
|
78
212
|
loader_kwargs=dict(
|
|
@@ -125,6 +259,72 @@ colqwen2_5 = ModelMeta(
|
|
|
125
259
|
citation=COLPALI_CITATION,
|
|
126
260
|
)
|
|
127
261
|
|
|
262
|
+
TOMORO_TRAINING_DATA = {
|
|
263
|
+
"VDRMultilingualRetrieval",
|
|
264
|
+
# from https://huggingface.co/datasets/vidore/colpali_train_set
|
|
265
|
+
"VidoreDocVQARetrieval",
|
|
266
|
+
"VidoreInfoVQARetrieval",
|
|
267
|
+
"VidoreTatdqaRetrieval",
|
|
268
|
+
"VidoreArxivQARetrieval",
|
|
269
|
+
"VisRAG-Ret-Train-Synthetic-data",
|
|
270
|
+
"VisRAG-Ret-Train-In-domain-data",
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
TOMORO_CITATION = """
|
|
274
|
+
@misc{huang2025tomoro_colqwen3_embed,
|
|
275
|
+
title={TomoroAI/tomoro-colqwen3-embed},
|
|
276
|
+
author={Xin Huang and Kye Min Tan and Albert Phelps},
|
|
277
|
+
year={2025},
|
|
278
|
+
url={https://huggingface.co/TomoroAI/tomoro-colqwen3-embed-8b}
|
|
279
|
+
}
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
colqwen3_8b = ModelMeta(
|
|
283
|
+
loader=ColQwen3Wrapper,
|
|
284
|
+
name="TomoroAI/tomoro-colqwen3-embed-8b",
|
|
285
|
+
languages=["eng-Latn"],
|
|
286
|
+
revision="0b9fe28142910e209bbac15b1efe85507c27644f",
|
|
287
|
+
release_date="2025-11-26",
|
|
288
|
+
modalities=["image", "text"],
|
|
289
|
+
n_parameters=8_000_000_000,
|
|
290
|
+
memory_usage_mb=16724,
|
|
291
|
+
max_tokens=262144,
|
|
292
|
+
embed_dim=320,
|
|
293
|
+
license="apache-2.0",
|
|
294
|
+
open_weights=True,
|
|
295
|
+
public_training_code="https://github.com/illuin-tech/colpali",
|
|
296
|
+
public_training_data=None,
|
|
297
|
+
framework=["PyTorch"],
|
|
298
|
+
reference="https://huggingface.co/TomoroAI/tomoro-colqwen3-embed-8b",
|
|
299
|
+
similarity_fn_name=ScoringFunction.MAX_SIM,
|
|
300
|
+
use_instructions=True,
|
|
301
|
+
training_datasets=TOMORO_TRAINING_DATA,
|
|
302
|
+
citation=TOMORO_CITATION,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
colqwen3_4b = ModelMeta(
|
|
306
|
+
loader=ColQwen3Wrapper,
|
|
307
|
+
name="TomoroAI/tomoro-colqwen3-embed-4b",
|
|
308
|
+
languages=["eng-Latn"],
|
|
309
|
+
revision="6a32fb68598730bf5620fbf18d832c784235c59c",
|
|
310
|
+
release_date="2025-11-26",
|
|
311
|
+
modalities=["image", "text"],
|
|
312
|
+
n_parameters=4_000_000_000,
|
|
313
|
+
memory_usage_mb=8466,
|
|
314
|
+
max_tokens=262144,
|
|
315
|
+
embed_dim=320,
|
|
316
|
+
license="apache-2.0",
|
|
317
|
+
open_weights=True,
|
|
318
|
+
public_training_code="https://github.com/illuin-tech/colpali",
|
|
319
|
+
public_training_data=None,
|
|
320
|
+
framework=["PyTorch"],
|
|
321
|
+
reference="https://huggingface.co/TomoroAI/tomoro-colqwen3-embed-4b",
|
|
322
|
+
similarity_fn_name=ScoringFunction.MAX_SIM,
|
|
323
|
+
use_instructions=True,
|
|
324
|
+
training_datasets=TOMORO_TRAINING_DATA,
|
|
325
|
+
citation=TOMORO_CITATION,
|
|
326
|
+
)
|
|
327
|
+
|
|
128
328
|
colnomic_7b = ModelMeta(
|
|
129
329
|
loader=ColQwen2_5Wrapper,
|
|
130
330
|
loader_kwargs=dict(
|
|
@@ -223,7 +423,11 @@ colnomic_7b = ModelMeta(
|
|
|
223
423
|
|
|
224
424
|
|
|
225
425
|
EVOQWEN_TRAINING_DATA = {
|
|
226
|
-
"colpali_train_set",
|
|
426
|
+
# "colpali_train_set",
|
|
427
|
+
"VidoreDocVQARetrieval",
|
|
428
|
+
"VidoreInfoVQARetrieval",
|
|
429
|
+
"VidoreTatdqaRetrieval",
|
|
430
|
+
"VidoreArxivQARetrieval",
|
|
227
431
|
"VisRAG-Ret-Train-Synthetic-data",
|
|
228
432
|
"VisRAG-Ret-Train-In-domain-data",
|
|
229
433
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from mteb.models.model_meta import ModelMeta
|
|
2
|
+
from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
|
|
3
|
+
|
|
4
|
+
Euler_Legal_Embedding_V1 = ModelMeta(
|
|
5
|
+
loader=sentence_transformers_loader,
|
|
6
|
+
name="Mira190/Euler-Legal-Embedding-V1",
|
|
7
|
+
revision="df607ed9e25e569514a99c27cdaaab16e76b6dd4",
|
|
8
|
+
release_date="2025-11-06",
|
|
9
|
+
languages=["eng-Latn"],
|
|
10
|
+
n_parameters=8000000000,
|
|
11
|
+
memory_usage_mb=15618,
|
|
12
|
+
max_tokens=1536,
|
|
13
|
+
embed_dim=4096,
|
|
14
|
+
license="apache-2.0",
|
|
15
|
+
open_weights=True,
|
|
16
|
+
public_training_code=None,
|
|
17
|
+
public_training_data=None,
|
|
18
|
+
framework=["PyTorch", "Sentence Transformers"],
|
|
19
|
+
reference="https://huggingface.co/Mira190/Euler-Legal-Embedding-V1",
|
|
20
|
+
similarity_fn_name="cosine",
|
|
21
|
+
use_instructions=False,
|
|
22
|
+
training_datasets=set(), # final-data-new-anonymized-grok4-filtered
|
|
23
|
+
adapted_from="Qwen/Qwen3-Embedding-8B",
|
|
24
|
+
superseded_by=None,
|
|
25
|
+
)
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from collections import defaultdict
|
|
2
3
|
from typing import Any, ClassVar
|
|
3
4
|
|
|
4
5
|
import numpy as np
|
|
5
6
|
import torch
|
|
7
|
+
from sentence_transformers import CrossEncoder
|
|
6
8
|
from torch.utils.data import DataLoader
|
|
7
9
|
|
|
8
10
|
from mteb._requires_package import requires_package
|
|
@@ -10,13 +12,92 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
10
12
|
from mteb.languages import PROGRAMMING_LANGS
|
|
11
13
|
from mteb.models.abs_encoder import AbsEncoder
|
|
12
14
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
13
|
-
from mteb.models.sentence_transformer_wrapper import
|
|
15
|
+
from mteb.models.sentence_transformer_wrapper import (
|
|
16
|
+
CrossEncoderWrapper,
|
|
17
|
+
SentenceTransformerEncoderWrapper,
|
|
18
|
+
)
|
|
14
19
|
from mteb.types import Array, BatchedInput, PromptType
|
|
15
20
|
|
|
16
21
|
logger = logging.getLogger(__name__)
|
|
17
22
|
|
|
18
23
|
MIN_SENTENCE_TRANSFORMERS_VERSION = (3, 1, 0)
|
|
19
24
|
|
|
25
|
+
multilingual_langs = [
|
|
26
|
+
"afr-Latn",
|
|
27
|
+
"ara-Arab",
|
|
28
|
+
"aze-Latn",
|
|
29
|
+
"bel-Cyrl",
|
|
30
|
+
"bul-Cyrl",
|
|
31
|
+
"ben-Beng",
|
|
32
|
+
"cat-Latn",
|
|
33
|
+
"ceb-Latn",
|
|
34
|
+
"ces-Latn",
|
|
35
|
+
"cym-Latn",
|
|
36
|
+
"dan-Latn",
|
|
37
|
+
"deu-Latn",
|
|
38
|
+
"ell-Grek",
|
|
39
|
+
"eng-Latn",
|
|
40
|
+
"spa-Latn",
|
|
41
|
+
"est-Latn",
|
|
42
|
+
"eus-Latn",
|
|
43
|
+
"fas-Arab",
|
|
44
|
+
"fin-Latn",
|
|
45
|
+
"fra-Latn",
|
|
46
|
+
"glg-Latn",
|
|
47
|
+
"guj-Gujr",
|
|
48
|
+
"heb-Hebr",
|
|
49
|
+
"hin-Deva",
|
|
50
|
+
"hrv-Latn",
|
|
51
|
+
"hat-Latn",
|
|
52
|
+
"hun-Latn",
|
|
53
|
+
"hye-Armn",
|
|
54
|
+
"ind-Latn",
|
|
55
|
+
"isl-Latn",
|
|
56
|
+
"ita-Latn",
|
|
57
|
+
"jpn-Jpan",
|
|
58
|
+
"jav-Latn",
|
|
59
|
+
"kat-Geor",
|
|
60
|
+
"kaz-Cyrl",
|
|
61
|
+
"khm-Khmr",
|
|
62
|
+
"kan-Knda",
|
|
63
|
+
"kor-Hang",
|
|
64
|
+
"kir-Cyrl",
|
|
65
|
+
"lao-Laoo",
|
|
66
|
+
"lit-Latn",
|
|
67
|
+
"lav-Latn",
|
|
68
|
+
"mkd-Cyrl",
|
|
69
|
+
"mal-Mlym",
|
|
70
|
+
"mon-Cyrl",
|
|
71
|
+
"mar-Deva",
|
|
72
|
+
"msa-Latn",
|
|
73
|
+
"mya-Mymr",
|
|
74
|
+
"nep-Deva",
|
|
75
|
+
"nld-Latn",
|
|
76
|
+
"nor-Latn",
|
|
77
|
+
"nob-Latn",
|
|
78
|
+
"nno-Latn",
|
|
79
|
+
"pan-Guru",
|
|
80
|
+
"pol-Latn",
|
|
81
|
+
"por-Latn",
|
|
82
|
+
"que-Latn",
|
|
83
|
+
"ron-Latn",
|
|
84
|
+
"rus-Cyrl",
|
|
85
|
+
"sin-Sinh",
|
|
86
|
+
"slk-Latn",
|
|
87
|
+
"slv-Latn",
|
|
88
|
+
"swa-Latn",
|
|
89
|
+
"tam-Taml",
|
|
90
|
+
"tel-Telu",
|
|
91
|
+
"tha-Thai",
|
|
92
|
+
"tgl-Latn",
|
|
93
|
+
"tur-Latn",
|
|
94
|
+
"ukr-Cyrl",
|
|
95
|
+
"urd-Arab",
|
|
96
|
+
"vie-Latn",
|
|
97
|
+
"yor-Latn",
|
|
98
|
+
"zho-Hans",
|
|
99
|
+
]
|
|
100
|
+
|
|
20
101
|
XLMR_LANGUAGES = [
|
|
21
102
|
"afr-Latn",
|
|
22
103
|
"amh-Latn",
|
|
@@ -119,6 +200,28 @@ XLMR_LANGUAGES = [
|
|
|
119
200
|
"zho-Hans",
|
|
120
201
|
]
|
|
121
202
|
|
|
203
|
+
JINARerankerV3_TRAINING_DATA = {
|
|
204
|
+
"MIRACLRetrieval",
|
|
205
|
+
"MIRACLRetrievalHardNegatives",
|
|
206
|
+
"MIRACLReranking",
|
|
207
|
+
"CMedQAv1-reranking",
|
|
208
|
+
"CMedQAv2-reranking",
|
|
209
|
+
"MrTidyRetrieval",
|
|
210
|
+
"T2Reranking",
|
|
211
|
+
"MSMARCO",
|
|
212
|
+
"MSMARCOHardNegatives",
|
|
213
|
+
"NQ",
|
|
214
|
+
"NQHardNegatives",
|
|
215
|
+
"HotpotQA",
|
|
216
|
+
"HotpotQAHardNegatives",
|
|
217
|
+
"T2Retrieval",
|
|
218
|
+
"DuRetrieval",
|
|
219
|
+
"MMarcoReranking",
|
|
220
|
+
"CornStack",
|
|
221
|
+
"MultiLongDocRetrieval",
|
|
222
|
+
"StackOverflowQA",
|
|
223
|
+
}
|
|
224
|
+
|
|
122
225
|
JinaV4_TRAINING_DATA = {
|
|
123
226
|
"MSMARCO",
|
|
124
227
|
"MSMARCOHardNegatives",
|
|
@@ -139,14 +242,72 @@ JinaV4_TRAINING_DATA = {
|
|
|
139
242
|
"CornStack",
|
|
140
243
|
"VDRMultilingualRetrieval",
|
|
141
244
|
# from https://huggingface.co/datasets/vidore/colpali_train_set
|
|
142
|
-
"
|
|
143
|
-
"
|
|
144
|
-
"
|
|
145
|
-
"
|
|
245
|
+
"VidoreDocVQARetrieval",
|
|
246
|
+
"VidoreInfoVQARetrieval",
|
|
247
|
+
"VidoreTatdqaRetrieval",
|
|
248
|
+
"VidoreArxivQARetrieval",
|
|
146
249
|
# "other", # inhouse dataset including synthetic datasets
|
|
147
250
|
}
|
|
148
251
|
|
|
149
252
|
|
|
253
|
+
class JinaRerankerV3Wrapper(CrossEncoderWrapper):
|
|
254
|
+
"""Wrapper integration for MTEB."""
|
|
255
|
+
|
|
256
|
+
def __init__(
|
|
257
|
+
self,
|
|
258
|
+
model: CrossEncoder | str,
|
|
259
|
+
revision: str | None = None,
|
|
260
|
+
trust_remote_code: bool = True,
|
|
261
|
+
**kwargs: Any,
|
|
262
|
+
) -> None:
|
|
263
|
+
from sentence_transformers.util import get_device_name
|
|
264
|
+
from transformers import AutoModel
|
|
265
|
+
|
|
266
|
+
self.model = AutoModel.from_pretrained(
|
|
267
|
+
model, trust_remote_code=trust_remote_code, dtype="auto"
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
device = kwargs.get("device", None)
|
|
271
|
+
if device is None:
|
|
272
|
+
device = get_device_name()
|
|
273
|
+
logger.info(f"Use pytorch device: {device}")
|
|
274
|
+
|
|
275
|
+
self.model.to(device)
|
|
276
|
+
self.model.eval()
|
|
277
|
+
|
|
278
|
+
def predict(
|
|
279
|
+
self,
|
|
280
|
+
inputs1: DataLoader[BatchedInput],
|
|
281
|
+
inputs2: DataLoader[BatchedInput],
|
|
282
|
+
*,
|
|
283
|
+
task_metadata: TaskMetadata,
|
|
284
|
+
hf_split: str,
|
|
285
|
+
hf_subset: str,
|
|
286
|
+
prompt_type: PromptType | None = None,
|
|
287
|
+
**kwargs: Any,
|
|
288
|
+
) -> Array:
|
|
289
|
+
all_corpus = [text for batch in inputs2 for text in batch["text"]]
|
|
290
|
+
all_queries = [text for batch in inputs1 for text in batch["text"]]
|
|
291
|
+
|
|
292
|
+
sentences_count = len(all_corpus)
|
|
293
|
+
query_groups: dict[str, list[tuple[int, str]]] = defaultdict(list)
|
|
294
|
+
for idx, (query, doc) in enumerate(zip(all_queries, all_corpus)):
|
|
295
|
+
query_groups[query].append((idx, doc))
|
|
296
|
+
|
|
297
|
+
results = np.zeros(sentences_count, dtype=np.float32)
|
|
298
|
+
for query, doc_infos in query_groups.items():
|
|
299
|
+
original_indices, docs = zip(*doc_infos)
|
|
300
|
+
|
|
301
|
+
scores = self.model.rerank(
|
|
302
|
+
query, list(docs), max_query_length=3072, max_doc_length=2048
|
|
303
|
+
)
|
|
304
|
+
for scr in scores:
|
|
305
|
+
original_idx = original_indices[scr["index"]]
|
|
306
|
+
results[original_idx] = float(scr["relevance_score"])
|
|
307
|
+
|
|
308
|
+
return results
|
|
309
|
+
|
|
310
|
+
|
|
150
311
|
class JinaWrapper(SentenceTransformerEncoderWrapper):
|
|
151
312
|
"""following the hf model card documentation."""
|
|
152
313
|
|
|
@@ -553,6 +714,43 @@ def get_programming_task_override(
|
|
|
553
714
|
return current_task_name
|
|
554
715
|
|
|
555
716
|
|
|
717
|
+
jina_reranker_v3 = ModelMeta(
|
|
718
|
+
loader=JinaRerankerV3Wrapper,
|
|
719
|
+
loader_kwargs=dict(
|
|
720
|
+
trust_remote_code=True,
|
|
721
|
+
),
|
|
722
|
+
name="jinaai/jina-reranker-v3",
|
|
723
|
+
languages=multilingual_langs,
|
|
724
|
+
open_weights=True,
|
|
725
|
+
revision="050e171c4f75dfec5b648ed8470a2475e5a30f30",
|
|
726
|
+
release_date="2025-09-18", # official release date
|
|
727
|
+
modalities=["text"],
|
|
728
|
+
n_parameters=int(0.6 * 1e9),
|
|
729
|
+
memory_usage_mb=1138,
|
|
730
|
+
max_tokens=131072,
|
|
731
|
+
embed_dim=None,
|
|
732
|
+
license="cc-by-nc-4.0",
|
|
733
|
+
similarity_fn_name=None,
|
|
734
|
+
framework=["PyTorch"],
|
|
735
|
+
use_instructions=None,
|
|
736
|
+
reference="https://huggingface.co/jinaai/jina-reranker-v3",
|
|
737
|
+
is_cross_encoder=True,
|
|
738
|
+
public_training_code=None,
|
|
739
|
+
public_training_data=None,
|
|
740
|
+
training_datasets=JINARerankerV3_TRAINING_DATA,
|
|
741
|
+
adapted_from="Qwen/Qwen3-0.6B",
|
|
742
|
+
citation="""@misc{wang2025jinarerankerv3lateinteractionlistwise,
|
|
743
|
+
title={jina-reranker-v3: Last but Not Late Interaction for Listwise Document Reranking},
|
|
744
|
+
author={Feng Wang and Yuqing Li and Han Xiao},
|
|
745
|
+
year={2025},
|
|
746
|
+
eprint={2509.25085},
|
|
747
|
+
archivePrefix={arXiv},
|
|
748
|
+
primaryClass={cs.CL},
|
|
749
|
+
url={https://arxiv.org/abs/2509.25085},}
|
|
750
|
+
""",
|
|
751
|
+
)
|
|
752
|
+
|
|
753
|
+
|
|
556
754
|
jina_embeddings_v4 = ModelMeta(
|
|
557
755
|
loader=JinaV4Wrapper,
|
|
558
756
|
loader_kwargs=dict(
|
|
@@ -117,19 +117,18 @@ class LlamaNemoretrieverColembed(AbsEncoder):
|
|
|
117
117
|
|
|
118
118
|
TRAINING_DATA = {
|
|
119
119
|
# from https://huggingface.co/datasets/vidore/colpali_train_set
|
|
120
|
-
"
|
|
121
|
-
"
|
|
122
|
-
"
|
|
123
|
-
"
|
|
124
|
-
"
|
|
125
|
-
"
|
|
120
|
+
"VidoreDocVQARetrieval",
|
|
121
|
+
"VidoreInfoVQARetrieval",
|
|
122
|
+
"VidoreTatdqaRetrieval",
|
|
123
|
+
"VidoreArxivQARetrieval",
|
|
124
|
+
"HotpotQA",
|
|
125
|
+
"MIRACLRetrieval",
|
|
126
126
|
"NQ",
|
|
127
|
-
"
|
|
127
|
+
"StackExchangeClustering",
|
|
128
128
|
"SQuAD",
|
|
129
129
|
"WebInstructSub",
|
|
130
130
|
"docmatix-ir",
|
|
131
|
-
"
|
|
132
|
-
"colpali_train_set", # as it contains PDFs
|
|
131
|
+
"VDRMultilingualRetrieval",
|
|
133
132
|
"VisRAG-Ret-Train-Synthetic-data",
|
|
134
133
|
"VisRAG-Ret-Train-In-domain-data",
|
|
135
134
|
"wiki-ss-nq",
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
|
|
2
|
+
from mteb.models.model_meta import ModelMeta
|
|
3
|
+
from mteb.models.models_protocols import PromptType
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def instruction_template(
|
|
7
|
+
instruction: str, prompt_type: PromptType | None = None
|
|
8
|
+
) -> str:
|
|
9
|
+
if not instruction or prompt_type == PromptType.document:
|
|
10
|
+
return ""
|
|
11
|
+
if isinstance(instruction, dict):
|
|
12
|
+
if prompt_type is None:
|
|
13
|
+
instruction = next(iter(instruction.values())) # TODO
|
|
14
|
+
else:
|
|
15
|
+
instruction = instruction[prompt_type]
|
|
16
|
+
return f"Instruct: {instruction}\nQuery:"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
training_data = {
|
|
20
|
+
"T2Retrieval",
|
|
21
|
+
"DuRetrieval",
|
|
22
|
+
"MMarcoReranking",
|
|
23
|
+
"CMedQAv2-reranking",
|
|
24
|
+
"NQ",
|
|
25
|
+
"MSMARCO",
|
|
26
|
+
"HotpotQA",
|
|
27
|
+
"MrTidyRetrieval",
|
|
28
|
+
"MIRACLRetrieval",
|
|
29
|
+
"CodeSearchNet",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
yuan_embedding_2_en = ModelMeta(
|
|
34
|
+
loader=InstructSentenceTransformerModel,
|
|
35
|
+
loader_kwargs=dict(
|
|
36
|
+
instruction_template=instruction_template,
|
|
37
|
+
apply_instruction_to_passages=False,
|
|
38
|
+
),
|
|
39
|
+
name="IEITYuan/Yuan-embedding-2.0-en",
|
|
40
|
+
languages=["eng-Latn"],
|
|
41
|
+
open_weights=True,
|
|
42
|
+
revision="b2fd15da3bcae3473c8529593825c15068f09fce",
|
|
43
|
+
release_date="2025-11-27",
|
|
44
|
+
n_parameters=595776512,
|
|
45
|
+
memory_usage_mb=2272,
|
|
46
|
+
embed_dim=1024,
|
|
47
|
+
max_tokens=2048,
|
|
48
|
+
license="apache-2.0",
|
|
49
|
+
reference="https://huggingface.co/IEITYuan/Yuan-embedding-2.0-en",
|
|
50
|
+
similarity_fn_name="cosine",
|
|
51
|
+
framework=["Sentence Transformers", "PyTorch"],
|
|
52
|
+
use_instructions=True,
|
|
53
|
+
public_training_code=None,
|
|
54
|
+
public_training_data=None,
|
|
55
|
+
training_datasets=training_data,
|
|
56
|
+
adapted_from="Qwen/Qwen3-Embedding-0.6B",
|
|
57
|
+
)
|
mteb/results/model_result.py
CHANGED
|
@@ -22,7 +22,7 @@ from mteb.types import (
|
|
|
22
22
|
SplitName,
|
|
23
23
|
)
|
|
24
24
|
|
|
25
|
-
from .task_result import TaskResult
|
|
25
|
+
from .task_result import TaskError, TaskResult
|
|
26
26
|
|
|
27
27
|
logger = logging.getLogger(__name__)
|
|
28
28
|
|
|
@@ -82,6 +82,7 @@ class ModelResult(BaseModel):
|
|
|
82
82
|
protected_namespaces=(),
|
|
83
83
|
)
|
|
84
84
|
)
|
|
85
|
+
exceptions: list[TaskError] | None = None
|
|
85
86
|
|
|
86
87
|
def __repr__(self) -> str:
|
|
87
88
|
n_entries = len(self.task_results)
|
mteb/results/task_result.py
CHANGED
|
@@ -840,3 +840,15 @@ class TaskResult(BaseModel):
|
|
|
840
840
|
)
|
|
841
841
|
)
|
|
842
842
|
return results
|
|
843
|
+
|
|
844
|
+
|
|
845
|
+
class TaskError(BaseModel):
|
|
846
|
+
"""A class to represent an error that occurred during the evaluation of a task.
|
|
847
|
+
|
|
848
|
+
Attributes:
|
|
849
|
+
task_name: The name of the MTEB task.
|
|
850
|
+
exception: The error message that occurred during the evaluation.
|
|
851
|
+
"""
|
|
852
|
+
|
|
853
|
+
task_name: str
|
|
854
|
+
exception: str
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from .esci_reranking import ESCIReranking
|
|
2
2
|
from .hume_wikipedia_reranking_multilingual import HUMEWikipediaRerankingMultilingual
|
|
3
3
|
from .miracl_reranking import MIRACLReranking
|
|
4
|
+
from .multi_long_doc_reranking import MultiLongDocReranking
|
|
4
5
|
from .wikipedia_reranking_multilingual import WikipediaRerankingMultilingual
|
|
5
6
|
from .x_glue_wpr_reranking import XGlueWPRReranking
|
|
6
7
|
|
|
@@ -8,6 +9,7 @@ __all__ = [
|
|
|
8
9
|
"ESCIReranking",
|
|
9
10
|
"HUMEWikipediaRerankingMultilingual",
|
|
10
11
|
"MIRACLReranking",
|
|
12
|
+
"MultiLongDocReranking",
|
|
11
13
|
"WikipediaRerankingMultilingual",
|
|
12
14
|
"XGlueWPRReranking",
|
|
13
15
|
]
|