mteb 2.7.12__py3-none-any.whl → 2.7.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,8 @@ from mteb.types.statistics import (
25
25
  if TYPE_CHECKING:
26
26
  from pathlib import Path
27
27
 
28
+ from numpy.typing import NDArray
29
+
28
30
  from mteb._evaluators.pair_classification_evaluator import (
29
31
  PairClassificationDistances,
30
32
  )
@@ -36,7 +38,6 @@ if TYPE_CHECKING:
36
38
  TextStatistics,
37
39
  )
38
40
 
39
-
40
41
  logger = logging.getLogger(__name__)
41
42
 
42
43
 
@@ -138,7 +139,7 @@ class AbsTaskPairClassification(AbsTask):
138
139
  self, similarity_scores: PairClassificationDistances, labels: list[int]
139
140
  ) -> dict[str, float]:
140
141
  logger.info("Computing metrics...")
141
- np_labels = np.asarray(labels)
142
+ np_labels: NDArray[np.int64] = np.asarray(labels, dtype=np.int64)
142
143
  output_scores = {}
143
144
  max_scores = defaultdict(list)
144
145
  for short_name, scores, reverse in [
@@ -281,7 +282,10 @@ class AbsTaskPairClassification(AbsTask):
281
282
  )
282
283
 
283
284
  def _compute_metrics_values(
284
- self, scores: list[float], labels: np.ndarray, high_score_more_similar: bool
285
+ self,
286
+ scores: list[float],
287
+ labels: NDArray[np.int64],
288
+ high_score_more_similar: bool,
285
289
  ) -> dict[str, float]:
286
290
  """Compute the metrics for the given scores and labels.
287
291
 
@@ -315,7 +319,10 @@ class AbsTaskPairClassification(AbsTask):
315
319
  )
316
320
 
317
321
  def _find_best_acc_and_threshold(
318
- self, scores: list[float], labels: np.ndarray, high_score_more_similar: bool
322
+ self,
323
+ scores: list[float],
324
+ labels: NDArray[np.int64],
325
+ high_score_more_similar: bool,
319
326
  ) -> tuple[float, float]:
320
327
  rows = list(zip(scores, labels))
321
328
  rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar)
@@ -323,7 +330,7 @@ class AbsTaskPairClassification(AbsTask):
323
330
  max_acc = 0
324
331
  best_threshold = -1.0
325
332
  positive_so_far = 0
326
- remaining_negatives = sum(np.array(labels) == 0)
333
+ remaining_negatives = sum(labels == 0)
327
334
 
328
335
  for i in range(len(rows) - 1):
329
336
  score, label = rows[i]
@@ -339,10 +346,9 @@ class AbsTaskPairClassification(AbsTask):
339
346
  return max_acc, best_threshold
340
347
 
341
348
  def _find_best_f1_and_threshold(
342
- self, scores, labels, high_score_more_similar: bool
349
+ self, scores, labels: NDArray[np.int64], high_score_more_similar: bool
343
350
  ) -> tuple[float, float, float, float]:
344
351
  scores = np.asarray(scores)
345
- labels = np.asarray(labels)
346
352
 
347
353
  rows = list(zip(scores, labels))
348
354
 
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import difflib
4
4
  import logging
5
+ import warnings
5
6
  from typing import TYPE_CHECKING, Any
6
7
 
7
8
  from mteb.models import (
@@ -122,6 +123,11 @@ def get_model(
122
123
  return model
123
124
 
124
125
 
126
+ _MODEL_RENAMES: dict[str, str] = {
127
+ "bm25s": "baseline/bm25s",
128
+ }
129
+
130
+
125
131
  def get_model_meta(
126
132
  model_name: str,
127
133
  revision: str | None = None,
@@ -139,6 +145,12 @@ def get_model_meta(
139
145
  Returns:
140
146
  A model metadata object
141
147
  """
148
+ if model_name in _MODEL_RENAMES:
149
+ new_name = _MODEL_RENAMES[model_name]
150
+ msg = f"The model '{model_name}' has been renamed to '{new_name}'. To prevent this warning use the new name."
151
+ warnings.warn(msg, DeprecationWarning, stacklevel=2)
152
+ model_name = new_name
153
+
142
154
  if model_name in MODEL_REGISTRY:
143
155
  model_meta = MODEL_REGISTRY[model_name]
144
156
 
@@ -129,7 +129,7 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
129
129
 
130
130
  bm25_s = ModelMeta(
131
131
  loader=bm25_loader,
132
- name="bm25s",
132
+ name="baseline/bm25s",
133
133
  model_type=["dense"],
134
134
  languages=["eng-Latn"],
135
135
  open_weights=True,
@@ -2,7 +2,7 @@ from mteb.models import ModelMeta
2
2
 
3
3
  human = ModelMeta(
4
4
  loader=None,
5
- name="Human",
5
+ name="baseline/Human",
6
6
  model_type=["dense"],
7
7
  languages=["eng-Latn", "ara-Arab", "rus-Cyrl", "dan-Latn", "nob-Latn"],
8
8
  open_weights=True,
@@ -7,6 +7,7 @@ import torch
7
7
  import torch.nn.functional as F
8
8
  from packaging.version import Version
9
9
 
10
+ from mteb.models import sentence_transformers_loader
10
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
12
  from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
12
13
  from mteb.types import PromptType
@@ -509,3 +510,42 @@ nomic_embed_text_v2_moe = ModelMeta(
509
510
  url={https://arxiv.org/abs/2502.07972},
510
511
  }""",
511
512
  )
513
+
514
+ nomic_embed_code = ModelMeta(
515
+ loader=sentence_transformers_loader,
516
+ loader_kwargs={
517
+ "trust_remote_code": True,
518
+ "model_prompts": model_prompts,
519
+ },
520
+ name="nomic-ai/nomic-embed-code",
521
+ revision="11114029805cee545ef111d5144b623787462a52",
522
+ release_date="2025-03-24",
523
+ languages=["eng-Latn"],
524
+ n_parameters=7_070_619_136,
525
+ n_embedding_parameters=None,
526
+ memory_usage_mb=26972.0,
527
+ max_tokens=32768,
528
+ embed_dim=3584,
529
+ license="apache-2.0",
530
+ open_weights=True,
531
+ public_training_code="https://github.com/gangiswag/cornstack/",
532
+ public_training_data="https://huggingface.co/collections/nomic-ai/cornstack",
533
+ framework=["PyTorch", "Sentence Transformers", "safetensors"],
534
+ reference="https://huggingface.co/nomic-ai/nomic-embed-code",
535
+ similarity_fn_name=ScoringFunction.COSINE,
536
+ use_instructions=True,
537
+ training_datasets={"CoRNStack"},
538
+ adapted_from=None,
539
+ superseded_by=None,
540
+ modalities=["text"],
541
+ model_type=["dense"],
542
+ citation="""@misc{suresh2025cornstackhighqualitycontrastivedata,
543
+ title={CoRNStack: High-Quality Contrastive Data for Better Code Retrieval and Reranking},
544
+ author={Tarun Suresh and Revanth Gangi Reddy and Yifei Xu and Zach Nussbaum and Andriy Mulyar and Brandon Duderstadt and Heng Ji},
545
+ year={2025},
546
+ eprint={2412.01007},
547
+ archivePrefix={arXiv},
548
+ primaryClass={cs.CL},
549
+ url={https://arxiv.org/abs/2412.01007},
550
+ }""",
551
+ )
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  from typing import TYPE_CHECKING, Any
4
4
 
5
5
  import torch
6
- from packaging.version import Version
6
+ from packaging.specifiers import SpecifierSet
7
7
  from torch.utils.data import DataLoader
8
8
  from transformers import __version__ as transformers_version
9
9
 
@@ -31,18 +31,20 @@ class LlamaNemoretrieverColembed(AbsEncoder):
31
31
  model_name_or_path: str,
32
32
  revision: str,
33
33
  trust_remote_code: bool,
34
+ transformers_version_constraint: str | None = None,
34
35
  device_map="cuda",
35
36
  torch_dtype=torch.bfloat16,
36
37
  attn_implementation="flash_attention_2",
37
38
  **kwargs,
38
39
  ):
39
- required_transformers_version = "4.49.0"
40
-
41
- if Version(transformers_version) != Version(required_transformers_version):
42
- raise RuntimeError(
43
- f"transformers version {transformers_version} is not match with required "
44
- f"install version {required_transformers_version} to run `nvidia/llama-nemoretriever-colembed`"
45
- )
40
+ if transformers_version_constraint is not None:
41
+ spec = SpecifierSet(transformers_version_constraint)
42
+ if transformers_version not in spec:
43
+ raise RuntimeError(
44
+ f"Model `{model_name_or_path}` requires transformers{transformers_version_constraint}, "
45
+ f"but {transformers_version} is installed. "
46
+ f"Run: pip install 'transformers{transformers_version_constraint}'"
47
+ )
46
48
 
47
49
  from transformers import AutoModel
48
50
 
@@ -150,10 +152,24 @@ TRAINING_DATA = {
150
152
  "wiki-ss-nq",
151
153
  }
152
154
 
155
+
156
+ TRAINING_DATA_v2 = {
157
+ "VidoreDocVQARetrieval",
158
+ "VidoreInfoVQARetrieval",
159
+ "VidoreTatdqaRetrieval",
160
+ "VidoreArxivQARetrieval",
161
+ "docmatix-ir",
162
+ "VDRMultilingualRetrieval",
163
+ "VisRAG-Ret-Train-Synthetic-data",
164
+ "VisRAG-Ret-Train-In-domain-data",
165
+ "wiki-ss-nq",
166
+ }
167
+
153
168
  llama_nemoretriever_colembed_1b_v1 = ModelMeta(
154
169
  loader=LlamaNemoretrieverColembed,
155
170
  loader_kwargs=dict(
156
171
  trust_remote_code=True,
172
+ transformers_version_constraint="==4.49.0",
157
173
  ),
158
174
  name="nvidia/llama-nemoretriever-colembed-1b-v1",
159
175
  model_type=["late-interaction"],
@@ -168,7 +184,7 @@ llama_nemoretriever_colembed_1b_v1 = ModelMeta(
168
184
  embed_dim=2048,
169
185
  license="https://huggingface.co/nvidia/llama-nemoretriever-colembed-1b-v1/blob/main/LICENSE",
170
186
  open_weights=True,
171
- public_training_code="Proprietary Code",
187
+ public_training_code=None,
172
188
  public_training_data="https://huggingface.co/nvidia/llama-nemoretriever-colembed-1b-v1#training-dataset",
173
189
  framework=["PyTorch", "Transformers", "safetensors"],
174
190
  reference="https://huggingface.co/nvidia/llama-nemoretriever-colembed-1b-v1",
@@ -182,6 +198,7 @@ llama_nemoretriever_colembed_3b_v1 = ModelMeta(
182
198
  loader=LlamaNemoretrieverColembed,
183
199
  loader_kwargs=dict(
184
200
  trust_remote_code=True,
201
+ transformers_version_constraint="==4.49.0",
185
202
  ),
186
203
  name="nvidia/llama-nemoretriever-colembed-3b-v1",
187
204
  model_type=["late-interaction"],
@@ -196,7 +213,7 @@ llama_nemoretriever_colembed_3b_v1 = ModelMeta(
196
213
  embed_dim=3072,
197
214
  license="https://huggingface.co/nvidia/llama-nemoretriever-colembed-1b-v1/blob/main/LICENSE",
198
215
  open_weights=True,
199
- public_training_code="Proprietary Code",
216
+ public_training_code=None,
200
217
  public_training_data="https://huggingface.co/nvidia/llama-nemoretriever-colembed-1b-v1#training-dataset",
201
218
  framework=["PyTorch", "Transformers", "safetensors"],
202
219
  reference="https://huggingface.co/nvidia/llama-nemoretriever-colembed-3b-v1",
@@ -205,3 +222,86 @@ llama_nemoretriever_colembed_3b_v1 = ModelMeta(
205
222
  training_datasets=TRAINING_DATA,
206
223
  citation=LLAMA_NEMORETRIEVER_CITATION,
207
224
  )
225
+
226
+ llama_nemotron_colembed_vl_3b_v2 = ModelMeta(
227
+ loader=LlamaNemoretrieverColembed,
228
+ loader_kwargs=dict(
229
+ trust_remote_code=True,
230
+ transformers_version_constraint="==4.49.0",
231
+ ),
232
+ name="nvidia/llama-nemotron-colembed-vl-3b-v2",
233
+ model_type=["late-interaction"],
234
+ languages=["eng-Latn"],
235
+ revision="75f03c712cb3a252e062295f9a0966e5d95d6156",
236
+ release_date="2026-01-21",
237
+ modalities=["image", "text"],
238
+ n_parameters=4_407_000_000,
239
+ memory_usage_mb=8403,
240
+ max_tokens=8192,
241
+ embed_dim=3072,
242
+ license="https://huggingface.co/nvidia/llama-nemotron-colembed-vl-3b-v2/blob/main/LICENSE",
243
+ open_weights=True,
244
+ public_training_code=None,
245
+ public_training_data="https://huggingface.co/nvidia/llama-nemotron-colembed-vl-3b-v2#training-dataset",
246
+ framework=["PyTorch", "Transformers", "safetensors"],
247
+ reference="https://huggingface.co/nvidia/llama-nemotron-colembed-vl-3b-v2",
248
+ similarity_fn_name="MaxSim",
249
+ use_instructions=True,
250
+ training_datasets=TRAINING_DATA,
251
+ citation=LLAMA_NEMORETRIEVER_CITATION,
252
+ )
253
+
254
+ nemotron_colembed_vl_4b_v2 = ModelMeta(
255
+ loader=LlamaNemoretrieverColembed,
256
+ loader_kwargs=dict(
257
+ trust_remote_code=True,
258
+ transformers_version_constraint="==5.0.0rc0",
259
+ ),
260
+ name="nvidia/nemotron-colembed-vl-4b-v2",
261
+ revision="823b1625c15fe3da73fa094205e538a7a2301a2a",
262
+ languages=["eng-Latn"],
263
+ release_date="2026-01-07",
264
+ modalities=["image", "text"],
265
+ n_parameters=4_800_000_000,
266
+ memory_usage_mb=9206,
267
+ max_tokens=262144,
268
+ embed_dim=2560,
269
+ license="https://huggingface.co/nvidia/nemotron-colembed-vl-4b-v2/blob/main/LICENSE",
270
+ open_weights=True,
271
+ public_training_code=None,
272
+ public_training_data="https://huggingface.co/nvidia/nemotron-colembed-vl-4b-v2#training-dataset",
273
+ framework=["PyTorch", "Transformers"],
274
+ reference="https://huggingface.co/nvidia/nemotron-colembed-vl-4b-v2",
275
+ similarity_fn_name="MaxSim",
276
+ use_instructions=True,
277
+ training_datasets=TRAINING_DATA_v2,
278
+ citation=LLAMA_NEMORETRIEVER_CITATION,
279
+ )
280
+
281
+
282
+ nemotron_colembed_vl_8b_v2 = ModelMeta(
283
+ loader=LlamaNemoretrieverColembed,
284
+ loader_kwargs=dict(
285
+ trust_remote_code=True,
286
+ transformers_version_constraint="==5.0.0rc0",
287
+ ),
288
+ name="nvidia/nemotron-colembed-vl-8b-v2",
289
+ revision="6cbe43579dda6237768fc373768ad372cc5cdfec",
290
+ languages=["eng-Latn"],
291
+ release_date="2026-01-07",
292
+ modalities=["image", "text"],
293
+ n_parameters=8_700_000_000,
294
+ memory_usage_mb=16722,
295
+ max_tokens=262144,
296
+ embed_dim=4096,
297
+ license="https://huggingface.co/nvidia/nemotron-colembed-vl-8b-v2/blob/main/LICENSE",
298
+ open_weights=True,
299
+ public_training_code=None,
300
+ public_training_data="https://huggingface.co/nvidia/nemotron-colembed-vl-8b-v2#training-dataset",
301
+ framework=["PyTorch", "Transformers"],
302
+ reference="https://huggingface.co/nvidia/nemotron-colembed-vl-8b-v2",
303
+ similarity_fn_name="MaxSim",
304
+ use_instructions=True,
305
+ training_datasets=TRAINING_DATA_v2,
306
+ citation=LLAMA_NEMORETRIEVER_CITATION,
307
+ )
@@ -134,7 +134,7 @@ class SparseEncoderWrapper(AbsEncoder):
134
134
 
135
135
  opensearch_neural_sparse_encoding_doc_v3_gte = ModelMeta(
136
136
  name="opensearch-project/opensearch-neural-sparse-encoding-doc-v3-gte",
137
- model_type=["dense"],
137
+ model_type=["sparse"],
138
138
  languages=["eng-Latn"],
139
139
  open_weights=True,
140
140
  revision="a8abaa916125ee512a7a8f4d706d07eb0128a8e6",
@@ -161,7 +161,7 @@ opensearch_neural_sparse_encoding_doc_v3_gte = ModelMeta(
161
161
 
162
162
  opensearch_neural_sparse_encoding_doc_v3_distill = ModelMeta(
163
163
  name="opensearch-project/opensearch-neural-sparse-encoding-doc-v3-distill",
164
- model_type=["dense"],
164
+ model_type=["sparse"],
165
165
  languages=["eng-Latn"],
166
166
  open_weights=True,
167
167
  revision="babf71f3c48695e2e53a978208e8aba48335e3c0",
@@ -184,7 +184,7 @@ opensearch_neural_sparse_encoding_doc_v3_distill = ModelMeta(
184
184
 
185
185
  opensearch_neural_sparse_encoding_doc_v2_distill = ModelMeta(
186
186
  name="opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill",
187
- model_type=["dense"],
187
+ model_type=["sparse"],
188
188
  languages=["eng-Latn"],
189
189
  open_weights=True,
190
190
  revision="8921a26c78b8559d6604eb1f5c0b74c079bee38f",
@@ -208,7 +208,7 @@ opensearch_neural_sparse_encoding_doc_v2_distill = ModelMeta(
208
208
 
209
209
  opensearch_neural_sparse_encoding_doc_v2_mini = ModelMeta(
210
210
  name="opensearch-project/opensearch-neural-sparse-encoding-doc-v2-mini",
211
- model_type=["dense"],
211
+ model_type=["sparse"],
212
212
  languages=["eng-Latn"],
213
213
  open_weights=True,
214
214
  revision="4af867a426867dfdd744097531046f4289a32fdd",
@@ -231,7 +231,7 @@ opensearch_neural_sparse_encoding_doc_v2_mini = ModelMeta(
231
231
 
232
232
  opensearch_neural_sparse_encoding_doc_v1 = ModelMeta(
233
233
  name="opensearch-project/opensearch-neural-sparse-encoding-doc-v1",
234
- model_type=["dense"],
234
+ model_type=["sparse"],
235
235
  languages=["eng-Latn"],
236
236
  open_weights=True,
237
237
  revision="98cdcbd72867c547f72f2b7b7bed9cdf9f09922d",
@@ -0,0 +1,267 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ import torch
6
+ from tqdm.auto import tqdm
7
+ from transformers import AutoModel, AutoProcessor
8
+
9
+ from mteb._requires_package import requires_image_dependencies
10
+ from mteb.models.abs_encoder import AbsEncoder
11
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
18
+
19
+
20
+ class OpsColQwen3Wrapper(AbsEncoder):
21
+ """Wrapper for OpsColQwen3 model."""
22
+
23
+ def __init__(
24
+ self,
25
+ model_name: str = "OpenSearch-AI/Ops-Colqwen3-4B",
26
+ revision: str | None = None,
27
+ device: str | None = None,
28
+ attn_implementation: str | None = None,
29
+ **kwargs,
30
+ ):
31
+ requires_image_dependencies()
32
+ from transformers.utils.import_utils import is_flash_attn_2_available
33
+
34
+ if attn_implementation is None:
35
+ attn_implementation = (
36
+ "flash_attention_2" if is_flash_attn_2_available() else None
37
+ )
38
+
39
+ self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
40
+
41
+ self.mdl = AutoModel.from_pretrained(
42
+ model_name,
43
+ device_map=self.device,
44
+ attn_implementation=attn_implementation,
45
+ trust_remote_code=True,
46
+ revision=revision,
47
+ **kwargs,
48
+ )
49
+ self.mdl.eval()
50
+
51
+ self.processor = AutoProcessor.from_pretrained(
52
+ model_name,
53
+ trust_remote_code=True,
54
+ )
55
+
56
+ def encode(
57
+ self,
58
+ inputs: DataLoader[BatchedInput],
59
+ *,
60
+ task_metadata: TaskMetadata,
61
+ hf_split: str,
62
+ hf_subset: str,
63
+ prompt_type: PromptType | None = None,
64
+ **kwargs: Any,
65
+ ) -> Array:
66
+ text_embeddings = None
67
+ image_embeddings = None
68
+
69
+ if "text" in inputs.dataset.features:
70
+ text_embeddings = self.get_text_embeddings(inputs, **kwargs)
71
+ if "image" in inputs.dataset.features:
72
+ image_embeddings = self.get_image_embeddings(inputs, **kwargs)
73
+
74
+ if text_embeddings is not None and image_embeddings is not None:
75
+ if len(text_embeddings) != len(image_embeddings):
76
+ raise ValueError(
77
+ "The number of texts and images must have the same length"
78
+ )
79
+ fused_embeddings = text_embeddings + image_embeddings
80
+ return fused_embeddings
81
+ elif text_embeddings is not None:
82
+ return text_embeddings
83
+ elif image_embeddings is not None:
84
+ return image_embeddings
85
+ raise ValueError("No text or image inputs found")
86
+
87
+ def encode_input(self, inputs):
88
+ return self.mdl(**inputs)
89
+
90
+ def get_image_embeddings(
91
+ self,
92
+ images: DataLoader,
93
+ batch_size: int = 32,
94
+ **kwargs,
95
+ ) -> torch.Tensor:
96
+ import torchvision.transforms.functional as F
97
+ from PIL import Image
98
+
99
+ all_embeds = []
100
+
101
+ with torch.no_grad():
102
+ for batch in tqdm(images, desc="Encoding images"):
103
+ # batch may be list of tensors or PIL
104
+ imgs = [
105
+ F.to_pil_image(b.to(self.device))
106
+ if not isinstance(b, Image.Image)
107
+ else b
108
+ for b in batch["image"]
109
+ ]
110
+ inputs = self.processor.process_images(imgs)
111
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
112
+ outs = self.encode_input(inputs)
113
+ all_embeds.extend(outs.cpu().to(torch.float32))
114
+
115
+ padded = torch.nn.utils.rnn.pad_sequence(
116
+ all_embeds, batch_first=True, padding_value=0
117
+ )
118
+ return padded
119
+
120
+ def get_text_embeddings(
121
+ self,
122
+ texts: DataLoader,
123
+ batch_size: int = 32,
124
+ **kwargs,
125
+ ) -> torch.Tensor:
126
+ all_embeds = []
127
+
128
+ with torch.no_grad():
129
+ for batch in tqdm(texts, desc="Encoding texts"):
130
+ batch_texts = batch["text"]
131
+ inputs = self.processor.process_queries(batch_texts)
132
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
133
+ outs = self.encode_input(inputs)
134
+ all_embeds.extend(outs.cpu().to(torch.float32))
135
+
136
+ padded = torch.nn.utils.rnn.pad_sequence(
137
+ all_embeds, batch_first=True, padding_value=0
138
+ )
139
+ return padded
140
+
141
+ def similarity(self, a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:
142
+ return self.processor.score_multi_vector(a, b, device=self.device)
143
+
144
+
145
+ OPS_COLQWEN3_TRAINING_DATA = {
146
+ "VDRMultilingualRetrieval",
147
+ # from https://huggingface.co/datasets/vidore/colpali_train_set
148
+ "VidoreDocVQARetrieval",
149
+ "VidoreInfoVQARetrieval",
150
+ "VidoreTatdqaRetrieval",
151
+ "VidoreArxivQARetrieval",
152
+ "docmatix-ir",
153
+ "HotpotQA",
154
+ "FEVER",
155
+ "NQ",
156
+ "MIRACLRetrieval",
157
+ "WebInstructSub", # MathStackExchange and ScienceStackExchange only
158
+ "MrTyDi",
159
+ }
160
+
161
+ multilingual_langs = [
162
+ "afr-Latn",
163
+ "ara-Arab",
164
+ "aze-Latn",
165
+ "bel-Cyrl",
166
+ "bul-Cyrl",
167
+ "ben-Beng",
168
+ "cat-Latn",
169
+ "ceb-Latn",
170
+ "ces-Latn",
171
+ "cym-Latn",
172
+ "dan-Latn",
173
+ "deu-Latn",
174
+ "ell-Grek",
175
+ "eng-Latn",
176
+ "spa-Latn",
177
+ "est-Latn",
178
+ "eus-Latn",
179
+ "fas-Arab",
180
+ "fin-Latn",
181
+ "fra-Latn",
182
+ "glg-Latn",
183
+ "guj-Gujr",
184
+ "heb-Hebr",
185
+ "hin-Deva",
186
+ "hrv-Latn",
187
+ "hat-Latn",
188
+ "hun-Latn",
189
+ "hye-Armn",
190
+ "ind-Latn",
191
+ "isl-Latn",
192
+ "ita-Latn",
193
+ "jpn-Jpan",
194
+ "jav-Latn",
195
+ "kat-Geor",
196
+ "kaz-Cyrl",
197
+ "khm-Khmr",
198
+ "kan-Knda",
199
+ "kor-Hang",
200
+ "kir-Cyrl",
201
+ "lao-Laoo",
202
+ "lit-Latn",
203
+ "lav-Latn",
204
+ "mkd-Cyrl",
205
+ "mal-Mlym",
206
+ "mon-Cyrl",
207
+ "mar-Deva",
208
+ "msa-Latn",
209
+ "mya-Mymr",
210
+ "nep-Deva",
211
+ "nld-Latn",
212
+ "nor-Latn",
213
+ "nob-Latn",
214
+ "nno-Latn",
215
+ "pan-Guru",
216
+ "pol-Latn",
217
+ "por-Latn",
218
+ "que-Latn",
219
+ "ron-Latn",
220
+ "rus-Cyrl",
221
+ "sin-Sinh",
222
+ "slk-Latn",
223
+ "slv-Latn",
224
+ "swa-Latn",
225
+ "tam-Taml",
226
+ "tel-Telu",
227
+ "tha-Thai",
228
+ "tgl-Latn",
229
+ "tur-Latn",
230
+ "ukr-Cyrl",
231
+ "urd-Arab",
232
+ "vie-Latn",
233
+ "yor-Latn",
234
+ "zho-Hans",
235
+ ]
236
+
237
+ OPS_COLQWEN3_CITATION = """
238
+ @misc{ops_colqwen3_4b,
239
+ author = {OpenSearch-AI},
240
+ title = {Ops-ColQwen3: State-of-the-Art Multimodal Embedding Model for Visual Document Retrieval},
241
+ year = {2026},
242
+ url = {https://huggingface.co/OpenSearch-AI/Ops-ColQwen3-4B},
243
+ }"""
244
+
245
+ ops_colqwen3_4b = ModelMeta(
246
+ loader=OpsColQwen3Wrapper,
247
+ name="OpenSearch-AI/Ops-Colqwen3-4B",
248
+ loader_kwargs=dict(dtype=torch.float16, trust_remote_code=True),
249
+ languages=multilingual_langs,
250
+ revision="4894b7d451ff33981650acc693bb482dbef302d3",
251
+ release_date="2026-01-24",
252
+ modalities=["image", "text"],
253
+ n_parameters=4_800_000_000,
254
+ memory_usage_mb=9206,
255
+ max_tokens=32768,
256
+ embed_dim=2560,
257
+ license="apache-2.0",
258
+ open_weights=True,
259
+ public_training_code="https://github.com/illuin-tech/colpali",
260
+ public_training_data=None,
261
+ framework=["PyTorch"],
262
+ reference="https://huggingface.co/OpenSearch-AI/Ops-Colqwen3-4B",
263
+ similarity_fn_name=ScoringFunction.MAX_SIM,
264
+ use_instructions=True,
265
+ training_datasets=OPS_COLQWEN3_TRAINING_DATA,
266
+ citation=OPS_COLQWEN3_CITATION,
267
+ )
@@ -0,0 +1,245 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ import torch
7
+ from tqdm.auto import tqdm
8
+
9
+ from mteb.models.model_meta import ModelMeta
10
+
11
+ from .rerankers_custom import RerankerWrapper
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import BatchedInput, PromptType
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class QueritWrapper(RerankerWrapper):
23
+ """
24
+ Multi-GPU / multi-process reranker wrapper for mteb.mteb evaluation.
25
+ Supports flattening all query-passage pairs without explicit grouping.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ model_name: str,
31
+ **kwargs: Any,
32
+ ) -> None:
33
+ super().__init__(model_name, **kwargs)
34
+ from transformers import AutoModel, AutoTokenizer
35
+
36
+ if not self.device:
37
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
38
+ model_args = {}
39
+ if self.fp_options:
40
+ model_args["torch_dtype"] = self.fp_options
41
+ self.model = AutoModel.from_pretrained(
42
+ model_name, trust_remote_code=True, **model_args
43
+ )
44
+ logger.info(f"Using model {model_name}")
45
+
46
+ self.model.to(self.device)
47
+ self.tokenizer = AutoTokenizer.from_pretrained(
48
+ model_name, trust_remote_code=True
49
+ )
50
+ if "[CLS]" not in self.tokenizer.get_vocab():
51
+ raise ValueError("Tokenizer missing required special token '[CLS]'")
52
+ self.cls_token_id = self.tokenizer.convert_tokens_to_ids("[CLS]")
53
+ self.pad_token_id = self.tokenizer.pad_token_id or 0
54
+
55
+ self.max_length = (
56
+ min(kwargs.get("max_length", 4096), self.tokenizer.model_max_length) - 1
57
+ ) # sometimes it's a v large number/max int
58
+ logger.info(f"Using max_length of {self.max_length}, 1 token for [CLS]")
59
+ self.model.eval()
60
+
61
+ def process_inputs(
62
+ self,
63
+ pairs: list[str],
64
+ ) -> dict[str, torch.Tensor]:
65
+ """
66
+ Encode a batch of (query, document) pairs:
67
+ - Concatenate prompt + Query + Content
68
+ - Append [CLS] at the end
69
+ - Left-pad to max_length
70
+ - Generate custom attention mask based on block types
71
+ """
72
+ # Construct input texts
73
+ enc = self.tokenizer(
74
+ pairs,
75
+ add_special_tokens=False,
76
+ truncation=True,
77
+ max_length=self.max_length,
78
+ padding=False,
79
+ )
80
+
81
+ input_ids_list: list[list[int]] = []
82
+ attn_mask_list: list[torch.Tensor] = []
83
+
84
+ for ids in enc["input_ids"]:
85
+ # Append [CLS] token
86
+ ids = ids + [self.cls_token_id]
87
+ block_types = [1] * (len(ids) - 1) + [2] # content + CLS
88
+
89
+ # Pad or truncate
90
+ if len(ids) < self.max_length:
91
+ pad_len = self.max_length - len(ids)
92
+ ids = [self.pad_token_id] * pad_len + ids
93
+ block_types = [0] * pad_len + block_types
94
+ else:
95
+ ids = ids[-self.max_length :]
96
+ block_types = block_types[-self.max_length :]
97
+
98
+ attn = self.compute_mask_content_cls(block_types)
99
+ input_ids_list.append(ids)
100
+ attn_mask_list.append(attn)
101
+
102
+ input_ids = torch.tensor(input_ids_list, dtype=torch.long, device=self.device)
103
+ attention_mask = torch.stack(attn_mask_list, dim=0).to(self.device)
104
+
105
+ return {"input_ids": input_ids, "attention_mask": attention_mask}
106
+
107
+ @torch.inference_mode()
108
+ def predict(
109
+ self,
110
+ inputs1: DataLoader[BatchedInput],
111
+ inputs2: DataLoader[BatchedInput],
112
+ *,
113
+ task_metadata: TaskMetadata,
114
+ hf_split: str,
115
+ hf_subset: str,
116
+ prompt_type: PromptType | None = None,
117
+ **kwargs: Any,
118
+ ) -> list[float]:
119
+ """
120
+ Predict relevance scores for query-passage pairs.
121
+ Supports both single-process and multi-process/multi-GPU modes.
122
+ """
123
+ # Flatten all pairs from mteb.mteb DataLoaders
124
+ queries = [text for batch in inputs1 for text in batch["text"]]
125
+ passages = [text for batch in inputs2 for text in batch["text"]]
126
+
127
+ instructions = None
128
+ if "instruction" in inputs2.dataset.features:
129
+ instructions = [text for batch in inputs1 for text in batch["instruction"]]
130
+
131
+ num_pairs = len(queries)
132
+ if num_pairs == 0:
133
+ return []
134
+ final_scores: list[float] = []
135
+
136
+ batch_size = kwargs.get("batch_size", self.batch_size)
137
+ with tqdm(total=num_pairs, desc="Scoring", ncols=100) as pbar:
138
+ for start in range(0, num_pairs, batch_size):
139
+ end = min(start + batch_size, num_pairs)
140
+ batch_q = queries[start:end]
141
+ batch_d = passages[start:end]
142
+
143
+ batch_instructions = (
144
+ instructions[start:end]
145
+ if instructions is not None
146
+ else [None] * len(batch_q)
147
+ )
148
+ pairs = [
149
+ self.format_instruction(instr, query, doc)
150
+ for instr, query, doc in zip(batch_instructions, batch_q, batch_d)
151
+ ]
152
+ enc = self.process_inputs(pairs)
153
+ out = self.model(**enc)
154
+ scores = out["score"].squeeze(-1).detach().float().cpu().tolist()
155
+
156
+ if not isinstance(scores, list):
157
+ scores = [scores]
158
+
159
+ final_scores.extend(scores)
160
+ pbar.update(len(scores))
161
+
162
+ return final_scores
163
+
164
+ @staticmethod
165
+ def format_instruction(instruction: str | None, query: str, doc: str) -> str:
166
+ if instruction is None:
167
+ output = f"Judge whether the Content meets the requirements based on the Query. Query: {query}; Content: {doc}"
168
+ else:
169
+ output = f"{instruction} Query: {query}; Content: {doc}"
170
+ return output
171
+
172
+ @staticmethod
173
+ def compute_mask_content_cls(block_types: list[int]) -> torch.Tensor:
174
+ """
175
+ Create custom attention mask based on token block types:
176
+ - 0: padding → ignored
177
+ - 1: content → causal attention to previous content only
178
+ - 2: [CLS] → causal attention to all non-padding tokens
179
+
180
+ Args:
181
+ block_types: List of token types for one sequence
182
+
183
+ Returns:
184
+ [1, seq_len, seq_len] boolean attention mask (True = allowed to attend)
185
+ """
186
+ pos = torch.tensor(block_types, dtype=torch.long)
187
+ n = pos.shape[0]
188
+ if n == 0:
189
+ return torch.empty((0, 0), dtype=torch.bool, device=pos.device)
190
+
191
+ row_types = pos.view(n, 1)
192
+ col_types = pos.view(1, n)
193
+
194
+ row_idx = torch.arange(n, device=pos.device).view(n, 1)
195
+ col_idx = torch.arange(n, device=pos.device).view(1, n)
196
+ causal_mask = col_idx <= row_idx
197
+
198
+ # Content tokens only attend to previous content
199
+ mask_content = (row_types == 1) & (col_types == 1) & causal_mask
200
+
201
+ # [CLS] attends to all non-pad tokens (causal)
202
+ mask_cls = (row_types == 2) & (col_types != 0) & causal_mask
203
+
204
+ type_mask = mask_content | mask_cls
205
+ return type_mask.unsqueeze(0)
206
+
207
+
208
+ querit_reranker_training_data = {
209
+ "MIRACLRanking", # https://huggingface.co/datasets/mteb/MIRACLReranking
210
+ "MrTidyRetrieval", # https://huggingface.co/datasets/mteb/mrtidy
211
+ "ruri-v3-dataset-reranker", # https://huggingface.co/datasets/cl-nagoya/ruri-v3-dataset-reranker
212
+ "MultiLongDocReranking", # https://huggingface.co/datasets/Shitao/MLDR
213
+ "MindSmallReranking", # https://huggingface.co/datasets/mteb/MindSmallReranking
214
+ "MSMARCO", # https://huggingface.co/datasets/mteb/msmarco
215
+ "CQADupStack", # https://huggingface.co/datasets/mteb/cqadupstack-*
216
+ "AskUbuntuDupQuestions", # https://github.com/taolei87/askubuntu & The corpus and queries that overlap with mteb/askubuntudupquestions-reranking have been removed.
217
+ "T2Reranking", # https://huggingface.co/datasets/THUIR/T2Ranking & The corpus and queries that overlap with mteb/T2Reranking have been removed.
218
+ }
219
+
220
+ model_meta = ModelMeta(
221
+ loader=QueritWrapper,
222
+ loader_kwargs={
223
+ "fp_options": "bfloat16",
224
+ },
225
+ name="Querit/Querit",
226
+ model_type=["cross-encoder"],
227
+ languages=["eng-Latn"],
228
+ open_weights=True,
229
+ revision="5ad2649cc4defb7e1361262260e9a781f14b08bc",
230
+ release_date="2026-01-24",
231
+ n_parameters=4919636992,
232
+ n_embedding_parameters=131907584,
233
+ embed_dim=1024,
234
+ memory_usage_mb=9383.0,
235
+ max_tokens=4096,
236
+ reference="https://huggingface.co/Querit/Querit",
237
+ similarity_fn_name=None,
238
+ training_datasets=querit_reranker_training_data,
239
+ license="apache-2.0",
240
+ framework=["PyTorch"],
241
+ use_instructions=None,
242
+ public_training_code=None,
243
+ public_training_data=None,
244
+ citation=None,
245
+ )
mteb/models/model_meta.py CHANGED
@@ -71,7 +71,7 @@ FRAMEWORKS = Literal[
71
71
  "Transformers",
72
72
  ]
73
73
 
74
- MODEL_TYPES = Literal["dense", "cross-encoder", "late-interaction"]
74
+ MODEL_TYPES = Literal["dense", "cross-encoder", "late-interaction", "sparse"]
75
75
 
76
76
 
77
77
  class ScoringFunction(HelpfulStrEnum):
@@ -266,7 +266,7 @@ class ModelMeta(BaseModel):
266
266
  @field_validator("name")
267
267
  @classmethod
268
268
  def _check_name(cls, v: str | None) -> str | None:
269
- if v is None or v in ("bm25s", "Human"):
269
+ if v is None:
270
270
  return v
271
271
  if "/" not in v:
272
272
  raise ValueError(
@@ -302,6 +302,121 @@ class ModelMeta(BaseModel):
302
302
  raise ValueError("Model name is not set")
303
303
  return self.name.replace("/", "__").replace(" ", "_")
304
304
 
305
+ @classmethod
306
+ def _detect_cross_encoder_or_dense(
307
+ cls,
308
+ model_name: str,
309
+ revision: str | None,
310
+ sentence_transformers_loader: Callable[..., MTEBModels],
311
+ cross_encoder_loader: Callable[..., MTEBModels],
312
+ ) -> tuple[Callable[..., MTEBModels] | None, MODEL_TYPES]:
313
+ """Detect if model is CrossEncoder or default to dense."""
314
+ config = _get_json_from_hub(
315
+ model_name, "config.json", "model", revision=revision
316
+ )
317
+
318
+ if not config:
319
+ logger.warning(
320
+ f"Could not load config.json for {model_name}. "
321
+ "Defaulting to SentenceTransformer loader."
322
+ )
323
+ return sentence_transformers_loader, "dense"
324
+
325
+ architectures = config.get("architectures", [])
326
+
327
+ is_cross_encoder = any(
328
+ arch.endswith("ForSequenceClassification") for arch in architectures
329
+ )
330
+ if is_cross_encoder:
331
+ return cross_encoder_loader, "cross-encoder"
332
+
333
+ if cls._is_causal_lm_reranker(architectures, config, model_name):
334
+ return cross_encoder_loader, "cross-encoder"
335
+
336
+ logger.info(
337
+ f"Model {model_name} does not have modules.json or recognized architecture. "
338
+ "Defaulting to SentenceTransformer loader."
339
+ )
340
+ return sentence_transformers_loader, "dense"
341
+
342
+ @staticmethod
343
+ def _is_causal_lm_reranker(
344
+ architectures: list[str], config: dict[str, Any], model_name: str
345
+ ) -> bool:
346
+ """Check if model is a CausalLM-style reranker."""
347
+ is_causal_lm = any(arch.endswith("ForCausalLM") for arch in architectures)
348
+
349
+ if not is_causal_lm:
350
+ return False
351
+
352
+ num_labels = config.get("num_labels", 0)
353
+ model_name_lower = model_name.lower()
354
+
355
+ return (
356
+ num_labels > 0
357
+ or "rerank" in model_name_lower
358
+ or "cross-encoder" in model_name_lower
359
+ )
360
+
361
+ @classmethod
362
+ def _detect_model_type_and_loader(
363
+ cls,
364
+ model_name: str | None,
365
+ revision: str | None = None,
366
+ ) -> tuple[Callable[..., MTEBModels] | None, MODEL_TYPES]:
367
+ """Detect the model type and appropriate loader based on HuggingFace Hub configuration files.
368
+
369
+ This follows the Sentence Transformers architecture detection logic:
370
+ 1. Check for modules.json - If present, model is a SentenceTransformer (dense encoder)
371
+ 2. If no modules.json, check config.json for architecture:
372
+ - ForSequenceClassification → CrossEncoder
373
+ - CausalLM with reranking indicators → CrossEncoder
374
+ 3. Default to dense (SentenceTransformer) if no clear indicators are found
375
+
376
+ Detection for CausalLM-style rerankers:
377
+ - Model has ForCausalLM architecture AND
378
+ - Has num_labels > 0 in config, OR
379
+ - Model name contains "rerank" or "cross-encoder"
380
+
381
+ Args:
382
+ model_name: The HuggingFace model name (can be None)
383
+ revision: The model revision
384
+
385
+ Returns:
386
+ A tuple of (loader_function, model_type) where:
387
+ - loader_function: A callable that returns MTEBModels, or None if model doesn't exist
388
+ - model_type: One of "dense", "cross-encoder", or "late-interaction"
389
+ """
390
+ from mteb.models import CrossEncoderWrapper, sentence_transformers_loader
391
+
392
+ if not model_name or not _repo_exists(model_name):
393
+ return sentence_transformers_loader, "dense"
394
+
395
+ try:
396
+ modules_config = _get_json_from_hub(
397
+ model_name, "modules.json", "model", revision=revision
398
+ )
399
+
400
+ if (
401
+ modules_config
402
+ ): # SentenceTransformer/SparseEncoder (Not support for now)
403
+ return sentence_transformers_loader, "dense"
404
+ else:
405
+ return cls._detect_cross_encoder_or_dense(
406
+ model_name,
407
+ revision,
408
+ sentence_transformers_loader,
409
+ cross_encoder_loader=CrossEncoderWrapper,
410
+ )
411
+
412
+ except Exception as e:
413
+ logger.warning(
414
+ f"Error detecting model type for {model_name}: {e}. "
415
+ "Defaulting to SentenceTransformer loader."
416
+ )
417
+
418
+ return sentence_transformers_loader, "dense"
419
+
305
420
  @classmethod
306
421
  def _from_hub(
307
422
  cls,
@@ -319,9 +434,11 @@ class ModelMeta(BaseModel):
319
434
  Returns:
320
435
  The generated ModelMeta.
321
436
  """
322
- from mteb.models import sentence_transformers_loader
437
+ loader: Callable[..., MTEBModels] | None
438
+ model_type: MODEL_TYPES
439
+
440
+ loader, model_type = cls._detect_model_type_and_loader(model_name, revision)
323
441
 
324
- loader = sentence_transformers_loader
325
442
  frameworks: list[FRAMEWORKS] = ["PyTorch"]
326
443
  model_license = None
327
444
  reference = None
@@ -363,6 +480,7 @@ class ModelMeta(BaseModel):
363
480
  return cls(
364
481
  loader=loader,
365
482
  name=model_name or "no_model_name/available",
483
+ model_type=[model_type],
366
484
  revision=revision or "no_revision_available",
367
485
  reference=reference,
368
486
  release_date=release_date,
@@ -17,6 +17,7 @@ from .task_result import TaskError, TaskResult
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from collections.abc import Callable, Iterable
20
+ from pathlib import Path
20
21
 
21
22
  from mteb.abstasks.abstask import AbsTask
22
23
  from mteb.abstasks.task_metadata import (
@@ -417,3 +418,25 @@ class ModelResult(BaseModel):
417
418
  if not mods:
418
419
  mods = self.default_modalities
419
420
  return list(set(mods))
421
+
422
+ def to_disk(self, path: Path) -> None:
423
+ """Save ModelResult to disk as JSON.
424
+
425
+ Args:
426
+ path: The path to the file to save.
427
+ """
428
+ with path.open("w") as f:
429
+ f.write(self.model_dump_json(indent=2))
430
+
431
+ @classmethod
432
+ def from_disk(cls, path: Path) -> ModelResult:
433
+ """Load ModelResult from disk.
434
+
435
+ Args:
436
+ path: The path to the JSON file to load.
437
+
438
+ Returns:
439
+ The loaded ModelResult object.
440
+ """
441
+ with path.open("r", encoding="utf-8") as f:
442
+ return cls.model_validate_json(f.read())
@@ -337,16 +337,16 @@ class TaskResult(BaseModel):
337
337
  The loaded TaskResult object.
338
338
  """
339
339
  with path.open("r", encoding="utf-8") as f:
340
- data = json.load(f)
340
+ json_str = f.read()
341
341
 
342
342
  if not load_historic_data:
343
343
  try:
344
- return cls.model_validate(data)
344
+ return cls.model_validate_json(json_str)
345
345
  except Exception as e:
346
346
  raise ValueError(
347
347
  f"Error loading TaskResult from disk. You can try to load historic data by setting `load_historic_data=True`. Error: {e}"
348
348
  )
349
-
349
+ data = json.loads(json_str)
350
350
  pre_1_11_load = (
351
351
  (
352
352
  "mteb_version" in data
@@ -357,7 +357,7 @@ class TaskResult(BaseModel):
357
357
  ) # assume it is before 1.11.0 if the version is not present
358
358
 
359
359
  try:
360
- obj: TaskResult = cls.model_validate(data)
360
+ obj: TaskResult = cls.model_validate_json(json_str)
361
361
  except Exception as e:
362
362
  if not pre_1_11_load:
363
363
  raise e
@@ -1,5 +1,3 @@
1
- import datasets
2
-
3
1
  from mteb.abstasks.pair_classification import AbsTaskPairClassification
4
2
  from mteb.abstasks.task_metadata import TaskMetadata
5
3
 
@@ -8,8 +6,8 @@ class FarsTail(AbsTaskPairClassification):
8
6
  metadata = TaskMetadata(
9
7
  name="FarsTail",
10
8
  dataset={
11
- "path": "azarijafari/FarsTail",
12
- "revision": "7335288588f14e5a687d97fc979194c2abe6f4e7",
9
+ "path": "mteb/FarsTail",
10
+ "revision": "0fa0863dc160869b5a2d78803b4440ea3c671ff5",
13
11
  },
14
12
  description="This dataset, named FarsTail, includes 10,367 samples which are provided in both the Persian language as well as the indexed format to be useful for non-Persian researchers. The samples are generated from 3,539 multiple-choice questions with the least amount of annotator interventions in a way similar to the SciTail dataset",
15
13
  reference="https://link.springer.com/article/10.1007/s00500-023-08959-3",
@@ -37,33 +35,3 @@ class FarsTail(AbsTaskPairClassification):
37
35
  }
38
36
  """, # after removing neutral
39
37
  )
40
-
41
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
42
- if self.data_loaded:
43
- return
44
- path = self.metadata.dataset["path"]
45
- revision = self.metadata.dataset["revision"]
46
- data_files = {
47
- "test": f"https://huggingface.co/datasets/{path}/resolve/{revision}/data/Test-word.csv"
48
- }
49
- self.dataset = datasets.load_dataset(
50
- "csv", data_files=data_files, delimiter="\t"
51
- )
52
- self.dataset_transform()
53
- self.data_loaded = True
54
-
55
- def dataset_transform(self, num_proc: int = 1):
56
- _dataset = {}
57
- self.dataset = self.dataset.filter(lambda x: x["label"] != "n")
58
- self.dataset = self.dataset.map(
59
- lambda example: {"label": 1 if example["label"] == "e" else 0}
60
- )
61
- for split in self.metadata.eval_splits:
62
- _dataset[split] = [
63
- {
64
- "sentence1": self.dataset[split]["premise"],
65
- "sentence2": self.dataset[split]["hypothesis"],
66
- "labels": self.dataset[split]["label"],
67
- }
68
- ]
69
- self.dataset = _dataset
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.7.12
3
+ Version: 2.7.14
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -32,8 +32,6 @@ Requires-Dist: rich>=0.0.0
32
32
  Requires-Dist: pytrec-eval-terrier>=0.5.6
33
33
  Requires-Dist: pydantic>=2.0.0
34
34
  Requires-Dist: polars>=0.20.22
35
- Requires-Dist: torch; python_full_version < "3.14"
36
- Requires-Dist: torch>=2.9.0; python_full_version >= "3.14"
37
35
  Provides-Extra: image
38
36
  Requires-Dist: torchvision>0.2.1; extra == "image"
39
37
  Requires-Dist: transformers[torch-vision,vision]; extra == "image"
@@ -40,7 +40,7 @@ mteb/abstasks/clustering.py,sha256=I8vre2f2FJFagzJEYf6hKDo3Y28xU29J_O-MhfqWqSI,1
40
40
  mteb/abstasks/clustering_legacy.py,sha256=sbx8K6paccvzDPnmhgNE_UJE83orAJnQm3NGr-Ktjfs,9184
41
41
  mteb/abstasks/dataset_card_template.md,sha256=aD6l8qc3_jxwoIGJNYLzse-jpRa8hu92AxpnUtNgges,5122
42
42
  mteb/abstasks/multilabel_classification.py,sha256=rFa_Pw2OsUzqhZS-jh2zFD7I-TNl8bVNJ-DW7EpPapU,9708
43
- mteb/abstasks/pair_classification.py,sha256=MpjOeIcM_RMqSpkXc8PFDAwDgmb54gEqj6ZOUcOa1aQ,14122
43
+ mteb/abstasks/pair_classification.py,sha256=RVV5WUjs18N5PbWpyxakDNEd1UlRc4ON9I0OjD26Z78,14231
44
44
  mteb/abstasks/regression.py,sha256=ZuMZfOwU3G4hr__eHsgdagKKdrbN4-wQMLz45jr9YUc,8946
45
45
  mteb/abstasks/retrieval.py,sha256=BPyRibStAD70JfR0Z1x-VVVfzJDRVSmbOS6uREfpmok,27743
46
46
  mteb/abstasks/retrieval_dataset_loaders.py,sha256=p0y1nrWlUrt_aeoR4ocDLEQMLuD_SlMH0gBiUsOwrww,9983
@@ -1479,9 +1479,9 @@ mteb/leaderboard/table.py,sha256=U5mWtrVUTk_6t8T4KAp5qlbFgKh1PD0iKICqNMfhsoY,104
1479
1479
  mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9HydipKM,6570
1480
1480
  mteb/models/__init__.py,sha256=ABTuoqiBjBtBWW3LYY7ItBHdylR6jWoy06HH0g6j6fU,910
1481
1481
  mteb/models/abs_encoder.py,sha256=We9HlwWP61P4cMyZ080gywvDErA1eVsU9t46PtcNrCM,16830
1482
- mteb/models/get_model_meta.py,sha256=h6Z2Q3D_L4BeJbi1gPHTSgelbQjsQn1ALpI8uwNa0ac,6832
1482
+ mteb/models/get_model_meta.py,sha256=WRWnVIT1n7i63BYlBRB-8BpYNtHxn7KMJOm5mzlJ8xI,7211
1483
1483
  mteb/models/instruct_wrapper.py,sha256=XAvvbPnXiTxKhFbmusm2uS8E9BMq8QXRSzQQI1jqKzE,9781
1484
- mteb/models/model_meta.py,sha256=5seQwce893SbgSywFsphLuZGQ9rRn1fLl9fj1SfKnXE,32887
1484
+ mteb/models/model_meta.py,sha256=E6mBB_inz9kMO8z3ixgGuB9QKWUYYzW44gSZwnY3ZbI,37316
1485
1485
  mteb/models/models_protocols.py,sha256=HTB4-SYa3SeJXMMSA8o05lHTiLBbq314VW60K_PfcZY,9509
1486
1486
  mteb/models/search_wrappers.py,sha256=PXE1VVDWUd0LgTPJ-FxqIbGpIDWLRKo5CjrwIuu5nzw,21567
1487
1487
  mteb/models/sentence_transformer_wrapper.py,sha256=RsOxj-b7qzeYcxUTVJyb-lZDY4bINl4jEAEkPvKYB10,13578
@@ -1505,7 +1505,7 @@ mteb/models/model_implementations/bge_models.py,sha256=JuO1FRWrsqlsM_jslQ96oVsD3
1505
1505
  mteb/models/model_implementations/bica_model.py,sha256=Yx3iZrXF6ZMJS9SH5lbzNHoUWGNH3dypRtZ7dX5o7rA,1305
1506
1506
  mteb/models/model_implementations/blip2_models.py,sha256=C6egwozJthHmv92I0SWID3-sQCPROPJP0TzfQVKNzlo,7898
1507
1507
  mteb/models/model_implementations/blip_models.py,sha256=D_9e7C8GXGST8k7dMJL20x984vMeqbITu36XASi-iUU,12149
1508
- mteb/models/model_implementations/bm25.py,sha256=-xpeMqYi0_Ne5rL_Y1qAlYvloLlQzNDUav2ESVGHAhA,5217
1508
+ mteb/models/model_implementations/bm25.py,sha256=IAKU8syYesN7seRQLII-c1ACq6BRz5Ql6nEQEXYWLwQ,5226
1509
1509
  mteb/models/model_implementations/bmretriever_models.py,sha256=rijCIzX6nO5kNXqxEFbZrV7bsZtmKs8RIkMqa5cPWTk,7078
1510
1510
  mteb/models/model_implementations/cadet_models.py,sha256=gXIfW9MkGYFhOhsrq5a_tQcPuth13Dh1dO1KySwVxyo,2305
1511
1511
  mteb/models/model_implementations/cde_models.py,sha256=l4E6h1hcsNY1GTXoCgQDoeG5dRcEl7JTOiiWmp6FYqg,9373
@@ -1537,7 +1537,7 @@ mteb/models/model_implementations/granite_vision_embedding_models.py,sha256=jxyR
1537
1537
  mteb/models/model_implementations/gritlm_models.py,sha256=756vgZGADy5FhKlFuzuD6huevC_AYD5b88V1Y5yFht8,3241
1538
1538
  mteb/models/model_implementations/gte_models.py,sha256=-ASkoAuAiVytVtsYMtuKonUf39i0U69HSEnJy_-PwXA,14574
1539
1539
  mteb/models/model_implementations/hinvec_models.py,sha256=SYWGFr8XALmM7B9tIHEQnrqq9kZOZIBkW7m7QpzerHI,1756
1540
- mteb/models/model_implementations/human.py,sha256=MTw560BrwuvUsHUb5_Gjq7ZRfrBmaT8dGnrubFFDB_o,591
1540
+ mteb/models/model_implementations/human.py,sha256=k7vN6WTcSWyWS9wnluzr6yCOjuMi5LupQnT-4cfzNOk,600
1541
1541
  mteb/models/model_implementations/ibm_granite_models.py,sha256=ipLRDBerTQiL5NaoaDho410Fzy7eNFlF3jB54hGZrwI,8687
1542
1542
  mteb/models/model_implementations/inf_models.py,sha256=q_hNNhzMjAxbnJnAT0N6KaNegX_3XZlmz-LXY5C891I,3093
1543
1543
  mteb/models/model_implementations/jasper_models.py,sha256=ourAMx1_L6b2AxX046wQcxDqvYzY1Mx3gaHww0WaMA8,16476
@@ -1565,14 +1565,15 @@ mteb/models/model_implementations/model2vec_models.py,sha256=qXcPhV0hGRFBsvRBrb8
1565
1565
  mteb/models/model_implementations/moka_models.py,sha256=4Esujv_fVJjHuX1nRH6sGtmrmF04A90F4Xo2uN0YTzs,5205
1566
1566
  mteb/models/model_implementations/nbailab.py,sha256=iv2xdqVM5HoTAlBR6e_UdzJu6rSPujqWXFYwyCv69hU,2684
1567
1567
  mteb/models/model_implementations/no_instruct_sentence_models.py,sha256=DTb-eHZYSY6lGJkkdkC0tZ_n0GHLQwVlUehVg59T5N4,4198
1568
- mteb/models/model_implementations/nomic_models.py,sha256=oUaSabKEJXrg_cnmWdfczsdrqRDP7LZhX5vN1U5b-Xk,15469
1568
+ mteb/models/model_implementations/nomic_models.py,sha256=BO6XQbX4PFa5By0opAYkxz95CcHmjxbG5DYcklxJ1l8,16986
1569
1569
  mteb/models/model_implementations/nomic_models_vision.py,sha256=AzTCWbXBonUAVub0TTxWCsBtg4WYex3vPiLlz3ULdHc,6916
1570
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py,sha256=5jYzS0qc4x82T5_c1HFt4uy9kksTt9Fs24pdLioq4Oc,7033
1570
+ mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py,sha256=-5_kd9jeDcgVv9gdwWuvX_-bNQdhAxInf1Mqo8_BdS8,10653
1571
1571
  mteb/models/model_implementations/nvidia_models.py,sha256=r-AW1dVQbteWjexjvZgFEt_90OHNRYer_5GLuqSXRS0,26924
1572
1572
  mteb/models/model_implementations/octen_models.py,sha256=5z-t2O-iIFiOOLdZ_AK9f7GrVRg-9_vx3JNAG9dJNPE,8562
1573
1573
  mteb/models/model_implementations/openai_models.py,sha256=y1wMknrrcu1L5CNwniG0mFThPVMON1c2Fj22jkKsw7Y,9730
1574
1574
  mteb/models/model_implementations/openclip_models.py,sha256=z2gQum16O0QhJPyxqKor3oO-_uWfnep6wSXqOFQQ2Q8,11969
1575
- mteb/models/model_implementations/opensearch_neural_sparse_models.py,sha256=O6ewgGKZtw-kpspbz8-dXArDf5tUWy_vY6AxFs7fsZ8,8936
1575
+ mteb/models/model_implementations/opensearch_neural_sparse_models.py,sha256=J5FEvKWQUiBusL6PHcrRuRRJOQ-iMwOSu1fX0pblXhk,8941
1576
+ mteb/models/model_implementations/ops_colqwen3_models.py,sha256=5vg5d1_WfVGMgtIwkh6zf2-Paum6V35XcKEvLfRyRzs,7437
1576
1577
  mteb/models/model_implementations/ops_moa_models.py,sha256=Ah7L78mqC9pH8t6sf1OWXOLjouVUpAutt6lZ0np7eMM,2655
1577
1578
  mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py,sha256=xv1ftJeMND4lpeKYC3RLQB4nhdiYy0wCxrzEjUj4gSg,1114
1578
1579
  mteb/models/model_implementations/pawan_models.py,sha256=iyzh6NSPZKU9znJYEDPjJNIqvkyuKPAol5TcILuq1Is,1225
@@ -1582,6 +1583,7 @@ mteb/models/model_implementations/promptriever_models.py,sha256=tDXysEKbvoDNtidV
1582
1583
  mteb/models/model_implementations/pylate_models.py,sha256=EwpDJf5yjONmmiv9zHSEbc3e7kMRJUHYv7K59QTrNAE,17525
1583
1584
  mteb/models/model_implementations/qodo_models.py,sha256=AwYRGctJbjEkcIh1pcSeeEXYiYiizAbfTyw6CaZBJfU,2224
1584
1585
  mteb/models/model_implementations/qtack_models.py,sha256=vw_2O4ZABR-_nYV4g1Ud8bW1DTP-wwtQS2eGqN70vT4,1304
1586
+ mteb/models/model_implementations/querit_models.py,sha256=P7lAw5IDe47DA_5srMwGPqxjMIFuvOW0BJ7xwB4GOro,8917
1585
1587
  mteb/models/model_implementations/qwen3_models.py,sha256=857UnUEil9o8xcw7vSr2fMRlEegyE2Q86e5yLeRL_mQ,5517
1586
1588
  mteb/models/model_implementations/qzhou_models.py,sha256=mfG70JrNJCo-s3MykRn6lg9gFPcKMeMI7Y8VrBhNo7I,3684
1587
1589
  mteb/models/model_implementations/random_baseline.py,sha256=YsITQoLbea_Iz2X84WNGBGkhlsQ3hB7yx1oJwXghimE,7561
@@ -1627,8 +1629,8 @@ mteb/models/search_encoder_index/search_indexes/__init__.py,sha256=Wm60_oUemUpFs
1627
1629
  mteb/models/search_encoder_index/search_indexes/faiss_search_index.py,sha256=jwC-3swhnILZnVHUrMR7Ts78TuYtVRxPusF02UV1g6E,5770
1628
1630
  mteb/results/__init__.py,sha256=EXQqK4Am5eIYzD52dpcGAFSdqnC38oE6JHN302oidHc,158
1629
1631
  mteb/results/benchmark_results.py,sha256=unBUBJ92ud0UXlkZJLn71WVcf-oUlF6XcITTccz5OBA,20318
1630
- mteb/results/model_result.py,sha256=u1eUynaf_KVsdYdz29MACsCDR_48xODapkIGLG-lZa0,15321
1631
- mteb/results/task_result.py,sha256=TL9V7P7cXqLljnCHdzzaQ9vZZSXh9qXYFnPRG049alw,34247
1632
+ mteb/results/model_result.py,sha256=h894O5-RSCOF8XNpXMuhBCqnj43T-1K5Y1el_fyrzP4,15954
1633
+ mteb/results/task_result.py,sha256=Wi5MRQBkb0Qddhc4nLzrrjm1nGlCh8aq4_VCQoxGmNg,34300
1632
1634
  mteb/tasks/__init__.py,sha256=izAxU0ip1F_YUwx0dFCuN35BaktdmePh6vlDiHC0kLo,503
1633
1635
  mteb/tasks/aggregated_tasks/__init__.py,sha256=Ufgbh1AirxCQkojO3AUhUFWM8zQG10cfdVTkj_PeyLI,104
1634
1636
  mteb/tasks/aggregated_tasks/eng/__init__.py,sha256=HgaSyAX8Is5CGE006RgJkLQQVxrx2FmMnm6NHQBDi-4,358
@@ -2111,7 +2113,7 @@ mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py,sha256=0YjKK4C47Uu
2111
2113
  mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py,sha256=M9B3JCFt4L6yEnd8S-o-W-FtCSMdl0h1KST3fqApEVA,1796
2112
2114
  mteb/tasks/pair_classification/fas/__init__.py,sha256=1Bbr5ZKSjpPuJb9zvk7OSd2Krdh1bpxJjVNLNPFT4Ck,440
2113
2115
  mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py,sha256=1i8phewQffpIxzWtBWQFUisHu3XhBpk9Sf-IkwM8jNg,10932
2114
- mteb/tasks/pair_classification/fas/fars_tail.py,sha256=NPtN4mAQ6TcBJmX_GD7WZ4mE-_Bw3D9Aj-GEliZDIMo,2848
2116
+ mteb/tasks/pair_classification/fas/fars_tail.py,sha256=jb-6UW0Lk7YxdMMCZsMavY6CRiv3T6MFrbvlPd0vPPk,1676
2115
2117
  mteb/tasks/pair_classification/hye/__init__.py,sha256=hU4xSf6kyKhD4o4CuNMQNE1w9FKv8tkkqvYvhpMV5Kg,93
2116
2118
  mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py,sha256=Ezi604W-cHOLDm8O9j3yq9z-GzDt9OWI9jgyqVjY9M4,1437
2117
2119
  mteb/tasks/pair_classification/ind/__init__.py,sha256=iXGvZ6eNgGhyD2wgbkvV-bpPPCJNxlE5eq_qvF2Y_UI,53
@@ -2644,9 +2646,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2644
2646
  mteb/types/_result.py,sha256=UKNokV9pu3G74MGebocU512aU_fFU9I9nPKnrG9Q0iE,1035
2645
2647
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2646
2648
  mteb/types/statistics.py,sha256=gElgSShKBXpfcqaZHhU_d2UHln1CyzUj8FN8KFun_UA,4087
2647
- mteb-2.7.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2648
- mteb-2.7.12.dist-info/METADATA,sha256=qFfnF_lPdIQWnBIiunbaPAatQI_x21ouRUpgasLHbM0,14458
2649
- mteb-2.7.12.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
2650
- mteb-2.7.12.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2651
- mteb-2.7.12.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2652
- mteb-2.7.12.dist-info/RECORD,,
2649
+ mteb-2.7.14.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2650
+ mteb-2.7.14.dist-info/METADATA,sha256=ZTD9D9Fuy9OCRxIXSZzh1bObP0PKSXUMqI4j3XVNR_c,14348
2651
+ mteb-2.7.14.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
2652
+ mteb-2.7.14.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2653
+ mteb-2.7.14.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2654
+ mteb-2.7.14.dist-info/RECORD,,
File without changes