mteb 2.7.15__py3-none-any.whl → 2.7.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. mteb/_evaluators/classification_metrics.py +10 -1
  2. mteb/_evaluators/pair_classification_evaluator.py +2 -1
  3. mteb/_evaluators/retrieval_metrics.py +9 -7
  4. mteb/_evaluators/sklearn_evaluator.py +9 -4
  5. mteb/abstasks/_stratification.py +13 -8
  6. mteb/abstasks/classification.py +4 -2
  7. mteb/abstasks/multilabel_classification.py +6 -4
  8. mteb/abstasks/regression.py +3 -2
  9. mteb/benchmarks/benchmark.py +131 -3
  10. mteb/leaderboard/figures.py +2 -1
  11. mteb/leaderboard/table.py +10 -2
  12. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -3
  13. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +3 -3
  14. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +8 -3
  15. mteb/models/cache_wrappers/cache_wrapper.py +2 -2
  16. mteb/models/model_implementations/bedrock_models.py +4 -4
  17. mteb/models/model_implementations/mcinext_models.py +2 -2
  18. mteb/models/model_implementations/misc_models.py +0 -48
  19. mteb/models/model_implementations/openai_models.py +2 -1
  20. mteb/models/model_implementations/random_baseline.py +4 -3
  21. mteb/models/model_implementations/rerankers_custom.py +0 -87
  22. mteb/models/model_implementations/rerankers_monot5_based.py +0 -26
  23. mteb/models/model_implementations/seed_models.py +7 -2
  24. mteb/models/model_implementations/voyage_models.py +1 -1
  25. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +1 -0
  26. mteb/types/_encoder_io.py +3 -2
  27. {mteb-2.7.15.dist-info → mteb-2.7.17.dist-info}/METADATA +1 -1
  28. {mteb-2.7.15.dist-info → mteb-2.7.17.dist-info}/RECORD +32 -32
  29. {mteb-2.7.15.dist-info → mteb-2.7.17.dist-info}/WHEEL +0 -0
  30. {mteb-2.7.15.dist-info → mteb-2.7.17.dist-info}/entry_points.txt +0 -0
  31. {mteb-2.7.15.dist-info → mteb-2.7.17.dist-info}/licenses/LICENSE +0 -0
  32. {mteb-2.7.15.dist-info → mteb-2.7.17.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  import numpy as np
2
6
 
7
+ if TYPE_CHECKING:
8
+ from numpy.typing import NDArray
9
+
3
10
 
4
- def hamming_score(y_true: np.ndarray, y_pred: np.ndarray) -> float:
11
+ def hamming_score(
12
+ y_true: NDArray[np.integer], y_pred: NDArray[np.integer | np.floating]
13
+ ) -> float:
5
14
  """Compute the Hamming score (a.k.a. label-based accuracy) for multilabel classification.
6
15
 
7
16
  The Hamming score is the fraction of labels that are correctly predicted for each sample,
@@ -16,6 +16,7 @@ from mteb.similarity_functions import compute_pairwise_similarity
16
16
 
17
17
  if TYPE_CHECKING:
18
18
  from datasets import Dataset
19
+ from numpy.typing import NDArray
19
20
 
20
21
  from mteb.abstasks.task_metadata import TaskMetadata
21
22
  from mteb.models import EncoderProtocol
@@ -155,7 +156,7 @@ class PairClassificationEvaluator(Evaluator):
155
156
  hf_split: str,
156
157
  hf_subset: str,
157
158
  **encode_kwargs: Any,
158
- ) -> np.ndarray:
159
+ ) -> NDArray[np.floating]:
159
160
  index_map = {}
160
161
  all_unique_texts: list[str] = []
161
162
  all_texts_indexes = []
@@ -15,6 +15,8 @@ from mteb.types import RetrievalEvaluationResult
15
15
  if TYPE_CHECKING:
16
16
  from collections.abc import Mapping
17
17
 
18
+ from numpy.typing import NDArray
19
+
18
20
  from mteb.types import RelevantDocumentsType
19
21
 
20
22
  logger = logging.getLogger(__name__)
@@ -273,9 +275,9 @@ def confidence_scores(sim_scores: list[float]) -> dict[str, float]:
273
275
 
274
276
 
275
277
  def nauc(
276
- conf_scores: np.ndarray,
277
- metrics: np.ndarray,
278
- abstention_rates: np.ndarray = np.linspace(0, 1, 11)[:-1],
278
+ conf_scores: NDArray[np.floating],
279
+ metrics: NDArray[np.floating],
280
+ abstention_rates: NDArray[np.floating] = np.linspace(0, 1, 11)[:-1],
279
281
  ) -> float:
280
282
  """Computes normalized Area Under the Curve (nAUC) on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997
281
283
 
@@ -295,10 +297,10 @@ def nauc(
295
297
  """
296
298
 
297
299
  def abstention_curve(
298
- conf_scores: np.ndarray,
299
- metrics: np.ndarray,
300
- abstention_rates: np.ndarray = np.linspace(0, 1, 11)[:-1],
301
- ) -> np.ndarray:
300
+ conf_scores: NDArray[np.floating],
301
+ metrics: NDArray[np.floating],
302
+ abstention_rates: NDArray[np.floating] = np.linspace(0, 1, 11)[:-1],
303
+ ) -> NDArray[np.floating]:
302
304
  """Computes the raw abstention curve for a given set of evaluated instances and corresponding confidence scores
303
305
 
304
306
  Args:
@@ -10,6 +10,7 @@ from .evaluator import Evaluator
10
10
  if TYPE_CHECKING:
11
11
  import numpy as np
12
12
  from datasets import Dataset
13
+ from numpy.typing import NDArray
13
14
  from torch.utils.data import DataLoader
14
15
  from typing_extensions import Self
15
16
 
@@ -21,11 +22,15 @@ logger = logging.getLogger(__name__)
21
22
 
22
23
 
23
24
  class SklearnModelProtocol(Protocol):
24
- def fit(self, X: Array, y: np.ndarray | list[int]) -> None: ... # noqa: N803
25
- def predict(self, X: Array) -> np.ndarray: ... # noqa: N803
25
+ def fit(
26
+ self, X: Array, y: NDArray[np.integer | np.floating] | list[int | float]
27
+ ) -> None: ...
28
+ def predict(self, X: Array) -> NDArray[np.integer | np.floating]: ...
26
29
  def get_params(self) -> dict[str, Any]: ...
27
30
  def set_params(self, random_state: int, **kwargs: dict[str, Any]) -> Self: ...
28
- def score(self, X: Array, y: np.ndarray | list[int]) -> float: ... # noqa: N803
31
+ def score(
32
+ self, X: Array, y: NDArray[np.integer | np.floating] | list[int | float]
33
+ ) -> float: ...
29
34
 
30
35
 
31
36
  class SklearnEvaluator(Evaluator):
@@ -79,7 +84,7 @@ class SklearnEvaluator(Evaluator):
79
84
  encode_kwargs: EncodeKwargs,
80
85
  test_cache: Array | None = None,
81
86
  num_proc: int = 1,
82
- ) -> tuple[np.ndarray, Array]:
87
+ ) -> tuple[NDArray[np.integer | np.floating], Array]:
83
88
  """Classification evaluation by training a sklearn classifier on the embeddings of the training set and evaluating on the embeddings of the test set.
84
89
 
85
90
  Args:
@@ -38,21 +38,26 @@ Bibtex:
38
38
  }
39
39
  """
40
40
 
41
+ from __future__ import annotations
42
+
41
43
  import itertools
42
- from typing import Any
44
+ from typing import TYPE_CHECKING, Any
43
45
 
44
46
  import numpy as np
45
47
  import scipy.sparse as sp
46
48
  from sklearn.model_selection._split import _BaseKFold
47
49
  from sklearn.utils import check_random_state
48
50
 
51
+ if TYPE_CHECKING:
52
+ from numpy.typing import NDArray
53
+
49
54
 
50
55
  def _iterative_train_test_split(
51
- X: np.ndarray, # noqa: N803
52
- y: np.ndarray,
56
+ X: NDArray[np.integer],
57
+ y: NDArray[np.integer],
53
58
  test_size: float,
54
59
  random_state: int | None = None,
55
- ) -> tuple[np.ndarray, np.ndarray]:
60
+ ) -> tuple[NDArray[np.integer], NDArray[np.integer]]:
56
61
  """Iteratively stratified train/test split
57
62
 
58
63
  Slighltly modified from:
@@ -79,8 +84,8 @@ def _iterative_train_test_split(
79
84
 
80
85
 
81
86
  def _fold_tie_break(
82
- desired_samples_per_fold: np.ndarray,
83
- M: np.ndarray, # noqa: N803
87
+ desired_samples_per_fold: NDArray[np.floating],
88
+ M: NDArray[np.integer], # noqa: N803
84
89
  random_state: np.random.RandomState,
85
90
  ):
86
91
  """Helper function to split a tie between folds with same desirability of a given sample
@@ -179,7 +184,7 @@ class IterativeStratification(_BaseKFold):
179
184
  ]
180
185
 
181
186
  def _prepare_stratification(
182
- self, y: np.ndarray
187
+ self, y: NDArray[np.integer]
183
188
  ) -> tuple[
184
189
  list[list[int]],
185
190
  dict[int, bool],
@@ -301,7 +306,7 @@ class IterativeStratification(_BaseKFold):
301
306
  self.desired_samples_per_fold[fold_selected] -= 1
302
307
  folds[fold_selected].append(row)
303
308
 
304
- def _iter_test_indices(self, X, y=None, groups=None): # noqa: N803
309
+ def _iter_test_indices(self, X, y=None, groups=None):
305
310
  """Internal method for providing scikit-learn's split with folds
306
311
 
307
312
  Args:
@@ -31,6 +31,8 @@ from .abstask import AbsTask
31
31
  if TYPE_CHECKING:
32
32
  from pathlib import Path
33
33
 
34
+ from numpy.typing import NDArray
35
+
34
36
  from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
35
37
  from mteb.models import MTEBModels
36
38
  from mteb.types import EncodeKwargs, HFSubset, ScoresDict
@@ -270,8 +272,8 @@ class AbsTaskClassification(AbsTask):
270
272
 
271
273
  def _calculate_scores(
272
274
  self,
273
- y_test: np.ndarray | list[int],
274
- y_pred: np.ndarray,
275
+ y_test: NDArray[np.integer] | list[int],
276
+ y_pred: NDArray[np.integer | np.floating] | list[int],
275
277
  ) -> ClassificationMetrics:
276
278
  scores = ClassificationMetrics(
277
279
  accuracy=accuracy_score(y_test, y_pred),
@@ -23,6 +23,8 @@ from .classification import AbsTaskClassification
23
23
  if TYPE_CHECKING:
24
24
  from pathlib import Path
25
25
 
26
+ from numpy.typing import NDArray
27
+
26
28
  from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
27
29
  from mteb.models import MTEBModels
28
30
  from mteb.types import Array, EncodeKwargs
@@ -32,10 +34,10 @@ logger = logging.getLogger(__name__)
32
34
 
33
35
  def _evaluate_classifier(
34
36
  embeddings_train: Array,
35
- y_train: np.ndarray,
37
+ y_train: NDArray[np.integer],
36
38
  embeddings_test: Array,
37
39
  classifier: SklearnModelProtocol,
38
- ) -> tuple[np.ndarray, SklearnModelProtocol]:
40
+ ) -> tuple[NDArray[np.integer | np.floating], SklearnModelProtocol]:
39
41
  classifier_copy: SklearnModelProtocol = clone(classifier)
40
42
  classifier_copy.fit(embeddings_train, y_train)
41
43
  return classifier_copy.predict(embeddings_test), classifier_copy
@@ -208,8 +210,8 @@ class AbsTaskMultilabelClassification(AbsTaskClassification):
208
210
 
209
211
  def _calculate_scores( # type: ignore[override]
210
212
  self,
211
- y_test: np.ndarray,
212
- y_pred: np.ndarray,
213
+ y_test: NDArray[np.integer],
214
+ y_pred: NDArray[np.integer | np.floating],
213
215
  x_test_embedding: Array,
214
216
  current_classifier: SklearnModelProtocol,
215
217
  ) -> MultilabelClassificationMetrics:
@@ -24,6 +24,7 @@ from .classification import AbsTaskClassification
24
24
 
25
25
  if TYPE_CHECKING:
26
26
  from datasets import Dataset
27
+ from numpy.typing import NDArray
27
28
 
28
29
  from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
29
30
  from mteb.types.statistics import (
@@ -123,8 +124,8 @@ class AbsTaskRegression(AbsTaskClassification):
123
124
 
124
125
  def _calculate_scores( # type: ignore[override]
125
126
  self,
126
- y_test: np.ndarray | list[int],
127
- y_pred: np.ndarray,
127
+ y_test: NDArray[np.floating] | list[float],
128
+ y_pred: NDArray[np.floating] | list[float],
128
129
  ) -> RegressionMetrics:
129
130
  mse = mean_squared_error(y_test, y_pred)
130
131
  return RegressionMetrics(
@@ -164,14 +164,142 @@ class MIEBBenchmark(Benchmark):
164
164
  class VidoreBenchmark(Benchmark):
165
165
  """Wrapper for Vidore3 benchmark."""
166
166
 
167
- def _create_summary_table(
167
+ def _create_vidore_summary_table(
168
168
  self, benchmark_results: BenchmarkResults
169
169
  ) -> pd.DataFrame:
170
+ """Create summary table from BenchmarkResults.
171
+
172
+ Returns a DataFrame with one row per model containing summary statistics
173
+ and task type averages. Customized for Vidore benchmark.
174
+
175
+ Args:
176
+ benchmark_results: BenchmarkResults object containing model results
177
+
178
+ Returns:
179
+ DataFrame with model summaries, ready for styling in the leaderboard
180
+ """
181
+ import mteb
170
182
  from mteb.benchmarks._create_table import (
171
- _create_summary_table_mean_public_private,
183
+ _format_max_tokens,
184
+ _format_n_parameters,
185
+ _get_means_per_types,
186
+ _split_on_capital,
187
+ )
188
+ from mteb.get_tasks import get_task
189
+
190
+ data = benchmark_results.to_dataframe(format="long")
191
+
192
+ if data.empty:
193
+ no_results_frame = pd.DataFrame(
194
+ {"No results": ["You can try relaxing your criteria"]}
195
+ )
196
+ return no_results_frame
197
+ public_task_name = benchmark_results._filter_tasks(is_public=True).task_names
198
+ private_task_name = benchmark_results._filter_tasks(is_public=False).task_names
199
+ # Convert to DataFrame and pivot
200
+ per_task = data.pivot(index="model_name", columns="task_name", values="score")
201
+
202
+ # Remove models with no scores
203
+ to_remove = per_task.isna().all(axis="columns")
204
+ if to_remove.all():
205
+ no_results_frame = pd.DataFrame(
206
+ {"No results": ["You can try relaxing your criteria"]}
207
+ )
208
+ return no_results_frame
209
+
210
+ models_to_remove = list(per_task[to_remove].index)
211
+ per_task = per_task.drop(models_to_remove, axis=0)
212
+
213
+ # Calculate means by task type
214
+ mean_per_type = _get_means_per_types(per_task)
215
+ mean_per_type = mean_per_type.pivot(
216
+ index="model_name", columns="task_type", values="score"
217
+ )
218
+ mean_per_type.columns = [
219
+ _split_on_capital(column) for column in mean_per_type.columns
220
+ ]
221
+
222
+ # Calculate overall means
223
+ public_mean = per_task[public_task_name].mean(skipna=False, axis=1)
224
+ private_mean = per_task[private_task_name].mean(skipna=False, axis=1)
225
+
226
+ # Build joint table
227
+ joint_table = mean_per_type.copy()
228
+ joint_table.insert(1, "mean(public)", public_mean)
229
+ joint_table.insert(2, "mean(private)", private_mean)
230
+ task_type = get_task(
231
+ per_task.columns[0]
232
+ ).metadata.type # "DocumentUnderstanding"
233
+ joint_table = joint_table.sort_values(
234
+ [_split_on_capital(task_type), "mean(public)", "mean(private)"],
235
+ ascending=False,
236
+ )
237
+
238
+ joint_table = joint_table.reset_index()
239
+
240
+ # Add model metadata
241
+ model_metas = joint_table["model_name"].map(mteb.get_model_meta)
242
+ joint_table = joint_table[model_metas.notna()]
243
+ joint_table["model_link"] = model_metas.map(lambda m: m.reference)
244
+
245
+ # Insert model metadata columns
246
+ joint_table.insert(
247
+ 1,
248
+ "Max Tokens",
249
+ model_metas.map(lambda m: _format_max_tokens(m.max_tokens)),
250
+ )
251
+ joint_table.insert(
252
+ 1,
253
+ "Embedding Dimensions",
254
+ model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
255
+ )
256
+ joint_table.insert(
257
+ 1,
258
+ "Number of Parameters (B)",
259
+ model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
260
+ )
261
+ joint_table.insert(
262
+ 1,
263
+ "Memory Usage (MB)",
264
+ model_metas.map(
265
+ lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
266
+ ),
267
+ )
268
+
269
+ # Clean up model names (remove HF organization)
270
+ joint_table["model_name"] = joint_table["model_name"].map(
271
+ lambda name: name.split("/")[-1]
272
+ )
273
+
274
+ # Add markdown links to model names
275
+ name_w_link = (
276
+ "[" + joint_table["model_name"] + "](" + joint_table["model_link"] + ")"
277
+ )
278
+ joint_table["model_name"] = joint_table["model_name"].mask(
279
+ joint_table["model_link"].notna(), name_w_link
280
+ )
281
+ joint_table = joint_table.drop(columns=["model_link"])
282
+
283
+ # Rename columns
284
+ rename_dict = {
285
+ "model_name": "Model",
286
+ "mean(public)": "Mean (Public)",
287
+ "mean(private)": "Mean (Private)",
288
+ }
289
+
290
+ joint_table = joint_table.rename(columns=rename_dict)
291
+
292
+ # Add Rank column
293
+ joint_table.insert(
294
+ 0, "Rank (Mean Task)", [i + 1 for i in range(len(joint_table))]
172
295
  )
173
296
 
174
- joint_table = _create_summary_table_mean_public_private(benchmark_results)
297
+ return joint_table
298
+
299
+ def _create_summary_table(
300
+ self, benchmark_results: BenchmarkResults
301
+ ) -> pd.DataFrame:
302
+ joint_table = self._create_vidore_summary_table(benchmark_results)
175
303
  # For ViDoRe (V1, V2, V3): all tasks are Document Understanding type, so Document Understanding column = Mean (Task)
176
304
  joint_table = joint_table.rename(
177
305
  columns={"Document Understanding": "Mean (Task)"}
@@ -125,6 +125,7 @@ def _performance_size_plot(df: pd.DataFrame) -> go.Figure:
125
125
  min_score, max_score = df["Mean (Task)"].min(), df["Mean (Task)"].max()
126
126
  df["sqrt(dim)"] = np.sqrt(df["Embedding Dimensions"])
127
127
  df["Max Tokens"] = df["Max Tokens"].apply(lambda x: _process_max_tokens(x))
128
+ rank_column = "Rank (Borda)" if "Rank (Borda)" in df.columns else "Rank (Mean Task)"
128
129
  fig = px.scatter(
129
130
  df,
130
131
  x="Number of Parameters",
@@ -141,7 +142,7 @@ def _performance_size_plot(df: pd.DataFrame) -> go.Figure:
141
142
  "Embedding Dimensions": True,
142
143
  "Number of Parameters": True,
143
144
  "Mean (Task)": True,
144
- "Rank (Borda)": True,
145
+ rank_column: True,
145
146
  "Log(Tokens)": False,
146
147
  "sqrt(dim)": False,
147
148
  "model_text": False,
mteb/leaderboard/table.py CHANGED
@@ -156,6 +156,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
156
156
  """
157
157
  excluded_columns = [
158
158
  "Rank (Borda)",
159
+ "Rank (Mean Task)",
159
160
  "Rank",
160
161
  "Model",
161
162
  "Number of Parameters (B)",
@@ -183,10 +184,17 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
183
184
  joint_table["Zero-shot"] = joint_table["Zero-shot"].apply(_format_zero_shot)
184
185
  joint_table[score_columns] = joint_table[score_columns].map(_format_scores)
185
186
 
187
+ if "Rank (Borda)" in joint_table.columns:
188
+ rank_column = "Rank (Borda)"
189
+ elif "Rank (Mean Task)" in joint_table.columns:
190
+ rank_column = "Rank (Mean Task)"
191
+ else:
192
+ raise ValueError("No rank column found in the result table.")
193
+
186
194
  joint_table_style = joint_table.style.format(
187
195
  {
188
196
  **dict.fromkeys(score_columns, "{:.2f}"),
189
- "Rank (Borda)": "{:.0f}",
197
+ rank_column: "{:.0f}",
190
198
  "Memory Usage (MB)": "{:.0f}",
191
199
  "Embedding Dimensions": "{:.0f}",
192
200
  "Max Tokens": "{:.0f}",
@@ -195,7 +203,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
195
203
  na_rep="",
196
204
  )
197
205
  joint_table_style = joint_table_style.highlight_min(
198
- "Rank (Borda)", props="font-weight: bold"
206
+ rank_column, props="font-weight: bold"
199
207
  ).highlight_max(subset=score_columns, props="font-weight: bold")
200
208
 
201
209
  # Apply background gradients for each selected column
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
5
5
  if TYPE_CHECKING:
6
6
  from pathlib import Path
7
7
 
8
- import numpy as np
8
+ from mteb.types import Array
9
9
 
10
10
 
11
11
  @runtime_checkable
@@ -26,7 +26,7 @@ class CacheBackendProtocol(Protocol):
26
26
  **kwargs: Additional backend-specific arguments.
27
27
  """
28
28
 
29
- def add(self, item: list[dict[str, Any]], vectors: np.ndarray) -> None:
29
+ def add(self, item: list[dict[str, Any]], vectors: Array) -> None:
30
30
  """Add a vector to the cache.
31
31
 
32
32
  Args:
@@ -34,7 +34,7 @@ class CacheBackendProtocol(Protocol):
34
34
  vectors: Embedding vector of shape (dim,) or (1, dim).
35
35
  """
36
36
 
37
- def get_vector(self, item: dict[str, Any]) -> np.ndarray | None:
37
+ def get_vector(self, item: dict[str, Any]) -> Array | None:
38
38
  """Retrieve the cached vector for the given item.
39
39
 
40
40
  Args:
@@ -15,7 +15,7 @@ from ._hash_utils import _hash_item
15
15
  if TYPE_CHECKING:
16
16
  import faiss
17
17
 
18
- from mteb.types import BatchedInput
18
+ from mteb.types import Array, BatchedInput
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
 
@@ -43,7 +43,7 @@ class FaissCache:
43
43
  logger.info(f"Initialized FAISS VectorCacheMap in {self.directory}")
44
44
  self.load()
45
45
 
46
- def add(self, items: list[dict[str, Any]], vectors: np.ndarray) -> None:
46
+ def add(self, items: list[dict[str, Any]], vectors: Array) -> None:
47
47
  """Add vector to FAISS index."""
48
48
  import faiss
49
49
 
@@ -67,7 +67,7 @@ class FaissCache:
67
67
  vectors_array = np.vstack(vectors_to_add).astype(np.float32)
68
68
  self.index.add(vectors_array)
69
69
 
70
- def get_vector(self, item: BatchedInput) -> np.ndarray | None:
70
+ def get_vector(self, item: dict[str, Any]) -> Array | None:
71
71
  """Retrieve vector from index by hash."""
72
72
  if self.index is None:
73
73
  return None
@@ -1,13 +1,18 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import warnings
4
6
  from pathlib import Path
5
- from typing import Any
7
+ from typing import TYPE_CHECKING, Any
6
8
 
7
9
  import numpy as np
8
10
 
9
11
  from ._hash_utils import _hash_item
10
12
 
13
+ if TYPE_CHECKING:
14
+ from mteb.types import Array
15
+
11
16
  logger = logging.getLogger(__name__)
12
17
 
13
18
 
@@ -27,7 +32,7 @@ class NumpyCache:
27
32
  logger.info(f"Initialized VectorCacheMap in directory: {self.directory}")
28
33
  self._initialize_vectors_file()
29
34
 
30
- def add(self, items: list[dict[str, Any]], vectors: np.ndarray) -> None:
35
+ def add(self, items: list[dict[str, Any]], vectors: Array) -> None:
31
36
  """Add a vector to the cache."""
32
37
  try:
33
38
  if self.vector_dim is None:
@@ -178,7 +183,7 @@ class NumpyCache:
178
183
  logger.error(f"Error loading VectorCacheMap: {str(e)}")
179
184
  raise
180
185
 
181
- def get_vector(self, item: dict[str, Any]) -> np.ndarray | None:
186
+ def get_vector(self, item: dict[str, Any]) -> Array | None:
182
187
  """Retrieve vector from index by hash."""
183
188
  if self.vectors is None:
184
189
  return None
@@ -98,7 +98,7 @@ class CachedEmbeddingWrapper:
98
98
  uncached_items: list[dict[str, Any]] = []
99
99
  uncached_indices: list[int] = []
100
100
  all_items: Dataset = inputs.dataset
101
- cached_vectors: dict[int, np.ndarray] = {}
101
+ cached_vectors: dict[int, Array] = {}
102
102
 
103
103
  for i, item in enumerate(all_items):
104
104
  vector = cache.get_vector(item)
@@ -108,7 +108,7 @@ class CachedEmbeddingWrapper:
108
108
  uncached_items.append(item)
109
109
  uncached_indices.append(i)
110
110
 
111
- newly_encoded: dict[int, np.ndarray] = {}
111
+ newly_encoded: dict[int, Array] = {}
112
112
  if uncached_items:
113
113
  logger.info(f"Encoding {len(uncached_items)} new items")
114
114
  # Build a simple DataLoader with only uncached items
@@ -86,7 +86,7 @@ class BedrockModel(AbsEncoder):
86
86
 
87
87
  def _encode_amazon(
88
88
  self, sentences: list[str], show_progress_bar: bool = False
89
- ) -> np.ndarray:
89
+ ) -> Array:
90
90
  from botocore.exceptions import ValidationError
91
91
 
92
92
  all_embeddings = []
@@ -125,7 +125,7 @@ class BedrockModel(AbsEncoder):
125
125
  sentences: list[str],
126
126
  cohere_task_type: str,
127
127
  show_progress_bar: bool = False,
128
- ) -> np.ndarray:
128
+ ) -> Array:
129
129
  batches = [
130
130
  sentences[i : i + self._max_batch_size]
131
131
  for i in range(0, len(sentences), self._max_batch_size)
@@ -149,7 +149,7 @@ class BedrockModel(AbsEncoder):
149
149
 
150
150
  return np.array(all_embeddings)
151
151
 
152
- def _embed_amazon(self, sentence: str) -> np.ndarray:
152
+ def _embed_amazon(self, sentence: str) -> Array:
153
153
  response = self._client.invoke_model(
154
154
  body=json.dumps({"inputText": sentence}),
155
155
  modelId=self._model_id,
@@ -158,7 +158,7 @@ class BedrockModel(AbsEncoder):
158
158
  )
159
159
  return self._to_numpy(response)
160
160
 
161
- def _to_numpy(self, embedding_response) -> np.ndarray:
161
+ def _to_numpy(self, embedding_response) -> Array:
162
162
  response = json.loads(embedding_response.get("body").read())
163
163
  key = "embedding" if self._provider == "amazon" else "embeddings"
164
164
  return np.array(response[key])
@@ -13,7 +13,7 @@ from mteb.models.abs_encoder import AbsEncoder
13
13
  from mteb.models.model_meta import ModelMeta
14
14
 
15
15
  if TYPE_CHECKING:
16
- from mteb.types import PromptType
16
+ from mteb.types import Array, PromptType
17
17
  logger = logging.getLogger(__name__)
18
18
 
19
19
  HAKIM_CITATION = """@article{sarmadi2025hakim,
@@ -302,7 +302,7 @@ class HakimModelWrapper(AbsEncoder):
302
302
  prompt_type: PromptType | None = None,
303
303
  batch_size: int = 32,
304
304
  **kwargs: Any,
305
- ) -> np.ndarray:
305
+ ) -> Array:
306
306
  """Encodes sentences using the API.
307
307
 
308
308
  Returns:
@@ -1007,54 +1007,6 @@ thenlper__gte_small = ModelMeta(
1007
1007
  year={2023}
1008
1008
  }""",
1009
1009
  )
1010
- OrlikB__KartonBERT_USE_base_v1 = ModelMeta(
1011
- name="OrlikB/KartonBERT-USE-base-v1",
1012
- model_type=["dense"],
1013
- revision="1f59dd58fe57995c0e867d5e29f03763eae99645",
1014
- release_date="2024-09-30",
1015
- languages=["pol-Latn"],
1016
- loader=sentence_transformers_loader,
1017
- n_parameters=103705344,
1018
- n_embedding_parameters=None,
1019
- memory_usage_mb=396,
1020
- max_tokens=512.0,
1021
- embed_dim=768,
1022
- license="gpl-3.0",
1023
- open_weights=True,
1024
- public_training_code=None,
1025
- public_training_data=None,
1026
- framework=["PyTorch"],
1027
- reference="https://huggingface.co/OrlikB/KartonBERT-USE-base-v1",
1028
- similarity_fn_name=ScoringFunction.COSINE,
1029
- use_instructions=None,
1030
- training_datasets=None,
1031
- adapted_from="KartonBERT-USE-base-v1",
1032
- superseded_by=None,
1033
- )
1034
- OrlikB__st_polish_kartonberta_base_alpha_v1 = ModelMeta(
1035
- name="OrlikB/st-polish-kartonberta-base-alpha-v1",
1036
- model_type=["dense"],
1037
- revision="5590a0e2d7bb43674e44d7076b3ff157f7d4a1cb",
1038
- release_date="2023-11-12",
1039
- languages=["pol-Latn"],
1040
- loader=sentence_transformers_loader,
1041
- n_parameters=None,
1042
- n_embedding_parameters=None,
1043
- memory_usage_mb=None,
1044
- max_tokens=514.0,
1045
- embed_dim=768,
1046
- license="lgpl",
1047
- open_weights=True,
1048
- public_training_code=None,
1049
- public_training_data=None,
1050
- framework=["PyTorch"],
1051
- reference="https://huggingface.co/OrlikB/st-polish-kartonberta-base-alpha-v1",
1052
- similarity_fn_name=ScoringFunction.COSINE,
1053
- use_instructions=None,
1054
- training_datasets=None,
1055
- adapted_from="st-polish-kartonberta-base-alpha-v1",
1056
- superseded_by=None,
1057
- )
1058
1010
  sdadas__mmlw_e5_base = ModelMeta(
1059
1011
  name="sdadas/mmlw-e5-base",
1060
1012
  model_type=["dense"],
@@ -11,6 +11,7 @@ from mteb.models.abs_encoder import AbsEncoder
11
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
12
 
13
13
  if TYPE_CHECKING:
14
+ from numpy.typing import NDArray
14
15
  from torch.utils.data import DataLoader
15
16
 
16
17
  from mteb.abstasks.task_metadata import TaskMetadata
@@ -166,7 +167,7 @@ class OpenAIModel(AbsEncoder):
166
167
  all_embeddings[mask] = no_empty_embeddings
167
168
  return all_embeddings
168
169
 
169
- def _to_numpy(self, embedding_response) -> np.ndarray:
170
+ def _to_numpy(self, embedding_response) -> NDArray[np.floating]:
170
171
  return np.array([e.embedding for e in embedding_response.data])
171
172
 
172
173
 
@@ -13,6 +13,7 @@ from mteb.similarity_functions import (
13
13
  )
14
14
 
15
15
  if TYPE_CHECKING:
16
+ from numpy.typing import NDArray
16
17
  from PIL import Image
17
18
  from torch.utils.data import DataLoader
18
19
 
@@ -20,7 +21,7 @@ if TYPE_CHECKING:
20
21
  from mteb.types._encoder_io import Array, BatchedInput, PromptType
21
22
 
22
23
 
23
- def _string_to_vector(text: str | None, size: int) -> np.ndarray:
24
+ def _string_to_vector(text: str | None, size: int) -> NDArray[np.floating]:
24
25
  """Generate a deterministic random vector based on a string.
25
26
 
26
27
  Args:
@@ -39,7 +40,7 @@ def _string_to_vector(text: str | None, size: int) -> np.ndarray:
39
40
  return rng.random(size, dtype=np.float32)
40
41
 
41
42
 
42
- def _image_to_vector(image: Image.Image, size: int) -> np.ndarray:
43
+ def _image_to_vector(image: Image.Image, size: int) -> NDArray[np.floating]:
43
44
  """Generate a deterministic random vector based on image content.
44
45
 
45
46
  Args:
@@ -80,7 +81,7 @@ _common_mock_metadata = dict(
80
81
 
81
82
  def _batch_to_embeddings(
82
83
  inputs: DataLoader[BatchedInput], embedding_dim: int
83
- ) -> np.ndarray:
84
+ ) -> NDArray[np.floating]:
84
85
  """Convert batched text/image inputs into embeddings.
85
86
 
86
87
  Args:
@@ -103,68 +103,6 @@ class BGEReranker(RerankerWrapper):
103
103
  return scores
104
104
 
105
105
 
106
- class MonoBERTReranker(RerankerWrapper):
107
- name: str = "MonoBERT"
108
-
109
- def __init__(
110
- self,
111
- model_name_or_path="castorini/monobert-large-msmarco",
112
- torch_compile=False,
113
- **kwargs,
114
- ):
115
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
116
-
117
- super().__init__(model_name_or_path, **kwargs)
118
- if not self.device:
119
- self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
120
- model_args = {}
121
- if self.fp_options:
122
- model_args["torch_dtype"] = self.fp_options
123
- self.model = AutoModelForSequenceClassification.from_pretrained(
124
- model_name_or_path,
125
- **model_args,
126
- )
127
- self.model.to(self.device)
128
- self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
129
- self.max_length = self.tokenizer.model_max_length
130
- logger.info(f"Using max_length of {self.max_length}")
131
-
132
- self.model.eval()
133
-
134
- @torch.inference_mode()
135
- def predict(
136
- self,
137
- inputs1: DataLoader[BatchedInput],
138
- inputs2: DataLoader[BatchedInput],
139
- *,
140
- task_metadata: TaskMetadata,
141
- hf_split: str,
142
- hf_subset: str,
143
- prompt_type: PromptType | None = None,
144
- **kwargs: Any,
145
- ) -> Array:
146
- queries = [text for batch in inputs1 for text in batch["query"]]
147
- instructions = None
148
- if "instruction" in inputs2.dataset.features:
149
- instructions = [text for batch in inputs1 for text in batch["instruction"]]
150
- passages = [text for batch in inputs2 for text in batch["text"]]
151
-
152
- if instructions is not None and instructions[0] is not None:
153
- queries = [f"{q} {i}".strip() for i, q in zip(instructions, queries)]
154
-
155
- tokens = self.tokenizer(
156
- queries,
157
- passages,
158
- padding=True,
159
- truncation="only_second",
160
- return_tensors="pt",
161
- max_length=self.max_length,
162
- ).to(self.device)
163
- output = self.model(**tokens)[0]
164
- batch_scores = torch.nn.functional.log_softmax(output, dim=1)
165
- return batch_scores[:, 1].exp()
166
-
167
-
168
106
  class JinaReranker(RerankerWrapper):
169
107
  name = "Jina"
170
108
 
@@ -219,31 +157,6 @@ class JinaReranker(RerankerWrapper):
219
157
  return scores
220
158
 
221
159
 
222
- monobert_large = ModelMeta(
223
- loader=MonoBERTReranker,
224
- loader_kwargs=dict(
225
- fp_options="float16",
226
- ),
227
- name="castorini/monobert-large-msmarco",
228
- model_type=["cross-encoder"],
229
- languages=["eng-Latn"],
230
- open_weights=True,
231
- revision="0a97706f3827389da43b83348d5d18c9d53876fa",
232
- release_date="2020-05-28",
233
- n_parameters=None,
234
- n_embedding_parameters=31_254_528,
235
- memory_usage_mb=None,
236
- max_tokens=None,
237
- embed_dim=None,
238
- license=None,
239
- public_training_code=None,
240
- public_training_data=None,
241
- similarity_fn_name=None,
242
- use_instructions=None,
243
- training_datasets=None,
244
- framework=["Sentence Transformers", "PyTorch", "Transformers"],
245
- )
246
-
247
160
  # languages unclear: https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual/discussions/28
248
161
  jina_reranker_multilingual = ModelMeta(
249
162
  loader=JinaReranker,
@@ -34,7 +34,6 @@ prediction_tokens = {
34
34
  "unicamp-dl/mt5-base-en-msmarco": ["▁no", "▁yes"],
35
35
  "unicamp-dl/mt5-base-mmarco-v2": ["▁no", "▁yes"],
36
36
  "unicamp-dl/mt5-base-mmarco-v1": ["▁no", "▁yes"],
37
- "unicamp-dl/mt5-13b-mmarco-100k": ["▁", "▁true"],
38
37
  }
39
38
 
40
39
 
@@ -919,28 +918,3 @@ mt5_base_mmarco_v2 = ModelMeta(
919
918
  use_instructions=None,
920
919
  framework=["PyTorch", "Transformers"],
921
920
  )
922
-
923
- mt5_13b_mmarco_100k = ModelMeta(
924
- loader=MonoT5Reranker,
925
- loader_kwargs=dict(
926
- fp_options="float16",
927
- ),
928
- name="unicamp-dl/mt5-13b-mmarco-100k",
929
- model_type=["cross-encoder"],
930
- languages=mt5_languages,
931
- open_weights=True,
932
- revision="e1a4317e102a525ea9e16745ad21394a4f1bffbc",
933
- release_date="2022-11-04",
934
- n_parameters=None,
935
- n_embedding_parameters=1_024_458_752,
936
- memory_usage_mb=None,
937
- max_tokens=None,
938
- embed_dim=None,
939
- license=None,
940
- public_training_code=None,
941
- public_training_data=None,
942
- similarity_fn_name=None,
943
- use_instructions=None,
944
- training_datasets=None,
945
- framework=["PyTorch", "Transformers"],
946
- )
@@ -1,6 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import time
3
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
4
6
 
5
7
  import numpy as np
6
8
  import torch
@@ -14,6 +16,9 @@ from mteb.types import PromptType
14
16
  from .bge_models import bge_chinese_training_data
15
17
  from .nvidia_models import nvidia_training_datasets
16
18
 
19
+ if TYPE_CHECKING:
20
+ from mteb.types import Array
21
+
17
22
  logger = logging.getLogger(__name__)
18
23
 
19
24
 
@@ -110,7 +115,7 @@ class SeedTextEmbeddingModel(AbsEncoder):
110
115
  prompt_type: PromptType | None = None,
111
116
  retries: int = 5,
112
117
  **kwargs: Any,
113
- ) -> np.ndarray:
118
+ ) -> Array:
114
119
  trimmed_sentences = []
115
120
  for sentence in sentences:
116
121
  encoded_sentence = self._encoding.encode(sentence)
@@ -150,7 +150,7 @@ class VoyageModel(AbsEncoder):
150
150
  sentences: list[str],
151
151
  batch_size: int,
152
152
  input_type: Literal["query", "document"],
153
- ) -> np.ndarray:
153
+ ) -> Array:
154
154
  embeddings, index = [], 0
155
155
 
156
156
  output_dtype = VOYAGE_DTYPE_TRANSLATION.get(
@@ -68,6 +68,7 @@ class Vidore3FinanceFrRetrieval(AbsTaskRetrieval):
68
68
  license="cc-by-4.0",
69
69
  annotations_creators="derived",
70
70
  dialect=[],
71
+ modalities=["text", "image"],
71
72
  sample_creation="created and machine-translated",
72
73
  bibtex_citation=r"""
73
74
  @article{loison2026vidorev3comprehensiveevaluation,
mteb/types/_encoder_io.py CHANGED
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, TypedDict
7
7
  import numpy as np
8
8
  import torch
9
9
  from datasets import Dataset
10
+ from numpy.typing import NDArray
10
11
 
11
12
  if TYPE_CHECKING:
12
13
  from PIL import Image
@@ -26,8 +27,8 @@ class EncodeKwargs(TypedDict):
26
27
 
27
28
 
28
29
  # --- Output types ---
29
- Array = np.ndarray | torch.Tensor
30
- """General array type, can be a numpy array or a torch tensor."""
30
+ Array = NDArray[np.floating | np.integer | np.bool] | torch.Tensor
31
+ """General array type, can be a numpy array (float, int, or bool) or a torch tensor."""
31
32
 
32
33
 
33
34
  # --- Input types ---
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.7.15
3
+ Version: 2.7.17
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -16,13 +16,13 @@ mteb/similarity_functions.py,sha256=7ENHjq35EMSO1kT73IKLbQ0jMWGxMPSMM8SPmGHzaAM,
16
16
  mteb/_evaluators/__init__.py,sha256=Ag1_RWpxBGMpujzd3FZjI40gY_KQKIpY31tJPuk-hFg,1013
17
17
  mteb/_evaluators/_download.py,sha256=jntlcURbJxcxUjTmn2D9Tu6ZnWgDc9t5bY8p9CZCqv4,586
18
18
  mteb/_evaluators/any_sts_evaluator.py,sha256=aeK6ZJ_wuGR_8L6f5B4Xcl7Fo0dgApqevsgg7hdHQk0,3918
19
- mteb/_evaluators/classification_metrics.py,sha256=TI-cMPWrIpMqpsNhhwSBY4bZUu2yM469fbcu44zolW0,1926
19
+ mteb/_evaluators/classification_metrics.py,sha256=cWMU-2bCNXlk_UN0sZ7uRS5aFXESkYlJWg4OLMeFrRg,2090
20
20
  mteb/_evaluators/clustering_evaluator.py,sha256=COSG4tGz2hI3Ff3MNFxdeg9w1TPmisxhvF333zQx-ko,2226
21
21
  mteb/_evaluators/evaluator.py,sha256=8fEuBZW0sv-tpJCUT2X5lIvmF0Ji0Vuq7Z8AblipspA,1074
22
- mteb/_evaluators/pair_classification_evaluator.py,sha256=0_B_5LVPTgd7IsbehUoxOPfmETmTbv0DOZtwPmDLhWs,6624
22
+ mteb/_evaluators/pair_classification_evaluator.py,sha256=g7JiwzYa_c_Ql3mOavKho2-vXdAagoZfEhref-r1luM,6671
23
23
  mteb/_evaluators/retrieval_evaluator.py,sha256=UexQCsfGMzZq-JfrdNQ4PylHdoyS6Ef_zxXokNZiR5o,3250
24
- mteb/_evaluators/retrieval_metrics.py,sha256=XXeI7pXHuhKDvDjMKhGSU-Uv6J2itQKeD7lUH85IL8Q,23856
25
- mteb/_evaluators/sklearn_evaluator.py,sha256=yjnNtwOuEU38eKOKHI7uFsIZJJDU5jmTHGH7MX0rKtA,4122
24
+ mteb/_evaluators/retrieval_metrics.py,sha256=nO1StNLDaa_iBWTuVexo2rsDWkxYCgHZ98qgr9bydCs,23964
25
+ mteb/_evaluators/sklearn_evaluator.py,sha256=sCGn3YOhwT2OARTlSCJIg69IMijqfVyHoTVgK1TFuYQ,4253
26
26
  mteb/_evaluators/zeroshot_classification_evaluator.py,sha256=jCf6H0LwxZGIZ-2LGF9SPbjHTtCAk9v1HtxWcmutqks,2488
27
27
  mteb/_evaluators/image/__init__.py,sha256=CsQd7OMkeV2Phun7paPWjayZ5qRnvj8H0TYBFeqMxag,148
28
28
  mteb/_evaluators/image/imagetext_pairclassification_evaluator.py,sha256=w2vJrQXIbRsdG837x1yYQkGNcTJt8rdLofQ3Jo3nIn4,5227
@@ -31,17 +31,17 @@ mteb/_evaluators/text/bitext_mining_evaluator.py,sha256=We-BQZJQGBXz_vTUZz-3OBt4
31
31
  mteb/_evaluators/text/summarization_evaluator.py,sha256=ZHn3kIFGJ1XzgbI21jEeTnP5pdIChGHVTCuyz2MSKyg,10900
32
32
  mteb/abstasks/__init__.py,sha256=1iAwpYTWX7U-goak2KMmacPFCzxPchLQAmZ_uI0t-p0,1130
33
33
  mteb/abstasks/_statistics_calculation.py,sha256=4opttohaS6LV5K0zQIqfG2IGIzQAdKAaLTpSTQ6auBc,5988
34
- mteb/abstasks/_stratification.py,sha256=GnqYRtkFYsB-412EvMR2iMqIinFr98NCSmxHeCXctlw,14347
34
+ mteb/abstasks/_stratification.py,sha256=upxjHt4wjuEFGmb_vrONTh9ngZ8Oa1tY56tUdllFERQ,14490
35
35
  mteb/abstasks/abstask.py,sha256=9bpPnrwwGBWE--MGoTOb-J-RtOnoo0YUaAjAPv7JC1g,26831
36
36
  mteb/abstasks/aggregate_task_metadata.py,sha256=WXYY_DUU55s4PkxMVz7lwbdZarq6QznhbvJYdSTYZZI,5846
37
37
  mteb/abstasks/aggregated_task.py,sha256=8NY_vaqmMuYNxuB05YjU4W6aEipyKrF2iDFS3m-eXNc,6167
38
- mteb/abstasks/classification.py,sha256=zSA9nTplwspktPnZiN_RQrPvOgEKYxeQASm_Q1lb3ww,14052
38
+ mteb/abstasks/classification.py,sha256=9gNSPw2fVPyoTb1tV4kR_Fzku3Z6xutJzbyNpkktPzc,14134
39
39
  mteb/abstasks/clustering.py,sha256=I8vre2f2FJFagzJEYf6hKDo3Y28xU29J_O-MhfqWqSI,14944
40
40
  mteb/abstasks/clustering_legacy.py,sha256=sbx8K6paccvzDPnmhgNE_UJE83orAJnQm3NGr-Ktjfs,9184
41
41
  mteb/abstasks/dataset_card_template.md,sha256=aD6l8qc3_jxwoIGJNYLzse-jpRa8hu92AxpnUtNgges,5122
42
- mteb/abstasks/multilabel_classification.py,sha256=rFa_Pw2OsUzqhZS-jh2zFD7I-TNl8bVNJ-DW7EpPapU,9708
42
+ mteb/abstasks/multilabel_classification.py,sha256=olldnMq5a-elan2vZSQ-EFZbcS6Mt9h1nTOi8iFXjjE,9810
43
43
  mteb/abstasks/pair_classification.py,sha256=RVV5WUjs18N5PbWpyxakDNEd1UlRc4ON9I0OjD26Z78,14231
44
- mteb/abstasks/regression.py,sha256=ZuMZfOwU3G4hr__eHsgdagKKdrbN4-wQMLz45jr9YUc,8946
44
+ mteb/abstasks/regression.py,sha256=2aYJvktoENLi1wLYdND35TGFfhrN_BFCs-Yr2Ex-f5I,9019
45
45
  mteb/abstasks/retrieval.py,sha256=BPyRibStAD70JfR0Z1x-VVVfzJDRVSmbOS6uREfpmok,27743
46
46
  mteb/abstasks/retrieval_dataset_loaders.py,sha256=p0y1nrWlUrt_aeoR4ocDLEQMLuD_SlMH0gBiUsOwrww,9983
47
47
  mteb/abstasks/sts.py,sha256=Xta3KVQE7hHqkPTDptemvNVEG0CsZSVjA-Z52EIBvDE,9576
@@ -58,7 +58,7 @@ mteb/abstasks/text/reranking.py,sha256=mCzy0-TnZ46_GC9Czl4zWKAPnYK5ur0qtFbPt47m9
58
58
  mteb/abstasks/text/summarization.py,sha256=bSgb0XhUzJVuLV1Wjr3HYB_Tn7SjmCDMnkBIEWHO4EQ,7381
59
59
  mteb/benchmarks/__init__.py,sha256=MQEVeli-zLaJ7Xg0z7RhXQwsdmm7Ht_W2Ln0rZo1Szc,225
60
60
  mteb/benchmarks/_create_table.py,sha256=CJL8U0adUbaxr5G26trfYo1tGx8cU2IWWx_ZHU6q6do,22407
61
- mteb/benchmarks/benchmark.py,sha256=YCGIvJ5Vc6GdCAYSjzwrnfj2A8MkbzNLvvtPBLMSSp8,6327
61
+ mteb/benchmarks/benchmark.py,sha256=3AA-Zeh9Z4bTN6qVpRUfHWHxJfTei3KQBEOoVP2Mtic,10922
62
62
  mteb/benchmarks/get_benchmark.py,sha256=nzR6cu5yXu1kIJKhd4A2R62xp43Z62bluPbOpNXHMWQ,2545
63
63
  mteb/benchmarks/benchmarks/__init__.py,sha256=-o3EMWEfP0eQ8iZpWvTj5r4yuGOUuL9mHk8IgFcpPtk,2330
64
64
  mteb/benchmarks/benchmarks/benchmarks.py,sha256=IOU3Kk7TEZkhypJ7ScyfqHmYgx1nG_KPJkjXLKoSTmo,103931
@@ -1474,8 +1474,8 @@ mteb/languages/programming_languages.py,sha256=zxAakT3OSUnAuTnQ34VyeFIECnNXMlleZ
1474
1474
  mteb/leaderboard/__init__.py,sha256=991roXmtRwEQysV-37hWEzWpkvPgMCGRqZTHR-hm2io,88
1475
1475
  mteb/leaderboard/app.py,sha256=Y3G93VJq6eZMD4_CNzLwSEEGnuNJDqYEYztmzYR85eA,42549
1476
1476
  mteb/leaderboard/benchmark_selector.py,sha256=qd-2L20RQ4ACke01UlytkhZok1dkWgfUlXzfET52kGc,7956
1477
- mteb/leaderboard/figures.py,sha256=cfOK82rRf-7sCjyP7GBxh4ezhOIt0OhD0_86mKtzLrg,7530
1478
- mteb/leaderboard/table.py,sha256=U5mWtrVUTk_6t8T4KAp5qlbFgKh1PD0iKICqNMfhsoY,10462
1477
+ mteb/leaderboard/figures.py,sha256=9mHxxzL44OFE3RL0d5zvMFJwl2irnYL0YbMbfCugifs,7616
1478
+ mteb/leaderboard/table.py,sha256=egBQYgOXqUZCZa7QXkjYq45RYhAuey7KCo6MeTcx4B8,10741
1479
1479
  mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9HydipKM,6570
1480
1480
  mteb/models/__init__.py,sha256=ABTuoqiBjBtBWW3LYY7ItBHdylR6jWoy06HH0g6j6fU,910
1481
1481
  mteb/models/abs_encoder.py,sha256=We9HlwWP61P4cMyZ080gywvDErA1eVsU9t46PtcNrCM,16830
@@ -1487,12 +1487,12 @@ mteb/models/search_wrappers.py,sha256=PXE1VVDWUd0LgTPJ-FxqIbGpIDWLRKo5CjrwIuu5nz
1487
1487
  mteb/models/sentence_transformer_wrapper.py,sha256=RsOxj-b7qzeYcxUTVJyb-lZDY4bINl4jEAEkPvKYB10,13578
1488
1488
  mteb/models/vllm_wrapper.py,sha256=rvE0mghcHRA1UwIN1mjJf1GKA6pTTcs8mZQ2UoGRJ0g,12287
1489
1489
  mteb/models/cache_wrappers/__init__.py,sha256=1w1TnMwulWJSzNkLXjbh5MY3sqgHWc6vUntYn49i9X8,169
1490
- mteb/models/cache_wrappers/cache_backend_protocol.py,sha256=d00imHSJAVU1jtvwLWv3mn06_SJn-6JQbOvb0N2nlAk,1676
1491
- mteb/models/cache_wrappers/cache_wrapper.py,sha256=ZzbtW5rVGNZ-5wrqE06C0Uy6BHT5Lq3IysHe8hAoTz4,6703
1490
+ mteb/models/cache_wrappers/cache_backend_protocol.py,sha256=vDDXnXDwWhhTvw0e0fiGB7PhPQtGx__-i6jJPvZVoAU,1676
1491
+ mteb/models/cache_wrappers/cache_wrapper.py,sha256=2hdQSFrISZB7Su2li743OIJDWUyp9z-spC1dnb8huTY,6693
1492
1492
  mteb/models/cache_wrappers/cache_backends/__init__.py,sha256=hN2Tq7cpTxoOYSCJ1Wnpvb8dEm-kQLfCCahT1N9Bacw,123
1493
1493
  mteb/models/cache_wrappers/cache_backends/_hash_utils.py,sha256=HRZ8FeXSJyK3m07gnEIFduR2hU2DSJLjo4QPs5DxDGs,620
1494
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py,sha256=ejOONSD8wZACuGt455UXn_z5ZHGMpv64ORvteKNqfDw,3899
1495
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py,sha256=V275IY-0lyh2REqZjIZOgJJ7SY05yiWdHNF2kiSdRfo,8071
1494
+ mteb/models/cache_wrappers/cache_backends/faiss_cache.py,sha256=vEGoc74mONqiaO5Tmf5OMY9HWZDUJj92SObSvifKgu4,3898
1495
+ mteb/models/cache_wrappers/cache_backends/numpy_cache.py,sha256=iWp2H63TWYH-e3sxFq-4E23RIXTdKM9GKgrbVNQDAXI,8164
1496
1496
  mteb/models/model_implementations/__init__.py,sha256=BZDdde6ajKv-yroy9mqE2YS3Hw1KBdKoxBPg8aPTZEs,1164
1497
1497
  mteb/models/model_implementations/align_models.py,sha256=82zOy5yyeaaVej81NGyjT49d1Lbg174Kpa4d8iaBi4Q,4676
1498
1498
  mteb/models/model_implementations/amazon_models.py,sha256=bEiOzDE2V2duMv-TCAYhMeqlgI--PXGMv6CqKLUQmp0,753
@@ -1500,7 +1500,7 @@ mteb/models/model_implementations/andersborges.py,sha256=au-947iRW4F6aq57QoYWZwQ
1500
1500
  mteb/models/model_implementations/ara_models.py,sha256=b-Qa5q3O8M5XbkauVm7I6D6aZSU1cd9XePT6ZVvSBtk,1517
1501
1501
  mteb/models/model_implementations/arctic_models.py,sha256=MfYvAkcGcb3FdbvieYmiekSvZREwu2pRJ_2sbbcUIPk,11051
1502
1502
  mteb/models/model_implementations/b1ade_models.py,sha256=-czgy_Ym5LHAX4-f-F7YaUGqTkfwsKmTL-tiCiihLnU,1705
1503
- mteb/models/model_implementations/bedrock_models.py,sha256=tqfQofVHbKGY163x46CPtrLiyjpyHXf6JLNIjcmCXw4,9072
1503
+ mteb/models/model_implementations/bedrock_models.py,sha256=oY6eLVUvMYlwfx7N1VvlPjPz6mCX0qISsF6VNCrMmVA,9052
1504
1504
  mteb/models/model_implementations/bge_models.py,sha256=JuO1FRWrsqlsM_jslQ96oVsD3FeWVD_uHBnMv8JJyNA,28033
1505
1505
  mteb/models/model_implementations/bica_model.py,sha256=Yx3iZrXF6ZMJS9SH5lbzNHoUWGNH3dypRtZ7dX5o7rA,1305
1506
1506
  mteb/models/model_implementations/blip2_models.py,sha256=C6egwozJthHmv92I0SWID3-sQCPROPJP0TzfQVKNzlo,7898
@@ -1554,9 +1554,9 @@ mteb/models/model_implementations/linq_models.py,sha256=alq0ylE8QDbpOGgIHUcs74fj
1554
1554
  mteb/models/model_implementations/listconranker.py,sha256=ojzBWxaCuMmfBaCZla8ECv5aq514IlABb6um5TGoYY8,4664
1555
1555
  mteb/models/model_implementations/llm2clip_models.py,sha256=X3W16uipaZ0t4Mco4lhhg4s9WC9lpVzg7Djq1wTQkyY,9522
1556
1556
  mteb/models/model_implementations/llm2vec_models.py,sha256=n86YQ8fAHU1gVtlY7tZcXq-1ab_ISxBmuk-X4MDnY4o,13348
1557
- mteb/models/model_implementations/mcinext_models.py,sha256=T3vO9JQSmh3BICp6Y_q7j4anuA8P8LGZ4ZWnwGnF7cs,19299
1557
+ mteb/models/model_implementations/mcinext_models.py,sha256=PHMjzPQV8haaOj25YDMntmcyhNeZLPwVa4D3nMNrWkg,19301
1558
1558
  mteb/models/model_implementations/mdbr_models.py,sha256=AqsRZ-IDekIjq-FDWu0zx7Nk9ySJxaWTdRb8YhUZeu4,2828
1559
- mteb/models/model_implementations/misc_models.py,sha256=0FkvheqPYh3JwM65F4CDlQKBDQQdjyMyfJPUdP1X2Ns,74780
1559
+ mteb/models/model_implementations/misc_models.py,sha256=JkJsyha-B5M8myLvHIwFUV14yo2lnSuBzHeO5fE9i74,73191
1560
1560
  mteb/models/model_implementations/mixedbread_ai_models.py,sha256=1-RD4M-16M-Rcf5CTD_R7LVoLv3cNFbmEjataQ__q94,10666
1561
1561
  mteb/models/model_implementations/mme5_models.py,sha256=V7BCGFkfZxkZ3ANJImvSFfP7in8OSfmkbqX-zXc_iF8,1574
1562
1562
  mteb/models/model_implementations/moco_models.py,sha256=6eEGpGTlI4StFRYsaNtXejhYE9GCqasUYCqB_SQy9cE,5714
@@ -1570,7 +1570,7 @@ mteb/models/model_implementations/nomic_models_vision.py,sha256=AzTCWbXBonUAVub0
1570
1570
  mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py,sha256=-5_kd9jeDcgVv9gdwWuvX_-bNQdhAxInf1Mqo8_BdS8,10653
1571
1571
  mteb/models/model_implementations/nvidia_models.py,sha256=r-AW1dVQbteWjexjvZgFEt_90OHNRYer_5GLuqSXRS0,26924
1572
1572
  mteb/models/model_implementations/octen_models.py,sha256=5z-t2O-iIFiOOLdZ_AK9f7GrVRg-9_vx3JNAG9dJNPE,8562
1573
- mteb/models/model_implementations/openai_models.py,sha256=y1wMknrrcu1L5CNwniG0mFThPVMON1c2Fj22jkKsw7Y,9730
1573
+ mteb/models/model_implementations/openai_models.py,sha256=fE8SfSAcl20GccR8D8s-7MR9w_kO6LlN5Pm80Iwx82c,9777
1574
1574
  mteb/models/model_implementations/openclip_models.py,sha256=z2gQum16O0QhJPyxqKor3oO-_uWfnep6wSXqOFQQ2Q8,11969
1575
1575
  mteb/models/model_implementations/opensearch_neural_sparse_models.py,sha256=J5FEvKWQUiBusL6PHcrRuRRJOQ-iMwOSu1fX0pblXhk,8941
1576
1576
  mteb/models/model_implementations/ops_colqwen3_models.py,sha256=5vg5d1_WfVGMgtIwkh6zf2-Paum6V35XcKEvLfRyRzs,7437
@@ -1586,12 +1586,12 @@ mteb/models/model_implementations/qtack_models.py,sha256=vw_2O4ZABR-_nYV4g1Ud8bW
1586
1586
  mteb/models/model_implementations/querit_models.py,sha256=P7lAw5IDe47DA_5srMwGPqxjMIFuvOW0BJ7xwB4GOro,8917
1587
1587
  mteb/models/model_implementations/qwen3_models.py,sha256=857UnUEil9o8xcw7vSr2fMRlEegyE2Q86e5yLeRL_mQ,5517
1588
1588
  mteb/models/model_implementations/qzhou_models.py,sha256=mfG70JrNJCo-s3MykRn6lg9gFPcKMeMI7Y8VrBhNo7I,3684
1589
- mteb/models/model_implementations/random_baseline.py,sha256=YsITQoLbea_Iz2X84WNGBGkhlsQ3hB7yx1oJwXghimE,7561
1589
+ mteb/models/model_implementations/random_baseline.py,sha256=SGJEjWQn6IlfHKMg3JU8iE-f7X1FuOgi3j_lx6ZEUYA,7628
1590
1590
  mteb/models/model_implementations/rasgaard_models.py,sha256=_uNYP_nqJcOyoKnHNcvfJnP9gRvsv7HCWhZX2LJzQ9s,1322
1591
1591
  mteb/models/model_implementations/reasonir_model.py,sha256=WNWGqa9wANBL9vTdcFx51TEFXz6yHq_ygK0rij3LCL8,5217
1592
1592
  mteb/models/model_implementations/repllama_models.py,sha256=k6BgN2Cn41p0gQ0F1FdOTQ9OXlmFgG-2RtdvzOcCSZg,7543
1593
- mteb/models/model_implementations/rerankers_custom.py,sha256=Bjgg_UbeHarupzzCk2rdy_Dd0_W0ZsE-DCD5v1EshnI,10953
1594
- mteb/models/model_implementations/rerankers_monot5_based.py,sha256=6por4DPCycS8gljqKRZWUNM093bjjSVvmyQ3dzj9H6U,35321
1593
+ mteb/models/model_implementations/rerankers_custom.py,sha256=WBSA7kBRqxgb1549UwRYdtYzUovdwmW8C0PWzvGR54g,8087
1594
+ mteb/models/model_implementations/rerankers_monot5_based.py,sha256=U9ChokUEDXtkoFno-o4GeT4fXEEoFtnZn2denIafxi8,34583
1595
1595
  mteb/models/model_implementations/richinfoai_models.py,sha256=FsXamY-bvR5LLagtKK8fP-I5oc6B_bKp_i6_xzUYL8Y,1069
1596
1596
  mteb/models/model_implementations/ru_sentence_models.py,sha256=W4R985LnThJ-9XFbPnTGKb3L1QnoS3i3VXBFq94DK_w,43034
1597
1597
  mteb/models/model_implementations/ruri_models.py,sha256=3zYOqacB3JEnGJkMGYHqFgVkbmLo4uceJs9kzV54ivU,10819
@@ -1601,7 +1601,7 @@ mteb/models/model_implementations/sarashina_embedding_models.py,sha256=vU6rPMfUb
1601
1601
  mteb/models/model_implementations/searchmap_models.py,sha256=XFJMAuSick-Dh0FCLTiVIjXo_kfP_VJp28Oi9WVjjlo,1994
1602
1602
  mteb/models/model_implementations/seed_1_6_embedding_models.py,sha256=u7L41utKM8EY-aTVVRrpvLB924FqivM3tjy83CqDPak,18699
1603
1603
  mteb/models/model_implementations/seed_1_6_embedding_models_1215.py,sha256=SDQUIyXIpODFjfgMMZF0rGI1_YPTvd3MpMjEvGhq0jc,36551
1604
- mteb/models/model_implementations/seed_models.py,sha256=dHTS3NYacl83yV8Z4L_kjBUFpsDNRquMKCN2isXrMzw,14183
1604
+ mteb/models/model_implementations/seed_models.py,sha256=DCfDfSebhGCOTrNKLAl7z7y1wuJjZ0unzFx8j46V5-I,14281
1605
1605
  mteb/models/model_implementations/sentence_transformers_models.py,sha256=u1GkT5Xxjw0bYZvuV1wK26-bRiqv8PPCRStbu_2hYeY,26757
1606
1606
  mteb/models/model_implementations/shuu_model.py,sha256=q4wZk5tYSJlnNErWpDeHGSypbAwHD5yAUHnEOuTURs8,1210
1607
1607
  mteb/models/model_implementations/siglip_models.py,sha256=IZj9GUlqIkvJr8_aLk7FCozWZSCXHTqaCqN44B5l0oY,13425
@@ -1617,7 +1617,7 @@ mteb/models/model_implementations/vdr_models.py,sha256=IGvpE2F42IWBN5QwKSWjsAehT
1617
1617
  mteb/models/model_implementations/vi_vn_models.py,sha256=7hot8CF5B1UeC4WJXnAAs1C1vbqK2lq7Bw338ztKFDE,6566
1618
1618
  mteb/models/model_implementations/vista_models.py,sha256=mcI0La6__LasuLd5P-nkc4Z-r9X_8sYhGFLdVPGPmkw,11033
1619
1619
  mteb/models/model_implementations/vlm2vec_models.py,sha256=1iq2i1ZbsPINE8nXoVZsX1Km-4dTTAd6St6J38I8Tew,11951
1620
- mteb/models/model_implementations/voyage_models.py,sha256=g7WET4MibXN6eABrmhn8uTGXdjwf5Kk4ddqQmHA7v6A,23920
1620
+ mteb/models/model_implementations/voyage_models.py,sha256=fNWs7DBS5dEMa0Hz4Ti52Pm1JrM-oPf58eRT-tlEJOc,23915
1621
1621
  mteb/models/model_implementations/voyage_v.py,sha256=_mJGhskJj9zeHYebEJFuYheLPb-YDyiu6Hny_5LQcAE,8280
1622
1622
  mteb/models/model_implementations/xyz_models.py,sha256=69JyOCQHVq19nAG3zQFi-UYYT6I7uHmvTcmRxHvjyc8,1361
1623
1623
  mteb/models/model_implementations/youtu_models.py,sha256=P5fh34UJZQObJAbz3Wuzqh9Nw5S7LraqxdtwAX3sDJ8,6028
@@ -2442,7 +2442,7 @@ mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py,sha256=In055XTnkJqZK
2442
2442
  mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py,sha256=7EBRQ173hECqHSjzL0CkRcOGeQ0IdkJfhfYfRLdTDL4,3825
2443
2443
  mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py,sha256=847aR3nw-Ba7ErgZlMnUI2DS6Wknxp55K6I-msOCAAw,4478
2444
2444
  mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py,sha256=xBUWdIf3O7Sz5WUpiQCCVdyf9WQ7VtA3_-LhtnRz19M,9509
2445
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py,sha256=V3jtSlWhoKR1PCvHsH0HrONy-oFghomwqihBonQs_50,17414
2445
+ mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py,sha256=gYNA2V7lKmQ_fCsDPe7D94Q3G4TOjf1zoFKZfUj5l0M,17452
2446
2446
  mteb/tasks/retrieval/multilingual/web_faq_retrieval.py,sha256=TM-Q98yXZny_PKHAFNEvw9o9ET_L6VM3aNis1NJ9DgM,2686
2447
2447
  mteb/tasks/retrieval/multilingual/wikipedia_retrieval_multilingual.py,sha256=zyqAt63bHXNU_I37jb891pwWUyGzZUGkXCyhWlRbed8,1569
2448
2448
  mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py,sha256=G5JZb7FkyNpeilYNoOM_vxAacpj3Y-AhXro5uUOxsbw,4388
@@ -2641,14 +2641,14 @@ mteb/tasks/zeroshot_classification/eng/sun397.py,sha256=Nls7tXM2Svu008MmAUjt-o_N
2641
2641
  mteb/tasks/zeroshot_classification/eng/ucf101.py,sha256=kwNRYks-_Oe4VE3GyoHIvN-2OJ6zhkwFr76WDNL9ymU,1884
2642
2642
  mteb/tasks/zeroshot_classification/eng/templates/__init__.py,sha256=da1PTClDMl-IBkrSvq6JC1lnS-K_BASzCvxVhNxN5Ls,13
2643
2643
  mteb/types/__init__.py,sha256=O26vXPolPReX7iVUBgUsyCkCo4w8KeLs7uueQDWp3fc,1142
2644
- mteb/types/_encoder_io.py,sha256=jfwzUBmYNOZvYepQcW3KJeiJ7vmA-JTHUCyNAtJBXK0,5851
2644
+ mteb/types/_encoder_io.py,sha256=V7m_t7ZXm3COJ4SoHP8bcr23WgjFBRCGa9AIaqAX8v4,5939
2645
2645
  mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2646
2646
  mteb/types/_result.py,sha256=UKNokV9pu3G74MGebocU512aU_fFU9I9nPKnrG9Q0iE,1035
2647
2647
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2648
2648
  mteb/types/statistics.py,sha256=gElgSShKBXpfcqaZHhU_d2UHln1CyzUj8FN8KFun_UA,4087
2649
- mteb-2.7.15.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2650
- mteb-2.7.15.dist-info/METADATA,sha256=EoUeroRRdre5jYbplBGCJuWs-6M7cZGpzwLqSQyJKgI,14348
2651
- mteb-2.7.15.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
2652
- mteb-2.7.15.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2653
- mteb-2.7.15.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2654
- mteb-2.7.15.dist-info/RECORD,,
2649
+ mteb-2.7.17.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2650
+ mteb-2.7.17.dist-info/METADATA,sha256=_PjxjEK4Txl-EuqMT4hDPwGIrWRp6IFqmTTGqryialU,14348
2651
+ mteb-2.7.17.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
2652
+ mteb-2.7.17.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2653
+ mteb-2.7.17.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2654
+ mteb-2.7.17.dist-info/RECORD,,
File without changes