mteb 2.6.7__py3-none-any.whl → 2.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. mteb/_create_dataloaders.py +7 -3
  2. mteb/_evaluators/any_sts_evaluator.py +6 -3
  3. mteb/_evaluators/clustering_evaluator.py +2 -2
  4. mteb/_evaluators/evaluator.py +2 -1
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +8 -5
  6. mteb/_evaluators/pair_classification_evaluator.py +2 -2
  7. mteb/_evaluators/retrieval_evaluator.py +2 -2
  8. mteb/_evaluators/sklearn_evaluator.py +3 -3
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +5 -3
  10. mteb/_evaluators/text/summarization_evaluator.py +3 -2
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +5 -3
  12. mteb/abstasks/abstask.py +3 -2
  13. mteb/abstasks/aggregated_task.py +3 -3
  14. mteb/abstasks/classification.py +3 -3
  15. mteb/abstasks/clustering.py +2 -2
  16. mteb/abstasks/clustering_legacy.py +2 -2
  17. mteb/abstasks/image/image_text_pair_classification.py +2 -1
  18. mteb/abstasks/multilabel_classification.py +2 -2
  19. mteb/abstasks/pair_classification.py +2 -2
  20. mteb/abstasks/retrieval.py +15 -14
  21. mteb/abstasks/sts.py +2 -2
  22. mteb/abstasks/text/bitext_mining.py +3 -3
  23. mteb/abstasks/text/summarization.py +2 -2
  24. mteb/abstasks/zeroshot_classification.py +3 -2
  25. mteb/benchmarks/benchmarks/__init__.py +2 -0
  26. mteb/benchmarks/benchmarks/benchmarks.py +24 -0
  27. mteb/cli/build_cli.py +2 -1
  28. mteb/deprecated_evaluator.py +3 -3
  29. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
  30. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
  31. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
  32. mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
  33. mteb/evaluate.py +5 -3
  34. mteb/models/abs_encoder.py +3 -1
  35. mteb/models/instruct_wrapper.py +1 -1
  36. mteb/models/model_implementations/bm25.py +3 -3
  37. mteb/models/model_implementations/mxbai_models.py +118 -1
  38. mteb/models/model_implementations/octen_models.py +30 -0
  39. mteb/models/model_implementations/pylate_models.py +5 -4
  40. mteb/models/models_protocols.py +6 -4
  41. mteb/models/search_wrappers.py +7 -6
  42. mteb/models/sentence_transformer_wrapper.py +5 -4
  43. mteb/tasks/retrieval/kor/__init__.py +15 -1
  44. mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
  45. mteb/types/__init__.py +2 -0
  46. mteb/types/_encoder_io.py +12 -0
  47. {mteb-2.6.7.dist-info → mteb-2.6.8.dist-info}/METADATA +1 -1
  48. {mteb-2.6.7.dist-info → mteb-2.6.8.dist-info}/RECORD +52 -47
  49. {mteb-2.6.7.dist-info → mteb-2.6.8.dist-info}/WHEEL +0 -0
  50. {mteb-2.6.7.dist-info → mteb-2.6.8.dist-info}/entry_points.txt +0 -0
  51. {mteb-2.6.7.dist-info → mteb-2.6.8.dist-info}/licenses/LICENSE +0 -0
  52. {mteb-2.6.7.dist-info → mteb-2.6.8.dist-info}/top_level.txt +0 -0
@@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
23
23
  def _create_dataloader_from_texts(
24
24
  text: list[str],
25
25
  batch_size: int = 32,
26
- **kwargs: dict[str, Any],
26
+ **kwargs: Any,
27
27
  ) -> DataLoader[TextInput]:
28
28
  """Create a dataloader from a list of text.
29
29
 
@@ -191,7 +191,8 @@ def _create_dataloader_for_queries_conversation(
191
191
  """
192
192
  return DataLoader(
193
193
  queries.map(
194
- _convert_conv_history_to_query, desc="Converting conversations to queries"
194
+ _convert_conv_history_to_query,
195
+ desc="Converting conversations to queries",
195
196
  ),
196
197
  collate_fn=_custom_collate_fn,
197
198
  batch_size=batch_size,
@@ -361,6 +362,9 @@ def _create_document_dataloader(
361
362
  task_metadata: Metadata of the task to determine the document type.
362
363
  input_column: The column to use as input. If None, it will use the first column that matches the modality.
363
364
  batch_size: Batch size for the dataloader.
365
+
366
+ Returns:
367
+ A dataloader for the documents.
364
368
  """
365
369
  document_type = task_metadata.get_modalities(PromptType.document)
366
370
  if document_type == ["text"]: # text only
@@ -383,7 +387,7 @@ def create_dataloader(
383
387
  prompt_type: PromptType | None = None,
384
388
  input_column: str | None = None,
385
389
  batch_size: int = 32,
386
- **kwargs: dict[str, Any],
390
+ **kwargs: Any,
387
391
  ) -> DataLoader[BatchedInput]:
388
392
  """Create a dataloader from a dataset.
389
393
 
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Any, TypedDict
2
+ from typing import TypedDict
3
3
 
4
4
  from datasets import Dataset
5
5
  from sklearn.metrics.pairwise import (
@@ -12,7 +12,7 @@ from mteb._create_dataloaders import create_dataloader
12
12
  from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.models import EncoderProtocol
14
14
  from mteb.similarity_functions import compute_pairwise_similarity
15
- from mteb.types import PromptType
15
+ from mteb.types import EncodeKwargs, PromptType
16
16
 
17
17
  from .evaluator import Evaluator
18
18
 
@@ -57,7 +57,10 @@ class AnySTSEvaluator(Evaluator):
57
57
  self.input2_prompt_type = input2_prompt_type
58
58
 
59
59
  def __call__(
60
- self, model: EncoderProtocol, *, encode_kwargs: dict[str, Any]
60
+ self,
61
+ model: EncoderProtocol,
62
+ *,
63
+ encode_kwargs: EncodeKwargs,
61
64
  ) -> STSEvaluatorScores:
62
65
  logger.info("Running semantic similarity - Encoding samples (1/2)")
63
66
  embeddings1 = model.encode(
@@ -1,5 +1,4 @@
1
1
  import logging
2
- from typing import Any
3
2
 
4
3
  from datasets import Dataset
5
4
  from sklearn import cluster
@@ -7,6 +6,7 @@ from sklearn import cluster
7
6
  from mteb._create_dataloaders import create_dataloader
8
7
  from mteb.abstasks.task_metadata import TaskMetadata
9
8
  from mteb.models import EncoderProtocol
9
+ from mteb.types import EncodeKwargs
10
10
 
11
11
  from .evaluator import Evaluator
12
12
 
@@ -38,7 +38,7 @@ class ClusteringEvaluator(Evaluator):
38
38
  self,
39
39
  model: EncoderProtocol,
40
40
  *,
41
- encode_kwargs: dict[str, Any],
41
+ encode_kwargs: EncodeKwargs,
42
42
  ) -> list[int]:
43
43
  data_loader = create_dataloader(
44
44
  self.dataset,
@@ -4,6 +4,7 @@ from typing import Any
4
4
 
5
5
  from mteb.abstasks.abstask import _set_seed
6
6
  from mteb.models import EncoderProtocol
7
+ from mteb.types import EncodeKwargs
7
8
 
8
9
 
9
10
  class Evaluator(ABC):
@@ -18,7 +19,7 @@ class Evaluator(ABC):
18
19
 
19
20
  @abstractmethod
20
21
  def __call__(
21
- self, model: EncoderProtocol, *, encode_kwargs: dict[str, Any]
22
+ self, model: EncoderProtocol, *, encode_kwargs: EncodeKwargs
22
23
  ) -> Mapping[str, float] | Iterable[Any]:
23
24
  """This is called during training to evaluate the model.
24
25
 
@@ -6,16 +6,17 @@ from typing import TYPE_CHECKING, Any
6
6
 
7
7
  import torch
8
8
  import torch.nn.functional as F
9
- from datasets import Dataset
10
9
  from torch.utils.data import DataLoader
11
10
 
12
11
  from mteb._create_dataloaders import (
12
+ _create_dataloader_from_texts,
13
13
  _transform_image_to_rgb,
14
14
  )
15
15
  from mteb._evaluators.evaluator import Evaluator
16
16
  from mteb._requires_package import requires_image_dependencies
17
17
  from mteb.abstasks.task_metadata import TaskMetadata
18
18
  from mteb.models.models_protocols import EncoderProtocol
19
+ from mteb.types import EncodeKwargs
19
20
 
20
21
  if TYPE_CHECKING:
21
22
  from PIL.Image import Image
@@ -84,7 +85,10 @@ class ImageTextPairClassificationEvaluator(Evaluator):
84
85
  self.hf_subset = hf_subset
85
86
 
86
87
  def __call__( # type: ignore[override]
87
- self, model: EncoderProtocol, *, encode_kwargs: dict[str, Any]
88
+ self,
89
+ model: EncoderProtocol,
90
+ *,
91
+ encode_kwargs: EncodeKwargs,
88
92
  ) -> list[torch.Tensor]:
89
93
  images = []
90
94
  if isinstance(self.images_column_names, str):
@@ -105,8 +109,8 @@ class ImageTextPairClassificationEvaluator(Evaluator):
105
109
  texts.append(row[col])
106
110
 
107
111
  text_embeddings = model.encode(
108
- DataLoader(
109
- Dataset.from_dict({"text": texts}),
112
+ _create_dataloader_from_texts(
113
+ texts,
110
114
  **encode_kwargs,
111
115
  ),
112
116
  task_metadata=self.task_metadata,
@@ -127,7 +131,6 @@ class ImageTextPairClassificationEvaluator(Evaluator):
127
131
  DataLoader(
128
132
  CustomImageDataset(images),
129
133
  collate_fn=lambda x: {"image": [item["image"] for item in x]},
130
- **encode_kwargs,
131
134
  ),
132
135
  task_metadata=self.task_metadata,
133
136
  hf_subset=self.hf_subset,
@@ -14,7 +14,7 @@ from mteb._evaluators.evaluator import Evaluator
14
14
  from mteb.abstasks.task_metadata import TaskMetadata
15
15
  from mteb.models import EncoderProtocol
16
16
  from mteb.similarity_functions import compute_pairwise_similarity
17
- from mteb.types import PromptType
17
+ from mteb.types import EncodeKwargs, PromptType
18
18
 
19
19
  logger = logging.getLogger(__name__)
20
20
 
@@ -85,7 +85,7 @@ class PairClassificationEvaluator(Evaluator):
85
85
  def __call__(
86
86
  self,
87
87
  model: EncoderProtocol,
88
- encode_kwargs: dict[str, Any],
88
+ encode_kwargs: EncodeKwargs,
89
89
  ) -> PairClassificationDistances:
90
90
  logger.info("Running pair classification - Encoding samples (1/2)")
91
91
  embeddings1 = model.encode(
@@ -1,11 +1,11 @@
1
1
  import logging
2
2
  from collections.abc import Sequence
3
- from typing import Any
4
3
 
5
4
  from mteb.abstasks.task_metadata import TaskMetadata
6
5
  from mteb.models import SearchProtocol
7
6
  from mteb.types import (
8
7
  CorpusDatasetType,
8
+ EncodeKwargs,
9
9
  QueryDatasetType,
10
10
  RelevantDocumentsType,
11
11
  RetrievalEvaluationResult,
@@ -48,7 +48,7 @@ class RetrievalEvaluator(Evaluator):
48
48
  def __call__( # type: ignore[override]
49
49
  self,
50
50
  search_model: SearchProtocol,
51
- encode_kwargs: dict[str, Any],
51
+ encode_kwargs: EncodeKwargs,
52
52
  ) -> RetrievalOutputType:
53
53
  logger.info("Running retrieval task - Indexing corpus...")
54
54
  search_model.index(
@@ -9,7 +9,7 @@ from typing_extensions import Self
9
9
  from mteb._create_dataloaders import create_dataloader
10
10
  from mteb.abstasks.task_metadata import TaskMetadata
11
11
  from mteb.models import EncoderProtocol
12
- from mteb.types import Array, BatchedInput
12
+ from mteb.types import Array, BatchedInput, EncodeKwargs
13
13
 
14
14
  from .evaluator import Evaluator
15
15
 
@@ -50,7 +50,7 @@ class SklearnEvaluator(Evaluator):
50
50
  self.evaluator_model = evaluator_model
51
51
 
52
52
  def create_dataloaders(
53
- self, encode_kwargs: dict[str, Any]
53
+ self, encode_kwargs: EncodeKwargs
54
54
  ) -> tuple[DataLoader[BatchedInput], DataLoader[BatchedInput]]:
55
55
  dataloader_train = create_dataloader(
56
56
  self.train_dataset,
@@ -70,7 +70,7 @@ class SklearnEvaluator(Evaluator):
70
70
  self,
71
71
  model: EncoderProtocol,
72
72
  *,
73
- encode_kwargs: dict[str, Any],
73
+ encode_kwargs: EncodeKwargs,
74
74
  test_cache: Array | None = None,
75
75
  ) -> tuple[np.ndarray, Array]:
76
76
  """Classification evaluation by training a sklearn classifier on the embeddings of the training set and evaluating on the embeddings of the test set.
@@ -1,5 +1,4 @@
1
1
  import logging
2
- from typing import Any
3
2
 
4
3
  import torch
5
4
  from datasets import Dataset
@@ -9,7 +8,7 @@ from mteb._create_dataloaders import _create_dataloader_from_texts
9
8
  from mteb._evaluators.evaluator import Evaluator
10
9
  from mteb.abstasks.task_metadata import TaskMetadata
11
10
  from mteb.models import EncoderProtocol
12
- from mteb.types import Array
11
+ from mteb.types import Array, EncodeKwargs
13
12
 
14
13
  logger = logging.getLogger(__name__)
15
14
 
@@ -33,7 +32,10 @@ class BitextMiningEvaluator(Evaluator):
33
32
  self.task_metadata = task_metadata
34
33
 
35
34
  def __call__(
36
- self, model: EncoderProtocol, *, encode_kwargs: dict[str, Any]
35
+ self,
36
+ model: EncoderProtocol,
37
+ *,
38
+ encode_kwargs: EncodeKwargs,
37
39
  ) -> dict[str, list[dict[str, float]]]:
38
40
  pair_elements = {p for pair in self.pairs for p in pair}
39
41
  if isinstance(self.sentences, Dataset):
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  import sys
3
- from typing import Any, TypedDict
3
+ from typing import TypedDict
4
4
 
5
5
  import numpy as np
6
6
  import torch
@@ -12,6 +12,7 @@ from mteb._evaluators.evaluator import Evaluator
12
12
  from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.models import EncoderProtocol
14
14
  from mteb.similarity_functions import cos_sim, dot_score
15
+ from mteb.types import EncodeKwargs
15
16
 
16
17
  # if later than python 3.13 use typing module
17
18
  if sys.version_info >= (3, 13):
@@ -94,7 +95,7 @@ class SummarizationEvaluator(Evaluator):
94
95
  self,
95
96
  model: EncoderProtocol,
96
97
  *,
97
- encode_kwargs: dict[str, Any],
98
+ encode_kwargs: EncodeKwargs,
98
99
  ) -> SummarizationDistances:
99
100
  # Get the human & machine summaries for the text in one go for all
100
101
  human_lens = [len(human_summaries) for human_summaries in self.human_summaries]
@@ -1,5 +1,4 @@
1
1
  import logging
2
- from typing import Any
3
2
 
4
3
  from datasets import Dataset
5
4
 
@@ -10,7 +9,7 @@ from mteb._create_dataloaders import (
10
9
  from mteb.abstasks.task_metadata import TaskMetadata
11
10
  from mteb.models import EncoderProtocol
12
11
  from mteb.similarity_functions import similarity
13
- from mteb.types import Array
12
+ from mteb.types import Array, EncodeKwargs
14
13
 
15
14
  from .evaluator import Evaluator
16
15
 
@@ -38,7 +37,10 @@ class ZeroShotClassificationEvaluator(Evaluator):
38
37
  self.hf_subset = hf_subset
39
38
 
40
39
  def __call__(
41
- self, model: EncoderProtocol, *, encode_kwargs: dict[str, Any]
40
+ self,
41
+ model: EncoderProtocol,
42
+ *,
43
+ encode_kwargs: EncodeKwargs,
42
44
  ) -> Array:
43
45
  dataloader = create_dataloader(
44
46
  self.dataset,
mteb/abstasks/abstask.py CHANGED
@@ -23,6 +23,7 @@ from mteb.models import (
23
23
  SearchProtocol,
24
24
  )
25
25
  from mteb.types import HFSubset, Modalities, ScoresDict
26
+ from mteb.types._encoder_io import EncodeKwargs
26
27
  from mteb.types.statistics import DescriptiveStatistics, SplitDescriptiveStatistics
27
28
 
28
29
  logger = logging.getLogger(__name__)
@@ -121,7 +122,7 @@ class AbsTask(ABC):
121
122
  split: str = "test",
122
123
  subsets_to_run: list[HFSubset] | None = None,
123
124
  *,
124
- encode_kwargs: dict[str, Any],
125
+ encode_kwargs: EncodeKwargs,
125
126
  prediction_folder: Path | None = None,
126
127
  **kwargs: Any,
127
128
  ) -> Mapping[HFSubset, ScoresDict]:
@@ -201,7 +202,7 @@ class AbsTask(ABC):
201
202
  *,
202
203
  hf_split: str,
203
204
  hf_subset: str,
204
- encode_kwargs: dict[str, Any],
205
+ encode_kwargs: EncodeKwargs,
205
206
  prediction_folder: Path | None = None,
206
207
  **kwargs: Any,
207
208
  ) -> ScoresDict:
@@ -9,7 +9,7 @@ from datasets import Dataset, DatasetDict
9
9
 
10
10
  from mteb.models.models_protocols import MTEBModels
11
11
  from mteb.results.task_result import TaskResult
12
- from mteb.types import HFSubset, ScoresDict
12
+ from mteb.types import EncodeKwargs, HFSubset, ScoresDict
13
13
  from mteb.types.statistics import DescriptiveStatistics
14
14
 
15
15
  from .abstask import AbsTask
@@ -127,7 +127,7 @@ class AbsTaskAggregate(AbsTask):
127
127
  split: str = "test",
128
128
  subsets_to_run: list[HFSubset] | None = None,
129
129
  *,
130
- encode_kwargs: dict[str, Any],
130
+ encode_kwargs: EncodeKwargs,
131
131
  prediction_folder: Path | None = None,
132
132
  **kwargs: Any,
133
133
  ) -> dict[HFSubset, ScoresDict]:
@@ -141,7 +141,7 @@ class AbsTaskAggregate(AbsTask):
141
141
  self,
142
142
  model: MTEBModels,
143
143
  data_split: DatasetDict | Dataset,
144
- encode_kwargs: dict[str, Any],
144
+ encode_kwargs: EncodeKwargs,
145
145
  **kwargs: Any,
146
146
  ) -> ScoresDict:
147
147
  raise NotImplementedError(
@@ -16,7 +16,7 @@ from sklearn.metrics import (
16
16
 
17
17
  from mteb._evaluators.sklearn_evaluator import SklearnEvaluator, SklearnModelProtocol
18
18
  from mteb.models import EncoderProtocol, MTEBModels
19
- from mteb.types import HFSubset, ScoresDict
19
+ from mteb.types import EncodeKwargs, HFSubset, ScoresDict
20
20
  from mteb.types.statistics import (
21
21
  ImageStatistics,
22
22
  LabelStatistics,
@@ -125,7 +125,7 @@ class AbsTaskClassification(AbsTask):
125
125
  split: str = "test",
126
126
  subsets_to_run: list[HFSubset] | None = None,
127
127
  *,
128
- encode_kwargs: dict[str, Any],
128
+ encode_kwargs: EncodeKwargs,
129
129
  prediction_folder: Path | None = None,
130
130
  **kwargs: Any,
131
131
  ) -> dict[HFSubset, ScoresDict]:
@@ -184,7 +184,7 @@ class AbsTaskClassification(AbsTask):
184
184
  model: MTEBModels,
185
185
  data_split: DatasetDict,
186
186
  *,
187
- encode_kwargs: dict[str, Any],
187
+ encode_kwargs: EncodeKwargs,
188
188
  hf_split: str,
189
189
  hf_subset: str,
190
190
  prediction_folder: Path | None = None,
@@ -12,7 +12,7 @@ from sklearn.metrics.cluster import v_measure_score
12
12
 
13
13
  from mteb._create_dataloaders import create_dataloader
14
14
  from mteb.models import EncoderProtocol, MTEBModels
15
- from mteb.types import Array, HFSubset, ScoresDict
15
+ from mteb.types import Array, EncodeKwargs, HFSubset, ScoresDict
16
16
  from mteb.types.statistics import (
17
17
  ImageStatistics,
18
18
  LabelStatistics,
@@ -156,7 +156,7 @@ class AbsTaskClustering(AbsTask):
156
156
  model: MTEBModels,
157
157
  data_split: Dataset,
158
158
  *,
159
- encode_kwargs: dict[str, Any],
159
+ encode_kwargs: EncodeKwargs,
160
160
  hf_split: str,
161
161
  hf_subset: str,
162
162
  prediction_folder: Path | None = None,
@@ -9,7 +9,7 @@ from sklearn import metrics
9
9
 
10
10
  from mteb._evaluators import ClusteringEvaluator
11
11
  from mteb.models import EncoderProtocol, MTEBModels
12
- from mteb.types import ScoresDict
12
+ from mteb.types import EncodeKwargs, ScoresDict
13
13
  from mteb.types.statistics import (
14
14
  ImageStatistics,
15
15
  LabelStatistics,
@@ -83,7 +83,7 @@ class AbsTaskClusteringLegacy(AbsTask):
83
83
  model: MTEBModels,
84
84
  data_split: Dataset,
85
85
  *,
86
- encode_kwargs: dict[str, Any],
86
+ encode_kwargs: EncodeKwargs,
87
87
  hf_split: str,
88
88
  hf_subset: str,
89
89
  prediction_folder: Path | None = None,
@@ -13,6 +13,7 @@ from mteb.abstasks._statistics_calculation import (
13
13
  )
14
14
  from mteb.abstasks.abstask import AbsTask
15
15
  from mteb.models.models_protocols import EncoderProtocol, MTEBModels
16
+ from mteb.types import EncodeKwargs
16
17
  from mteb.types.statistics import (
17
18
  ImageStatistics,
18
19
  SplitDescriptiveStatistics,
@@ -119,7 +120,7 @@ class AbsTaskImageTextPairClassification(AbsTask):
119
120
  model: MTEBModels,
120
121
  data_split: Dataset,
121
122
  *,
122
- encode_kwargs: dict[str, Any],
123
+ encode_kwargs: EncodeKwargs,
123
124
  hf_split: str,
124
125
  hf_subset: str,
125
126
  prediction_folder: Path | None = None,
@@ -17,7 +17,7 @@ from mteb._create_dataloaders import create_dataloader
17
17
  from mteb._evaluators.classification_metrics import hamming_score
18
18
  from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
19
19
  from mteb.models import EncoderProtocol, MTEBModels
20
- from mteb.types import Array
20
+ from mteb.types import Array, EncodeKwargs
21
21
 
22
22
  from .classification import AbsTaskClassification
23
23
 
@@ -83,7 +83,7 @@ class AbsTaskMultilabelClassification(AbsTaskClassification):
83
83
  model: MTEBModels,
84
84
  data_split: DatasetDict,
85
85
  *,
86
- encode_kwargs: dict[str, Any],
86
+ encode_kwargs: EncodeKwargs,
87
87
  hf_split: str,
88
88
  hf_subset: str,
89
89
  prediction_folder: Path | None = None,
@@ -19,7 +19,7 @@ from mteb.abstasks._statistics_calculation import (
19
19
  from mteb.abstasks.abstask import AbsTask
20
20
  from mteb.models.model_meta import ScoringFunction
21
21
  from mteb.models.models_protocols import EncoderProtocol, MTEBModels
22
- from mteb.types import PromptType
22
+ from mteb.types import EncodeKwargs, PromptType
23
23
  from mteb.types.statistics import (
24
24
  ImageStatistics,
25
25
  LabelStatistics,
@@ -84,7 +84,7 @@ class AbsTaskPairClassification(AbsTask):
84
84
  *,
85
85
  hf_split: str,
86
86
  hf_subset: str,
87
- encode_kwargs: dict[str, str],
87
+ encode_kwargs: EncodeKwargs,
88
88
  prediction_folder: Path | None = None,
89
89
  **kwargs,
90
90
  ) -> dict[str, float]:
@@ -25,6 +25,7 @@ from mteb.models import (
25
25
  SearchProtocol,
26
26
  )
27
27
  from mteb.types import (
28
+ EncodeKwargs,
28
29
  HFSubset,
29
30
  QueryDatasetType,
30
31
  RelevantDocumentsType,
@@ -184,17 +185,17 @@ class AbsTaskRetrieval(AbsTask):
184
185
  return queries, corpus
185
186
 
186
187
  if self.metadata.is_multilingual:
187
- for subset in self.queries:
188
- for split in self.queries[subset]:
189
- queries = self.queries[subset][split]
190
- corpus = self.corpus[subset][split]
188
+ for subset in self.queries: # type: ignore[attr-defined]
189
+ for split in self.queries[subset]: # type: ignore[attr-defined]
190
+ queries = self.queries[subset][split] # type: ignore[attr-defined]
191
+ corpus = self.corpus[subset][split] # type: ignore[attr-defined]
191
192
 
192
193
  (
193
194
  self.dataset[subset][split]["queries"],
194
195
  self.dataset[subset][split]["corpus"],
195
196
  ) = _process_split(queries, corpus)
196
197
 
197
- self.dataset[subset][split]["relevant_docs"] = self.relevant_docs[
198
+ self.dataset[subset][split]["relevant_docs"] = self.relevant_docs[ # type: ignore[attr-defined]
198
199
  subset
199
200
  ][split]
200
201
  if hasattr(self, "instructions"):
@@ -211,15 +212,15 @@ class AbsTaskRetrieval(AbsTask):
211
212
  ][split]
212
213
  else:
213
214
  subset = "default"
214
- for split in self.queries:
215
- queries = self.queries[split]
216
- corpus = self.corpus[split]
215
+ for split in self.queries: # type: ignore[attr-defined]
216
+ queries = self.queries[split] # type: ignore[attr-defined]
217
+ corpus = self.corpus[split] # type: ignore[attr-defined]
217
218
  (
218
219
  self.dataset[subset][split]["queries"],
219
220
  self.dataset[subset][split]["corpus"],
220
221
  ) = _process_split(queries, corpus)
221
222
 
222
- self.dataset[subset][split]["relevant_docs"] = self.relevant_docs[
223
+ self.dataset[subset][split]["relevant_docs"] = self.relevant_docs[ # type: ignore[attr-defined]
223
224
  split
224
225
  ].copy()
225
226
  if hasattr(self, "instructions"):
@@ -235,9 +236,9 @@ class AbsTaskRetrieval(AbsTask):
235
236
  split
236
237
  ].copy()
237
238
 
238
- del self.queries
239
- del self.corpus
240
- del self.relevant_docs
239
+ del self.queries # type: ignore[attr-defined]
240
+ del self.corpus # type: ignore[attr-defined]
241
+ del self.relevant_docs # type: ignore[attr-defined]
241
242
  if hasattr(self, "instructions"):
242
243
  del self.instructions
243
244
  if hasattr(self, "top_ranked"):
@@ -283,7 +284,7 @@ class AbsTaskRetrieval(AbsTask):
283
284
  split: str = "test",
284
285
  subsets_to_run: list[HFSubset] | None = None,
285
286
  *,
286
- encode_kwargs: dict[str, Any],
287
+ encode_kwargs: EncodeKwargs,
287
288
  prediction_folder: Path | None = None,
288
289
  **kwargs: Any,
289
290
  ) -> Mapping[HFSubset, ScoresDict]:
@@ -320,7 +321,7 @@ class AbsTaskRetrieval(AbsTask):
320
321
  self,
321
322
  model: MTEBModels,
322
323
  data_split: RetrievalSplitData,
323
- encode_kwargs: dict[str, Any],
324
+ encode_kwargs: EncodeKwargs,
324
325
  hf_split: str,
325
326
  hf_subset: str,
326
327
  prediction_folder: Path | None = None,
mteb/abstasks/sts.py CHANGED
@@ -8,7 +8,7 @@ from scipy.stats import pearsonr, spearmanr
8
8
  from mteb._evaluators import AnySTSEvaluator
9
9
  from mteb._evaluators.any_sts_evaluator import STSEvaluatorScores
10
10
  from mteb.models import EncoderProtocol, MTEBModels
11
- from mteb.types import PromptType
11
+ from mteb.types import EncodeKwargs, PromptType
12
12
  from mteb.types.statistics import (
13
13
  ImageStatistics,
14
14
  ScoreStatistics,
@@ -105,7 +105,7 @@ class AbsTaskSTS(AbsTask):
105
105
  self,
106
106
  model: MTEBModels,
107
107
  data_split: Dataset,
108
- encode_kwargs: dict[str, Any],
108
+ encode_kwargs: EncodeKwargs,
109
109
  hf_split: str,
110
110
  hf_subset: str,
111
111
  prediction_folder: Path | None = None,
@@ -10,7 +10,7 @@ from mteb._evaluators import BitextMiningEvaluator
10
10
  from mteb.abstasks._statistics_calculation import calculate_text_statistics
11
11
  from mteb.abstasks.abstask import AbsTask
12
12
  from mteb.models import EncoderProtocol, MTEBModels
13
- from mteb.types import HFSubset, ScoresDict
13
+ from mteb.types import EncodeKwargs, HFSubset, ScoresDict
14
14
  from mteb.types.statistics import SplitDescriptiveStatistics, TextStatistics
15
15
 
16
16
  logger = logging.getLogger(__name__)
@@ -73,7 +73,7 @@ class AbsTaskBitextMining(AbsTask):
73
73
  split: str = "test",
74
74
  subsets_to_run: list[HFSubset] | None = None,
75
75
  *,
76
- encode_kwargs: dict[str, Any],
76
+ encode_kwargs: EncodeKwargs,
77
77
  prediction_folder: Path | None = None,
78
78
  **kwargs: Any,
79
79
  ) -> dict[HFSubset, ScoresDict]:
@@ -142,7 +142,7 @@ class AbsTaskBitextMining(AbsTask):
142
142
  *,
143
143
  hf_split: str,
144
144
  hf_subset: str,
145
- encode_kwargs: dict[str, Any],
145
+ encode_kwargs: EncodeKwargs,
146
146
  prediction_folder: Path | None = None,
147
147
  parallel: bool = False,
148
148
  **kwargs,
@@ -1,6 +1,5 @@
1
1
  import logging
2
2
  from pathlib import Path
3
- from typing import Any
4
3
 
5
4
  import numpy as np
6
5
  from datasets import Dataset
@@ -13,6 +12,7 @@ from mteb.abstasks._statistics_calculation import (
13
12
  )
14
13
  from mteb.abstasks.abstask import AbsTask
15
14
  from mteb.models import EncoderProtocol, MTEBModels
15
+ from mteb.types import EncodeKwargs
16
16
  from mteb.types.statistics import (
17
17
  ScoreStatistics,
18
18
  SplitDescriptiveStatistics,
@@ -82,7 +82,7 @@ class AbsTaskSummarization(AbsTask):
82
82
  *,
83
83
  hf_split: str,
84
84
  hf_subset: str,
85
- encode_kwargs: dict[str, Any],
85
+ encode_kwargs: EncodeKwargs,
86
86
  prediction_folder: Path | None = None,
87
87
  **kwargs,
88
88
  ) -> SummarizationMetrics:
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from pathlib import Path
3
- from typing import Any, TypedDict
3
+ from typing import TypedDict
4
4
 
5
5
  import torch
6
6
  from datasets import Dataset
@@ -8,6 +8,7 @@ from sklearn import metrics
8
8
 
9
9
  from mteb._evaluators import ZeroShotClassificationEvaluator
10
10
  from mteb.models import EncoderProtocol, MTEBModels
11
+ from mteb.types import EncodeKwargs
11
12
  from mteb.types.statistics import (
12
13
  ImageStatistics,
13
14
  LabelStatistics,
@@ -116,7 +117,7 @@ class AbsTaskZeroShotClassification(AbsTask):
116
117
  *,
117
118
  hf_split: str,
118
119
  hf_subset: str,
119
- encode_kwargs: dict[str, Any],
120
+ encode_kwargs: EncodeKwargs,
120
121
  prediction_folder: Path | None = None,
121
122
  **kwargs,
122
123
  ) -> ZeroShotClassificationMetrics:
@@ -14,6 +14,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
14
14
  JINA_VDR,
15
15
  JMTEB_LITE_V1,
16
16
  JMTEB_V2,
17
+ KOVIDORE_V2,
17
18
  LONG_EMBED,
18
19
  MIEB_ENG,
19
20
  MIEB_IMG,
@@ -79,6 +80,7 @@ __all__ = [
79
80
  "JINA_VDR",
80
81
  "JMTEB_LITE_V1",
81
82
  "JMTEB_V2",
83
+ "KOVIDORE_V2",
82
84
  "LONG_EMBED",
83
85
  "MIEB_ENG",
84
86
  "MIEB_IMG",