mteb 2.6.7__py3-none-any.whl → 2.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +7 -3
- mteb/_evaluators/any_sts_evaluator.py +6 -3
- mteb/_evaluators/clustering_evaluator.py +2 -2
- mteb/_evaluators/evaluator.py +2 -1
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +8 -5
- mteb/_evaluators/pair_classification_evaluator.py +2 -2
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/sklearn_evaluator.py +3 -3
- mteb/_evaluators/text/bitext_mining_evaluator.py +5 -3
- mteb/_evaluators/text/summarization_evaluator.py +3 -2
- mteb/_evaluators/zeroshot_classification_evaluator.py +5 -3
- mteb/abstasks/abstask.py +3 -2
- mteb/abstasks/aggregated_task.py +3 -3
- mteb/abstasks/classification.py +3 -3
- mteb/abstasks/clustering.py +2 -2
- mteb/abstasks/clustering_legacy.py +2 -2
- mteb/abstasks/image/image_text_pair_classification.py +2 -1
- mteb/abstasks/multilabel_classification.py +2 -2
- mteb/abstasks/pair_classification.py +2 -2
- mteb/abstasks/retrieval.py +15 -14
- mteb/abstasks/sts.py +2 -2
- mteb/abstasks/text/bitext_mining.py +3 -3
- mteb/abstasks/text/summarization.py +2 -2
- mteb/abstasks/zeroshot_classification.py +3 -2
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +24 -0
- mteb/cli/build_cli.py +2 -1
- mteb/deprecated_evaluator.py +3 -3
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/evaluate.py +5 -3
- mteb/models/abs_encoder.py +3 -1
- mteb/models/instruct_wrapper.py +1 -1
- mteb/models/model_implementations/bm25.py +3 -3
- mteb/models/model_implementations/mxbai_models.py +118 -1
- mteb/models/model_implementations/octen_models.py +30 -0
- mteb/models/model_implementations/pylate_models.py +5 -4
- mteb/models/models_protocols.py +6 -4
- mteb/models/search_wrappers.py +7 -6
- mteb/models/sentence_transformer_wrapper.py +5 -4
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +12 -0
- {mteb-2.6.7.dist-info → mteb-2.6.8.dist-info}/METADATA +1 -1
- {mteb-2.6.7.dist-info → mteb-2.6.8.dist-info}/RECORD +52 -47
- {mteb-2.6.7.dist-info → mteb-2.6.8.dist-info}/WHEEL +0 -0
- {mteb-2.6.7.dist-info → mteb-2.6.8.dist-info}/entry_points.txt +0 -0
- {mteb-2.6.7.dist-info → mteb-2.6.8.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.6.7.dist-info → mteb-2.6.8.dist-info}/top_level.txt +0 -0
|
@@ -2728,3 +2728,27 @@ JMTEB_LITE_V1 = Benchmark(
|
|
|
2728
2728
|
""",
|
|
2729
2729
|
contacts=["lsz05"],
|
|
2730
2730
|
)
|
|
2731
|
+
|
|
2732
|
+
KOVIDORE_V2 = Benchmark(
|
|
2733
|
+
name="KoViDoRe(v2)",
|
|
2734
|
+
display_name="KoViDoRe v2",
|
|
2735
|
+
tasks=get_tasks(
|
|
2736
|
+
tasks=[
|
|
2737
|
+
"KoVidore2CybersecurityRetrieval",
|
|
2738
|
+
"KoVidore2EconomicRetrieval",
|
|
2739
|
+
"KoVidore2EnergyRetrieval",
|
|
2740
|
+
"KoVidore2HrRetrieval",
|
|
2741
|
+
]
|
|
2742
|
+
),
|
|
2743
|
+
description="KoViDoRe v2 sets a new industry gold standard for multi-modal, enterprise document visual retrieval evaluation. It addresses a critical challenge in production RAG systems: retrieving accurate information from complex, visually-rich documents.",
|
|
2744
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
2745
|
+
citation=r"""
|
|
2746
|
+
@misc{choi2026kovidorev2,
|
|
2747
|
+
author = {Yongbin Choi},
|
|
2748
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
2749
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
2750
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
2751
|
+
year = {2026},
|
|
2752
|
+
}
|
|
2753
|
+
""",
|
|
2754
|
+
)
|
mteb/cli/build_cli.py
CHANGED
|
@@ -13,6 +13,7 @@ from mteb.cache import ResultCache
|
|
|
13
13
|
from mteb.cli._display_tasks import _display_benchmarks, _display_tasks
|
|
14
14
|
from mteb.cli.generate_model_card import generate_model_card
|
|
15
15
|
from mteb.evaluate import OverwriteStrategy
|
|
16
|
+
from mteb.types._encoder_io import EncodeKwargs
|
|
16
17
|
|
|
17
18
|
logger = logging.getLogger(__name__)
|
|
18
19
|
|
|
@@ -64,7 +65,7 @@ def run(args: argparse.Namespace) -> None:
|
|
|
64
65
|
eval_splits=args.eval_splits,
|
|
65
66
|
)
|
|
66
67
|
|
|
67
|
-
encode_kwargs = {}
|
|
68
|
+
encode_kwargs: EncodeKwargs = {}
|
|
68
69
|
if args.batch_size is not None:
|
|
69
70
|
encode_kwargs["batch_size"] = args.batch_size
|
|
70
71
|
|
mteb/deprecated_evaluator.py
CHANGED
|
@@ -28,7 +28,7 @@ from mteb.models import (
|
|
|
28
28
|
SentenceTransformerEncoderWrapper,
|
|
29
29
|
)
|
|
30
30
|
from mteb.results import TaskResult
|
|
31
|
-
from mteb.types import ScoresDict
|
|
31
|
+
from mteb.types import EncodeKwargs, ScoresDict
|
|
32
32
|
|
|
33
33
|
if sys.version_info >= (3, 13):
|
|
34
34
|
from warnings import deprecated
|
|
@@ -174,7 +174,7 @@ class MTEB:
|
|
|
174
174
|
split: str,
|
|
175
175
|
subsets_to_run: list[str] | None = None,
|
|
176
176
|
*,
|
|
177
|
-
encode_kwargs:
|
|
177
|
+
encode_kwargs: EncodeKwargs,
|
|
178
178
|
**kwargs: Any,
|
|
179
179
|
):
|
|
180
180
|
tick = time()
|
|
@@ -263,7 +263,7 @@ class MTEB:
|
|
|
263
263
|
overwrite_results: bool = False,
|
|
264
264
|
raise_error: bool = True,
|
|
265
265
|
co2_tracker: bool = False,
|
|
266
|
-
encode_kwargs:
|
|
266
|
+
encode_kwargs: EncodeKwargs | None = None,
|
|
267
267
|
**kwargs,
|
|
268
268
|
) -> list[TaskResult]:
|
|
269
269
|
"""Run the evaluation pipeline on the selected tasks.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 1299,
|
|
4
|
+
"number_of_characters": 9254,
|
|
5
|
+
"documents_text_statistics": null,
|
|
6
|
+
"documents_image_statistics": {
|
|
7
|
+
"min_image_width": 2245,
|
|
8
|
+
"average_image_width": 2370.324347826087,
|
|
9
|
+
"max_image_width": 3508,
|
|
10
|
+
"min_image_height": 2481,
|
|
11
|
+
"average_image_height": 3289.8060869565215,
|
|
12
|
+
"max_image_height": 3580,
|
|
13
|
+
"unique_images": 1132
|
|
14
|
+
},
|
|
15
|
+
"queries_text_statistics": {
|
|
16
|
+
"total_text_length": 9254,
|
|
17
|
+
"min_text_length": 15,
|
|
18
|
+
"average_text_length": 62.10738255033557,
|
|
19
|
+
"max_text_length": 108,
|
|
20
|
+
"unique_texts": 149
|
|
21
|
+
},
|
|
22
|
+
"queries_image_statistics": null,
|
|
23
|
+
"relevant_docs_statistics": {
|
|
24
|
+
"num_relevant_docs": 409,
|
|
25
|
+
"min_relevant_docs_per_query": 1,
|
|
26
|
+
"average_relevant_docs_per_query": 2.7449664429530203,
|
|
27
|
+
"max_relevant_docs_per_query": 7,
|
|
28
|
+
"unique_relevant_docs": 316
|
|
29
|
+
},
|
|
30
|
+
"top_ranked_statistics": null
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 1640,
|
|
4
|
+
"number_of_characters": 8331,
|
|
5
|
+
"documents_text_statistics": null,
|
|
6
|
+
"documents_image_statistics": {
|
|
7
|
+
"min_image_width": 2313,
|
|
8
|
+
"average_image_width": 2347.5321597833445,
|
|
9
|
+
"max_image_width": 2481,
|
|
10
|
+
"min_image_height": 3138,
|
|
11
|
+
"average_image_height": 3214.301963439404,
|
|
12
|
+
"max_image_height": 3508,
|
|
13
|
+
"unique_images": 1442
|
|
14
|
+
},
|
|
15
|
+
"queries_text_statistics": {
|
|
16
|
+
"total_text_length": 8331,
|
|
17
|
+
"min_text_length": 23,
|
|
18
|
+
"average_text_length": 51.11042944785276,
|
|
19
|
+
"max_text_length": 110,
|
|
20
|
+
"unique_texts": 163
|
|
21
|
+
},
|
|
22
|
+
"queries_image_statistics": null,
|
|
23
|
+
"relevant_docs_statistics": {
|
|
24
|
+
"num_relevant_docs": 413,
|
|
25
|
+
"min_relevant_docs_per_query": 1,
|
|
26
|
+
"average_relevant_docs_per_query": 2.5337423312883436,
|
|
27
|
+
"max_relevant_docs_per_query": 6,
|
|
28
|
+
"unique_relevant_docs": 349
|
|
29
|
+
},
|
|
30
|
+
"top_ranked_statistics": null
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 2166,
|
|
4
|
+
"number_of_characters": 9764,
|
|
5
|
+
"documents_text_statistics": null,
|
|
6
|
+
"documents_image_statistics": {
|
|
7
|
+
"min_image_width": 2221,
|
|
8
|
+
"average_image_width": 2339.4957350727545,
|
|
9
|
+
"max_image_width": 2480,
|
|
10
|
+
"min_image_height": 3036,
|
|
11
|
+
"average_image_height": 3242.8138484696437,
|
|
12
|
+
"max_image_height": 3508,
|
|
13
|
+
"unique_images": 1974
|
|
14
|
+
},
|
|
15
|
+
"queries_text_statistics": {
|
|
16
|
+
"total_text_length": 9764,
|
|
17
|
+
"min_text_length": 22,
|
|
18
|
+
"average_text_length": 56.4393063583815,
|
|
19
|
+
"max_text_length": 103,
|
|
20
|
+
"unique_texts": 173
|
|
21
|
+
},
|
|
22
|
+
"queries_image_statistics": null,
|
|
23
|
+
"relevant_docs_statistics": {
|
|
24
|
+
"num_relevant_docs": 525,
|
|
25
|
+
"min_relevant_docs_per_query": 1,
|
|
26
|
+
"average_relevant_docs_per_query": 3.0346820809248554,
|
|
27
|
+
"max_relevant_docs_per_query": 7,
|
|
28
|
+
"unique_relevant_docs": 442
|
|
29
|
+
},
|
|
30
|
+
"top_ranked_statistics": null
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 2330,
|
|
4
|
+
"number_of_characters": 13131,
|
|
5
|
+
"documents_text_statistics": null,
|
|
6
|
+
"documents_image_statistics": {
|
|
7
|
+
"min_image_width": 1949,
|
|
8
|
+
"average_image_width": 2430.1152204836417,
|
|
9
|
+
"max_image_width": 3505,
|
|
10
|
+
"min_image_height": 2480,
|
|
11
|
+
"average_image_height": 3350.3921289710765,
|
|
12
|
+
"max_image_height": 3626,
|
|
13
|
+
"unique_images": 2096
|
|
14
|
+
},
|
|
15
|
+
"queries_text_statistics": {
|
|
16
|
+
"total_text_length": 13131,
|
|
17
|
+
"min_text_length": 21,
|
|
18
|
+
"average_text_length": 59.41628959276018,
|
|
19
|
+
"max_text_length": 112,
|
|
20
|
+
"unique_texts": 221
|
|
21
|
+
},
|
|
22
|
+
"queries_image_statistics": null,
|
|
23
|
+
"relevant_docs_statistics": {
|
|
24
|
+
"num_relevant_docs": 726,
|
|
25
|
+
"min_relevant_docs_per_query": 1,
|
|
26
|
+
"average_relevant_docs_per_query": 3.2850678733031673,
|
|
27
|
+
"max_relevant_docs_per_query": 7,
|
|
28
|
+
"unique_relevant_docs": 575
|
|
29
|
+
},
|
|
30
|
+
"top_ranked_statistics": null
|
|
31
|
+
}
|
|
32
|
+
}
|
mteb/evaluate.py
CHANGED
|
@@ -5,7 +5,7 @@ import warnings
|
|
|
5
5
|
from collections.abc import Iterable
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from time import time
|
|
8
|
-
from typing import TYPE_CHECKING,
|
|
8
|
+
from typing import TYPE_CHECKING, cast
|
|
9
9
|
|
|
10
10
|
from datasets.exceptions import DatasetNotFoundError
|
|
11
11
|
from tqdm.auto import tqdm
|
|
@@ -27,6 +27,7 @@ from mteb.models.sentence_transformer_wrapper import (
|
|
|
27
27
|
from mteb.results import ModelResult, TaskResult
|
|
28
28
|
from mteb.results.task_result import TaskError
|
|
29
29
|
from mteb.types import HFSubset, PromptType, SplitName
|
|
30
|
+
from mteb.types._encoder_io import EncodeKwargs
|
|
30
31
|
from mteb.types._metadata import ModelName, Revision
|
|
31
32
|
|
|
32
33
|
if TYPE_CHECKING:
|
|
@@ -85,9 +86,10 @@ def _evaluate_task(
|
|
|
85
86
|
*,
|
|
86
87
|
splits: dict[SplitName, list[HFSubset]],
|
|
87
88
|
co2_tracker: bool | None,
|
|
88
|
-
encode_kwargs:
|
|
89
|
+
encode_kwargs: EncodeKwargs,
|
|
89
90
|
prediction_folder: Path | None,
|
|
90
91
|
public_only: bool | None,
|
|
92
|
+
num_proc: int = 1,
|
|
91
93
|
) -> TaskResult | TaskError:
|
|
92
94
|
"""The core logic to run a model on a given task. See `evaluate` for more details.
|
|
93
95
|
|
|
@@ -270,7 +272,7 @@ def evaluate(
|
|
|
270
272
|
*,
|
|
271
273
|
co2_tracker: bool | None = None,
|
|
272
274
|
raise_error: bool = True,
|
|
273
|
-
encode_kwargs:
|
|
275
|
+
encode_kwargs: EncodeKwargs | None = None,
|
|
274
276
|
cache: ResultCache | None = ResultCache(),
|
|
275
277
|
overwrite_strategy: str | OverwriteStrategy = "only-missing",
|
|
276
278
|
prediction_folder: Path | str | None = None,
|
mteb/models/abs_encoder.py
CHANGED
|
@@ -5,6 +5,7 @@ from collections.abc import Callable, Sequence
|
|
|
5
5
|
from typing import Any, Literal, cast, get_args, overload
|
|
6
6
|
|
|
7
7
|
from torch.utils.data import DataLoader
|
|
8
|
+
from typing_extensions import Unpack
|
|
8
9
|
|
|
9
10
|
import mteb
|
|
10
11
|
from mteb.abstasks.task_metadata import TaskMetadata, TaskType
|
|
@@ -19,6 +20,7 @@ from mteb.similarity_functions import (
|
|
|
19
20
|
from mteb.types import (
|
|
20
21
|
Array,
|
|
21
22
|
BatchedInput,
|
|
23
|
+
EncodeKwargs,
|
|
22
24
|
PromptType,
|
|
23
25
|
)
|
|
24
26
|
|
|
@@ -370,7 +372,7 @@ class AbsEncoder(ABC):
|
|
|
370
372
|
hf_split: str,
|
|
371
373
|
hf_subset: str,
|
|
372
374
|
prompt_type: PromptType | None = None,
|
|
373
|
-
**kwargs:
|
|
375
|
+
**kwargs: Unpack[EncodeKwargs],
|
|
374
376
|
) -> Array:
|
|
375
377
|
"""Encodes the given sentences using the encoder.
|
|
376
378
|
|
mteb/models/instruct_wrapper.py
CHANGED
|
@@ -92,7 +92,7 @@ def instruct_wrapper(
|
|
|
92
92
|
logger.info(
|
|
93
93
|
f"Using instruction: '{instruction}' for task: '{task_metadata.name}'"
|
|
94
94
|
)
|
|
95
|
-
embeddings = super().encode( # type: ignore[safe-super]
|
|
95
|
+
embeddings = super().encode( # type: ignore[safe-super,call-arg]
|
|
96
96
|
_inputs, # type: ignore[arg-type]
|
|
97
97
|
instruction=instruction,
|
|
98
98
|
*args,
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any
|
|
3
2
|
|
|
4
3
|
from mteb._create_dataloaders import _create_text_queries_dataloader
|
|
5
4
|
from mteb._requires_package import requires_package
|
|
@@ -8,6 +7,7 @@ from mteb.models.model_meta import ModelMeta
|
|
|
8
7
|
from mteb.models.models_protocols import SearchProtocol
|
|
9
8
|
from mteb.types import (
|
|
10
9
|
CorpusDatasetType,
|
|
10
|
+
EncodeKwargs,
|
|
11
11
|
InstructionDatasetType,
|
|
12
12
|
QueryDatasetType,
|
|
13
13
|
RetrievalOutputType,
|
|
@@ -49,7 +49,7 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
|
|
|
49
49
|
task_metadata: TaskMetadata,
|
|
50
50
|
hf_split: str,
|
|
51
51
|
hf_subset: str,
|
|
52
|
-
encode_kwargs:
|
|
52
|
+
encode_kwargs: EncodeKwargs,
|
|
53
53
|
) -> None:
|
|
54
54
|
logger.info("Encoding Corpus...")
|
|
55
55
|
corpus_texts = [
|
|
@@ -74,7 +74,7 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
|
|
|
74
74
|
hf_split: str,
|
|
75
75
|
hf_subset: str,
|
|
76
76
|
top_k: int,
|
|
77
|
-
encode_kwargs:
|
|
77
|
+
encode_kwargs: EncodeKwargs,
|
|
78
78
|
instructions: InstructionDatasetType | None = None,
|
|
79
79
|
top_ranked: TopRankedDocumentsType | None = None,
|
|
80
80
|
) -> RetrievalOutputType:
|
|
@@ -2,7 +2,10 @@ from mteb.models.model_meta import (
|
|
|
2
2
|
ModelMeta,
|
|
3
3
|
ScoringFunction,
|
|
4
4
|
)
|
|
5
|
-
from mteb.models.sentence_transformer_wrapper import
|
|
5
|
+
from mteb.models.sentence_transformer_wrapper import (
|
|
6
|
+
CrossEncoderWrapper,
|
|
7
|
+
sentence_transformers_loader,
|
|
8
|
+
)
|
|
6
9
|
|
|
7
10
|
mixedbread_training_data = {
|
|
8
11
|
# from correspondence:
|
|
@@ -122,3 +125,117 @@ mxbai_embed_xsmall_v1 = ModelMeta(
|
|
|
122
125
|
url={https://www.mixedbread.ai/blog/mxbai-embed-xsmall-v1},
|
|
123
126
|
}""",
|
|
124
127
|
)
|
|
128
|
+
|
|
129
|
+
mxbai_rerank_xsmall_v1 = ModelMeta(
|
|
130
|
+
loader=CrossEncoderWrapper,
|
|
131
|
+
name="mixedbread-ai/mxbai-rerank-xsmall-v1",
|
|
132
|
+
revision="b5c6e9da73abc3711f593f705371cdbe9e0fe422",
|
|
133
|
+
release_date="2024-02-29",
|
|
134
|
+
languages=["eng-Latn"],
|
|
135
|
+
n_parameters=70830337,
|
|
136
|
+
memory_usage_mb=135.0,
|
|
137
|
+
max_tokens=512,
|
|
138
|
+
embed_dim=None,
|
|
139
|
+
license="apache-2.0",
|
|
140
|
+
open_weights=True,
|
|
141
|
+
public_training_code=None,
|
|
142
|
+
public_training_data=None,
|
|
143
|
+
framework=[
|
|
144
|
+
"PyTorch",
|
|
145
|
+
"Sentence Transformers",
|
|
146
|
+
"Transformers",
|
|
147
|
+
"ONNX",
|
|
148
|
+
"safetensors",
|
|
149
|
+
],
|
|
150
|
+
reference="https://huggingface.co/mixedbread-ai/mxbai-rerank-xsmall-v1",
|
|
151
|
+
similarity_fn_name=None,
|
|
152
|
+
use_instructions=None,
|
|
153
|
+
training_datasets=None,
|
|
154
|
+
adapted_from=None,
|
|
155
|
+
superseded_by=None,
|
|
156
|
+
modalities=["text"],
|
|
157
|
+
model_type=["cross-encoder"],
|
|
158
|
+
citation="""@online{rerank2024mxbai,
|
|
159
|
+
title={Boost Your Search With The Crispy Mixedbread Rerank Models},
|
|
160
|
+
author={Aamir Shakir and Darius Koenig and Julius Lipp and Sean Lee},
|
|
161
|
+
year={2024},
|
|
162
|
+
url={https://www.mixedbread.ai/blog/mxbai-rerank-v1},
|
|
163
|
+
}""",
|
|
164
|
+
contacts=None,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
mxbai_rerank_base_v1 = ModelMeta(
|
|
168
|
+
loader=CrossEncoderWrapper,
|
|
169
|
+
name="mixedbread-ai/mxbai-rerank-base-v1",
|
|
170
|
+
revision="800f24c113213a187e65bde9db00c15a2bb12738",
|
|
171
|
+
release_date="2024-02-29",
|
|
172
|
+
languages=["eng-Latn"],
|
|
173
|
+
n_parameters=184422913,
|
|
174
|
+
memory_usage_mb=352.0,
|
|
175
|
+
max_tokens=512,
|
|
176
|
+
embed_dim=None,
|
|
177
|
+
license="apache-2.0",
|
|
178
|
+
open_weights=True,
|
|
179
|
+
public_training_code=None,
|
|
180
|
+
public_training_data=None,
|
|
181
|
+
framework=[
|
|
182
|
+
"PyTorch",
|
|
183
|
+
"Sentence Transformers",
|
|
184
|
+
"Transformers",
|
|
185
|
+
"ONNX",
|
|
186
|
+
"safetensors",
|
|
187
|
+
],
|
|
188
|
+
reference="https://huggingface.co/mixedbread-ai/mxbai-rerank-base-v1",
|
|
189
|
+
similarity_fn_name=None,
|
|
190
|
+
use_instructions=None,
|
|
191
|
+
training_datasets=None,
|
|
192
|
+
adapted_from=None,
|
|
193
|
+
superseded_by=None,
|
|
194
|
+
modalities=["text"],
|
|
195
|
+
model_type=["cross-encoder"],
|
|
196
|
+
citation="""@online{rerank2024mxbai,
|
|
197
|
+
title={Boost Your Search With The Crispy Mixedbread Rerank Models},
|
|
198
|
+
author={Aamir Shakir and Darius Koenig and Julius Lipp and Sean Lee},
|
|
199
|
+
year={2024},
|
|
200
|
+
url={https://www.mixedbread.ai/blog/mxbai-rerank-v1},
|
|
201
|
+
}""",
|
|
202
|
+
contacts=None,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
mxbai_rerank_large_v1 = ModelMeta(
|
|
206
|
+
loader=CrossEncoderWrapper,
|
|
207
|
+
name="mixedbread-ai/mxbai-rerank-large-v1",
|
|
208
|
+
revision="98f655841d5caf0b16eaff79c2b4ca109d920d17",
|
|
209
|
+
release_date="2024-02-29",
|
|
210
|
+
languages=["eng-Latn"],
|
|
211
|
+
n_parameters=435062785,
|
|
212
|
+
memory_usage_mb=830.0,
|
|
213
|
+
max_tokens=512,
|
|
214
|
+
embed_dim=None,
|
|
215
|
+
license="apache-2.0",
|
|
216
|
+
open_weights=True,
|
|
217
|
+
public_training_code=None,
|
|
218
|
+
public_training_data=None,
|
|
219
|
+
framework=[
|
|
220
|
+
"PyTorch",
|
|
221
|
+
"Sentence Transformers",
|
|
222
|
+
"Transformers",
|
|
223
|
+
"ONNX",
|
|
224
|
+
"safetensors",
|
|
225
|
+
],
|
|
226
|
+
reference="https://huggingface.co/mixedbread-ai/mxbai-rerank-large-v1",
|
|
227
|
+
similarity_fn_name=None,
|
|
228
|
+
use_instructions=None,
|
|
229
|
+
training_datasets=None,
|
|
230
|
+
adapted_from=None,
|
|
231
|
+
superseded_by=None,
|
|
232
|
+
modalities=["text"],
|
|
233
|
+
model_type=["cross-encoder"],
|
|
234
|
+
citation="""@online{rerank2024mxbai,
|
|
235
|
+
title={Boost Your Search With The Crispy Mixedbread Rerank Models},
|
|
236
|
+
author={Aamir Shakir and Darius Koenig and Julius Lipp and Sean Lee},
|
|
237
|
+
year={2024},
|
|
238
|
+
url={https://www.mixedbread.ai/blog/mxbai-rerank-v1},
|
|
239
|
+
}""",
|
|
240
|
+
contacts=None,
|
|
241
|
+
)
|
|
@@ -163,6 +163,36 @@ _PREDEFINED_PROMPTS = {
|
|
|
163
163
|
"German1Retrieval": "Given a query, retrieve relevant passages",
|
|
164
164
|
}
|
|
165
165
|
|
|
166
|
+
Octen_Embedding_0B6 = ModelMeta(
|
|
167
|
+
loader=InstructSentenceTransformerModel,
|
|
168
|
+
loader_kwargs=dict(
|
|
169
|
+
instruction_template=instruction_template,
|
|
170
|
+
apply_instruction_to_passages=True,
|
|
171
|
+
prompts_dict=_PREDEFINED_PROMPTS,
|
|
172
|
+
max_seq_length=18480,
|
|
173
|
+
model_kwargs={"torch_dtype": "bfloat16"},
|
|
174
|
+
),
|
|
175
|
+
name="bflhc/Octen-Embedding-0.6B",
|
|
176
|
+
languages=multilingual_langs,
|
|
177
|
+
open_weights=True,
|
|
178
|
+
revision="1a00a4e837bd788f6f8d91bc43201a5e52cf8ef8",
|
|
179
|
+
release_date="2026-01-10",
|
|
180
|
+
n_parameters=595776512,
|
|
181
|
+
memory_usage_mb=1136,
|
|
182
|
+
embed_dim=1024,
|
|
183
|
+
max_tokens=32768,
|
|
184
|
+
license="apache-2.0",
|
|
185
|
+
reference="https://huggingface.co/bflhc/Octen-Embedding-0.6B",
|
|
186
|
+
similarity_fn_name="cosine",
|
|
187
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors"],
|
|
188
|
+
use_instructions=True,
|
|
189
|
+
public_training_code=None,
|
|
190
|
+
public_training_data=None,
|
|
191
|
+
training_datasets=training_data,
|
|
192
|
+
citation=OCTEN_CITATION,
|
|
193
|
+
adapted_from="Qwen/Qwen3-Embedding-0.6B",
|
|
194
|
+
)
|
|
195
|
+
|
|
166
196
|
Octen_Embedding_4B = ModelMeta(
|
|
167
197
|
loader=InstructSentenceTransformerModel,
|
|
168
198
|
loader_kwargs=dict(
|
|
@@ -19,6 +19,7 @@ from mteb.types import (
|
|
|
19
19
|
Array,
|
|
20
20
|
BatchedInput,
|
|
21
21
|
CorpusDatasetType,
|
|
22
|
+
EncodeKwargs,
|
|
22
23
|
PromptType,
|
|
23
24
|
QueryDatasetType,
|
|
24
25
|
RetrievalOutputType,
|
|
@@ -45,7 +46,7 @@ class PylateSearchEncoder:
|
|
|
45
46
|
task_metadata: TaskMetadata,
|
|
46
47
|
hf_split: str,
|
|
47
48
|
hf_subset: str,
|
|
48
|
-
encode_kwargs:
|
|
49
|
+
encode_kwargs: EncodeKwargs,
|
|
49
50
|
) -> None:
|
|
50
51
|
"""Index the corpus for retrieval.
|
|
51
52
|
|
|
@@ -78,7 +79,7 @@ class PylateSearchEncoder:
|
|
|
78
79
|
hf_split: str,
|
|
79
80
|
hf_subset: str,
|
|
80
81
|
top_k: int,
|
|
81
|
-
encode_kwargs:
|
|
82
|
+
encode_kwargs: EncodeKwargs,
|
|
82
83
|
top_ranked: TopRankedDocumentsType | None = None,
|
|
83
84
|
) -> RetrievalOutputType:
|
|
84
85
|
queries_dataloader = create_dataloader(
|
|
@@ -136,7 +137,7 @@ class PylateSearchEncoder:
|
|
|
136
137
|
hf_subset: str,
|
|
137
138
|
hf_split: str,
|
|
138
139
|
top_k: int,
|
|
139
|
-
encode_kwargs:
|
|
140
|
+
encode_kwargs: EncodeKwargs,
|
|
140
141
|
) -> dict[str, list[tuple[float, str]]]:
|
|
141
142
|
from pylate import indexes, retrieve
|
|
142
143
|
|
|
@@ -200,7 +201,7 @@ class PylateSearchEncoder:
|
|
|
200
201
|
task_metadata: TaskMetadata,
|
|
201
202
|
hf_subset: str,
|
|
202
203
|
hf_split: str,
|
|
203
|
-
encode_kwargs:
|
|
204
|
+
encode_kwargs: EncodeKwargs,
|
|
204
205
|
) -> dict[str, list[tuple[float, str]]]:
|
|
205
206
|
"""Rerank with PyLate's rank.rerank using per-query candidates.
|
|
206
207
|
|
mteb/models/models_protocols.py
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
|
|
2
2
|
|
|
3
3
|
from torch.utils.data import DataLoader
|
|
4
|
+
from typing_extensions import Unpack
|
|
4
5
|
|
|
5
6
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
6
7
|
from mteb.types import (
|
|
7
8
|
Array,
|
|
8
9
|
BatchedInput,
|
|
9
10
|
CorpusDatasetType,
|
|
11
|
+
EncodeKwargs,
|
|
10
12
|
PromptType,
|
|
11
13
|
QueryDatasetType,
|
|
12
14
|
RetrievalOutputType,
|
|
@@ -28,7 +30,7 @@ class SearchProtocol(Protocol):
|
|
|
28
30
|
task_metadata: TaskMetadata,
|
|
29
31
|
hf_split: str,
|
|
30
32
|
hf_subset: str,
|
|
31
|
-
encode_kwargs:
|
|
33
|
+
encode_kwargs: EncodeKwargs,
|
|
32
34
|
) -> None:
|
|
33
35
|
"""Index the corpus for retrieval.
|
|
34
36
|
|
|
@@ -49,7 +51,7 @@ class SearchProtocol(Protocol):
|
|
|
49
51
|
hf_split: str,
|
|
50
52
|
hf_subset: str,
|
|
51
53
|
top_k: int,
|
|
52
|
-
encode_kwargs:
|
|
54
|
+
encode_kwargs: EncodeKwargs,
|
|
53
55
|
top_ranked: TopRankedDocumentsType | None = None,
|
|
54
56
|
) -> RetrievalOutputType:
|
|
55
57
|
"""Search the corpus using the given queries.
|
|
@@ -108,7 +110,7 @@ class EncoderProtocol(Protocol):
|
|
|
108
110
|
hf_split: str,
|
|
109
111
|
hf_subset: str,
|
|
110
112
|
prompt_type: PromptType | None = None,
|
|
111
|
-
**kwargs:
|
|
113
|
+
**kwargs: Unpack[EncodeKwargs],
|
|
112
114
|
) -> Array:
|
|
113
115
|
"""Encodes the given sentences using the encoder.
|
|
114
116
|
|
|
@@ -214,7 +216,7 @@ class CrossEncoderProtocol(Protocol):
|
|
|
214
216
|
hf_split: str,
|
|
215
217
|
hf_subset: str,
|
|
216
218
|
prompt_type: PromptType | None = None,
|
|
217
|
-
**kwargs:
|
|
219
|
+
**kwargs: Unpack[EncodeKwargs],
|
|
218
220
|
) -> Array:
|
|
219
221
|
"""Predicts relevance scores for pairs of inputs. Note that, unlike the encoder, the cross-encoder can compare across inputs.
|
|
220
222
|
|
mteb/models/search_wrappers.py
CHANGED
|
@@ -14,6 +14,7 @@ from mteb.types import (
|
|
|
14
14
|
Array,
|
|
15
15
|
BatchedInput,
|
|
16
16
|
CorpusDatasetType,
|
|
17
|
+
EncodeKwargs,
|
|
17
18
|
PromptType,
|
|
18
19
|
QueryDatasetType,
|
|
19
20
|
RetrievalOutputType,
|
|
@@ -50,7 +51,7 @@ class SearchEncoderWrapper:
|
|
|
50
51
|
task_metadata: TaskMetadata,
|
|
51
52
|
hf_split: str,
|
|
52
53
|
hf_subset: str,
|
|
53
|
-
encode_kwargs:
|
|
54
|
+
encode_kwargs: EncodeKwargs,
|
|
54
55
|
) -> None:
|
|
55
56
|
"""Index the corpus for retrieval.
|
|
56
57
|
|
|
@@ -88,7 +89,7 @@ class SearchEncoderWrapper:
|
|
|
88
89
|
hf_split: str,
|
|
89
90
|
hf_subset: str,
|
|
90
91
|
top_k: int,
|
|
91
|
-
encode_kwargs:
|
|
92
|
+
encode_kwargs: EncodeKwargs,
|
|
92
93
|
top_ranked: TopRankedDocumentsType | None = None,
|
|
93
94
|
) -> RetrievalOutputType:
|
|
94
95
|
"""Search the corpus for the given queries.
|
|
@@ -215,7 +216,7 @@ class SearchEncoderWrapper:
|
|
|
215
216
|
hf_subset: str,
|
|
216
217
|
hf_split: str,
|
|
217
218
|
top_k: int,
|
|
218
|
-
encode_kwargs:
|
|
219
|
+
encode_kwargs: EncodeKwargs,
|
|
219
220
|
) -> dict[str, list[tuple[float, str]]]:
|
|
220
221
|
logger.info("Encoding Corpus in batches (this might take a while)...")
|
|
221
222
|
if self.task_corpus is None:
|
|
@@ -318,7 +319,7 @@ class SearchEncoderWrapper:
|
|
|
318
319
|
task_metadata: TaskMetadata,
|
|
319
320
|
hf_subset: str,
|
|
320
321
|
hf_split: str,
|
|
321
|
-
encode_kwargs:
|
|
322
|
+
encode_kwargs: EncodeKwargs,
|
|
322
323
|
) -> dict[str, list[tuple[float, str]]]:
|
|
323
324
|
"""Rerank documents based on pre-ranked documents.
|
|
324
325
|
|
|
@@ -470,7 +471,7 @@ class SearchCrossEncoderWrapper:
|
|
|
470
471
|
task_metadata: TaskMetadata,
|
|
471
472
|
hf_split: str,
|
|
472
473
|
hf_subset: str,
|
|
473
|
-
encode_kwargs:
|
|
474
|
+
encode_kwargs: EncodeKwargs,
|
|
474
475
|
) -> None:
|
|
475
476
|
"""Index the corpus for retrieval.
|
|
476
477
|
|
|
@@ -491,7 +492,7 @@ class SearchCrossEncoderWrapper:
|
|
|
491
492
|
hf_split: str,
|
|
492
493
|
hf_subset: str,
|
|
493
494
|
top_k: int,
|
|
494
|
-
encode_kwargs:
|
|
495
|
+
encode_kwargs: EncodeKwargs,
|
|
495
496
|
top_ranked: TopRankedDocumentsType | None = None,
|
|
496
497
|
) -> RetrievalOutputType:
|
|
497
498
|
"""Search the corpus using the given queries.
|
|
@@ -8,10 +8,11 @@ import numpy as np
|
|
|
8
8
|
import torch
|
|
9
9
|
from packaging.version import Version
|
|
10
10
|
from torch.utils.data import DataLoader
|
|
11
|
+
from typing_extensions import Unpack
|
|
11
12
|
|
|
12
13
|
from mteb._log_once import LogOnce
|
|
13
14
|
from mteb.models import ModelMeta
|
|
14
|
-
from mteb.types import Array, BatchedInput, PromptType
|
|
15
|
+
from mteb.types import Array, BatchedInput, EncodeKwargs, PromptType
|
|
15
16
|
|
|
16
17
|
from .abs_encoder import AbsEncoder
|
|
17
18
|
|
|
@@ -122,7 +123,7 @@ class SentenceTransformerEncoderWrapper(AbsEncoder):
|
|
|
122
123
|
hf_split: str,
|
|
123
124
|
hf_subset: str,
|
|
124
125
|
prompt_type: PromptType | None = None,
|
|
125
|
-
**kwargs:
|
|
126
|
+
**kwargs: Unpack[EncodeKwargs],
|
|
126
127
|
) -> Array:
|
|
127
128
|
"""Encodes the given sentences using the encoder.
|
|
128
129
|
|
|
@@ -201,7 +202,7 @@ class SentenceTransformerMultimodalEncoderWrapper(SentenceTransformerEncoderWrap
|
|
|
201
202
|
hf_split: str,
|
|
202
203
|
hf_subset: str,
|
|
203
204
|
prompt_type: PromptType | None = None,
|
|
204
|
-
**kwargs:
|
|
205
|
+
**kwargs: Unpack[EncodeKwargs],
|
|
205
206
|
) -> Array:
|
|
206
207
|
"""Encodes the given sentences using the encoder.
|
|
207
208
|
|
|
@@ -292,7 +293,7 @@ class CrossEncoderWrapper:
|
|
|
292
293
|
hf_split: str,
|
|
293
294
|
hf_subset: str,
|
|
294
295
|
prompt_type: PromptType | None = None,
|
|
295
|
-
**kwargs:
|
|
296
|
+
**kwargs: Unpack[EncodeKwargs],
|
|
296
297
|
) -> Array:
|
|
297
298
|
"""Predicts relevance scores for pairs of inputs. Note that, unlike the encoder, the cross-encoder can compare across inputs.
|
|
298
299
|
|