mteb 2.6.6__py3-none-any.whl → 2.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +7 -3
- mteb/_evaluators/any_sts_evaluator.py +6 -3
- mteb/_evaluators/clustering_evaluator.py +2 -2
- mteb/_evaluators/evaluator.py +2 -1
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +8 -5
- mteb/_evaluators/pair_classification_evaluator.py +2 -2
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/sklearn_evaluator.py +3 -3
- mteb/_evaluators/text/bitext_mining_evaluator.py +5 -3
- mteb/_evaluators/text/summarization_evaluator.py +3 -2
- mteb/_evaluators/zeroshot_classification_evaluator.py +5 -3
- mteb/abstasks/abstask.py +3 -2
- mteb/abstasks/aggregated_task.py +3 -3
- mteb/abstasks/classification.py +3 -3
- mteb/abstasks/clustering.py +2 -2
- mteb/abstasks/clustering_legacy.py +2 -2
- mteb/abstasks/image/image_text_pair_classification.py +2 -1
- mteb/abstasks/multilabel_classification.py +2 -2
- mteb/abstasks/pair_classification.py +2 -2
- mteb/abstasks/retrieval.py +15 -14
- mteb/abstasks/sts.py +2 -2
- mteb/abstasks/text/bitext_mining.py +3 -3
- mteb/abstasks/text/summarization.py +2 -2
- mteb/abstasks/zeroshot_classification.py +3 -2
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +24 -0
- mteb/cli/build_cli.py +2 -1
- mteb/deprecated_evaluator.py +3 -3
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/evaluate.py +5 -3
- mteb/models/abs_encoder.py +3 -1
- mteb/models/instruct_wrapper.py +1 -1
- mteb/models/model_implementations/bm25.py +3 -3
- mteb/models/model_implementations/jina_clip.py +46 -8
- mteb/models/model_implementations/mxbai_models.py +118 -1
- mteb/models/model_implementations/nvidia_models.py +73 -5
- mteb/models/model_implementations/octen_models.py +30 -0
- mteb/models/model_implementations/pylate_models.py +5 -4
- mteb/models/model_implementations/sentence_transformers_models.py +66 -0
- mteb/models/models_protocols.py +6 -4
- mteb/models/search_wrappers.py +7 -6
- mteb/models/sentence_transformer_wrapper.py +5 -4
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +12 -0
- {mteb-2.6.6.dist-info → mteb-2.6.8.dist-info}/METADATA +1 -1
- {mteb-2.6.6.dist-info → mteb-2.6.8.dist-info}/RECORD +55 -50
- {mteb-2.6.6.dist-info → mteb-2.6.8.dist-info}/WHEEL +0 -0
- {mteb-2.6.6.dist-info → mteb-2.6.8.dist-info}/entry_points.txt +0 -0
- {mteb-2.6.6.dist-info → mteb-2.6.8.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.6.6.dist-info → mteb-2.6.8.dist-info}/top_level.txt +0 -0
|
@@ -19,6 +19,7 @@ from mteb.types import (
|
|
|
19
19
|
Array,
|
|
20
20
|
BatchedInput,
|
|
21
21
|
CorpusDatasetType,
|
|
22
|
+
EncodeKwargs,
|
|
22
23
|
PromptType,
|
|
23
24
|
QueryDatasetType,
|
|
24
25
|
RetrievalOutputType,
|
|
@@ -45,7 +46,7 @@ class PylateSearchEncoder:
|
|
|
45
46
|
task_metadata: TaskMetadata,
|
|
46
47
|
hf_split: str,
|
|
47
48
|
hf_subset: str,
|
|
48
|
-
encode_kwargs:
|
|
49
|
+
encode_kwargs: EncodeKwargs,
|
|
49
50
|
) -> None:
|
|
50
51
|
"""Index the corpus for retrieval.
|
|
51
52
|
|
|
@@ -78,7 +79,7 @@ class PylateSearchEncoder:
|
|
|
78
79
|
hf_split: str,
|
|
79
80
|
hf_subset: str,
|
|
80
81
|
top_k: int,
|
|
81
|
-
encode_kwargs:
|
|
82
|
+
encode_kwargs: EncodeKwargs,
|
|
82
83
|
top_ranked: TopRankedDocumentsType | None = None,
|
|
83
84
|
) -> RetrievalOutputType:
|
|
84
85
|
queries_dataloader = create_dataloader(
|
|
@@ -136,7 +137,7 @@ class PylateSearchEncoder:
|
|
|
136
137
|
hf_subset: str,
|
|
137
138
|
hf_split: str,
|
|
138
139
|
top_k: int,
|
|
139
|
-
encode_kwargs:
|
|
140
|
+
encode_kwargs: EncodeKwargs,
|
|
140
141
|
) -> dict[str, list[tuple[float, str]]]:
|
|
141
142
|
from pylate import indexes, retrieve
|
|
142
143
|
|
|
@@ -200,7 +201,7 @@ class PylateSearchEncoder:
|
|
|
200
201
|
task_metadata: TaskMetadata,
|
|
201
202
|
hf_subset: str,
|
|
202
203
|
hf_split: str,
|
|
203
|
-
encode_kwargs:
|
|
204
|
+
encode_kwargs: EncodeKwargs,
|
|
204
205
|
) -> dict[str, list[tuple[float, str]]]:
|
|
205
206
|
"""Rerank with PyLate's rank.rerank using per-query candidates.
|
|
206
207
|
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Implementation of Sentence Transformers model validated in MTEB."""
|
|
2
2
|
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
3
5
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
4
6
|
from mteb.models.sentence_transformer_wrapper import (
|
|
5
7
|
SentenceTransformerEncoderWrapper,
|
|
@@ -773,3 +775,67 @@ gtr_t5_base = ModelMeta(
|
|
|
773
775
|
},
|
|
774
776
|
citation=GTR_CITATION,
|
|
775
777
|
)
|
|
778
|
+
|
|
779
|
+
static_retrieval_mrl_en_v1 = ModelMeta(
|
|
780
|
+
loader=sentence_transformers_loader,
|
|
781
|
+
name="sentence-transformers/static-retrieval-mrl-en-v1",
|
|
782
|
+
revision="f60985c706f192d45d218078e49e5a8b6f15283a",
|
|
783
|
+
release_date="2024-10-24",
|
|
784
|
+
languages=["eng-Latn"],
|
|
785
|
+
n_parameters=3_125_4528,
|
|
786
|
+
memory_usage_mb=119,
|
|
787
|
+
max_tokens=np.inf,
|
|
788
|
+
embed_dim=1024,
|
|
789
|
+
license="apache-2.0",
|
|
790
|
+
open_weights=True,
|
|
791
|
+
public_training_code="https://huggingface.co/sentence-transformers/static-retrieval-mrl-en-v1/blob/main/train.py",
|
|
792
|
+
public_training_data=None,
|
|
793
|
+
framework=["PyTorch", "Sentence Transformers"],
|
|
794
|
+
reference="https://huggingface.co/sentence-transformers/static-retrieval-mrl-en-v1",
|
|
795
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
796
|
+
use_instructions=False,
|
|
797
|
+
training_datasets={
|
|
798
|
+
"MSMARCO",
|
|
799
|
+
# gooaq
|
|
800
|
+
# s2orc
|
|
801
|
+
# allnli
|
|
802
|
+
# paq
|
|
803
|
+
# trivia-qa
|
|
804
|
+
# swim-ir-monolingual
|
|
805
|
+
# PubMedQA
|
|
806
|
+
# swim
|
|
807
|
+
"MIRACLRetrieval",
|
|
808
|
+
"MultiLongDocRetrieval",
|
|
809
|
+
"MrTidyRetrieval",
|
|
810
|
+
},
|
|
811
|
+
modalities=["text"],
|
|
812
|
+
model_type=["dense"],
|
|
813
|
+
)
|
|
814
|
+
|
|
815
|
+
multi_qa_mpnet_base_dot_v1 = ModelMeta(
|
|
816
|
+
loader=sentence_transformers_loader,
|
|
817
|
+
name="sentence-transformers/multi-qa-mpnet-base-dot-v1",
|
|
818
|
+
revision="3af7c6da5b3e1bea796ef6c97fe237538cbe6e7f",
|
|
819
|
+
release_date="2021-08-23",
|
|
820
|
+
languages=["eng-Latn"],
|
|
821
|
+
n_parameters=109486978,
|
|
822
|
+
memory_usage_mb=418.0,
|
|
823
|
+
max_tokens=512,
|
|
824
|
+
embed_dim=768,
|
|
825
|
+
license=None,
|
|
826
|
+
open_weights=True,
|
|
827
|
+
public_training_code="https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1/blob/main/train_script.py",
|
|
828
|
+
public_training_data=None,
|
|
829
|
+
framework=["PyTorch", "Sentence Transformers"],
|
|
830
|
+
reference="https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1",
|
|
831
|
+
similarity_fn_name=ScoringFunction.DOT_PRODUCT,
|
|
832
|
+
use_instructions=False,
|
|
833
|
+
training_datasets={
|
|
834
|
+
"MSMARCO",
|
|
835
|
+
"YahooAnswersTopicsClassification",
|
|
836
|
+
"NQ",
|
|
837
|
+
},
|
|
838
|
+
adapted_from="microsoft/mpnet-base",
|
|
839
|
+
modalities=["text"],
|
|
840
|
+
model_type=["dense"],
|
|
841
|
+
)
|
mteb/models/models_protocols.py
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
|
|
2
2
|
|
|
3
3
|
from torch.utils.data import DataLoader
|
|
4
|
+
from typing_extensions import Unpack
|
|
4
5
|
|
|
5
6
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
6
7
|
from mteb.types import (
|
|
7
8
|
Array,
|
|
8
9
|
BatchedInput,
|
|
9
10
|
CorpusDatasetType,
|
|
11
|
+
EncodeKwargs,
|
|
10
12
|
PromptType,
|
|
11
13
|
QueryDatasetType,
|
|
12
14
|
RetrievalOutputType,
|
|
@@ -28,7 +30,7 @@ class SearchProtocol(Protocol):
|
|
|
28
30
|
task_metadata: TaskMetadata,
|
|
29
31
|
hf_split: str,
|
|
30
32
|
hf_subset: str,
|
|
31
|
-
encode_kwargs:
|
|
33
|
+
encode_kwargs: EncodeKwargs,
|
|
32
34
|
) -> None:
|
|
33
35
|
"""Index the corpus for retrieval.
|
|
34
36
|
|
|
@@ -49,7 +51,7 @@ class SearchProtocol(Protocol):
|
|
|
49
51
|
hf_split: str,
|
|
50
52
|
hf_subset: str,
|
|
51
53
|
top_k: int,
|
|
52
|
-
encode_kwargs:
|
|
54
|
+
encode_kwargs: EncodeKwargs,
|
|
53
55
|
top_ranked: TopRankedDocumentsType | None = None,
|
|
54
56
|
) -> RetrievalOutputType:
|
|
55
57
|
"""Search the corpus using the given queries.
|
|
@@ -108,7 +110,7 @@ class EncoderProtocol(Protocol):
|
|
|
108
110
|
hf_split: str,
|
|
109
111
|
hf_subset: str,
|
|
110
112
|
prompt_type: PromptType | None = None,
|
|
111
|
-
**kwargs:
|
|
113
|
+
**kwargs: Unpack[EncodeKwargs],
|
|
112
114
|
) -> Array:
|
|
113
115
|
"""Encodes the given sentences using the encoder.
|
|
114
116
|
|
|
@@ -214,7 +216,7 @@ class CrossEncoderProtocol(Protocol):
|
|
|
214
216
|
hf_split: str,
|
|
215
217
|
hf_subset: str,
|
|
216
218
|
prompt_type: PromptType | None = None,
|
|
217
|
-
**kwargs:
|
|
219
|
+
**kwargs: Unpack[EncodeKwargs],
|
|
218
220
|
) -> Array:
|
|
219
221
|
"""Predicts relevance scores for pairs of inputs. Note that, unlike the encoder, the cross-encoder can compare across inputs.
|
|
220
222
|
|
mteb/models/search_wrappers.py
CHANGED
|
@@ -14,6 +14,7 @@ from mteb.types import (
|
|
|
14
14
|
Array,
|
|
15
15
|
BatchedInput,
|
|
16
16
|
CorpusDatasetType,
|
|
17
|
+
EncodeKwargs,
|
|
17
18
|
PromptType,
|
|
18
19
|
QueryDatasetType,
|
|
19
20
|
RetrievalOutputType,
|
|
@@ -50,7 +51,7 @@ class SearchEncoderWrapper:
|
|
|
50
51
|
task_metadata: TaskMetadata,
|
|
51
52
|
hf_split: str,
|
|
52
53
|
hf_subset: str,
|
|
53
|
-
encode_kwargs:
|
|
54
|
+
encode_kwargs: EncodeKwargs,
|
|
54
55
|
) -> None:
|
|
55
56
|
"""Index the corpus for retrieval.
|
|
56
57
|
|
|
@@ -88,7 +89,7 @@ class SearchEncoderWrapper:
|
|
|
88
89
|
hf_split: str,
|
|
89
90
|
hf_subset: str,
|
|
90
91
|
top_k: int,
|
|
91
|
-
encode_kwargs:
|
|
92
|
+
encode_kwargs: EncodeKwargs,
|
|
92
93
|
top_ranked: TopRankedDocumentsType | None = None,
|
|
93
94
|
) -> RetrievalOutputType:
|
|
94
95
|
"""Search the corpus for the given queries.
|
|
@@ -215,7 +216,7 @@ class SearchEncoderWrapper:
|
|
|
215
216
|
hf_subset: str,
|
|
216
217
|
hf_split: str,
|
|
217
218
|
top_k: int,
|
|
218
|
-
encode_kwargs:
|
|
219
|
+
encode_kwargs: EncodeKwargs,
|
|
219
220
|
) -> dict[str, list[tuple[float, str]]]:
|
|
220
221
|
logger.info("Encoding Corpus in batches (this might take a while)...")
|
|
221
222
|
if self.task_corpus is None:
|
|
@@ -318,7 +319,7 @@ class SearchEncoderWrapper:
|
|
|
318
319
|
task_metadata: TaskMetadata,
|
|
319
320
|
hf_subset: str,
|
|
320
321
|
hf_split: str,
|
|
321
|
-
encode_kwargs:
|
|
322
|
+
encode_kwargs: EncodeKwargs,
|
|
322
323
|
) -> dict[str, list[tuple[float, str]]]:
|
|
323
324
|
"""Rerank documents based on pre-ranked documents.
|
|
324
325
|
|
|
@@ -470,7 +471,7 @@ class SearchCrossEncoderWrapper:
|
|
|
470
471
|
task_metadata: TaskMetadata,
|
|
471
472
|
hf_split: str,
|
|
472
473
|
hf_subset: str,
|
|
473
|
-
encode_kwargs:
|
|
474
|
+
encode_kwargs: EncodeKwargs,
|
|
474
475
|
) -> None:
|
|
475
476
|
"""Index the corpus for retrieval.
|
|
476
477
|
|
|
@@ -491,7 +492,7 @@ class SearchCrossEncoderWrapper:
|
|
|
491
492
|
hf_split: str,
|
|
492
493
|
hf_subset: str,
|
|
493
494
|
top_k: int,
|
|
494
|
-
encode_kwargs:
|
|
495
|
+
encode_kwargs: EncodeKwargs,
|
|
495
496
|
top_ranked: TopRankedDocumentsType | None = None,
|
|
496
497
|
) -> RetrievalOutputType:
|
|
497
498
|
"""Search the corpus using the given queries.
|
|
@@ -8,10 +8,11 @@ import numpy as np
|
|
|
8
8
|
import torch
|
|
9
9
|
from packaging.version import Version
|
|
10
10
|
from torch.utils.data import DataLoader
|
|
11
|
+
from typing_extensions import Unpack
|
|
11
12
|
|
|
12
13
|
from mteb._log_once import LogOnce
|
|
13
14
|
from mteb.models import ModelMeta
|
|
14
|
-
from mteb.types import Array, BatchedInput, PromptType
|
|
15
|
+
from mteb.types import Array, BatchedInput, EncodeKwargs, PromptType
|
|
15
16
|
|
|
16
17
|
from .abs_encoder import AbsEncoder
|
|
17
18
|
|
|
@@ -122,7 +123,7 @@ class SentenceTransformerEncoderWrapper(AbsEncoder):
|
|
|
122
123
|
hf_split: str,
|
|
123
124
|
hf_subset: str,
|
|
124
125
|
prompt_type: PromptType | None = None,
|
|
125
|
-
**kwargs:
|
|
126
|
+
**kwargs: Unpack[EncodeKwargs],
|
|
126
127
|
) -> Array:
|
|
127
128
|
"""Encodes the given sentences using the encoder.
|
|
128
129
|
|
|
@@ -201,7 +202,7 @@ class SentenceTransformerMultimodalEncoderWrapper(SentenceTransformerEncoderWrap
|
|
|
201
202
|
hf_split: str,
|
|
202
203
|
hf_subset: str,
|
|
203
204
|
prompt_type: PromptType | None = None,
|
|
204
|
-
**kwargs:
|
|
205
|
+
**kwargs: Unpack[EncodeKwargs],
|
|
205
206
|
) -> Array:
|
|
206
207
|
"""Encodes the given sentences using the encoder.
|
|
207
208
|
|
|
@@ -292,7 +293,7 @@ class CrossEncoderWrapper:
|
|
|
292
293
|
hf_split: str,
|
|
293
294
|
hf_subset: str,
|
|
294
295
|
prompt_type: PromptType | None = None,
|
|
295
|
-
**kwargs:
|
|
296
|
+
**kwargs: Unpack[EncodeKwargs],
|
|
296
297
|
) -> Array:
|
|
297
298
|
"""Predicts relevance scores for pairs of inputs. Note that, unlike the encoder, the cross-encoder can compare across inputs.
|
|
298
299
|
|
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
from .auto_rag_retrieval import AutoRAGRetrieval
|
|
2
2
|
from .ko_strategy_qa import KoStrategyQA
|
|
3
|
+
from .kovidore2_bench_retrieval import (
|
|
4
|
+
KoVidore2CybersecurityRetrieval,
|
|
5
|
+
KoVidore2EconomicRetrieval,
|
|
6
|
+
KoVidore2EnergyRetrieval,
|
|
7
|
+
KoVidore2HrRetrieval,
|
|
8
|
+
)
|
|
3
9
|
from .squad_kor_v1_retrieval import SQuADKorV1Retrieval
|
|
4
10
|
|
|
5
|
-
__all__ = [
|
|
11
|
+
__all__ = [
|
|
12
|
+
"AutoRAGRetrieval",
|
|
13
|
+
"KoStrategyQA",
|
|
14
|
+
"KoVidore2CybersecurityRetrieval",
|
|
15
|
+
"KoVidore2EconomicRetrieval",
|
|
16
|
+
"KoVidore2EnergyRetrieval",
|
|
17
|
+
"KoVidore2HrRetrieval",
|
|
18
|
+
"SQuADKorV1Retrieval",
|
|
19
|
+
]
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class KoVidore2CybersecurityRetrieval(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="KoVidore2CybersecurityRetrieval",
|
|
8
|
+
description="Retrieve associated pages according to questions. This dataset, Cybersecurity, is a corpus of technical reports on cyber threat trends and security incident responses in Korea, intended for complex-document understanding tasks.",
|
|
9
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
10
|
+
dataset={
|
|
11
|
+
"path": "whybe-choi/kovidore-v2-cybersecurity-mteb",
|
|
12
|
+
"revision": "577d7c45f79d8eb4e7584db3990f91daa7e47956",
|
|
13
|
+
},
|
|
14
|
+
type="DocumentUnderstanding",
|
|
15
|
+
category="t2i",
|
|
16
|
+
eval_splits=["test"],
|
|
17
|
+
eval_langs=["kor-Hang"],
|
|
18
|
+
main_score="ndcg_at_10",
|
|
19
|
+
date=("2025-12-21", "2026-01-06"),
|
|
20
|
+
domains=["Social"],
|
|
21
|
+
task_subtypes=["Image Text Retrieval"],
|
|
22
|
+
license="cc-by-4.0",
|
|
23
|
+
annotations_creators="derived",
|
|
24
|
+
dialect=[],
|
|
25
|
+
modalities=["text", "image"],
|
|
26
|
+
sample_creation="created",
|
|
27
|
+
bibtex_citation="""
|
|
28
|
+
@misc{choi2026kovidorev2,
|
|
29
|
+
author = {Yongbin Choi},
|
|
30
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
31
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
32
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
33
|
+
year = {2026},
|
|
34
|
+
}
|
|
35
|
+
""",
|
|
36
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class KoVidore2EconomicRetrieval(AbsTaskRetrieval):
|
|
41
|
+
metadata = TaskMetadata(
|
|
42
|
+
name="KoVidore2EconomicRetrieval",
|
|
43
|
+
description="Retrieve associated pages according to questions. This dataset, Economic trends, is a corpus of periodic reports on major economic indicators in Korea, intended for complex-document understanding tasks.",
|
|
44
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
45
|
+
dataset={
|
|
46
|
+
"path": "whybe-choi/kovidore-v2-economic-mteb",
|
|
47
|
+
"revision": "0189c26211290a902cd9d41a0db932808a54c0a8",
|
|
48
|
+
},
|
|
49
|
+
type="DocumentUnderstanding",
|
|
50
|
+
category="t2i",
|
|
51
|
+
eval_splits=["test"],
|
|
52
|
+
eval_langs=["kor-Hang"],
|
|
53
|
+
main_score="ndcg_at_10",
|
|
54
|
+
date=("2025-12-21", "2026-01-06"),
|
|
55
|
+
domains=["Social"],
|
|
56
|
+
task_subtypes=["Image Text Retrieval"],
|
|
57
|
+
license="cc-by-4.0",
|
|
58
|
+
annotations_creators="derived",
|
|
59
|
+
dialect=[],
|
|
60
|
+
modalities=["text", "image"],
|
|
61
|
+
sample_creation="created",
|
|
62
|
+
bibtex_citation="""
|
|
63
|
+
@misc{choi2026kovidorev2,
|
|
64
|
+
author = {Yongbin Choi},
|
|
65
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
66
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
67
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
68
|
+
year = {2026},
|
|
69
|
+
}
|
|
70
|
+
""",
|
|
71
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class KoVidore2EnergyRetrieval(AbsTaskRetrieval):
|
|
76
|
+
metadata = TaskMetadata(
|
|
77
|
+
name="KoVidore2EnergyRetrieval",
|
|
78
|
+
description="Retrieve associated pages according to questions. This dataset, Energy, is a corpus of reports on energy market trends, policy planning, and industry statistics, intended for complex-document understanding tasks.",
|
|
79
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
80
|
+
dataset={
|
|
81
|
+
"path": "whybe-choi/kovidore-v2-energy-mteb",
|
|
82
|
+
"revision": "8c09a3d22b1fa3a7f5e815e9521da9b048754211",
|
|
83
|
+
},
|
|
84
|
+
type="DocumentUnderstanding",
|
|
85
|
+
category="t2i",
|
|
86
|
+
eval_splits=["test"],
|
|
87
|
+
eval_langs=["kor-Hang"],
|
|
88
|
+
main_score="ndcg_at_10",
|
|
89
|
+
date=("2025-12-21", "2026-01-06"),
|
|
90
|
+
domains=["Social"],
|
|
91
|
+
task_subtypes=["Image Text Retrieval"],
|
|
92
|
+
license="cc-by-4.0",
|
|
93
|
+
annotations_creators="derived",
|
|
94
|
+
dialect=[],
|
|
95
|
+
modalities=["text", "image"],
|
|
96
|
+
sample_creation="created",
|
|
97
|
+
bibtex_citation="""
|
|
98
|
+
@misc{choi2026kovidorev2,
|
|
99
|
+
author = {Yongbin Choi},
|
|
100
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
101
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
102
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
103
|
+
year = {2026},
|
|
104
|
+
}
|
|
105
|
+
""",
|
|
106
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class KoVidore2HrRetrieval(AbsTaskRetrieval):
|
|
111
|
+
metadata = TaskMetadata(
|
|
112
|
+
name="KoVidore2HrRetrieval",
|
|
113
|
+
description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports on workforce outlook and employment policy in korea, intended for complex-document understanding tasks.",
|
|
114
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
115
|
+
dataset={
|
|
116
|
+
"path": "whybe-choi/kovidore-v2-hr-mteb",
|
|
117
|
+
"revision": "d9432c782a9a3e2eed064f6fac08b4c967d92b99",
|
|
118
|
+
},
|
|
119
|
+
type="DocumentUnderstanding",
|
|
120
|
+
category="t2i",
|
|
121
|
+
eval_splits=["test"],
|
|
122
|
+
eval_langs=["kor-Hang"],
|
|
123
|
+
main_score="ndcg_at_10",
|
|
124
|
+
date=("2025-12-21", "2026-01-06"),
|
|
125
|
+
domains=["Social"],
|
|
126
|
+
task_subtypes=["Image Text Retrieval"],
|
|
127
|
+
license="cc-by-4.0",
|
|
128
|
+
annotations_creators="derived",
|
|
129
|
+
dialect=[],
|
|
130
|
+
modalities=["text", "image"],
|
|
131
|
+
sample_creation="created",
|
|
132
|
+
bibtex_citation="""
|
|
133
|
+
@misc{choi2026kovidorev2,
|
|
134
|
+
author = {Yongbin Choi},
|
|
135
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
136
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
137
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
138
|
+
year = {2026},
|
|
139
|
+
}
|
|
140
|
+
""",
|
|
141
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
142
|
+
)
|
mteb/types/__init__.py
CHANGED
|
@@ -4,6 +4,7 @@ from ._encoder_io import (
|
|
|
4
4
|
Conversation,
|
|
5
5
|
ConversationTurn,
|
|
6
6
|
CorpusDatasetType,
|
|
7
|
+
EncodeKwargs,
|
|
7
8
|
InstructionDatasetType,
|
|
8
9
|
PromptType,
|
|
9
10
|
QueryDatasetType,
|
|
@@ -30,6 +31,7 @@ __all__ = [
|
|
|
30
31
|
"Conversation",
|
|
31
32
|
"ConversationTurn",
|
|
32
33
|
"CorpusDatasetType",
|
|
34
|
+
"EncodeKwargs",
|
|
33
35
|
"HFSubset",
|
|
34
36
|
"ISOLanguage",
|
|
35
37
|
"ISOLanguageScript",
|
mteb/types/_encoder_io.py
CHANGED
|
@@ -13,6 +13,18 @@ if TYPE_CHECKING:
|
|
|
13
13
|
from PIL import Image
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
class EncodeKwargs(TypedDict):
|
|
17
|
+
"""Keyword arguments for encoding methods.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
batch_size: The batch size to use for encoding.
|
|
21
|
+
show_progress_bar: Whether to show a progress bar during encoding.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
batch_size: NotRequired[int]
|
|
25
|
+
show_progress_bar: NotRequired[bool]
|
|
26
|
+
|
|
27
|
+
|
|
16
28
|
# --- Output types ---
|
|
17
29
|
Array = np.ndarray | torch.Tensor
|
|
18
30
|
"""General array type, can be a numpy array or a torch tensor."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.6.
|
|
3
|
+
Version: 2.6.8
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|