mteb 2.1.0__py3-none-any.whl → 2.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +2 -0
- mteb/_evaluators/retrieval_metrics.py +0 -9
- mteb/abstasks/_stratification.py +1 -1
- mteb/abstasks/abstask.py +6 -1
- mteb/abstasks/dataset_card_template.md +1 -1
- mteb/abstasks/retrieval.py +2 -1
- mteb/abstasks/retrieval_dataset_loaders.py +1 -1
- mteb/abstasks/task_metadata.py +1 -1
- mteb/benchmarks/benchmarks/benchmarks.py +9 -13
- mteb/benchmarks/get_benchmark.py +1 -1
- mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
- mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
- mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
- mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
- mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
- mteb/languages/check_language_code.py +11 -3
- mteb/languages/language_scripts.py +4 -0
- mteb/leaderboard/app.py +1 -1
- mteb/leaderboard/benchmark_selector.py +1 -0
- mteb/leaderboard/text_segments.py +1 -1
- mteb/models/model_implementations/b1ade_models.py +1 -1
- mteb/models/model_implementations/bge_models.py +1 -3
- mteb/models/model_implementations/bmretriever_models.py +1 -1
- mteb/models/model_implementations/gme_v_models.py +2 -2
- mteb/models/model_implementations/ibm_granite_models.py +1 -1
- mteb/models/model_implementations/inf_models.py +3 -3
- mteb/models/model_implementations/jina_models.py +12 -2
- mteb/models/model_implementations/llm2vec_models.py +1 -1
- mteb/models/model_implementations/misc_models.py +2 -2
- mteb/models/model_implementations/mxbai_models.py +1 -1
- mteb/models/model_implementations/reasonir_model.py +1 -1
- mteb/models/model_implementations/salesforce_models.py +1 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
- mteb/models/model_implementations/voyage_v.py +9 -9
- mteb/results/task_result.py +6 -8
- mteb/tasks/classification/dan/angry_tweets_classification.py +2 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +3 -3
- mteb/tasks/classification/mya/myanmar_news.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
- mteb/tasks/retrieval/code/code_rag.py +8 -8
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +18 -4
- mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
- mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
- mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
- mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
- mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/rus/__init__.py +11 -2
- mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
- mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
- {mteb-2.1.0.dist-info → mteb-2.1.2.dist-info}/METADATA +5 -5
- {mteb-2.1.0.dist-info → mteb-2.1.2.dist-info}/RECORD +86 -91
- mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
- mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
- mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
- mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
- {mteb-2.1.0.dist-info → mteb-2.1.2.dist-info}/WHEEL +0 -0
- {mteb-2.1.0.dist-info → mteb-2.1.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.0.dist-info → mteb-2.1.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.0.dist-info → mteb-2.1.2.dist-info}/top_level.txt +0 -0
|
@@ -20,9 +20,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
|
|
|
20
20
|
corpus[split] = split_dataset.map(
|
|
21
21
|
lambda x, idx: {
|
|
22
22
|
"id": f"corpus-{split}-{idx}",
|
|
23
|
-
"text": None,
|
|
24
23
|
"modality": "image",
|
|
25
|
-
# "image": None,
|
|
26
24
|
},
|
|
27
25
|
with_indices=True,
|
|
28
26
|
remove_columns=[
|
|
@@ -37,9 +35,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
|
|
|
37
35
|
queries[split] = split_dataset.map(
|
|
38
36
|
lambda x, idx: {
|
|
39
37
|
"id": f"query-{split}-{idx}",
|
|
40
|
-
# "text": None,
|
|
41
38
|
"modality": "text",
|
|
42
|
-
"image": None,
|
|
43
39
|
},
|
|
44
40
|
with_indices=True,
|
|
45
41
|
remove_columns=[
|
|
@@ -24,7 +24,6 @@ def _load_data(
|
|
|
24
24
|
lambda x: {
|
|
25
25
|
"id": f"query-{split}-{x['query-id']}",
|
|
26
26
|
"text": x["query"],
|
|
27
|
-
"image": None,
|
|
28
27
|
"modality": "text",
|
|
29
28
|
},
|
|
30
29
|
remove_columns=["query-id", "query"],
|
|
@@ -40,7 +39,6 @@ def _load_data(
|
|
|
40
39
|
corpus_ds = corpus_ds.map(
|
|
41
40
|
lambda x: {
|
|
42
41
|
"id": f"corpus-{split}-{x['corpus-id']}",
|
|
43
|
-
"text": None,
|
|
44
42
|
"modality": "image",
|
|
45
43
|
},
|
|
46
44
|
remove_columns=["corpus-id"],
|
|
@@ -7,7 +7,7 @@ class JaGovFaqsRetrieval(AbsTaskRetrieval):
|
|
|
7
7
|
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="JaGovFaqsRetrieval",
|
|
10
|
-
description="JaGovFaqs is a dataset consisting of FAQs
|
|
10
|
+
description="JaGovFaqs is a dataset consisting of FAQs manually extracted from the website of Japanese bureaus. The dataset consists of 22k FAQs, where the queries (questions) and corpus (answers) have been shuffled, and the goal is to match the answer with the question.",
|
|
11
11
|
reference="https://github.com/sbintuitions/JMTEB",
|
|
12
12
|
dataset={
|
|
13
13
|
"path": "mteb/JaGovFaqsRetrieval",
|
|
@@ -132,7 +132,7 @@ _LANGUAGES = [
|
|
|
132
132
|
|
|
133
133
|
|
|
134
134
|
def get_lang_pairs() -> dict[str, list[str]]:
|
|
135
|
-
# add pairs with same
|
|
135
|
+
# add pairs with same language as the source and target
|
|
136
136
|
# add pairs with english as source or target
|
|
137
137
|
lang_pairs = {}
|
|
138
138
|
for x in _LANGUAGES:
|
|
@@ -72,7 +72,6 @@ def _load_single_language(
|
|
|
72
72
|
lambda x: {
|
|
73
73
|
"id": f"query-{split}-{x['query-id']}",
|
|
74
74
|
"text": x["query"],
|
|
75
|
-
"image": None,
|
|
76
75
|
"modality": "text",
|
|
77
76
|
},
|
|
78
77
|
remove_columns=["query-id", "query"],
|
|
@@ -87,7 +86,6 @@ def _load_single_language(
|
|
|
87
86
|
corpus_ds = corpus_ds.map(
|
|
88
87
|
lambda x: {
|
|
89
88
|
"id": f"corpus-{split}-{x['corpus-id']}",
|
|
90
|
-
"text": None,
|
|
91
89
|
"modality": "image",
|
|
92
90
|
},
|
|
93
91
|
remove_columns=["corpus-id"],
|
|
@@ -92,7 +92,7 @@ class MIRACLRetrievalHardNegativesV2(AbsTaskRetrieval):
|
|
|
92
92
|
"MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is a multilingual retrieval "
|
|
93
93
|
"dataset that focuses on search across 18 different languages. The hard negative version has been "
|
|
94
94
|
"created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
|
|
95
|
-
"V2 uses a more appropriate prompt rather than the default prompt for retrieval."
|
|
95
|
+
"V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
|
|
96
96
|
),
|
|
97
97
|
dataset={
|
|
98
98
|
"path": "mteb/MIRACLRetrievalHardNegatives",
|
|
@@ -30,7 +30,7 @@ _LANGUAGES = {
|
|
|
30
30
|
def _load_miracl_data(
|
|
31
31
|
path: str,
|
|
32
32
|
langs: list,
|
|
33
|
-
splits: str,
|
|
33
|
+
splits: list[str],
|
|
34
34
|
revision: str | None = None,
|
|
35
35
|
):
|
|
36
36
|
corpus = {lang: dict.fromkeys(splits) for lang in langs}
|
|
@@ -65,9 +65,7 @@ def _load_miracl_data(
|
|
|
65
65
|
images_data = images_data.map(
|
|
66
66
|
lambda x: {
|
|
67
67
|
"id": imgid2docid[str(x["file_name"])],
|
|
68
|
-
# "modality": "text",
|
|
69
68
|
"modality": "image",
|
|
70
|
-
"text": None,
|
|
71
69
|
},
|
|
72
70
|
remove_columns=["file_name"],
|
|
73
71
|
)
|
|
@@ -86,7 +84,6 @@ def _load_miracl_data(
|
|
|
86
84
|
"id": str(x["_id"]),
|
|
87
85
|
"text": x["text"],
|
|
88
86
|
"modality": "text",
|
|
89
|
-
"image": None,
|
|
90
87
|
},
|
|
91
88
|
remove_columns=["_id"],
|
|
92
89
|
)
|
|
@@ -108,10 +105,6 @@ def _load_miracl_data(
|
|
|
108
105
|
relevant_docs[lang][split][query_id] = {}
|
|
109
106
|
relevant_docs[lang][split][query_id][doc_id] = score
|
|
110
107
|
|
|
111
|
-
corpus = datasets.DatasetDict(corpus)
|
|
112
|
-
queries = datasets.DatasetDict(queries)
|
|
113
|
-
relevant_docs = datasets.DatasetDict(relevant_docs)
|
|
114
|
-
|
|
115
108
|
return corpus, queries, relevant_docs
|
|
116
109
|
|
|
117
110
|
|
|
@@ -156,7 +149,7 @@ class MIRACLVisionRetrieval(AbsTaskRetrieval):
|
|
|
156
149
|
|
|
157
150
|
self.corpus, self.queries, self.relevant_docs = _load_miracl_data(
|
|
158
151
|
path=self.metadata.dataset["path"],
|
|
159
|
-
splits=self.metadata.eval_splits
|
|
152
|
+
splits=self.metadata.eval_splits,
|
|
160
153
|
langs=self.hf_subsets,
|
|
161
154
|
revision=self.metadata.dataset["revision"],
|
|
162
155
|
)
|
|
@@ -37,7 +37,6 @@ def _load_data(
|
|
|
37
37
|
lambda x: {
|
|
38
38
|
"id": f"query-{split}-{x['query-id']}",
|
|
39
39
|
"text": x["query"],
|
|
40
|
-
"image": None,
|
|
41
40
|
"modality": "text",
|
|
42
41
|
},
|
|
43
42
|
remove_columns=["query-id", "query"],
|
|
@@ -52,7 +51,6 @@ def _load_data(
|
|
|
52
51
|
corpus_ds = corpus_ds.map(
|
|
53
52
|
lambda x: {
|
|
54
53
|
"id": f"corpus-{split}-{x['corpus-id']}",
|
|
55
|
-
"text": None,
|
|
56
54
|
"modality": "image",
|
|
57
55
|
},
|
|
58
56
|
remove_columns=["corpus-id"],
|
|
@@ -34,7 +34,6 @@ def _load_wit_data(path: str, langs: list, splits: str, revision: str | None = N
|
|
|
34
34
|
lang_corpus = lang_data.map(
|
|
35
35
|
lambda x: {
|
|
36
36
|
"id": "corpus-" + x["image_id"],
|
|
37
|
-
"text": None,
|
|
38
37
|
"modality": "image",
|
|
39
38
|
"image": x["image"],
|
|
40
39
|
},
|
|
@@ -60,7 +59,6 @@ def _load_wit_data(path: str, langs: list, splits: str, revision: str | None = N
|
|
|
60
59
|
"id": query_id,
|
|
61
60
|
"text": caption,
|
|
62
61
|
"modality": "text",
|
|
63
|
-
"image": None,
|
|
64
62
|
}
|
|
65
63
|
)
|
|
66
64
|
if query_id not in relevant_docs[lang][split]:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from datasets import DatasetDict, load_dataset
|
|
1
|
+
from datasets import DatasetDict, Image, load_dataset
|
|
2
2
|
|
|
3
3
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
4
4
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
@@ -16,7 +16,7 @@ _LANGUAGES = {
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def _load_xflickrco_data(
|
|
19
|
-
path: str, langs: list, splits: str, revision: str | None = None
|
|
19
|
+
path: str, langs: list, splits: list[str], revision: str | None = None
|
|
20
20
|
):
|
|
21
21
|
corpus = {lang: dict.fromkeys(splits) for lang in langs}
|
|
22
22
|
queries = {lang: dict.fromkeys(splits) for lang in langs}
|
|
@@ -32,22 +32,23 @@ def _load_xflickrco_data(
|
|
|
32
32
|
lang_corpus = lang_data.map(
|
|
33
33
|
lambda x: {
|
|
34
34
|
"id": "corpus-" + x["id"],
|
|
35
|
-
"text": None,
|
|
36
35
|
"modality": "image",
|
|
37
|
-
"image": x["image"]
|
|
36
|
+
"image": x["image"],
|
|
38
37
|
},
|
|
39
38
|
remove_columns=["sentences"],
|
|
40
39
|
)
|
|
40
|
+
lang_corpus = lang_corpus.cast_column("image", Image())
|
|
41
41
|
|
|
42
42
|
lang_queries = lang_data.map(
|
|
43
43
|
lambda x: {
|
|
44
44
|
"id": "query-" + x["id"],
|
|
45
45
|
"text": x["sentences"],
|
|
46
46
|
"modality": "text",
|
|
47
|
-
"image": None,
|
|
48
47
|
},
|
|
49
48
|
remove_columns=["sentences"],
|
|
50
49
|
)
|
|
50
|
+
# None values
|
|
51
|
+
lang_queries = lang_queries.remove_columns(["image"])
|
|
51
52
|
|
|
52
53
|
relevant_docs[lang][split] = {}
|
|
53
54
|
for row in lang_data:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from datasets import Dataset, DatasetDict, load_dataset
|
|
1
|
+
from datasets import Dataset, DatasetDict, Image, load_dataset
|
|
2
2
|
|
|
3
3
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
4
4
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
@@ -61,9 +61,8 @@ def _load_xm3600_data(
|
|
|
61
61
|
lang_corpus = lang_data.map(
|
|
62
62
|
lambda x: {
|
|
63
63
|
"id": "corpus-" + x["image_id"],
|
|
64
|
-
"text": None,
|
|
65
64
|
"modality": "image",
|
|
66
|
-
"image": x["image"]
|
|
65
|
+
"image": x["image"],
|
|
67
66
|
},
|
|
68
67
|
remove_columns=[
|
|
69
68
|
"captions",
|
|
@@ -73,6 +72,7 @@ def _load_xm3600_data(
|
|
|
73
72
|
"image_id",
|
|
74
73
|
],
|
|
75
74
|
)
|
|
75
|
+
lang_corpus = lang_corpus.cast_column("image", Image())
|
|
76
76
|
|
|
77
77
|
corpus[lang][split] = lang_corpus
|
|
78
78
|
|
|
@@ -90,7 +90,6 @@ def _load_xm3600_data(
|
|
|
90
90
|
"id": query_id,
|
|
91
91
|
"text": caption,
|
|
92
92
|
"modality": "text",
|
|
93
|
-
"image": None,
|
|
94
93
|
}
|
|
95
94
|
)
|
|
96
95
|
if query_id not in relevant_docs[lang][split]:
|
|
@@ -59,9 +59,9 @@ Fishel, Mark},
|
|
|
59
59
|
self.data_loaded = True
|
|
60
60
|
|
|
61
61
|
def dataset_transform(self) -> None:
|
|
62
|
-
"""And transform to a retrieval
|
|
62
|
+
"""And transform to a retrieval dataset, which have the following attributes
|
|
63
63
|
|
|
64
|
-
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document
|
|
64
|
+
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
|
|
65
65
|
self.queries = dict[query_id, str] #id => query
|
|
66
66
|
self.relevant_docs = dict[query_id, dict[[doc_id, score]]
|
|
67
67
|
"""
|
|
@@ -46,9 +46,9 @@ class SNLRetrieval(AbsTaskRetrieval):
|
|
|
46
46
|
self.data_loaded = True
|
|
47
47
|
|
|
48
48
|
def dataset_transform(self) -> None:
|
|
49
|
-
"""And transform to a retrieval
|
|
49
|
+
"""And transform to a retrieval dataset, which have the following attributes
|
|
50
50
|
|
|
51
|
-
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document
|
|
51
|
+
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
|
|
52
52
|
self.queries = dict[query_id, str] #id => query
|
|
53
53
|
self.relevant_docs = dict[query_id, dict[[doc_id, score]]
|
|
54
54
|
"""
|
|
@@ -1,4 +1,13 @@
|
|
|
1
|
-
from .ria_news_retrieval import
|
|
1
|
+
from .ria_news_retrieval import (
|
|
2
|
+
RiaNewsRetrieval,
|
|
3
|
+
RiaNewsRetrievalHardNegatives,
|
|
4
|
+
RiaNewsRetrievalHardNegativesV2,
|
|
5
|
+
)
|
|
2
6
|
from .ru_bq_retrieval import RuBQRetrieval
|
|
3
7
|
|
|
4
|
-
__all__ = [
|
|
8
|
+
__all__ = [
|
|
9
|
+
"RiaNewsRetrieval",
|
|
10
|
+
"RiaNewsRetrievalHardNegatives",
|
|
11
|
+
"RiaNewsRetrievalHardNegativesV2",
|
|
12
|
+
"RuBQRetrieval",
|
|
13
|
+
]
|
|
@@ -1,6 +1,31 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
+
_ria_news_metadata = dict(
|
|
5
|
+
reference="https://arxiv.org/abs/1901.07786",
|
|
6
|
+
type="Retrieval",
|
|
7
|
+
category="t2t",
|
|
8
|
+
modalities=["text"],
|
|
9
|
+
eval_splits=["test"],
|
|
10
|
+
eval_langs=["rus-Cyrl"],
|
|
11
|
+
main_score="ndcg_at_10",
|
|
12
|
+
date=("2010-01-01", "2014-12-31"),
|
|
13
|
+
domains=["News", "Written"],
|
|
14
|
+
task_subtypes=["Article retrieval"],
|
|
15
|
+
license="cc-by-nc-nd-4.0",
|
|
16
|
+
annotations_creators="derived",
|
|
17
|
+
dialect=[],
|
|
18
|
+
sample_creation="found",
|
|
19
|
+
bibtex_citation=r"""
|
|
20
|
+
@inproceedings{gavrilov2018self,
|
|
21
|
+
author = {Gavrilov, Daniil and Kalaidin, Pavel and Malykh, Valentin},
|
|
22
|
+
booktitle = {Proceedings of the 41st European Conference on Information Retrieval},
|
|
23
|
+
title = {Self-Attentive Model for Headline Generation},
|
|
24
|
+
year = {2019},
|
|
25
|
+
}
|
|
26
|
+
""",
|
|
27
|
+
)
|
|
28
|
+
|
|
4
29
|
|
|
5
30
|
class RiaNewsRetrieval(AbsTaskRetrieval):
|
|
6
31
|
ignore_identical_ids = True
|
|
@@ -12,29 +37,8 @@ class RiaNewsRetrieval(AbsTaskRetrieval):
|
|
|
12
37
|
"revision": "82374b0bbacda6114f39ff9c5b925fa1512ca5d7",
|
|
13
38
|
},
|
|
14
39
|
description="News article retrieval by headline. Based on Rossiya Segodnya dataset.",
|
|
15
|
-
reference="https://arxiv.org/abs/1901.07786",
|
|
16
|
-
type="Retrieval",
|
|
17
|
-
category="t2t",
|
|
18
|
-
modalities=["text"],
|
|
19
|
-
eval_splits=["test"],
|
|
20
|
-
eval_langs=["rus-Cyrl"],
|
|
21
|
-
main_score="ndcg_at_10",
|
|
22
|
-
date=("2010-01-01", "2014-12-31"),
|
|
23
|
-
domains=["News", "Written"],
|
|
24
|
-
task_subtypes=["Article retrieval"],
|
|
25
|
-
license="cc-by-nc-nd-4.0",
|
|
26
|
-
annotations_creators="derived",
|
|
27
|
-
dialect=[],
|
|
28
|
-
sample_creation="found",
|
|
29
|
-
bibtex_citation=r"""
|
|
30
|
-
@inproceedings{gavrilov2018self,
|
|
31
|
-
author = {Gavrilov, Daniil and Kalaidin, Pavel and Malykh, Valentin},
|
|
32
|
-
booktitle = {Proceedings of the 41st European Conference on Information Retrieval},
|
|
33
|
-
title = {Self-Attentive Model for Headline Generation},
|
|
34
|
-
year = {2019},
|
|
35
|
-
}
|
|
36
|
-
""",
|
|
37
40
|
prompt={"query": "Given a news title, retrieve relevant news article"},
|
|
41
|
+
**_ria_news_metadata,
|
|
38
42
|
)
|
|
39
43
|
|
|
40
44
|
|
|
@@ -48,27 +52,27 @@ class RiaNewsRetrievalHardNegatives(AbsTaskRetrieval):
|
|
|
48
52
|
"revision": "d42860a6c15f0a2c4485bda10c6e5b641fdfe479",
|
|
49
53
|
},
|
|
50
54
|
description="News article retrieval by headline. Based on Rossiya Segodnya dataset. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct.",
|
|
51
|
-
reference="https://arxiv.org/abs/1901.07786",
|
|
52
|
-
type="Retrieval",
|
|
53
|
-
category="t2t",
|
|
54
|
-
modalities=["text"],
|
|
55
|
-
eval_splits=["test"],
|
|
56
|
-
eval_langs=["rus-Cyrl"],
|
|
57
|
-
main_score="ndcg_at_10",
|
|
58
|
-
date=("2010-01-01", "2014-12-31"),
|
|
59
|
-
domains=["News", "Written"],
|
|
60
|
-
task_subtypes=["Article retrieval"],
|
|
61
|
-
license="cc-by-nc-nd-4.0",
|
|
62
|
-
annotations_creators="derived",
|
|
63
|
-
dialect=[],
|
|
64
|
-
sample_creation="found",
|
|
65
|
-
bibtex_citation=r"""
|
|
66
|
-
@inproceedings{gavrilov2018self,
|
|
67
|
-
author = {Gavrilov, Daniil and Kalaidin, Pavel and Malykh, Valentin},
|
|
68
|
-
booktitle = {Proceedings of the 41st European Conference on Information Retrieval},
|
|
69
|
-
title = {Self-Attentive Model for Headline Generation},
|
|
70
|
-
year = {2019},
|
|
71
|
-
}
|
|
72
|
-
""",
|
|
73
55
|
adapted_from=["RiaNewsRetrieval"],
|
|
56
|
+
superseded_by="RiaNewsRetrievalHardNegatives.v2",
|
|
57
|
+
**_ria_news_metadata,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class RiaNewsRetrievalHardNegativesV2(AbsTaskRetrieval):
|
|
62
|
+
ignore_identical_ids = True
|
|
63
|
+
|
|
64
|
+
metadata = TaskMetadata(
|
|
65
|
+
name="RiaNewsRetrievalHardNegatives.v2",
|
|
66
|
+
dataset={
|
|
67
|
+
"path": "mteb/RiaNewsRetrieval_test_top_250_only_w_correct-v2",
|
|
68
|
+
"revision": "d42860a6c15f0a2c4485bda10c6e5b641fdfe479",
|
|
69
|
+
},
|
|
70
|
+
description=(
|
|
71
|
+
"News article retrieval by headline. Based on Rossiya Segodnya dataset. "
|
|
72
|
+
"The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
|
|
73
|
+
"V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
|
|
74
|
+
),
|
|
75
|
+
adapted_from=["RiaNewsRetrieval"],
|
|
76
|
+
prompt={"query": "Given a news title, retrieve relevant news article"},
|
|
77
|
+
**_ria_news_metadata,
|
|
74
78
|
)
|
|
@@ -42,9 +42,9 @@ class TurHistQuadRetrieval(AbsTaskRetrieval):
|
|
|
42
42
|
)
|
|
43
43
|
|
|
44
44
|
def load_data(self, **kwargs) -> None:
|
|
45
|
-
"""And transform to a retrieval
|
|
45
|
+
"""And transform to a retrieval dataset, which have the following attributes
|
|
46
46
|
|
|
47
|
-
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document
|
|
47
|
+
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
|
|
48
48
|
self.queries = dict[query_id, str] #id => query
|
|
49
49
|
self.relevant_docs = dict[query_id, dict[[doc_id, score]]
|
|
50
50
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.2
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -204,10 +204,10 @@ For more on how to use the CLI check out the [related documentation](https://emb
|
|
|
204
204
|
[Tasks]: https://embeddings-benchmark.github.io/mteb/overview/available_tasks/any2anymultilingualretrieval/
|
|
205
205
|
[Benchmarks]: https://embeddings-benchmark.github.io/mteb/overview/available_benchmarks/
|
|
206
206
|
[Models]: https://embeddings-benchmark.github.io/mteb/overview/available_models/text/
|
|
207
|
-
[Contributing]:
|
|
208
|
-
[Adding a model]:
|
|
209
|
-
[Adding a dataset]:
|
|
210
|
-
[Adding a benchmark]:
|
|
207
|
+
[Contributing]: https://embeddings-benchmark.github.io/mteb/CONTRIBUTING/
|
|
208
|
+
[Adding a model]: https://embeddings-benchmark.github.io/mteb/contributing/adding_a_model/
|
|
209
|
+
[Adding a dataset]: https://embeddings-benchmark.github.io/mteb/contributing/adding_a_dataset/
|
|
210
|
+
[Adding a benchmark]: https://embeddings-benchmark.github.io/mteb/contributing/adding_a_benchmark/
|
|
211
211
|
[Leaderboard]: https://huggingface.co/spaces/mteb/leaderboard
|
|
212
212
|
|
|
213
213
|
## Citing
|