mteb 1.39.5__py3-none-any.whl → 1.39.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/benchmarks/_create_table.py +129 -0
- mteb/benchmarks/benchmark.py +9 -0
- mteb/benchmarks/benchmarks/benchmarks.py +5 -5
- mteb/evaluation/evaluators/RegressionEvaluator.py +0 -6
- mteb/leaderboard/table.py +1 -0
- mteb/tasks/Classification/__init__.py +1 -1
- mteb/tasks/Classification/{svk → slk}/SlovakMovieReviewSentimentClassification.py +2 -2
- mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py +77 -60
- {mteb-1.39.5.dist-info → mteb-1.39.7.dist-info}/METADATA +1 -1
- {mteb-1.39.5.dist-info → mteb-1.39.7.dist-info}/RECORD +14 -15
- mteb/tasks/Classification/svk/__init__.py +0 -0
- {mteb-1.39.5.dist-info → mteb-1.39.7.dist-info}/WHEEL +0 -0
- {mteb-1.39.5.dist-info → mteb-1.39.7.dist-info}/entry_points.txt +0 -0
- {mteb-1.39.5.dist-info → mteb-1.39.7.dist-info}/licenses/LICENSE +0 -0
- {mteb-1.39.5.dist-info → mteb-1.39.7.dist-info}/top_level.txt +0 -0
mteb/benchmarks/_create_table.py
CHANGED
|
@@ -500,3 +500,132 @@ def _create_summary_table_mean_subset(
|
|
|
500
500
|
joint_table.insert(0, "Rank (Borda)", joint_table.pop("borda_rank"))
|
|
501
501
|
|
|
502
502
|
return joint_table
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def _create_summary_table_mean_task_type(
|
|
506
|
+
benchmark_results: BenchmarkResults,
|
|
507
|
+
) -> pd.DataFrame:
|
|
508
|
+
"""Create summary table from BenchmarkResults.
|
|
509
|
+
|
|
510
|
+
Returns a DataFrame with one row per model containing summary statistics
|
|
511
|
+
and task type averages.
|
|
512
|
+
|
|
513
|
+
Args:
|
|
514
|
+
benchmark_results: BenchmarkResults object containing model results
|
|
515
|
+
|
|
516
|
+
Returns:
|
|
517
|
+
DataFrame with model summaries, ready for styling in the leaderboard
|
|
518
|
+
"""
|
|
519
|
+
data = benchmark_results.to_dataframe(format="long")
|
|
520
|
+
|
|
521
|
+
if data.empty:
|
|
522
|
+
no_results_frame = pd.DataFrame(
|
|
523
|
+
{"No results": ["You can try relaxing your criteria"]}
|
|
524
|
+
)
|
|
525
|
+
return no_results_frame
|
|
526
|
+
|
|
527
|
+
# Convert to DataFrame and pivot
|
|
528
|
+
per_task = data.pivot(index="model_name", columns="task_name", values="score")
|
|
529
|
+
|
|
530
|
+
# Remove models with no scores
|
|
531
|
+
to_remove = per_task.isna().all(axis="columns")
|
|
532
|
+
if to_remove.all():
|
|
533
|
+
no_results_frame = pd.DataFrame(
|
|
534
|
+
{"No results": ["You can try relaxing your criteria"]}
|
|
535
|
+
)
|
|
536
|
+
return no_results_frame
|
|
537
|
+
|
|
538
|
+
models_to_remove = list(per_task[to_remove].index)
|
|
539
|
+
per_task = per_task.drop(models_to_remove, axis=0)
|
|
540
|
+
|
|
541
|
+
# Calculate means by task type
|
|
542
|
+
mean_per_type = _get_means_per_types(per_task)
|
|
543
|
+
mean_per_type = mean_per_type.pivot(
|
|
544
|
+
index="model_name", columns="task_type", values="score"
|
|
545
|
+
)
|
|
546
|
+
mean_per_type.columns = [
|
|
547
|
+
_split_on_capital(column) for column in mean_per_type.columns
|
|
548
|
+
]
|
|
549
|
+
|
|
550
|
+
# Calculate overall means
|
|
551
|
+
typed_mean = mean_per_type.mean(skipna=False, axis=1)
|
|
552
|
+
|
|
553
|
+
# Build joint table
|
|
554
|
+
joint_table = mean_per_type.copy()
|
|
555
|
+
joint_table = joint_table.drop(models_to_remove, axis=0)
|
|
556
|
+
joint_table.insert(0, "mean_by_task_type", typed_mean)
|
|
557
|
+
joint_table = joint_table.sort_values("mean_by_task_type", ascending=False)
|
|
558
|
+
joint_table["borda_rank"] = _get_borda_rank(per_task)
|
|
559
|
+
joint_table["rank"] = [i + 1 for i in range(len(joint_table))]
|
|
560
|
+
joint_table = joint_table.reset_index()
|
|
561
|
+
|
|
562
|
+
# Add model metadata
|
|
563
|
+
model_metas = joint_table["model_name"].map(_failsafe_get_model_meta)
|
|
564
|
+
joint_table = joint_table[model_metas.notna()]
|
|
565
|
+
joint_table["model_link"] = model_metas.map(lambda m: m.reference)
|
|
566
|
+
|
|
567
|
+
# Insert model metadata columns
|
|
568
|
+
joint_table.insert(
|
|
569
|
+
1,
|
|
570
|
+
"Max Tokens",
|
|
571
|
+
model_metas.map(lambda m: _format_max_tokens(m.max_tokens)),
|
|
572
|
+
)
|
|
573
|
+
joint_table.insert(
|
|
574
|
+
1,
|
|
575
|
+
"Embedding Dimensions",
|
|
576
|
+
model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
|
|
577
|
+
)
|
|
578
|
+
joint_table.insert(
|
|
579
|
+
1,
|
|
580
|
+
"Number of Parameters",
|
|
581
|
+
model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
|
|
582
|
+
)
|
|
583
|
+
joint_table.insert(
|
|
584
|
+
1,
|
|
585
|
+
"Memory Usage (MB)",
|
|
586
|
+
model_metas.map(
|
|
587
|
+
lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
|
|
588
|
+
),
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
# Add zero-shot percentage
|
|
592
|
+
tasks = get_tasks(tasks=list(data["task_name"].unique()))
|
|
593
|
+
joint_table.insert(
|
|
594
|
+
1, "Zero-shot", model_metas.map(lambda m: m.zero_shot_percentage(tasks))
|
|
595
|
+
)
|
|
596
|
+
joint_table["Zero-shot"] = joint_table["Zero-shot"].fillna(-1)
|
|
597
|
+
|
|
598
|
+
# Clean up model names (remove HF organization)
|
|
599
|
+
joint_table["model_name"] = joint_table["model_name"].map(
|
|
600
|
+
lambda name: name.split("/")[-1]
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
# Add markdown links to model names
|
|
604
|
+
name_w_link = (
|
|
605
|
+
"[" + joint_table["model_name"] + "](" + joint_table["model_link"] + ")"
|
|
606
|
+
)
|
|
607
|
+
joint_table["model_name"] = joint_table["model_name"].mask(
|
|
608
|
+
joint_table["model_link"].notna(), name_w_link
|
|
609
|
+
)
|
|
610
|
+
joint_table = joint_table.drop(columns=["model_link"])
|
|
611
|
+
|
|
612
|
+
# Rename columns
|
|
613
|
+
joint_table = joint_table.rename(
|
|
614
|
+
columns={
|
|
615
|
+
"model_name": "Model",
|
|
616
|
+
"mean_by_task_type": "Mean (TaskType)",
|
|
617
|
+
"borda_rank": "Rank (Borda)",
|
|
618
|
+
}
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
if "Any Any Multilingual Retrieval" in joint_table.columns:
|
|
622
|
+
joint_table = joint_table.rename(
|
|
623
|
+
columns={"Any Any Multilingual Retrieval": "Multilingual Retrieval"}
|
|
624
|
+
)
|
|
625
|
+
if "Any Any Retrieval" in joint_table.columns:
|
|
626
|
+
joint_table = joint_table.rename(columns={"Any Any Retrieval": "Retrieval"})
|
|
627
|
+
|
|
628
|
+
# Move borda rank to front
|
|
629
|
+
joint_table.insert(0, "Rank", joint_table.pop("rank"))
|
|
630
|
+
|
|
631
|
+
return joint_table
|
mteb/benchmarks/benchmark.py
CHANGED
|
@@ -12,6 +12,7 @@ from mteb.benchmarks._create_table import (
|
|
|
12
12
|
_create_summary_table_from_benchmark_results,
|
|
13
13
|
_create_summary_table_mean_public_private,
|
|
14
14
|
_create_summary_table_mean_subset,
|
|
15
|
+
_create_summary_table_mean_task_type,
|
|
15
16
|
)
|
|
16
17
|
from mteb.load_results.load_results import load_results
|
|
17
18
|
|
|
@@ -107,3 +108,11 @@ class HUMEBenchmark(Benchmark):
|
|
|
107
108
|
) -> pd.DataFrame:
|
|
108
109
|
"""Create summary table. Called by the leaderboard app."""
|
|
109
110
|
return _create_summary_table_mean_subset(benchmark_results)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class MIEBBenchmark(Benchmark):
|
|
114
|
+
def _create_summary_table(
|
|
115
|
+
self, benchmark_results: BenchmarkResults
|
|
116
|
+
) -> pd.DataFrame:
|
|
117
|
+
"""Create summary table. Called by the leaderboard app."""
|
|
118
|
+
return _create_summary_table_mean_task_type(benchmark_results)
|
|
@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Annotated
|
|
|
4
4
|
|
|
5
5
|
from pydantic import AnyUrl, BeforeValidator, TypeAdapter
|
|
6
6
|
|
|
7
|
-
from mteb.benchmarks.benchmark import Benchmark, HUMEBenchmark
|
|
7
|
+
from mteb.benchmarks.benchmark import Benchmark, HUMEBenchmark, MIEBBenchmark
|
|
8
8
|
from mteb.overview import MTEBTasks, get_task, get_tasks
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
@@ -1754,7 +1754,7 @@ MIEB_common_tasks = [
|
|
|
1754
1754
|
"WebQAT2TRetrieval",
|
|
1755
1755
|
]
|
|
1756
1756
|
|
|
1757
|
-
MIEB_ENG =
|
|
1757
|
+
MIEB_ENG = MIEBBenchmark(
|
|
1758
1758
|
name="MIEB(eng)",
|
|
1759
1759
|
display_name="Image-Text, English",
|
|
1760
1760
|
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-picture.svg",
|
|
@@ -1783,7 +1783,7 @@ MIEB_ENG = Benchmark(
|
|
|
1783
1783
|
""",
|
|
1784
1784
|
)
|
|
1785
1785
|
|
|
1786
|
-
MIEB_MULTILINGUAL =
|
|
1786
|
+
MIEB_MULTILINGUAL = MIEBBenchmark(
|
|
1787
1787
|
name="MIEB(Multilingual)",
|
|
1788
1788
|
display_name="Image-Text, Multilingual",
|
|
1789
1789
|
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-pictures.svg",
|
|
@@ -1818,7 +1818,7 @@ MIEB_MULTILINGUAL = Benchmark(
|
|
|
1818
1818
|
""",
|
|
1819
1819
|
)
|
|
1820
1820
|
|
|
1821
|
-
MIEB_LITE =
|
|
1821
|
+
MIEB_LITE = MIEBBenchmark(
|
|
1822
1822
|
name="MIEB(lite)",
|
|
1823
1823
|
display_name="Image-Text, Lite",
|
|
1824
1824
|
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-landscape.svg",
|
|
@@ -1902,7 +1902,7 @@ MIEB_LITE = Benchmark(
|
|
|
1902
1902
|
""",
|
|
1903
1903
|
)
|
|
1904
1904
|
|
|
1905
|
-
MIEB_IMG =
|
|
1905
|
+
MIEB_IMG = MIEBBenchmark(
|
|
1906
1906
|
name="MIEB(Img)",
|
|
1907
1907
|
display_name="Image only",
|
|
1908
1908
|
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-pictures.svg",
|
|
@@ -53,19 +53,13 @@ class LinearRegressionEvaluator(Evaluator):
|
|
|
53
53
|
scores = {}
|
|
54
54
|
X_train = model.encode(
|
|
55
55
|
self.sentences_train,
|
|
56
|
-
model=model,
|
|
57
56
|
task_name=self.task_name,
|
|
58
|
-
hf_split="train",
|
|
59
|
-
hf_subset=self.hf_subset,
|
|
60
57
|
**encode_kwargs,
|
|
61
58
|
)
|
|
62
59
|
if test_cache is None:
|
|
63
60
|
X_test = model.encode(
|
|
64
61
|
self.sentences_test,
|
|
65
|
-
model=model,
|
|
66
62
|
task_name=self.task_name,
|
|
67
|
-
hf_split=self.hf_split,
|
|
68
|
-
hf_subset=self.hf_subset,
|
|
69
63
|
**encode_kwargs,
|
|
70
64
|
)
|
|
71
65
|
test_cache = X_test
|
mteb/leaderboard/table.py
CHANGED
|
@@ -150,11 +150,11 @@ from .sin.SinhalaNewsClassification import *
|
|
|
150
150
|
from .sin.SinhalaNewsSourceClassification import *
|
|
151
151
|
from .slk.CSFDSKMovieReviewSentimentClassification import *
|
|
152
152
|
from .slk.SlovakHateSpeechClassification import *
|
|
153
|
+
from .slk.SlovakMovieReviewSentimentClassification import *
|
|
153
154
|
from .slv.FrenkSlClassification import *
|
|
154
155
|
from .spa.SpanishNewsClassification import *
|
|
155
156
|
from .spa.SpanishSentimentClassification import *
|
|
156
157
|
from .ssw.SiswatiNewsClassification import *
|
|
157
|
-
from .svk.SlovakMovieReviewSentimentClassification import *
|
|
158
158
|
from .swa.SwahiliNewsClassification import *
|
|
159
159
|
from .swe.DalajClassification import *
|
|
160
160
|
from .swe.SwedishSentimentClassification import *
|
|
@@ -18,7 +18,7 @@ class SlovakMovieReviewSentimentClassification(AbsTaskClassification):
|
|
|
18
18
|
category="s2s",
|
|
19
19
|
modalities=["text"],
|
|
20
20
|
eval_splits=["test"],
|
|
21
|
-
eval_langs=["
|
|
21
|
+
eval_langs=["slk-Latn"],
|
|
22
22
|
main_score="accuracy",
|
|
23
23
|
date=("2002-05-21", "2020-03-05"),
|
|
24
24
|
dialect=[],
|
|
@@ -59,7 +59,7 @@ class SlovakMovieReviewSentimentClassificationV2(AbsTaskClassification):
|
|
|
59
59
|
category="s2s",
|
|
60
60
|
modalities=["text"],
|
|
61
61
|
eval_splits=["test"],
|
|
62
|
-
eval_langs=["
|
|
62
|
+
eval_langs=["slk-Latn"],
|
|
63
63
|
main_score="accuracy",
|
|
64
64
|
date=("2002-05-21", "2020-03-05"),
|
|
65
65
|
dialect=[],
|
|
@@ -30,6 +30,39 @@ _LANGUAGES = {
|
|
|
30
30
|
"zh": ["zho-Hans"],
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
+
_common_metadata = dict(
|
|
34
|
+
reference="http://miracl.ai",
|
|
35
|
+
type="Retrieval",
|
|
36
|
+
category="s2p",
|
|
37
|
+
modalities=["text"],
|
|
38
|
+
eval_splits=[_EVAL_SPLIT],
|
|
39
|
+
eval_langs=_LANGUAGES,
|
|
40
|
+
main_score="ndcg_at_10",
|
|
41
|
+
date=("2022-06-01", "2023-01-30"),
|
|
42
|
+
domains=["Encyclopaedic", "Written"],
|
|
43
|
+
task_subtypes=[],
|
|
44
|
+
license="cc-by-sa-4.0",
|
|
45
|
+
annotations_creators="expert-annotated",
|
|
46
|
+
dialect=[],
|
|
47
|
+
sample_creation="created",
|
|
48
|
+
bibtex_citation=r"""
|
|
49
|
+
@article{10.1162/tacl_a_00595,
|
|
50
|
+
abstract = {{MIRACL is a multilingual dataset for ad hoc retrieval across 18 languages that collectively encompass over three billion native speakers around the world. This resource is designed to support monolingual retrieval tasks, where the queries and the corpora are in the same language. In total, we have gathered over 726k high-quality relevance judgments for 78k queries over Wikipedia in these languages, where all annotations have been performed by native speakers hired by our team. MIRACL covers languages that are both typologically close as well as distant from 10 language families and 13 sub-families, associated with varying amounts of publicly available resources. Extensive automatic heuristic verification and manual assessments were performed during the annotation process to control data quality. In total, MIRACL represents an investment of around five person-years of human annotator effort. Our goal is to spur research on improving retrieval across a continuum of languages, thus enhancing information access capabilities for diverse populations around the world, particularly those that have traditionally been underserved. MIRACL is available at http://miracl.ai/.}},
|
|
51
|
+
author = {Zhang, Xinyu and Thakur, Nandan and Ogundepo, Odunayo and Kamalloo, Ehsan and Alfonso-Hermelo, David and Li, Xiaoguang and Liu, Qun and Rezagholizadeh, Mehdi and Lin, Jimmy},
|
|
52
|
+
doi = {10.1162/tacl_a_00595},
|
|
53
|
+
eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\_a\_00595/2157340/tacl\_a\_00595.pdf},
|
|
54
|
+
issn = {2307-387X},
|
|
55
|
+
journal = {Transactions of the Association for Computational Linguistics},
|
|
56
|
+
month = {09},
|
|
57
|
+
pages = {1114-1131},
|
|
58
|
+
title = {{MIRACL: A Multilingual Retrieval Dataset Covering 18 Diverse Languages}},
|
|
59
|
+
url = {https://doi.org/10.1162/tacl\_a\_00595},
|
|
60
|
+
volume = {11},
|
|
61
|
+
year = {2023},
|
|
62
|
+
}
|
|
63
|
+
""",
|
|
64
|
+
)
|
|
65
|
+
|
|
33
66
|
|
|
34
67
|
def _load_miracl_data(
|
|
35
68
|
path: str,
|
|
@@ -106,44 +139,15 @@ class MIRACLRetrieval(MultilingualTask, AbsTaskRetrieval):
|
|
|
106
139
|
metadata = TaskMetadata(
|
|
107
140
|
name="MIRACLRetrieval",
|
|
108
141
|
description="MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is a multilingual retrieval dataset that focuses on search across 18 different languages.",
|
|
109
|
-
reference="http://miracl.ai",
|
|
110
142
|
dataset={
|
|
111
143
|
"path": "miracl/mmteb-miracl",
|
|
112
144
|
"revision": "main",
|
|
113
145
|
"trust_remote_code": True,
|
|
114
146
|
},
|
|
115
|
-
type="Retrieval",
|
|
116
|
-
category="s2p",
|
|
117
|
-
modalities=["text"],
|
|
118
|
-
eval_splits=[_EVAL_SPLIT],
|
|
119
|
-
eval_langs=_LANGUAGES,
|
|
120
|
-
main_score="ndcg_at_10",
|
|
121
|
-
date=("2022-06-01", "2023-01-30"),
|
|
122
|
-
domains=["Encyclopaedic", "Written"],
|
|
123
|
-
task_subtypes=[],
|
|
124
|
-
license="cc-by-sa-4.0",
|
|
125
|
-
annotations_creators="expert-annotated",
|
|
126
|
-
dialect=[],
|
|
127
|
-
sample_creation="created",
|
|
128
|
-
bibtex_citation=r"""
|
|
129
|
-
@article{10.1162/tacl_a_00595,
|
|
130
|
-
abstract = {{MIRACL is a multilingual dataset for ad hoc retrieval across 18 languages that collectively encompass over three billion native speakers around the world. This resource is designed to support monolingual retrieval tasks, where the queries and the corpora are in the same language. In total, we have gathered over 726k high-quality relevance judgments for 78k queries over Wikipedia in these languages, where all annotations have been performed by native speakers hired by our team. MIRACL covers languages that are both typologically close as well as distant from 10 language families and 13 sub-families, associated with varying amounts of publicly available resources. Extensive automatic heuristic verification and manual assessments were performed during the annotation process to control data quality. In total, MIRACL represents an investment of around five person-years of human annotator effort. Our goal is to spur research on improving retrieval across a continuum of languages, thus enhancing information access capabilities for diverse populations around the world, particularly those that have traditionally been underserved. MIRACL is available at http://miracl.ai/.}},
|
|
131
|
-
author = {Zhang, Xinyu and Thakur, Nandan and Ogundepo, Odunayo and Kamalloo, Ehsan and Alfonso-Hermelo, David and Li, Xiaoguang and Liu, Qun and Rezagholizadeh, Mehdi and Lin, Jimmy},
|
|
132
|
-
doi = {10.1162/tacl_a_00595},
|
|
133
|
-
eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\_a\_00595/2157340/tacl\_a\_00595.pdf},
|
|
134
|
-
issn = {2307-387X},
|
|
135
|
-
journal = {Transactions of the Association for Computational Linguistics},
|
|
136
|
-
month = {09},
|
|
137
|
-
pages = {1114-1131},
|
|
138
|
-
title = {{MIRACL: A Multilingual Retrieval Dataset Covering 18 Diverse Languages}},
|
|
139
|
-
url = {https://doi.org/10.1162/tacl\_a\_00595},
|
|
140
|
-
volume = {11},
|
|
141
|
-
year = {2023},
|
|
142
|
-
}
|
|
143
|
-
""",
|
|
144
147
|
prompt={
|
|
145
148
|
"query": "Given a question, retrieve Wikipedia passages that answer the question"
|
|
146
149
|
},
|
|
150
|
+
**_common_metadata,
|
|
147
151
|
)
|
|
148
152
|
|
|
149
153
|
def load_data(self, **kwargs):
|
|
@@ -300,45 +304,58 @@ def _load_miracl_data_hard_negatives(
|
|
|
300
304
|
|
|
301
305
|
|
|
302
306
|
class MIRACLRetrievalHardNegatives(MultilingualTask, AbsTaskRetrieval):
|
|
307
|
+
# in current version prompt for instruction models different from original task
|
|
308
|
+
superseded_by = "MIRACLRetrievalHardNegatives.v2"
|
|
309
|
+
|
|
303
310
|
metadata = TaskMetadata(
|
|
304
311
|
name="MIRACLRetrievalHardNegatives",
|
|
305
312
|
description="MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is a multilingual retrieval dataset that focuses on search across 18 different languages. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct.",
|
|
306
|
-
reference="http://miracl.ai",
|
|
307
313
|
dataset={
|
|
308
314
|
"path": "mteb/miracl-hard-negatives",
|
|
309
315
|
"revision": "95c8db7d4a6e9c1d8a60601afd63d553ae20a2eb",
|
|
310
316
|
"trust_remote_code": True,
|
|
311
317
|
},
|
|
312
|
-
type="Retrieval",
|
|
313
|
-
category="s2p",
|
|
314
|
-
modalities=["text"],
|
|
315
|
-
eval_splits=[_EVAL_SPLIT],
|
|
316
|
-
eval_langs=_LANGUAGES,
|
|
317
|
-
main_score="ndcg_at_10",
|
|
318
|
-
date=("2022-06-01", "2023-01-30"),
|
|
319
|
-
domains=["Encyclopaedic", "Written"],
|
|
320
|
-
task_subtypes=[],
|
|
321
|
-
license="cc-by-sa-4.0",
|
|
322
|
-
annotations_creators="expert-annotated",
|
|
323
|
-
dialect=[],
|
|
324
|
-
sample_creation="created",
|
|
325
|
-
bibtex_citation=r"""
|
|
326
|
-
@article{10.1162/tacl_a_00595,
|
|
327
|
-
abstract = {{MIRACL is a multilingual dataset for ad hoc retrieval across 18 languages that collectively encompass over three billion native speakers around the world. This resource is designed to support monolingual retrieval tasks, where the queries and the corpora are in the same language. In total, we have gathered over 726k high-quality relevance judgments for 78k queries over Wikipedia in these languages, where all annotations have been performed by native speakers hired by our team. MIRACL covers languages that are both typologically close as well as distant from 10 language families and 13 sub-families, associated with varying amounts of publicly available resources. Extensive automatic heuristic verification and manual assessments were performed during the annotation process to control data quality. In total, MIRACL represents an investment of around five person-years of human annotator effort. Our goal is to spur research on improving retrieval across a continuum of languages, thus enhancing information access capabilities for diverse populations around the world, particularly those that have traditionally been underserved. MIRACL is available at http://miracl.ai/.}},
|
|
328
|
-
author = {Zhang, Xinyu and Thakur, Nandan and Ogundepo, Odunayo and Kamalloo, Ehsan and Alfonso-Hermelo, David and Li, Xiaoguang and Liu, Qun and Rezagholizadeh, Mehdi and Lin, Jimmy},
|
|
329
|
-
doi = {10.1162/tacl_a_00595},
|
|
330
|
-
eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\_a\_00595/2157340/tacl\_a\_00595.pdf},
|
|
331
|
-
issn = {2307-387X},
|
|
332
|
-
journal = {Transactions of the Association for Computational Linguistics},
|
|
333
|
-
month = {09},
|
|
334
|
-
pages = {1114-1131},
|
|
335
|
-
title = {{MIRACL: A Multilingual Retrieval Dataset Covering 18 Diverse Languages}},
|
|
336
|
-
url = {https://doi.org/10.1162/tacl\_a\_00595},
|
|
337
|
-
volume = {11},
|
|
338
|
-
year = {2023},
|
|
339
|
-
}
|
|
340
|
-
""",
|
|
341
318
|
adapted_from=["MIRACLRetrieval"],
|
|
319
|
+
**_common_metadata,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
def load_data(self, **kwargs):
|
|
323
|
+
if self.data_loaded:
|
|
324
|
+
return
|
|
325
|
+
|
|
326
|
+
self.corpus, self.queries, self.relevant_docs = (
|
|
327
|
+
_load_miracl_data_hard_negatives(
|
|
328
|
+
path=self.metadata.dataset["path"],
|
|
329
|
+
langs=self.hf_subsets,
|
|
330
|
+
splits=self.metadata_dict["eval_splits"],
|
|
331
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
332
|
+
revision=self.metadata.dataset["revision"],
|
|
333
|
+
trust_remote_code=self.metadata.dataset.get("trust_remote_code", False),
|
|
334
|
+
)
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
self.data_loaded = True
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
class MIRACLRetrievalHardNegativesV2(MultilingualTask, AbsTaskRetrieval):
|
|
341
|
+
metadata = TaskMetadata(
|
|
342
|
+
name="MIRACLRetrievalHardNegatives.v2",
|
|
343
|
+
description=(
|
|
344
|
+
"MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is a multilingual retrieval "
|
|
345
|
+
"dataset that focuses on search across 18 different languages. The hard negative version has been "
|
|
346
|
+
"created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
|
|
347
|
+
"V2 uses a more appropriate prompt rather than the default prompt for retrieval."
|
|
348
|
+
),
|
|
349
|
+
dataset={
|
|
350
|
+
"path": "mteb/miracl-hard-negatives",
|
|
351
|
+
"revision": "95c8db7d4a6e9c1d8a60601afd63d553ae20a2eb",
|
|
352
|
+
"trust_remote_code": True,
|
|
353
|
+
},
|
|
354
|
+
prompt={
|
|
355
|
+
"query": "Given a question, retrieve Wikipedia passages that answer the question",
|
|
356
|
+
},
|
|
357
|
+
adapted_from=["MIRACLRetrieval"],
|
|
358
|
+
**_common_metadata,
|
|
342
359
|
)
|
|
343
360
|
|
|
344
361
|
def load_data(self, **kwargs):
|
|
@@ -48,11 +48,11 @@ mteb/abstasks/Image/AbsTaskVisualSTS.py,sha256=Gyke5MxDseNzZqz56it_tMI5jCNVdURrb
|
|
|
48
48
|
mteb/abstasks/Image/AbsTaskZeroShotClassification.py,sha256=BC_Ev8ldT3gd4om4tzkTjUTwr7GFw7vePosNLSCdgZU,5163
|
|
49
49
|
mteb/abstasks/Image/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
50
|
mteb/benchmarks/__init__.py,sha256=xfFZhvNbha5RS5dJL-j9cSYWdWckbZXHuM3ijmO7nCs,335
|
|
51
|
-
mteb/benchmarks/_create_table.py,sha256=
|
|
52
|
-
mteb/benchmarks/benchmark.py,sha256=
|
|
51
|
+
mteb/benchmarks/_create_table.py,sha256=9oc4szgHVNIAYTSAcR21DK3rot3Av-EXReDjEaOfSIQ,20758
|
|
52
|
+
mteb/benchmarks/benchmark.py,sha256=I_qOOSUYz3Mxa3sSv3LfbxUESsw9yevFYtvzEkzskEg,4177
|
|
53
53
|
mteb/benchmarks/get_benchmark.py,sha256=WxjPpjxcLtkAlxKnD9cQ2MtwpkxsIlvAtwwhE73EwsA,2744
|
|
54
54
|
mteb/benchmarks/benchmarks/__init__.py,sha256=AdowVnEur-DVwKHtk_FoV3rIlmX9_lXoSsx95ZqkkqE,2131
|
|
55
|
-
mteb/benchmarks/benchmarks/benchmarks.py,sha256=
|
|
55
|
+
mteb/benchmarks/benchmarks/benchmarks.py,sha256=fmjPlHTNgeJPvclT9K2F9CKfN43L-2Q1eJkTyRjLE2A,83899
|
|
56
56
|
mteb/benchmarks/benchmarks/rteb_benchmarks.py,sha256=M4oRsbYe8daKlZZF0yUpEgxt7I4STVUD_ouIdp2RsRE,10640
|
|
57
57
|
mteb/evaluation/LangMapping.py,sha256=_ipd6Cg_Za2xFS50t4DEAPxCVpOZPCs8c9lhp7Kv1q0,2831
|
|
58
58
|
mteb/evaluation/MTEB.py,sha256=eN2xqG_rTmG4NRVltn0-8uBU0pK3y1Q0NAVPX9axsL0,32002
|
|
@@ -63,7 +63,7 @@ mteb/evaluation/evaluators/ClusteringEvaluator.py,sha256=8AhwqPiX_XPn8WuFLlAMuy7
|
|
|
63
63
|
mteb/evaluation/evaluators/Evaluator.py,sha256=MXRcAAdg9SSKLIUf-rouDyCWz4zzUaGd_FXdB6aJYY0,896
|
|
64
64
|
mteb/evaluation/evaluators/InstructionRetrievalEvaluator.py,sha256=xfT0C1g1FdOSonl6H7BR_8XE05N9VMxryCwm3zTfNLI,1525
|
|
65
65
|
mteb/evaluation/evaluators/PairClassificationEvaluator.py,sha256=CZfYYVBwaqoc5nQ3A9JAIAgeNEtHgruhDG4fyVJlhBQ,9068
|
|
66
|
-
mteb/evaluation/evaluators/RegressionEvaluator.py,sha256=
|
|
66
|
+
mteb/evaluation/evaluators/RegressionEvaluator.py,sha256=ZZl3jAxOYFibNlD1bPX_uvJNntn4LTsHJiYtMxYMRFQ,2110
|
|
67
67
|
mteb/evaluation/evaluators/RerankingEvaluator.py,sha256=E_XojL1my7iqNjtP8dtytWaGoVQEdCptIuPv3JDx6nc,22364
|
|
68
68
|
mteb/evaluation/evaluators/RetrievalEvaluator.py,sha256=TCYwl1SbNp54QqhG1Zxlztgye8RgOGwpqgfeBfz2fHU,23558
|
|
69
69
|
mteb/evaluation/evaluators/STSEvaluator.py,sha256=z59KeCDj_BzoMIgANQ_brFBumaqXNLfLEFX4V5NkGKs,3481
|
|
@@ -82,7 +82,7 @@ mteb/leaderboard/__init__.py,sha256=rb6vKGZV31m6x33LpKruPAwIBlQ5QzJVT0Y0do9DUSc,
|
|
|
82
82
|
mteb/leaderboard/app.py,sha256=N6Ox5gzZHXdLaIyl8Yf1_Wu4yoOtyl0y6aVe_kzszts,31522
|
|
83
83
|
mteb/leaderboard/benchmark_selector.py,sha256=dhZ9x3sqCkdcw7hJbqVr6PHSucPMbsGAgGPT6F_sow8,7196
|
|
84
84
|
mteb/leaderboard/figures.py,sha256=wcSJWDJO4oATZlao_mY-oLDpj0QKvJp8pUhf4FbrI6s,7465
|
|
85
|
-
mteb/leaderboard/table.py,sha256=
|
|
85
|
+
mteb/leaderboard/table.py,sha256=1Yx2xlKzn--RSLi1GAAwGx9mpGBcZVU5ECr191G385s,7339
|
|
86
86
|
mteb/leaderboard/text_segments.py,sha256=fokW080HKfLbyHH-HAcoNQ1PoW4K1IQSr-MukHjEVhU,6609
|
|
87
87
|
mteb/load_results/__init__.py,sha256=vtxMb4Zz2Jpn4GjY59qhsTyQpfX3z57-b0iLSqm5pBQ,250
|
|
88
88
|
mteb/load_results/benchmark_results.py,sha256=XQSCCnJ967A0tjIZoOVVmjRWxYA663c6Xcob-t3psRY,31671
|
|
@@ -233,7 +233,7 @@ mteb/tasks/BitextMining/srn/SRNCorpusBitextMining.py,sha256=1KY3kQd31Wz2GPKTyshw
|
|
|
233
233
|
mteb/tasks/BitextMining/srn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
234
234
|
mteb/tasks/BitextMining/vie/VieMedEVBitextMining.py,sha256=KsEY8e6Mx2SjGlV5_JGqPrzcFxJxnSXh7fWt32zefR8,2760
|
|
235
235
|
mteb/tasks/BitextMining/vie/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
236
|
-
mteb/tasks/Classification/__init__.py,sha256=
|
|
236
|
+
mteb/tasks/Classification/__init__.py,sha256=8gKeMJfGQIAEXRAGsWwtbL43cY1DiRJYP66AsfUWXnI,9191
|
|
237
237
|
mteb/tasks/Classification/ara/AJGT.py,sha256=GNhn0ddAmjhEsH2e-7Yva8ap0W53E0TNfGSTMY4vOqs,3142
|
|
238
238
|
mteb/tasks/Classification/ara/HotelReviewSentimentClassification.py,sha256=Tb3uzfYHI5fbrC726IuXU0o7N0fCgiKFqTAycsBKZe8,3101
|
|
239
239
|
mteb/tasks/Classification/ara/OnlineStoreReviewSentimentClassification.py,sha256=oenRFbJbIONJ04k4_tPHlUMpOdniE9P6_1v1gy9wlCI,2502
|
|
@@ -426,6 +426,7 @@ mteb/tasks/Classification/sin/SinhalaNewsSourceClassification.py,sha256=ShaqgmTF
|
|
|
426
426
|
mteb/tasks/Classification/sin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
427
427
|
mteb/tasks/Classification/slk/CSFDSKMovieReviewSentimentClassification.py,sha256=vCHn3qAb2hkZ1uVM7Pz1xNOsmDg6VjIJ91bJ5QB9O9U,3625
|
|
428
428
|
mteb/tasks/Classification/slk/SlovakHateSpeechClassification.py,sha256=CgcXv4yvZ6wv82eE5d7P35PjDvxvWHlvoWg64kcMSGk,2400
|
|
429
|
+
mteb/tasks/Classification/slk/SlovakMovieReviewSentimentClassification.py,sha256=Dj-2AxHElAITkL7nhMuv0OBC7HEImRYBT_8x_2Csshg,3365
|
|
429
430
|
mteb/tasks/Classification/slk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
430
431
|
mteb/tasks/Classification/slv/FrenkSlClassification.py,sha256=VjOwvpTDhEsKTR1zIWR3M998ZuWUz6MNm4_30ui0zYE,3266
|
|
431
432
|
mteb/tasks/Classification/slv/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -434,8 +435,6 @@ mteb/tasks/Classification/spa/SpanishSentimentClassification.py,sha256=gEiTSir_3
|
|
|
434
435
|
mteb/tasks/Classification/spa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
435
436
|
mteb/tasks/Classification/ssw/SiswatiNewsClassification.py,sha256=YipX9ACHi25m-LDzQ5CxIRNgOL62oAT5NBapeQkzhKc,3110
|
|
436
437
|
mteb/tasks/Classification/ssw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
437
|
-
mteb/tasks/Classification/svk/SlovakMovieReviewSentimentClassification.py,sha256=ByiHVk6EpL2QLxHS3LAw4JBivN5hHXtXPdzTBb71ZUw,3365
|
|
438
|
-
mteb/tasks/Classification/svk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
439
438
|
mteb/tasks/Classification/swa/SwahiliNewsClassification.py,sha256=Iml296uPAsYNpMuRZTcTAa-BCvkh6WXAw0iemKS0yIg,3644
|
|
440
439
|
mteb/tasks/Classification/swa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
441
440
|
mteb/tasks/Classification/swe/DalajClassification.py,sha256=kWC4mzpRtP6iC3K9jNK0vKsO1zk7poIw_r7smCQ8OP8,4228
|
|
@@ -980,7 +979,7 @@ mteb/tasks/Retrieval/multilingual/CUREv1Retrieval.py,sha256=1icEnr1bhZTSwu05MTsx
|
|
|
980
979
|
mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT19.py,sha256=d_lcwJFEOzF_XWESftIW7dY6VAOTAHCvsFAIGqcuYSw,4885
|
|
981
980
|
mteb/tasks/Retrieval/multilingual/CrossLingualSemanticDiscriminationWMT21.py,sha256=aPJ7Q3q6bF08fYO8JF7lRHIIzD42Rr0s0-SnL7TzHOE,4885
|
|
982
981
|
mteb/tasks/Retrieval/multilingual/IndicQARetrieval.py,sha256=gcuYoqc8Oxw4eDC9JbsmSBzEK5ygFn7fIADX8A7EhJI,3863
|
|
983
|
-
mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py,sha256=
|
|
982
|
+
mteb/tasks/Retrieval/multilingual/MIRACLRetrieval.py,sha256=B-ingrCxfOyWkpE9aIIpwayAYlQt8QQA4Vbjm4EPSDE,14454
|
|
984
983
|
mteb/tasks/Retrieval/multilingual/MKQARetrieval.py,sha256=d7KZAPROhcih_pXl2-p-d26QRR5i3UbyK2oYThC3vug,4477
|
|
985
984
|
mteb/tasks/Retrieval/multilingual/MLQARetrieval.py,sha256=2hC-uGDGqJoRRIFNSRJXjFAzK4ITmFpsZ-jV0jCcdU8,6619
|
|
986
985
|
mteb/tasks/Retrieval/multilingual/MintakaRetrieval.py,sha256=tnJ0PrUIsaWINxqfpU022ZGkUjAEVQeP0MWmwq70XPU,4400
|
|
@@ -1153,9 +1152,9 @@ mteb/tasks/aggregated_tasks/STS17MultilingualVisualSTS.py,sha256=uf02yWS1BOnffl8
|
|
|
1153
1152
|
mteb/tasks/aggregated_tasks/STSBenchmarkMultilingualVisualSTS.py,sha256=ImA29OtG9sotn5PynAO4QSl5YdXgYVMbHfqfOD2skIk,3156
|
|
1154
1153
|
mteb/tasks/aggregated_tasks/SynPerChatbotConvSAClassification.py,sha256=V2XjmsKqe4CTK2IPKuVM7T8hDPP7Uo1WkzbVZlHinKg,1220
|
|
1155
1154
|
mteb/tasks/aggregated_tasks/__init__.py,sha256=MAU-3SqUN6nypOkdBv4MZVCA2tMnxuw317Jwct9QX-A,881
|
|
1156
|
-
mteb-1.39.
|
|
1157
|
-
mteb-1.39.
|
|
1158
|
-
mteb-1.39.
|
|
1159
|
-
mteb-1.39.
|
|
1160
|
-
mteb-1.39.
|
|
1161
|
-
mteb-1.39.
|
|
1155
|
+
mteb-1.39.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
1156
|
+
mteb-1.39.7.dist-info/METADATA,sha256=9vYlBQSFrtp81iE0lKTdsZ5SVwD2H2gWANnBxqe-2H8,29039
|
|
1157
|
+
mteb-1.39.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1158
|
+
mteb-1.39.7.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
|
|
1159
|
+
mteb-1.39.7.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
|
|
1160
|
+
mteb-1.39.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|