mteb 2.0.5__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. mteb/__init__.py +10 -1
  2. mteb/_create_dataloaders.py +2 -0
  3. mteb/abstasks/_stratification.py +1 -1
  4. mteb/abstasks/abstask.py +6 -1
  5. mteb/abstasks/dataset_card_template.md +1 -1
  6. mteb/abstasks/retrieval.py +2 -1
  7. mteb/abstasks/retrieval_dataset_loaders.py +1 -1
  8. mteb/abstasks/task_metadata.py +1 -1
  9. mteb/benchmarks/benchmarks/__init__.py +2 -0
  10. mteb/benchmarks/benchmarks/benchmarks.py +82 -11
  11. mteb/benchmarks/get_benchmark.py +1 -1
  12. mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
  13. mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
  14. mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
  15. mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
  16. mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
  17. mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
  18. mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
  19. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
  20. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
  21. mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
  22. mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
  23. mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
  24. mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
  25. mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
  26. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
  27. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
  28. mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
  29. mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
  30. mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
  31. mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
  32. mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
  33. mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
  34. mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
  35. mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
  36. mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
  37. mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
  38. mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
  39. mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
  40. mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
  41. mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
  42. mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
  43. mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
  44. mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
  45. mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
  46. mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
  47. mteb/languages/check_language_code.py +11 -3
  48. mteb/languages/language_scripts.py +4 -0
  49. mteb/leaderboard/text_segments.py +1 -1
  50. mteb/models/model_implementations/b1ade_models.py +1 -1
  51. mteb/models/model_implementations/bge_models.py +1 -3
  52. mteb/models/model_implementations/bmretriever_models.py +1 -1
  53. mteb/models/model_implementations/gme_v_models.py +2 -2
  54. mteb/models/model_implementations/ibm_granite_models.py +1 -1
  55. mteb/models/model_implementations/inf_models.py +3 -3
  56. mteb/models/model_implementations/jina_models.py +12 -2
  57. mteb/models/model_implementations/llm2vec_models.py +1 -1
  58. mteb/models/model_implementations/misc_models.py +2 -2
  59. mteb/models/model_implementations/mxbai_models.py +1 -1
  60. mteb/models/model_implementations/salesforce_models.py +1 -1
  61. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
  62. mteb/models/model_implementations/voyage_v.py +9 -9
  63. mteb/results/task_result.py +6 -8
  64. mteb/tasks/classification/dan/angry_tweets_classification.py +2 -2
  65. mteb/tasks/classification/eng/legal_bench_classification.py +3 -3
  66. mteb/tasks/classification/mya/myanmar_news.py +2 -2
  67. mteb/tasks/classification/nld/__init__.py +16 -0
  68. mteb/tasks/classification/nld/dutch_cola_classification.py +38 -0
  69. mteb/tasks/classification/nld/dutch_government_bias_classification.py +37 -0
  70. mteb/tasks/classification/nld/dutch_news_articles_classification.py +30 -0
  71. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +36 -0
  72. mteb/tasks/classification/nld/iconclass_classification.py +41 -0
  73. mteb/tasks/classification/nld/open_tender_classification.py +38 -0
  74. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +46 -0
  75. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  76. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  77. mteb/tasks/clustering/__init__.py +1 -0
  78. mteb/tasks/clustering/nld/__init__.py +17 -0
  79. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +37 -0
  80. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +37 -0
  81. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +47 -0
  82. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +51 -0
  83. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +41 -0
  84. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +51 -0
  85. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +51 -0
  86. mteb/tasks/multilabel_classification/__init__.py +1 -0
  87. mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
  88. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +88 -0
  89. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +44 -0
  90. mteb/tasks/pair_classification/__init__.py +1 -0
  91. mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
  92. mteb/tasks/pair_classification/nld/__init__.py +7 -0
  93. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +36 -0
  94. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +41 -0
  95. mteb/tasks/retrieval/code/code_rag.py +8 -8
  96. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  97. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  98. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  99. mteb/tasks/retrieval/eng/__init__.py +18 -4
  100. mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
  101. mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
  102. mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
  103. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
  104. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
  105. mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
  106. mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
  107. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
  108. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
  109. mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
  110. mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
  111. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
  112. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
  113. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
  114. mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
  115. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +1 -1
  116. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
  117. mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
  118. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
  119. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
  120. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
  121. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
  122. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
  123. mteb/tasks/retrieval/nld/__init__.py +10 -0
  124. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +41 -0
  125. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +30 -0
  126. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +39 -0
  127. mteb/tasks/retrieval/nld/open_tender_retrieval.py +38 -0
  128. mteb/tasks/retrieval/nld/vabb_retrieval.py +41 -0
  129. mteb/tasks/retrieval/nob/norquad.py +2 -2
  130. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  131. mteb/tasks/retrieval/rus/__init__.py +11 -2
  132. mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
  133. mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
  134. mteb/tasks/sts/__init__.py +1 -0
  135. mteb/tasks/sts/nld/__init__.py +5 -0
  136. mteb/tasks/sts/nld/sick_nl_sts.py +41 -0
  137. mteb-2.1.1.dist-info/METADATA +253 -0
  138. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/RECORD +142 -95
  139. mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
  140. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
  141. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
  142. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
  143. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
  144. mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
  145. mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
  146. mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
  147. mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
  148. mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
  149. mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
  150. mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
  151. mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
  152. mteb-2.0.5.dist-info/METADATA +0 -455
  153. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/WHEEL +0 -0
  154. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/entry_points.txt +0 -0
  155. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/licenses/LICENSE +0 -0
  156. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/top_level.txt +0 -0
@@ -20,7 +20,6 @@ def _load_data(path: str, splits: str, revision: str | None = None):
20
20
  "id": f"corpus-{split_name}-{idx}",
21
21
  "text": x["text_corrected"],
22
22
  "modality": "text",
23
- "image": None,
24
23
  }
25
24
 
26
25
  split_datasets = {}
@@ -56,9 +55,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
56
55
  queries[split] = split_dataset.map(
57
56
  lambda x, idx: {
58
57
  "id": f"query-{split}-{idx}",
59
- "text": None,
60
58
  "modality": "image",
61
- # "image": None,
62
59
  },
63
60
  with_indices=True,
64
61
  remove_columns=[
@@ -18,7 +18,6 @@ def _load_data(path: str, splits: str, revision: str | None = None):
18
18
  def map_function(split_name):
19
19
  return lambda x, idx: {
20
20
  "id": f"corpus-{split_name}-{idx}",
21
- "text": None,
22
21
  "modality": "image",
23
22
  }
24
23
 
@@ -56,7 +55,6 @@ def _load_data(path: str, splits: str, revision: str | None = None):
56
55
  "id": f"query-{split}-{idx}",
57
56
  "text": x["text_corrected"],
58
57
  "modality": "text",
59
- "image": None,
60
58
  },
61
59
  with_indices=True,
62
60
  remove_columns=[
@@ -12,7 +12,7 @@ class OVENIT2TRetrieval(AbsTaskRetrieval):
12
12
  "revision": "2192074af29422bc1dc41cf07936f198b8c69bd0",
13
13
  },
14
14
  type="Any2AnyRetrieval",
15
- category="it2i",
15
+ category="it2t",
16
16
  eval_splits=["test"],
17
17
  eval_langs=["eng-Latn"],
18
18
  main_score="ndcg_at_10",
@@ -1,6 +1,32 @@
1
1
  from mteb.abstasks.retrieval import AbsTaskRetrieval
2
2
  from mteb.abstasks.task_metadata import TaskMetadata
3
3
 
4
+ _quora_metadata = dict(
5
+ reference="https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs",
6
+ type="Retrieval",
7
+ category="t2t",
8
+ modalities=["text"],
9
+ eval_splits=["test"],
10
+ eval_langs=["eng-Latn"],
11
+ main_score="ndcg_at_10",
12
+ date=None,
13
+ domains=["Written", "Web", "Blog"],
14
+ task_subtypes=["Question answering"],
15
+ license="not specified",
16
+ annotations_creators="human-annotated",
17
+ dialect=[],
18
+ sample_creation="found",
19
+ bibtex_citation=r"""
20
+ @misc{quora-question-pairs,
21
+ author = {DataCanary, hilfialkaff, Lili Jiang, Meg Risdal, Nikhil Dandekar, tomtung},
22
+ publisher = {Kaggle},
23
+ title = {Quora Question Pairs},
24
+ url = {https://kaggle.com/competitions/quora-question-pairs},
25
+ year = {2017},
26
+ }
27
+ """,
28
+ )
29
+
4
30
 
5
31
  class QuoraRetrieval(AbsTaskRetrieval):
6
32
  ignore_identical_ids = True
@@ -15,32 +41,10 @@ class QuoraRetrieval(AbsTaskRetrieval):
15
41
  "QuoraRetrieval is based on questions that are marked as duplicates on the Quora platform. Given a"
16
42
  + " question, find other (duplicate) questions."
17
43
  ),
18
- reference="https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs",
19
- type="Retrieval",
20
- category="t2t",
21
- modalities=["text"],
22
- eval_splits=["test"],
23
- eval_langs=["eng-Latn"],
24
- main_score="ndcg_at_10",
25
- date=None,
26
- domains=["Written", "Web", "Blog"],
27
- task_subtypes=["Question answering"],
28
- license="not specified",
29
- annotations_creators="human-annotated",
30
- dialect=[],
31
- sample_creation="found",
32
- bibtex_citation=r"""
33
- @misc{quora-question-pairs,
34
- author = {DataCanary, hilfialkaff, Lili Jiang, Meg Risdal, Nikhil Dandekar, tomtung},
35
- publisher = {Kaggle},
36
- title = {Quora Question Pairs},
37
- url = {https://kaggle.com/competitions/quora-question-pairs},
38
- year = {2017},
39
- }
40
- """,
41
44
  prompt={
42
45
  "query": "Given a question, retrieve questions that are semantically equivalent to the given question"
43
46
  },
47
+ **_quora_metadata,
44
48
  )
45
49
 
46
50
 
@@ -57,28 +61,29 @@ class QuoraRetrievalHardNegatives(AbsTaskRetrieval):
57
61
  "QuoraRetrieval is based on questions that are marked as duplicates on the Quora platform. Given a"
58
62
  + " question, find other (duplicate) questions. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
59
63
  ),
60
- reference="https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs",
61
- type="Retrieval",
62
- category="t2t",
63
- modalities=["text"],
64
- eval_splits=["test"],
65
- eval_langs=["eng-Latn"],
66
- main_score="ndcg_at_10",
67
- date=None,
68
- domains=None,
69
- task_subtypes=None,
70
- license=None,
71
- annotations_creators=None,
72
- dialect=None,
73
- sample_creation=None,
74
- bibtex_citation=r"""
75
- @misc{quora-question-pairs,
76
- author = {DataCanary, hilfialkaff, Lili Jiang, Meg Risdal, Nikhil Dandekar, tomtung},
77
- publisher = {Kaggle},
78
- title = {Quora Question Pairs},
79
- url = {https://kaggle.com/competitions/quora-question-pairs},
80
- year = {2017},
81
- }
82
- """,
83
64
  adapted_from=["QuoraRetrieval"],
65
+ superseded_by="QuoraRetrievalHardNegatives.v2",
66
+ **_quora_metadata,
67
+ )
68
+
69
+
70
+ class QuoraRetrievalHardNegativesV2(AbsTaskRetrieval):
71
+ ignore_identical_ids = True
72
+
73
+ metadata = TaskMetadata(
74
+ name="QuoraRetrievalHardNegatives.v2",
75
+ dataset={
76
+ "path": "mteb/QuoraRetrieval_test_top_250_only_w_correct-v2",
77
+ "revision": "907a33577e9506221d3ba20f5a851b7c3f8dc6d3",
78
+ },
79
+ description=(
80
+ "QuoraRetrieval is based on questions that are marked as duplicates on the Quora platform. Given a "
81
+ "question, find other (duplicate) questions. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
82
+ "V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
83
+ ),
84
+ adapted_from=["QuoraRetrieval"],
85
+ prompt={
86
+ "query": "Given a question, retrieve questions that are semantically equivalent to the given question"
87
+ },
88
+ **_quora_metadata,
84
89
  )
@@ -20,9 +20,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
20
20
  corpus[split] = split_dataset.map(
21
21
  lambda x, idx: {
22
22
  "id": f"corpus-{split}-{idx}",
23
- # "text": None,
24
23
  "modality": "text",
25
- "image": None,
26
24
  },
27
25
  with_indices=True,
28
26
  remove_columns=[
@@ -37,9 +35,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
37
35
  queries[split] = split_dataset.map(
38
36
  lambda x, idx: {
39
37
  "id": f"query-{split}-{idx}",
40
- "text": None,
41
38
  "modality": "image",
42
- # "image": None,
43
39
  },
44
40
  with_indices=True,
45
41
  remove_columns=[
@@ -20,9 +20,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
20
20
  corpus[split] = split_dataset.map(
21
21
  lambda x, idx: {
22
22
  "id": f"corpus-{split}-{idx}",
23
- "text": None,
24
23
  "modality": "image",
25
- # "image": None,
26
24
  },
27
25
  with_indices=True,
28
26
  remove_columns=[
@@ -37,9 +35,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
37
35
  queries[split] = split_dataset.map(
38
36
  lambda x, idx: {
39
37
  "id": f"query-{split}-{idx}",
40
- # "text": None,
41
38
  "modality": "text",
42
- "image": None,
43
39
  },
44
40
  with_indices=True,
45
41
  remove_columns=[
@@ -24,7 +24,6 @@ def _load_data(
24
24
  lambda x: {
25
25
  "id": f"query-{split}-{x['query-id']}",
26
26
  "text": x["query"],
27
- "image": None,
28
27
  "modality": "text",
29
28
  },
30
29
  remove_columns=["query-id", "query"],
@@ -40,7 +39,6 @@ def _load_data(
40
39
  corpus_ds = corpus_ds.map(
41
40
  lambda x: {
42
41
  "id": f"corpus-{split}-{x['corpus-id']}",
43
- "text": None,
44
42
  "modality": "image",
45
43
  },
46
44
  remove_columns=["corpus-id"],
@@ -7,7 +7,7 @@ class JaGovFaqsRetrieval(AbsTaskRetrieval):
7
7
 
8
8
  metadata = TaskMetadata(
9
9
  name="JaGovFaqsRetrieval",
10
- description="JaGovFaqs is a dataset consisting of FAQs manully extracted from the website of Japanese bureaus. The dataset consists of 22k FAQs, where the queries (questions) and corpus (answers) have been shuffled, and the goal is to match the answer with the question.",
10
+ description="JaGovFaqs is a dataset consisting of FAQs manually extracted from the website of Japanese bureaus. The dataset consists of 22k FAQs, where the queries (questions) and corpus (answers) have been shuffled, and the goal is to match the answer with the question.",
11
11
  reference="https://github.com/sbintuitions/JMTEB",
12
12
  dataset={
13
13
  "path": "mteb/JaGovFaqsRetrieval",
@@ -132,7 +132,7 @@ _LANGUAGES = [
132
132
 
133
133
 
134
134
  def get_lang_pairs() -> dict[str, list[str]]:
135
- # add pairs with same langauge as the source and target
135
+ # add pairs with same language as the source and target
136
136
  # add pairs with english as source or target
137
137
  lang_pairs = {}
138
138
  for x in _LANGUAGES:
@@ -72,7 +72,6 @@ def _load_single_language(
72
72
  lambda x: {
73
73
  "id": f"query-{split}-{x['query-id']}",
74
74
  "text": x["query"],
75
- "image": None,
76
75
  "modality": "text",
77
76
  },
78
77
  remove_columns=["query-id", "query"],
@@ -87,7 +86,6 @@ def _load_single_language(
87
86
  corpus_ds = corpus_ds.map(
88
87
  lambda x: {
89
88
  "id": f"corpus-{split}-{x['corpus-id']}",
90
- "text": None,
91
89
  "modality": "image",
92
90
  },
93
91
  remove_columns=["corpus-id"],
@@ -92,7 +92,7 @@ class MIRACLRetrievalHardNegativesV2(AbsTaskRetrieval):
92
92
  "MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is a multilingual retrieval "
93
93
  "dataset that focuses on search across 18 different languages. The hard negative version has been "
94
94
  "created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
95
- "V2 uses a more appropriate prompt rather than the default prompt for retrieval."
95
+ "V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
96
96
  ),
97
97
  dataset={
98
98
  "path": "mteb/MIRACLRetrievalHardNegatives",
@@ -30,7 +30,7 @@ _LANGUAGES = {
30
30
  def _load_miracl_data(
31
31
  path: str,
32
32
  langs: list,
33
- splits: str,
33
+ splits: list[str],
34
34
  revision: str | None = None,
35
35
  ):
36
36
  corpus = {lang: dict.fromkeys(splits) for lang in langs}
@@ -65,9 +65,7 @@ def _load_miracl_data(
65
65
  images_data = images_data.map(
66
66
  lambda x: {
67
67
  "id": imgid2docid[str(x["file_name"])],
68
- # "modality": "text",
69
68
  "modality": "image",
70
- "text": None,
71
69
  },
72
70
  remove_columns=["file_name"],
73
71
  )
@@ -86,7 +84,6 @@ def _load_miracl_data(
86
84
  "id": str(x["_id"]),
87
85
  "text": x["text"],
88
86
  "modality": "text",
89
- "image": None,
90
87
  },
91
88
  remove_columns=["_id"],
92
89
  )
@@ -108,10 +105,6 @@ def _load_miracl_data(
108
105
  relevant_docs[lang][split][query_id] = {}
109
106
  relevant_docs[lang][split][query_id][doc_id] = score
110
107
 
111
- corpus = datasets.DatasetDict(corpus)
112
- queries = datasets.DatasetDict(queries)
113
- relevant_docs = datasets.DatasetDict(relevant_docs)
114
-
115
108
  return corpus, queries, relevant_docs
116
109
 
117
110
 
@@ -156,7 +149,7 @@ class MIRACLVisionRetrieval(AbsTaskRetrieval):
156
149
 
157
150
  self.corpus, self.queries, self.relevant_docs = _load_miracl_data(
158
151
  path=self.metadata.dataset["path"],
159
- splits=self.metadata.eval_splits[0],
152
+ splits=self.metadata.eval_splits,
160
153
  langs=self.hf_subsets,
161
154
  revision=self.metadata.dataset["revision"],
162
155
  )
@@ -37,7 +37,6 @@ def _load_data(
37
37
  lambda x: {
38
38
  "id": f"query-{split}-{x['query-id']}",
39
39
  "text": x["query"],
40
- "image": None,
41
40
  "modality": "text",
42
41
  },
43
42
  remove_columns=["query-id", "query"],
@@ -52,7 +51,6 @@ def _load_data(
52
51
  corpus_ds = corpus_ds.map(
53
52
  lambda x: {
54
53
  "id": f"corpus-{split}-{x['corpus-id']}",
55
- "text": None,
56
54
  "modality": "image",
57
55
  },
58
56
  remove_columns=["corpus-id"],
@@ -34,7 +34,6 @@ def _load_wit_data(path: str, langs: list, splits: str, revision: str | None = N
34
34
  lang_corpus = lang_data.map(
35
35
  lambda x: {
36
36
  "id": "corpus-" + x["image_id"],
37
- "text": None,
38
37
  "modality": "image",
39
38
  "image": x["image"],
40
39
  },
@@ -60,7 +59,6 @@ def _load_wit_data(path: str, langs: list, splits: str, revision: str | None = N
60
59
  "id": query_id,
61
60
  "text": caption,
62
61
  "modality": "text",
63
- "image": None,
64
62
  }
65
63
  )
66
64
  if query_id not in relevant_docs[lang][split]:
@@ -1,4 +1,4 @@
1
- from datasets import DatasetDict, load_dataset
1
+ from datasets import DatasetDict, Image, load_dataset
2
2
 
3
3
  from mteb.abstasks.retrieval import AbsTaskRetrieval
4
4
  from mteb.abstasks.task_metadata import TaskMetadata
@@ -16,7 +16,7 @@ _LANGUAGES = {
16
16
 
17
17
 
18
18
  def _load_xflickrco_data(
19
- path: str, langs: list, splits: str, revision: str | None = None
19
+ path: str, langs: list, splits: list[str], revision: str | None = None
20
20
  ):
21
21
  corpus = {lang: dict.fromkeys(splits) for lang in langs}
22
22
  queries = {lang: dict.fromkeys(splits) for lang in langs}
@@ -32,22 +32,23 @@ def _load_xflickrco_data(
32
32
  lang_corpus = lang_data.map(
33
33
  lambda x: {
34
34
  "id": "corpus-" + x["id"],
35
- "text": None,
36
35
  "modality": "image",
37
- "image": x["image"]["bytes"],
36
+ "image": x["image"],
38
37
  },
39
38
  remove_columns=["sentences"],
40
39
  )
40
+ lang_corpus = lang_corpus.cast_column("image", Image())
41
41
 
42
42
  lang_queries = lang_data.map(
43
43
  lambda x: {
44
44
  "id": "query-" + x["id"],
45
45
  "text": x["sentences"],
46
46
  "modality": "text",
47
- "image": None,
48
47
  },
49
48
  remove_columns=["sentences"],
50
49
  )
50
+ # None values
51
+ lang_queries = lang_queries.remove_columns(["image"])
51
52
 
52
53
  relevant_docs[lang][split] = {}
53
54
  for row in lang_data:
@@ -1,4 +1,4 @@
1
- from datasets import Dataset, DatasetDict, load_dataset
1
+ from datasets import Dataset, DatasetDict, Image, load_dataset
2
2
 
3
3
  from mteb.abstasks.retrieval import AbsTaskRetrieval
4
4
  from mteb.abstasks.task_metadata import TaskMetadata
@@ -61,9 +61,8 @@ def _load_xm3600_data(
61
61
  lang_corpus = lang_data.map(
62
62
  lambda x: {
63
63
  "id": "corpus-" + x["image_id"],
64
- "text": None,
65
64
  "modality": "image",
66
- "image": x["image"]["bytes"],
65
+ "image": x["image"],
67
66
  },
68
67
  remove_columns=[
69
68
  "captions",
@@ -73,6 +72,7 @@ def _load_xm3600_data(
73
72
  "image_id",
74
73
  ],
75
74
  )
75
+ lang_corpus = lang_corpus.cast_column("image", Image())
76
76
 
77
77
  corpus[lang][split] = lang_corpus
78
78
 
@@ -90,7 +90,6 @@ def _load_xm3600_data(
90
90
  "id": query_id,
91
91
  "text": caption,
92
92
  "modality": "text",
93
- "image": None,
94
93
  }
95
94
  )
96
95
  if query_id not in relevant_docs[lang][split]:
@@ -1,4 +1,5 @@
1
1
  from .argu_ana_nl_retrieval import ArguAnaNL
2
+ from .bbsard_nl_retrieval import BBSARDNLRetrieval
2
3
  from .climate_fevernl_retrieval import ClimateFEVERNL
3
4
  from .cqa_dupstack_android_nl_retrieval import CQADupstackAndroidNLRetrieval
4
5
  from .cqa_dupstack_english_nl_retrieval import CQADupstackEnglishNLRetrieval
@@ -13,17 +14,21 @@ from .cqa_dupstack_unix_nl_retrieval import CQADupstackUnixNLRetrieval
13
14
  from .cqa_dupstack_webmasters_nl_retrieval import CQADupstackWebmastersNLRetrieval
14
15
  from .cqa_dupstack_wordpress_nl_retrieval import CQADupstackWordpressNLRetrieval
15
16
  from .db_pedia_nl_retrieval import DBPediaNL
17
+ from .dutch_news_articles_retrieval import DutchNewsArticlesRetrieval
16
18
  from .fevernl_retrieval import FEVERNL
17
19
  from .fi_qa2018_nl_retrieval import FiQA2018NL
18
20
  from .hotpot_qanl_retrieval import HotpotQANL
21
+ from .legal_qa_nl_retrieval import LegalQANLRetrieval
19
22
  from .mmarconl_retrieval import MMMARCONL
20
23
  from .nf_corpus_nl_retrieval import NFCorpusNL
21
24
  from .nqnl_retrieval import NQNL
25
+ from .open_tender_retrieval import OpenTenderRetrieval
22
26
  from .quora_nl_retrieval import QuoraNLRetrieval
23
27
  from .sci_fact_nl_retrieval import SciFactNL
24
28
  from .scidocsnl_retrieval import SCIDOCSNL
25
29
  from .touche2020_nl_retrieval import Touche2020NL
26
30
  from .treccovidnl_retrieval import TRECCOVIDNL
31
+ from .vabb_retrieval import VABBRetrieval
27
32
 
28
33
  __all__ = [
29
34
  "FEVERNL",
@@ -32,6 +37,7 @@ __all__ = [
32
37
  "SCIDOCSNL",
33
38
  "TRECCOVIDNL",
34
39
  "ArguAnaNL",
40
+ "BBSARDNLRetrieval",
35
41
  "CQADupstackAndroidNLRetrieval",
36
42
  "CQADupstackEnglishNLRetrieval",
37
43
  "CQADupstackGamingNLRetrieval",
@@ -46,10 +52,14 @@ __all__ = [
46
52
  "CQADupstackWordpressNLRetrieval",
47
53
  "ClimateFEVERNL",
48
54
  "DBPediaNL",
55
+ "DutchNewsArticlesRetrieval",
49
56
  "FiQA2018NL",
50
57
  "HotpotQANL",
58
+ "LegalQANLRetrieval",
51
59
  "NFCorpusNL",
60
+ "OpenTenderRetrieval",
52
61
  "QuoraNLRetrieval",
53
62
  "SciFactNL",
54
63
  "Touche2020NL",
64
+ "VABBRetrieval",
55
65
  ]
@@ -0,0 +1,41 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class BBSARDNLRetrieval(AbsTaskRetrieval):
6
+ ignore_identical_ids = True
7
+
8
+ metadata = TaskMetadata(
9
+ name="bBSARDNLRetrieval",
10
+ description="Building on the Belgian Statutory Article Retrieval Dataset (BSARD) in French, we introduce the "
11
+ "bilingual version of this dataset, bBSARD. The dataset contains parallel Belgian statutory "
12
+ "articles in both French and Dutch, along with legal questions from BSARD and their Dutch "
13
+ "translation.",
14
+ reference="https://aclanthology.org/2025.regnlp-1.3.pdf",
15
+ dataset={
16
+ "path": "clips/mteb-nl-bbsard",
17
+ "revision": "52027c212ba9765a3e9737c9cbf9a06ae83cbb93",
18
+ },
19
+ type="Retrieval",
20
+ category="t2t",
21
+ modalities=["text"],
22
+ eval_splits=["test"],
23
+ eval_langs=["nld-Latn"],
24
+ main_score="ndcg_at_10",
25
+ date=("2021-05-01", "2021-08-26"),
26
+ domains=["Legal", "Written"],
27
+ task_subtypes=[],
28
+ license="cc-by-nc-sa-4.0",
29
+ annotations_creators="expert-annotated",
30
+ dialect=[],
31
+ sample_creation="found",
32
+ bibtex_citation=r"""
33
+ @article{lotfi2025bilingual,
34
+ author = {Lotfi, Ehsan and Banar, Nikolay and Yuzbashyan, Nerses and Daelemans, Walter},
35
+ journal = {COLING 2025},
36
+ pages = {10},
37
+ title = {Bilingual BSARD: Extending Statutory Article Retrieval to Dutch},
38
+ year = {2025},
39
+ }
40
+ """,
41
+ )
@@ -0,0 +1,30 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class DutchNewsArticlesRetrieval(AbsTaskRetrieval):
6
+ metadata = TaskMetadata(
7
+ name="DutchNewsArticlesRetrieval",
8
+ description="This dataset contains all the articles published by the NOS as of the 1st of January 2010. The "
9
+ "data is obtained by scraping the NOS website. The NOS is one of the biggest (online) news "
10
+ "organizations in the Netherlands.",
11
+ reference="https://www.kaggle.com/datasets/maxscheijen/dutch-news-articles",
12
+ dataset={
13
+ "path": "clips/mteb-nl-news-articles-ret",
14
+ "revision": "c8042a86f3eb0d1fcec79a4a44ebf1eafe635462",
15
+ },
16
+ type="Retrieval",
17
+ category="t2t",
18
+ modalities=["text"],
19
+ eval_splits=["test"],
20
+ eval_langs=["nld-Latn"],
21
+ main_score="ndcg_at_10",
22
+ date=("2009-11-01", "2010-01-01"),
23
+ domains=["Written", "News"],
24
+ task_subtypes=["Article retrieval"],
25
+ license="cc-by-nc-sa-4.0",
26
+ annotations_creators="derived",
27
+ dialect=[],
28
+ sample_creation="found",
29
+ bibtex_citation="",
30
+ )
@@ -0,0 +1,39 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class LegalQANLRetrieval(AbsTaskRetrieval):
6
+ ignore_identical_ids = True
7
+
8
+ metadata = TaskMetadata(
9
+ name="LegalQANLRetrieval",
10
+ description="To this end, we create and publish a Dutch legal QA dataset, consisting of question-answer pairs "
11
+ "with attributions to Dutch law articles.",
12
+ reference="https://aclanthology.org/2024.nllp-1.12/",
13
+ dataset={
14
+ "path": "clips/mteb-nl-legalqa-pr",
15
+ "revision": "8f593522dfbe7ec07055ca9d38a700e7643d3882",
16
+ },
17
+ type="Retrieval",
18
+ category="t2t",
19
+ modalities=["text"],
20
+ eval_splits=["test"],
21
+ eval_langs=["nld-Latn"],
22
+ main_score="ndcg_at_10",
23
+ date=("2021-05-01", "2021-08-26"),
24
+ domains=["Legal", "Written"],
25
+ task_subtypes=[],
26
+ license="cc-by-nc-sa-4.0",
27
+ annotations_creators="expert-annotated",
28
+ dialect=[],
29
+ sample_creation="found",
30
+ bibtex_citation=r"""
31
+ @inproceedings{redelaar2024attributed,
32
+ author = {Redelaar, Felicia and Van Drie, Romy and Verberne, Suzan and De Boer, Maaike},
33
+ booktitle = {Proceedings of the natural legal language processing workshop 2024},
34
+ pages = {154--165},
35
+ title = {Attributed Question Answering for Preconditions in the Dutch Law},
36
+ year = {2024},
37
+ }
38
+ """,
39
+ )
@@ -0,0 +1,38 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class OpenTenderRetrieval(AbsTaskRetrieval):
6
+ metadata = TaskMetadata(
7
+ name="OpenTenderRetrieval",
8
+ description="This dataset contains Belgian and Dutch tender calls from OpenTender in Dutch",
9
+ reference="https://arxiv.org/abs/2509.12340",
10
+ dataset={
11
+ "path": "clips/mteb-nl-opentender-ret",
12
+ "revision": "83eec1aa9c58f1dc8acfac015f653a9c25bda3f4",
13
+ },
14
+ type="Retrieval",
15
+ category="t2t",
16
+ modalities=["text"],
17
+ eval_splits=["test"],
18
+ eval_langs=["nld-Latn"],
19
+ main_score="ndcg_at_10",
20
+ date=("2009-11-01", "2010-01-01"),
21
+ domains=["Government", "Written"],
22
+ task_subtypes=["Article retrieval"],
23
+ license="cc-by-nc-sa-4.0",
24
+ annotations_creators="derived",
25
+ dialect=[],
26
+ sample_creation="found",
27
+ bibtex_citation=r"""
28
+ @misc{banar2025mtebnle5nlembeddingbenchmark,
29
+ archiveprefix = {arXiv},
30
+ author = {Nikolay Banar and Ehsan Lotfi and Jens Van Nooten and Cristina Arhiliuc and Marija Kliocaite and Walter Daelemans},
31
+ eprint = {2509.12340},
32
+ primaryclass = {cs.CL},
33
+ title = {MTEB-NL and E5-NL: Embedding Benchmark and Models for Dutch},
34
+ url = {https://arxiv.org/abs/2509.12340},
35
+ year = {2025},
36
+ }
37
+ """,
38
+ )