mteb 2.0.5__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. mteb/__init__.py +10 -1
  2. mteb/_create_dataloaders.py +2 -0
  3. mteb/abstasks/_stratification.py +1 -1
  4. mteb/abstasks/abstask.py +6 -1
  5. mteb/abstasks/dataset_card_template.md +1 -1
  6. mteb/abstasks/retrieval.py +2 -1
  7. mteb/abstasks/retrieval_dataset_loaders.py +1 -1
  8. mteb/abstasks/task_metadata.py +1 -1
  9. mteb/benchmarks/benchmarks/__init__.py +2 -0
  10. mteb/benchmarks/benchmarks/benchmarks.py +82 -11
  11. mteb/benchmarks/get_benchmark.py +1 -1
  12. mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
  13. mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
  14. mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
  15. mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
  16. mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
  17. mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
  18. mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
  19. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
  20. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
  21. mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
  22. mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
  23. mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
  24. mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
  25. mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
  26. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
  27. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
  28. mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
  29. mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
  30. mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
  31. mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
  32. mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
  33. mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
  34. mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
  35. mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
  36. mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
  37. mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
  38. mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
  39. mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
  40. mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
  41. mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
  42. mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
  43. mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
  44. mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
  45. mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
  46. mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
  47. mteb/languages/check_language_code.py +11 -3
  48. mteb/languages/language_scripts.py +4 -0
  49. mteb/leaderboard/text_segments.py +1 -1
  50. mteb/models/model_implementations/b1ade_models.py +1 -1
  51. mteb/models/model_implementations/bge_models.py +1 -3
  52. mteb/models/model_implementations/bmretriever_models.py +1 -1
  53. mteb/models/model_implementations/gme_v_models.py +2 -2
  54. mteb/models/model_implementations/ibm_granite_models.py +1 -1
  55. mteb/models/model_implementations/inf_models.py +3 -3
  56. mteb/models/model_implementations/jina_models.py +12 -2
  57. mteb/models/model_implementations/llm2vec_models.py +1 -1
  58. mteb/models/model_implementations/misc_models.py +2 -2
  59. mteb/models/model_implementations/mxbai_models.py +1 -1
  60. mteb/models/model_implementations/salesforce_models.py +1 -1
  61. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
  62. mteb/models/model_implementations/voyage_v.py +9 -9
  63. mteb/results/task_result.py +6 -8
  64. mteb/tasks/classification/dan/angry_tweets_classification.py +2 -2
  65. mteb/tasks/classification/eng/legal_bench_classification.py +3 -3
  66. mteb/tasks/classification/mya/myanmar_news.py +2 -2
  67. mteb/tasks/classification/nld/__init__.py +16 -0
  68. mteb/tasks/classification/nld/dutch_cola_classification.py +38 -0
  69. mteb/tasks/classification/nld/dutch_government_bias_classification.py +37 -0
  70. mteb/tasks/classification/nld/dutch_news_articles_classification.py +30 -0
  71. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +36 -0
  72. mteb/tasks/classification/nld/iconclass_classification.py +41 -0
  73. mteb/tasks/classification/nld/open_tender_classification.py +38 -0
  74. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +46 -0
  75. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  76. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  77. mteb/tasks/clustering/__init__.py +1 -0
  78. mteb/tasks/clustering/nld/__init__.py +17 -0
  79. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +37 -0
  80. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +37 -0
  81. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +47 -0
  82. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +51 -0
  83. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +41 -0
  84. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +51 -0
  85. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +51 -0
  86. mteb/tasks/multilabel_classification/__init__.py +1 -0
  87. mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
  88. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +88 -0
  89. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +44 -0
  90. mteb/tasks/pair_classification/__init__.py +1 -0
  91. mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
  92. mteb/tasks/pair_classification/nld/__init__.py +7 -0
  93. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +36 -0
  94. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +41 -0
  95. mteb/tasks/retrieval/code/code_rag.py +8 -8
  96. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  97. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  98. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  99. mteb/tasks/retrieval/eng/__init__.py +18 -4
  100. mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
  101. mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
  102. mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
  103. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
  104. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
  105. mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
  106. mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
  107. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
  108. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
  109. mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
  110. mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
  111. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
  112. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
  113. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
  114. mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
  115. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +1 -1
  116. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
  117. mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
  118. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
  119. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
  120. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
  121. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
  122. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
  123. mteb/tasks/retrieval/nld/__init__.py +10 -0
  124. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +41 -0
  125. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +30 -0
  126. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +39 -0
  127. mteb/tasks/retrieval/nld/open_tender_retrieval.py +38 -0
  128. mteb/tasks/retrieval/nld/vabb_retrieval.py +41 -0
  129. mteb/tasks/retrieval/nob/norquad.py +2 -2
  130. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  131. mteb/tasks/retrieval/rus/__init__.py +11 -2
  132. mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
  133. mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
  134. mteb/tasks/sts/__init__.py +1 -0
  135. mteb/tasks/sts/nld/__init__.py +5 -0
  136. mteb/tasks/sts/nld/sick_nl_sts.py +41 -0
  137. mteb-2.1.1.dist-info/METADATA +253 -0
  138. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/RECORD +142 -95
  139. mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
  140. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
  141. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
  142. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
  143. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
  144. mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
  145. mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
  146. mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
  147. mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
  148. mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
  149. mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
  150. mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
  151. mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
  152. mteb-2.0.5.dist-info/METADATA +0 -455
  153. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/WHEEL +0 -0
  154. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/entry_points.txt +0 -0
  155. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/licenses/LICENSE +0 -0
  156. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,184 @@
1
+ {
2
+ "train": {
3
+ "num_samples": 16500,
4
+ "number_of_characters": 118992,
5
+ "documents_text_statistics": null,
6
+ "documents_image_statistics": {
7
+ "min_image_width": 447,
8
+ "average_image_width": 1401.1196666666667,
9
+ "max_image_width": 2743,
10
+ "min_image_height": 376,
11
+ "average_image_height": 1685.2892,
12
+ "max_image_height": 5257,
13
+ "unique_images": 14981
14
+ },
15
+ "queries_text_statistics": {
16
+ "total_text_length": 118992,
17
+ "min_text_length": 13,
18
+ "average_text_length": 79.328,
19
+ "max_text_length": 204,
20
+ "unique_texts": 1499
21
+ },
22
+ "queries_image_statistics": null,
23
+ "relevant_docs_statistics": {
24
+ "num_relevant_docs": 1499,
25
+ "min_relevant_docs_per_query": 1,
26
+ "average_relevant_docs_per_query": 1.0,
27
+ "max_relevant_docs_per_query": 1,
28
+ "unique_relevant_docs": 1499
29
+ },
30
+ "top_ranked_statistics": null,
31
+ "hf_subset_descriptive_stats": {
32
+ "en": {
33
+ "num_samples": 3300,
34
+ "number_of_characters": 20947,
35
+ "documents_text_statistics": null,
36
+ "documents_image_statistics": {
37
+ "min_image_width": 653,
38
+ "average_image_width": 1388.4603333333334,
39
+ "max_image_width": 2464,
40
+ "min_image_height": 878,
41
+ "average_image_height": 1691.6246666666666,
42
+ "max_image_height": 3533,
43
+ "unique_images": 2996
44
+ },
45
+ "queries_text_statistics": {
46
+ "total_text_length": 20947,
47
+ "min_text_length": 31,
48
+ "average_text_length": 69.82333333333334,
49
+ "max_text_length": 142,
50
+ "unique_texts": 300
51
+ },
52
+ "queries_image_statistics": null,
53
+ "relevant_docs_statistics": {
54
+ "num_relevant_docs": 300,
55
+ "min_relevant_docs_per_query": 1,
56
+ "average_relevant_docs_per_query": 1.0,
57
+ "max_relevant_docs_per_query": 1,
58
+ "unique_relevant_docs": 300
59
+ },
60
+ "top_ranked_statistics": null
61
+ },
62
+ "es": {
63
+ "num_samples": 3300,
64
+ "number_of_characters": 24935,
65
+ "documents_text_statistics": null,
66
+ "documents_image_statistics": {
67
+ "min_image_width": 447,
68
+ "average_image_width": 1370.8263333333334,
69
+ "max_image_width": 2743,
70
+ "min_image_height": 376,
71
+ "average_image_height": 1709.195,
72
+ "max_image_height": 5257,
73
+ "unique_images": 2997
74
+ },
75
+ "queries_text_statistics": {
76
+ "total_text_length": 24935,
77
+ "min_text_length": 35,
78
+ "average_text_length": 83.11666666666666,
79
+ "max_text_length": 153,
80
+ "unique_texts": 300
81
+ },
82
+ "queries_image_statistics": null,
83
+ "relevant_docs_statistics": {
84
+ "num_relevant_docs": 300,
85
+ "min_relevant_docs_per_query": 1,
86
+ "average_relevant_docs_per_query": 1.0,
87
+ "max_relevant_docs_per_query": 1,
88
+ "unique_relevant_docs": 300
89
+ },
90
+ "top_ranked_statistics": null
91
+ },
92
+ "fr": {
93
+ "num_samples": 3300,
94
+ "number_of_characters": 25217,
95
+ "documents_text_statistics": null,
96
+ "documents_image_statistics": {
97
+ "min_image_width": 780,
98
+ "average_image_width": 1402.3566666666666,
99
+ "max_image_width": 2579,
100
+ "min_image_height": 756,
101
+ "average_image_height": 1689.5696666666668,
102
+ "max_image_height": 2912,
103
+ "unique_images": 2998
104
+ },
105
+ "queries_text_statistics": {
106
+ "total_text_length": 25217,
107
+ "min_text_length": 37,
108
+ "average_text_length": 84.05666666666667,
109
+ "max_text_length": 152,
110
+ "unique_texts": 299
111
+ },
112
+ "queries_image_statistics": null,
113
+ "relevant_docs_statistics": {
114
+ "num_relevant_docs": 299,
115
+ "min_relevant_docs_per_query": 1,
116
+ "average_relevant_docs_per_query": 1.0,
117
+ "max_relevant_docs_per_query": 1,
118
+ "unique_relevant_docs": 299
119
+ },
120
+ "top_ranked_statistics": null
121
+ },
122
+ "de": {
123
+ "num_samples": 3300,
124
+ "number_of_characters": 23029,
125
+ "documents_text_statistics": null,
126
+ "documents_image_statistics": {
127
+ "min_image_width": 828,
128
+ "average_image_width": 1394.5596666666668,
129
+ "max_image_width": 2366,
130
+ "min_image_height": 756,
131
+ "average_image_height": 1686.0596666666668,
132
+ "max_image_height": 2827,
133
+ "unique_images": 2994
134
+ },
135
+ "queries_text_statistics": {
136
+ "total_text_length": 23029,
137
+ "min_text_length": 35,
138
+ "average_text_length": 76.76333333333334,
139
+ "max_text_length": 143,
140
+ "unique_texts": 300
141
+ },
142
+ "queries_image_statistics": null,
143
+ "relevant_docs_statistics": {
144
+ "num_relevant_docs": 300,
145
+ "min_relevant_docs_per_query": 1,
146
+ "average_relevant_docs_per_query": 1.0,
147
+ "max_relevant_docs_per_query": 1,
148
+ "unique_relevant_docs": 300
149
+ },
150
+ "top_ranked_statistics": null
151
+ },
152
+ "it": {
153
+ "num_samples": 3300,
154
+ "number_of_characters": 24864,
155
+ "documents_text_statistics": null,
156
+ "documents_image_statistics": {
157
+ "min_image_width": 788,
158
+ "average_image_width": 1449.3953333333334,
159
+ "max_image_width": 2583,
160
+ "min_image_height": 804,
161
+ "average_image_height": 1649.997,
162
+ "max_image_height": 2168,
163
+ "unique_images": 2996
164
+ },
165
+ "queries_text_statistics": {
166
+ "total_text_length": 24864,
167
+ "min_text_length": 13,
168
+ "average_text_length": 82.88,
169
+ "max_text_length": 204,
170
+ "unique_texts": 300
171
+ },
172
+ "queries_image_statistics": null,
173
+ "relevant_docs_statistics": {
174
+ "num_relevant_docs": 300,
175
+ "min_relevant_docs_per_query": 1,
176
+ "average_relevant_docs_per_query": 1.0,
177
+ "max_relevant_docs_per_query": 1,
178
+ "unique_relevant_docs": 300
179
+ },
180
+ "top_ranked_statistics": null
181
+ }
182
+ }
183
+ }
184
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 22637,
4
+ "number_of_characters": 21218611,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 21197901,
7
+ "min_text_length": 7,
8
+ "average_text_length": 945.7015837608744,
9
+ "max_text_length": 37834,
10
+ "unique_texts": 22415
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 20710,
15
+ "min_text_length": 22,
16
+ "average_text_length": 93.28828828828829,
17
+ "max_text_length": 250,
18
+ "unique_texts": 222
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 1059,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 4.77027027027027,
25
+ "max_relevant_docs_per_query": 57,
26
+ "unique_relevant_docs": 491
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 4902,
4
+ "number_of_characters": 463327,
5
+ "unique_pairs": 4902,
6
+ "text1_statistics": {
7
+ "total_text_length": 233941,
8
+ "min_text_length": 10,
9
+ "average_text_length": 47.72358221134231,
10
+ "max_text_length": 158,
11
+ "unique_texts": 3378
12
+ },
13
+ "text2_statistics": {
14
+ "total_text_length": 229386,
15
+ "min_text_length": 10,
16
+ "average_text_length": 46.79436964504284,
17
+ "max_text_length": 158,
18
+ "unique_texts": 3327
19
+ },
20
+ "image1_statistics": null,
21
+ "image2_statistics": null,
22
+ "label_statistics": {
23
+ "min_score": 1.0,
24
+ "avg_score": 3.528012039368932,
25
+ "max_score": 5.0
26
+ }
27
+ }
28
+ }
@@ -13,7 +13,15 @@ def check_language_code(code: str) -> None:
13
13
  Args:
14
14
  code: The language code to check.
15
15
  """
16
- lang, script = code.split("-")
16
+ lang = None
17
+ script = None
18
+ if "-" in code:
19
+ lang, script = code.split("-")
20
+ elif code[0].isupper():
21
+ script = code
22
+ else:
23
+ lang = code
24
+
17
25
  if script == "Code":
18
26
  if lang in PROGRAMMING_LANGS:
19
27
  return # override for code
@@ -21,11 +29,11 @@ def check_language_code(code: str) -> None:
21
29
  raise ValueError(
22
30
  f"Programming language {lang} is not a valid programming language."
23
31
  )
24
- if lang not in ISO_TO_LANGUAGE:
32
+ if lang is not None and lang not in ISO_TO_LANGUAGE:
25
33
  raise ValueError(
26
34
  f"Invalid language code: {lang}, you can find valid ISO 639-3 codes in {path_to_lang_codes}"
27
35
  )
28
- if script not in ISO_TO_SCRIPT:
36
+ if script is not None and script not in ISO_TO_SCRIPT:
29
37
  raise ValueError(
30
38
  f"Invalid script code: {script}, you can find valid ISO 15924 codes in {path_to_lang_scripts}"
31
39
  )
@@ -3,6 +3,8 @@ from dataclasses import dataclass
3
3
 
4
4
  from typing_extensions import Self
5
5
 
6
+ from mteb.languages import check_language_code
7
+
6
8
 
7
9
  @dataclass
8
10
  class LanguageScripts:
@@ -46,8 +48,10 @@ class LanguageScripts:
46
48
  if len(lang_script) == 2:
47
49
  normalized_langs.add(lang_script[0])
48
50
  lang_script_codes.add(lang)
51
+ check_language_code(lang)
49
52
  script_codes.add(lang_script[1])
50
53
  else:
54
+ check_language_code(lang)
51
55
  normalized_langs.add(lang)
52
56
 
53
57
  return cls(
@@ -53,7 +53,7 @@ ACKNOWLEDGEMENT = """
53
53
  <img src="https://play-lh.googleusercontent.com/HdfHZ5jnfMM1Ep7XpPaVdFIVSRx82wKlRC_qmnHx9H1E4aWNp4WKoOcH0x95NAnuYg" width="60" height="55" style="padding: 10px;">
54
54
  </a>
55
55
  <a href="https://huggingface.co">
56
- <img src="https://raw.githubusercontent.com/embeddings-benchmark/mteb/main/docs/images/hf_logo.png" width="60" height="55" style="padding: 10px;">
56
+ <img src="https://raw.githubusercontent.com/embeddings-benchmark/mteb/main/docs/images/logos/hf_logo.png" width="60" height="55" style="padding: 10px;">
57
57
  </a>
58
58
  </div>
59
59
 
@@ -2,7 +2,7 @@ from mteb.models.model_meta import ModelMeta, ScoringFunction
2
2
  from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
3
3
 
4
4
  b1ade_training_data = {
5
- # We are in teh process of submitting a paper outlining our process of creating b1ade using model merging and knowledge distillation.
5
+ # We are in the process of submitting a paper outlining our process of creating b1ade using model merging and knowledge distillation.
6
6
  # Similar to mixedbread models, we do not train on any data (except the MSMarco training split) of MTEB.
7
7
  "MSMARCO",
8
8
  }
@@ -62,7 +62,7 @@ bge_m3_training_data = {
62
62
  # mMARCO-ZH
63
63
  # LawGPT
64
64
  # NLI-zh2, LeCaRDv2,
65
- # NLI, MultiLongDoc (their syntetic)
65
+ # NLI, MultiLongDoc (their synthetic)
66
66
  # + synthetic data
67
67
  }
68
68
 
@@ -141,7 +141,6 @@ bge_chinese_training_data = {
141
141
  # https://huggingface.co/BAAI/bge-m3/discussions/29
142
142
  bgem3_languages = [
143
143
  "afr-Latn", # af
144
- # als
145
144
  "amh-Ethi", # am
146
145
  # an
147
146
  # ar
@@ -151,7 +150,6 @@ bgem3_languages = [
151
150
  # av
152
151
  # az
153
152
  "azj-Latn", # azb
154
- # ba
155
153
  # bar
156
154
  # bcl
157
155
  "ben-Beng", # be
@@ -48,7 +48,7 @@ class BMRetrieverWrapper(InstructSentenceTransformerModel):
48
48
  if padding_side is not None:
49
49
  tokenizer_params["padding_side"] = padding_side
50
50
  kwargs.setdefault("tokenizer_args", {}).update(tokenizer_params)
51
- kwargs.setdefault("config_args", {}).update(revison=revision)
51
+ kwargs.setdefault("config_args", {}).update(revision=revision)
52
52
 
53
53
  transformer = Transformer(
54
54
  model_name,
@@ -39,7 +39,7 @@ class Encoder(torch.nn.Module):
39
39
  self.max_length = max_length
40
40
  self.normalize = normalize
41
41
  self.processor.tokenizer.padding_side = "right"
42
- self.defualt_instruction = "You are a helpful assistant."
42
+ self.default_instruction = "You are a helpful assistant."
43
43
 
44
44
  def forward(
45
45
  self,
@@ -103,7 +103,7 @@ class Encoder(torch.nn.Module):
103
103
  instruction=None,
104
104
  **kwargs,
105
105
  ):
106
- instruction = instruction or self.defualt_instruction
106
+ instruction = instruction or self.default_instruction
107
107
  # Inputs must be batched
108
108
  input_texts, input_images = [], []
109
109
  for t, i in zip(texts, images):
@@ -79,7 +79,7 @@ granite_training_data = {
79
79
  "MIRACLReranking",
80
80
  # Multilingual MrTydi Triples
81
81
  "MrTidyRetrieval",
82
- # Sadeeem Question Asnwering
82
+ # Sadeeem Question Answering
83
83
  # DBPedia Title-Body Pairs
84
84
  "DBPedia",
85
85
  "DBPedia-NL", # translated from hotpotQA (not trained on)
@@ -4,7 +4,7 @@ from mteb.models.model_meta import (
4
4
  )
5
5
  from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
6
6
 
7
- inf_retreiver_v1_training_data = {
7
+ inf_retriever_v1_training_data = {
8
8
  # eng_Latn
9
9
  "ArguAna",
10
10
  "CQADupstackRetrieval",
@@ -66,7 +66,7 @@ inf_retriever_v1 = ModelMeta(
66
66
  adapted_from="Alibaba-NLP/gte-Qwen2-7B-instruct",
67
67
  public_training_code=None,
68
68
  public_training_data=None,
69
- training_datasets=inf_retreiver_v1_training_data,
69
+ training_datasets=inf_retriever_v1_training_data,
70
70
  citation=INF_RETRIEVER_CITATION,
71
71
  )
72
72
 
@@ -92,6 +92,6 @@ inf_retriever_v1_1_5b = ModelMeta(
92
92
  adapted_from="Alibaba-NLP/gte-Qwen2-1.5B-instruct",
93
93
  public_training_code=None,
94
94
  public_training_data=None,
95
- training_datasets=inf_retreiver_v1_training_data,
95
+ training_datasets=inf_retriever_v1_training_data,
96
96
  citation=INF_RETRIEVER_CITATION,
97
97
  )
@@ -310,9 +310,19 @@ class JinaV4Wrapper(AbsEncoder):
310
310
  text_embeddings = None
311
311
  image_embeddings = None
312
312
  if "text" in inputs.dataset.features:
313
- text_embeddings = self.get_text_embeddings(inputs, **kwargs)
313
+ text_embeddings = self.get_text_embeddings(
314
+ inputs,
315
+ task_metadata=task_metadata,
316
+ prompt_type=prompt_type,
317
+ **kwargs,
318
+ )
314
319
  if "image" in inputs.dataset.features:
315
- image_embeddings = self.get_image_embeddings(inputs, **kwargs)
320
+ image_embeddings = self.get_image_embeddings(
321
+ inputs,
322
+ task_metadata=task_metadata,
323
+ prompt_type=prompt_type,
324
+ **kwargs,
325
+ )
316
326
 
317
327
  if text_embeddings is not None and image_embeddings is not None:
318
328
  if len(text_embeddings) != len(image_embeddings):
@@ -23,7 +23,7 @@ def llm2vec_instruction(instruction):
23
23
 
24
24
  llm2vec_supervised_training_data = {
25
25
  # source, section g1: https://arxiv.org/pdf/2404.05961
26
- # splits assumed but unkown
26
+ # splits assumed but unknown
27
27
  "HotpotQA",
28
28
  "HotpotQA-PL", # translation not trained on
29
29
  "HotpotQA-NL", # translation not trained on
@@ -382,7 +382,7 @@ Mihaiii__Venusaur = ModelMeta(
382
382
  reference="https://huggingface.co/Mihaiii/Venusaur",
383
383
  similarity_fn_name=ScoringFunction.COSINE,
384
384
  use_instructions=None,
385
- training_datasets=None, # source model is unkown
385
+ training_datasets=None, # source model is unknown
386
386
  # {"Mihaiii/qa-assistant"},
387
387
  adapted_from="Mihaiii/test14",
388
388
  superseded_by=None,
@@ -1516,7 +1516,7 @@ openbmb__minicpm_embedding = ModelMeta(
1516
1516
  superseded_by=None,
1517
1517
  )
1518
1518
 
1519
- silma_ai__silma_embeddding_matryoshka_v0_1 = ModelMeta(
1519
+ silma_ai__silma_embedding_matryoshka_v0_1 = ModelMeta(
1520
1520
  name="silma-ai/silma-embeddding-matryoshka-v0.1",
1521
1521
  revision="a520977a9542ebdb8a7206df6b7ff6977f1886ea",
1522
1522
  release_date="2024-10-12",
@@ -5,7 +5,7 @@ from mteb.models.model_meta import (
5
5
  from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
6
6
 
7
7
  mixedbread_training_data = {
8
- # from correspondance:
8
+ # from correspondence:
9
9
  # as mentioned in our blog post
10
10
  # (https://www.mixedbread.com/blog/mxbai-embed-large-v1#built-for-rag-and-real-world-use-cases:~:text=During%20the%20whole,related%20use%20cases.)
11
11
  # We do not train on any data (except the MSMarco training split) of MTEB. We have a strong filtering process to ensure the OOD setting. That's true
@@ -27,7 +27,7 @@ SFR_TRAINING_DATA = { # inherits from e5
27
27
  "HotpotQA-PL", # translation not trained on
28
28
  "HotpotQA-NL", # translation not trained on
29
29
  # source: https://github.com/embeddings-benchmark/leaderboard/issues/41
30
- # qoute: In the realm of Semantic Textual Similarity (STS), it is trained on STS12, STS22, and STSBenchmark
30
+ # quote: In the realm of Semantic Textual Similarity (STS), it is trained on STS12, STS22, and STSBenchmark
31
31
  "STS12",
32
32
  "STS22",
33
33
  "STSBenchmark",
@@ -344,7 +344,7 @@ TASK_NAME_TO_INSTRUCTION = {
344
344
  "SprintDuplicateQuestions": "Retrieve semantically similar text\n{}",
345
345
  "TwitterSemEval2015": "Retrieve semantically similar text\n{}",
346
346
  "TwitterURLCorpus": "Retrieve semantically similar text\n{}",
347
- "CQADupstackGamingRetrieval": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given questionn\n{}",
347
+ "CQADupstackGamingRetrieval": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question\n{}",
348
348
  "CQADupstackUnixRetrieval": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question\n{}",
349
349
  "DuRetrieval": "为这个句子生成表示以用于检索相关内容:{}",
350
350
  "T2Retrieval": "为这个句子生成表示以用于检索相关内容:{}",
@@ -51,7 +51,13 @@ def _downsample_image(
51
51
  def voyage_v_loader(model_name, **kwargs):
52
52
  requires_package(
53
53
  voyage_v_loader,
54
- "voyageai and tenacity",
54
+ "voyageai",
55
+ model_name,
56
+ "pip install 'mteb[voyage_v]'",
57
+ )
58
+ requires_package(
59
+ voyage_v_loader,
60
+ "tenacity",
55
61
  model_name,
56
62
  "pip install 'mteb[voyage_v]'",
57
63
  )
@@ -65,11 +71,9 @@ def voyage_v_loader(model_name, **kwargs):
65
71
  **kwargs: Any,
66
72
  ):
67
73
  requires_image_dependencies()
68
- from torchvision import transforms
69
74
 
70
75
  self.model_name = model_name.split("/")[-1]
71
76
  self.vo = voyageai.Client()
72
- self.tensor_to_image = transforms.Compose([transforms.PILToTensor()])
73
77
 
74
78
  @retry(
75
79
  stop=stop_after_attempt(6), # Stop after 6 attempts
@@ -126,10 +130,7 @@ def voyage_v_loader(model_name, **kwargs):
126
130
  for batch in tqdm(
127
131
  images, disable=not show_progress_bar, desc="Image Encoding"
128
132
  ):
129
- batch_images = [
130
- [_downsample_image(self.tensor_to_image(image))]
131
- for image in batch["image"]
132
- ]
133
+ batch_images = [[_downsample_image(image)] for image in batch["image"]]
133
134
  embeddings = self._multimodal_embed(
134
135
  batch_images, model=self.model_name, input_type=input_type
135
136
  ).embeddings
@@ -163,8 +164,7 @@ def voyage_v_loader(model_name, **kwargs):
163
164
  inputs, disable=not show_progress_bar, desc="Interleaved Encoding"
164
165
  ):
165
166
  batch_images = [
166
- _downsample_image(self.tensor_to_image(image))
167
- for image in batch["image"]
167
+ _downsample_image(image) for image in batch["image"]
168
168
  ]
169
169
  batch_texts = batch["text"]
170
170
  interleaved_inputs = [
@@ -32,7 +32,7 @@ from mteb.types import (
32
32
  logger = logging.getLogger(__name__)
33
33
 
34
34
 
35
- class Criterias(HelpfulStrEnum):
35
+ class Criteria(HelpfulStrEnum):
36
36
  """Enum for criteria to check when merging TaskResult objects."""
37
37
 
38
38
  MTEB_VERSION = "mteb_version"
@@ -671,7 +671,7 @@ class TaskResult(BaseModel):
671
671
  def is_mergeable(
672
672
  self,
673
673
  result: TaskResult | AbsTask,
674
- criteria: list[str] | list[Criterias] = [
674
+ criteria: list[str] | list[Criteria] = [
675
675
  "mteb_version",
676
676
  "dataset_revision",
677
677
  ],
@@ -688,9 +688,7 @@ class TaskResult(BaseModel):
688
688
  Returns:
689
689
  True if the TaskResult object can be merged with the other object, False otherwise.
690
690
  """
691
- criteria = [
692
- Criterias.from_str(c) if isinstance(c, str) else c for c in criteria
693
- ]
691
+ criteria = [Criteria.from_str(c) if isinstance(c, str) else c for c in criteria]
694
692
  if isinstance(result, TaskResult):
695
693
  name = result.task_name
696
694
  revision = result.dataset_revision
@@ -709,14 +707,14 @@ class TaskResult(BaseModel):
709
707
  )
710
708
  return False
711
709
 
712
- if Criterias.MTEB_VERSION in criteria and self.mteb_version != mteb_version:
710
+ if Criteria.MTEB_VERSION in criteria and self.mteb_version != mteb_version:
713
711
  if raise_error:
714
712
  raise ValueError(
715
713
  f"Cannot merge TaskResult objects as they are derived from different MTEB versions ({self.mteb_version} and {mteb_version})"
716
714
  )
717
715
  return False
718
716
 
719
- if Criterias.DATASET_REVISION in criteria and self.dataset_revision != revision:
717
+ if Criteria.DATASET_REVISION in criteria and self.dataset_revision != revision:
720
718
  if raise_error:
721
719
  raise ValueError(
722
720
  f"Cannot merge TaskResult objects as they are derived from different dataset revisions ({self.dataset_revision} and {revision})"
@@ -728,7 +726,7 @@ class TaskResult(BaseModel):
728
726
  def merge(
729
727
  self,
730
728
  new_results: TaskResult,
731
- criteria: list[str] | list[Criterias] = [
729
+ criteria: list[str] | list[Criteria] = [
732
730
  "mteb_version",
733
731
  "dataset_revision",
734
732
  ],
@@ -9,7 +9,7 @@ class AngryTweetsClassification(AbsTaskClassification):
9
9
  "path": "DDSC/angry-tweets",
10
10
  "revision": "20b0e6081892e78179356fada741b7afa381443d",
11
11
  },
12
- description="A sentiment dataset with 3 classes (positiv, negativ, neutral) for Danish tweets",
12
+ description="A sentiment dataset with 3 classes (positive, negative, neutral) for Danish tweets",
13
13
  reference="https://aclanthology.org/2021.nodalida-main.53/",
14
14
  type="Classification",
15
15
  category="t2c",
@@ -47,7 +47,7 @@ class AngryTweetsClassificationV2(AbsTaskClassification):
47
47
  "path": "mteb/angry_tweets",
48
48
  "revision": "b9475fb66a13befda4fa9871cd92343bb2c0eb77",
49
49
  },
50
- description="""A sentiment dataset with 3 classes (positiv, negativ, neutral) for Danish tweets
50
+ description="""A sentiment dataset with 3 classes (positive, negative, neutral) for Danish tweets
51
51
  This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
52
52
  reference="https://aclanthology.org/2021.nodalida-main.53/",
53
53
  type="Classification",
@@ -2641,7 +2641,7 @@ class InternationalCitizenshipQuestionsLegalBenchClassification(AbsTaskClassific
2641
2641
  class JCrewBlockerLegalBenchClassification(AbsTaskClassification):
2642
2642
  metadata = TaskMetadata(
2643
2643
  name="JCrewBlockerLegalBenchClassification",
2644
- description="The J.Crew Blocker, also known as the J.Crew Protection, is a provision included in leveraged loan documents to prevent companies from removing security by transferring intellectual property (IP) into new subsidiaries and raising additional debt. The task consists of detemining whether the J.Crew Blocker is present in the document.",
2644
+ description="The J.Crew Blocker, also known as the J.Crew Protection, is a provision included in leveraged loan documents to prevent companies from removing security by transferring intellectual property (IP) into new subsidiaries and raising additional debt. The task consists of determining whether the J.Crew Blocker is present in the document.",
2645
2645
  reference="https://huggingface.co/datasets/nguha/legalbench",
2646
2646
  dataset={
2647
2647
  "path": "mteb/JCrewBlockerLegalBenchClassification",
@@ -2677,7 +2677,7 @@ class JCrewBlockerLegalBenchClassification(AbsTaskClassification):
2677
2677
  class JCrewBlockerLegalBenchClassificationV2(AbsTaskClassification):
2678
2678
  metadata = TaskMetadata(
2679
2679
  name="JCrewBlockerLegalBenchClassification.v2",
2680
- description="""The J.Crew Blocker, also known as the J.Crew Protection, is a provision included in leveraged loan documents to prevent companies from removing security by transferring intellectual property (IP) into new subsidiaries and raising additional debt. The task consists of detemining whether the J.Crew Blocker is present in the document.
2680
+ description="""The J.Crew Blocker, also known as the J.Crew Protection, is a provision included in leveraged loan documents to prevent companies from removing security by transferring intellectual property (IP) into new subsidiaries and raising additional debt. The task consists of determining whether the J.Crew Blocker is present in the document.
2681
2681
  This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
2682
2682
  reference="https://huggingface.co/datasets/nguha/legalbench",
2683
2683
  dataset={
@@ -4500,7 +4500,7 @@ class OverrulingLegalBenchClassificationV2(AbsTaskClassification):
4500
4500
  class PersonalJurisdictionLegalBenchClassification(AbsTaskClassification):
4501
4501
  metadata = TaskMetadata(
4502
4502
  name="PersonalJurisdictionLegalBenchClassification",
4503
- description="""Given a fact pattern describing the set of contacts between a plaintiff, defendant, and forum, determine if a court in that forum could excercise personal jurisdiction over the defendant.""",
4503
+ description="""Given a fact pattern describing the set of contacts between a plaintiff, defendant, and forum, determine if a court in that forum could exercise personal jurisdiction over the defendant.""",
4504
4504
  reference="https://huggingface.co/datasets/nguha/legalbench",
4505
4505
  dataset={
4506
4506
  "path": "mteb/PersonalJurisdictionLegalBenchClassification",