mteb 2.7.16__py3-none-any.whl → 2.7.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. mteb/_create_dataloaders.py +16 -16
  2. mteb/_evaluators/any_sts_evaluator.py +1 -1
  3. mteb/_evaluators/classification_metrics.py +10 -1
  4. mteb/_evaluators/clustering_evaluator.py +1 -1
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
  6. mteb/_evaluators/pair_classification_evaluator.py +3 -2
  7. mteb/_evaluators/retrieval_evaluator.py +1 -1
  8. mteb/_evaluators/retrieval_metrics.py +9 -7
  9. mteb/_evaluators/sklearn_evaluator.py +13 -6
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +1 -1
  11. mteb/_evaluators/text/summarization_evaluator.py +1 -1
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +1 -1
  13. mteb/abstasks/_stratification.py +13 -8
  14. mteb/abstasks/abstask.py +4 -4
  15. mteb/abstasks/classification.py +6 -4
  16. mteb/abstasks/clustering.py +1 -1
  17. mteb/abstasks/clustering_legacy.py +1 -1
  18. mteb/abstasks/image/image_text_pair_classification.py +1 -1
  19. mteb/abstasks/multilabel_classification.py +7 -5
  20. mteb/abstasks/pair_classification.py +1 -1
  21. mteb/abstasks/regression.py +3 -2
  22. mteb/abstasks/retrieval.py +8 -5
  23. mteb/abstasks/retrieval_dataset_loaders.py +27 -8
  24. mteb/abstasks/sts.py +1 -1
  25. mteb/abstasks/text/bitext_mining.py +2 -2
  26. mteb/abstasks/text/reranking.py +1 -1
  27. mteb/abstasks/text/summarization.py +1 -1
  28. mteb/abstasks/zeroshot_classification.py +1 -1
  29. mteb/benchmarks/benchmark.py +131 -3
  30. mteb/evaluate.py +2 -2
  31. mteb/leaderboard/figures.py +2 -1
  32. mteb/leaderboard/table.py +10 -2
  33. mteb/models/cache_wrappers/cache_backend_protocol.py +3 -3
  34. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +3 -3
  35. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +8 -3
  36. mteb/models/cache_wrappers/cache_wrapper.py +2 -2
  37. mteb/models/model_implementations/bedrock_models.py +4 -4
  38. mteb/models/model_implementations/bm25.py +2 -2
  39. mteb/models/model_implementations/mcinext_models.py +2 -2
  40. mteb/models/model_implementations/openai_models.py +2 -1
  41. mteb/models/model_implementations/pylate_models.py +4 -4
  42. mteb/models/model_implementations/random_baseline.py +4 -3
  43. mteb/models/model_implementations/seed_models.py +7 -2
  44. mteb/models/model_implementations/voyage_models.py +1 -1
  45. mteb/models/models_protocols.py +2 -2
  46. mteb/models/search_wrappers.py +4 -4
  47. mteb/tasks/bitext_mining/multilingual/bible_nlp_bitext_mining.py +1 -1
  48. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  49. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  50. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  51. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  52. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +1 -1
  53. mteb/tasks/classification/ben/bengali_document_classification.py +2 -2
  54. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +2 -2
  55. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -1
  56. mteb/tasks/classification/multilingual/hin_dialect_classification.py +1 -1
  57. mteb/tasks/classification/multilingual/indic_lang_classification.py +1 -1
  58. mteb/tasks/classification/multilingual/indic_sentiment_classification.py +1 -1
  59. mteb/tasks/classification/multilingual/language_classification.py +1 -1
  60. mteb/tasks/classification/multilingual/south_african_lang_classification.py +1 -1
  61. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  62. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +2 -2
  63. mteb/tasks/classification/swa/swahili_news_classification.py +2 -2
  64. mteb/tasks/clustering/deu/ten_k_gnad_clustering_p2p.py +1 -1
  65. mteb/tasks/clustering/deu/ten_k_gnad_clustering_s2s.py +1 -1
  66. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  67. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  68. mteb/tasks/clustering/nob/vg_hierarchical_clustering.py +2 -2
  69. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  70. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  71. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  72. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  73. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +1 -1
  74. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
  75. mteb/tasks/pair_classification/multilingual/pub_chem_wiki_pair_classification.py +1 -1
  76. mteb/tasks/pair_classification/multilingual/rte3.py +1 -1
  77. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  78. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  79. mteb/tasks/retrieval/code/code_rag.py +8 -8
  80. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  81. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  82. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  83. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  84. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  85. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  86. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  87. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  88. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  89. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  90. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  91. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  92. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  93. mteb/tasks/retrieval/eng/bright_retrieval.py +1 -1
  94. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  95. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  96. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  97. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  98. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  99. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  100. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  101. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  102. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  103. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  104. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  105. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  106. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  107. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  108. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  109. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  110. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  111. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  112. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  113. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  114. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  115. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  116. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  117. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  118. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  119. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  120. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  121. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  122. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  123. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  124. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  125. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  126. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  127. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  128. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  129. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  130. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  131. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  132. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  133. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  134. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  135. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  136. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  137. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  138. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  139. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
  140. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  141. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  142. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +5 -5
  143. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +1 -0
  144. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  145. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  146. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  147. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  148. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  149. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  150. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  151. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  152. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  153. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  154. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  155. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  156. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  157. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  158. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  159. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  160. mteb/tasks/retrieval/nob/norquad.py +2 -2
  161. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  162. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  163. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  164. mteb/tasks/sts/multilingual/sem_rel24_sts.py +1 -1
  165. mteb/tasks/sts/multilingual/sts_benchmark_multilingual_sts.py +1 -1
  166. mteb/tasks/sts/por/assin2_sts.py +1 -1
  167. mteb/types/_encoder_io.py +3 -2
  168. {mteb-2.7.16.dist-info → mteb-2.7.18.dist-info}/METADATA +1 -1
  169. {mteb-2.7.16.dist-info → mteb-2.7.18.dist-info}/RECORD +173 -173
  170. {mteb-2.7.16.dist-info → mteb-2.7.18.dist-info}/WHEEL +0 -0
  171. {mteb-2.7.16.dist-info → mteb-2.7.18.dist-info}/entry_points.txt +0 -0
  172. {mteb-2.7.16.dist-info → mteb-2.7.18.dist-info}/licenses/LICENSE +0 -0
  173. {mteb-2.7.16.dist-info → mteb-2.7.18.dist-info}/top_level.txt +0 -0
@@ -111,7 +111,7 @@ class CUREv1Retrieval(AbsTaskRetrieval):
111
111
 
112
112
  return queries
113
113
 
114
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
114
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
115
115
  if self.data_loaded:
116
116
  return
117
117
 
@@ -148,7 +148,7 @@ def _load_data(
148
148
  return corpus, queries, relevant_docs
149
149
 
150
150
 
151
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
151
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
152
152
  if self.data_loaded:
153
153
  return
154
154
 
@@ -143,7 +143,7 @@ class MIRACLVisionRetrieval(AbsTaskRetrieval):
143
143
  prompt={"query": "Find a screenshot that is relevant to the user's query."},
144
144
  )
145
145
 
146
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
146
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
147
147
  if self.data_loaded:
148
148
  return
149
149
 
@@ -108,7 +108,7 @@ class MrTidyRetrieval(AbsTaskRetrieval):
108
108
  """,
109
109
  )
110
110
 
111
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
111
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
112
112
  if self.data_loaded:
113
113
  return
114
114
 
@@ -97,7 +97,7 @@ class PublicHealthQARetrieval(AbsTaskRetrieval):
97
97
  """,
98
98
  )
99
99
 
100
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
100
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
101
101
  if self.data_loaded:
102
102
  return
103
103
 
@@ -103,7 +103,7 @@ class RuSciBenchCiteRetrieval(AbsTaskRetrieval):
103
103
  },
104
104
  )
105
105
 
106
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
106
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
107
107
  if self.data_loaded:
108
108
  return
109
109
 
@@ -161,7 +161,7 @@ class RuSciBenchCociteRetrieval(AbsTaskRetrieval):
161
161
  },
162
162
  )
163
163
 
164
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
164
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
165
165
  if self.data_loaded:
166
166
  return
167
167
 
@@ -96,7 +96,7 @@ de Vries, Harm},
96
96
  """,
97
97
  )
98
98
 
99
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
99
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
100
100
  if self.data_loaded:
101
101
  return
102
102
 
@@ -126,7 +126,7 @@ class VDRMultilingualRetrieval(AbsTaskRetrieval):
126
126
  """,
127
127
  )
128
128
 
129
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
129
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
130
130
  if self.data_loaded:
131
131
  return
132
132
 
@@ -16,7 +16,7 @@ def _load_data(
16
16
  splits: list[str],
17
17
  langs: list | None = None,
18
18
  revision: str | None = None,
19
- num_proc: int = 1,
19
+ num_proc: int | None = None,
20
20
  ):
21
21
  if langs is None:
22
22
  corpus = {}
@@ -131,7 +131,7 @@ class Vidore2ESGReportsRetrieval(AbsTaskRetrieval):
131
131
  prompt={"query": "Find a screenshot that relevant to the user's question."},
132
132
  )
133
133
 
134
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
134
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
135
135
  if self.data_loaded:
136
136
  return
137
137
 
@@ -179,7 +179,7 @@ class Vidore2EconomicsReportsRetrieval(AbsTaskRetrieval):
179
179
  prompt={"query": "Find a screenshot that relevant to the user's question."},
180
180
  )
181
181
 
182
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
182
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
183
183
  if self.data_loaded:
184
184
  return
185
185
 
@@ -227,7 +227,7 @@ class Vidore2BioMedicalLecturesRetrieval(AbsTaskRetrieval):
227
227
  prompt={"query": "Find a screenshot that relevant to the user's question."},
228
228
  )
229
229
 
230
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
230
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
231
231
  if self.data_loaded:
232
232
  return
233
233
 
@@ -275,7 +275,7 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskRetrieval):
275
275
  prompt={"query": "Find a screenshot that relevant to the user's question."},
276
276
  )
277
277
 
278
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
278
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
279
279
  if self.data_loaded:
280
280
  return
281
281
 
@@ -68,6 +68,7 @@ class Vidore3FinanceFrRetrieval(AbsTaskRetrieval):
68
68
  license="cc-by-4.0",
69
69
  annotations_creators="derived",
70
70
  dialect=[],
71
+ modalities=["text", "image"],
71
72
  sample_creation="created and machine-translated",
72
73
  bibtex_citation=r"""
73
74
  @article{loison2026vidorev3comprehensiveevaluation,
@@ -116,7 +116,7 @@ class WITT2IRetrieval(AbsTaskRetrieval):
116
116
  """,
117
117
  )
118
118
 
119
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
119
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
120
120
  if self.data_loaded:
121
121
  return
122
122
 
@@ -104,7 +104,7 @@ class XFlickr30kCoT2IRetrieval(AbsTaskRetrieval):
104
104
  """,
105
105
  )
106
106
 
107
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
107
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
108
108
  if self.data_loaded:
109
109
  return
110
110
 
@@ -64,7 +64,7 @@ class XQuADRetrieval(AbsTaskRetrieval):
64
64
  """,
65
65
  )
66
66
 
67
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
67
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
68
68
  if self.data_loaded:
69
69
  return
70
70
 
@@ -146,7 +146,7 @@ class XM3600T2IRetrieval(AbsTaskRetrieval):
146
146
  """,
147
147
  )
148
148
 
149
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
149
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
150
150
  if self.data_loaded:
151
151
  return
152
152
 
@@ -42,7 +42,7 @@ class CQADupstackAndroidNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackAndroid"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackEnglishNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackEnglish"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackGamingNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackGamingRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackGisNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackGisRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackMathematicaNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackMathematicaRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackPhysicsNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackPhysicsRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackProgrammersNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackProgrammersRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackStatsNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackStatsRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackTexNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackTexRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackUnixNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackUnixRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackWebmastersNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackWebmastersRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackWordpressNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackWordpressRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -50,7 +50,7 @@ Fishel, Mark},
50
50
  },
51
51
  )
52
52
 
53
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
53
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
54
54
  """Load dataset from HuggingFace hub"""
55
55
  if self.data_loaded:
56
56
  return
@@ -58,7 +58,7 @@ Fishel, Mark},
58
58
  self.dataset_transform()
59
59
  self.data_loaded = True
60
60
 
61
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
61
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
62
62
  """And transform to a retrieval dataset, which have the following attributes
63
63
 
64
64
  self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
@@ -37,7 +37,7 @@ class SNLRetrieval(AbsTaskRetrieval):
37
37
  task_subtypes=["Article retrieval"],
38
38
  )
39
39
 
40
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
40
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
41
41
  """Load dataset from HuggingFace hub"""
42
42
  if self.data_loaded:
43
43
  return
@@ -45,7 +45,7 @@ class SNLRetrieval(AbsTaskRetrieval):
45
45
  self.dataset_transform()
46
46
  self.data_loaded = True
47
47
 
48
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
48
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
49
49
  """And transform to a retrieval dataset, which have the following attributes
50
50
 
51
51
  self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
@@ -36,7 +36,7 @@ class SlovakSumRetrieval(AbsTaskRetrieval):
36
36
  """,
37
37
  )
38
38
 
39
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
39
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
40
40
  if self.data_loaded:
41
41
  return
42
42
  self.corpus, self.queries, self.relevant_docs = {}, {}, {}
@@ -52,7 +52,7 @@ Zong, Chengqing},
52
52
  """,
53
53
  )
54
54
 
55
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
55
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
56
56
  if self.data_loaded:
57
57
  return
58
58
 
@@ -66,6 +66,6 @@ Seid Muhie Yimam and Saif M. Mohammad},
66
66
  min_score = 0
67
67
  max_score = 1
68
68
 
69
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
69
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
70
70
  for lang, subset in self.dataset.items():
71
71
  self.dataset[lang] = subset.rename_column("label", "score")
@@ -56,6 +56,6 @@ class STSBenchmarkMultilingualSTS(AbsTaskSTS):
56
56
  min_score = 0
57
57
  max_score = 5
58
58
 
59
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
59
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
60
60
  for lang, subset in self.dataset.items():
61
61
  self.dataset[lang] = subset.rename_column("similarity_score", "score")
@@ -39,7 +39,7 @@ class Assin2STS(AbsTaskSTS):
39
39
  min_score = 1
40
40
  max_score = 5
41
41
 
42
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
42
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
43
43
  self.dataset = self.dataset.rename_columns(
44
44
  {
45
45
  "premise": "sentence1",
mteb/types/_encoder_io.py CHANGED
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, TypedDict
7
7
  import numpy as np
8
8
  import torch
9
9
  from datasets import Dataset
10
+ from numpy.typing import NDArray
10
11
 
11
12
  if TYPE_CHECKING:
12
13
  from PIL import Image
@@ -26,8 +27,8 @@ class EncodeKwargs(TypedDict):
26
27
 
27
28
 
28
29
  # --- Output types ---
29
- Array = np.ndarray | torch.Tensor
30
- """General array type, can be a numpy array or a torch tensor."""
30
+ Array = NDArray[np.floating | np.integer | np.bool] | torch.Tensor
31
+ """General array type, can be a numpy array (float, int, or bool) or a torch tensor."""
31
32
 
32
33
 
33
34
  # --- Input types ---
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.7.16
3
+ Version: 2.7.18
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>