mteb 2.7.17__py3-none-any.whl → 2.7.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. mteb/_create_dataloaders.py +16 -16
  2. mteb/_evaluators/any_sts_evaluator.py +1 -1
  3. mteb/_evaluators/clustering_evaluator.py +1 -1
  4. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
  5. mteb/_evaluators/pair_classification_evaluator.py +1 -1
  6. mteb/_evaluators/retrieval_evaluator.py +1 -1
  7. mteb/_evaluators/sklearn_evaluator.py +4 -2
  8. mteb/_evaluators/text/bitext_mining_evaluator.py +1 -1
  9. mteb/_evaluators/text/summarization_evaluator.py +1 -1
  10. mteb/_evaluators/zeroshot_classification_evaluator.py +1 -1
  11. mteb/abstasks/abstask.py +4 -4
  12. mteb/abstasks/classification.py +2 -2
  13. mteb/abstasks/clustering.py +1 -1
  14. mteb/abstasks/clustering_legacy.py +1 -1
  15. mteb/abstasks/image/image_text_pair_classification.py +1 -1
  16. mteb/abstasks/multilabel_classification.py +1 -1
  17. mteb/abstasks/pair_classification.py +1 -1
  18. mteb/abstasks/retrieval.py +8 -5
  19. mteb/abstasks/retrieval_dataset_loaders.py +27 -8
  20. mteb/abstasks/sts.py +1 -1
  21. mteb/abstasks/text/bitext_mining.py +2 -2
  22. mteb/abstasks/text/reranking.py +1 -1
  23. mteb/abstasks/text/summarization.py +1 -1
  24. mteb/abstasks/zeroshot_classification.py +1 -1
  25. mteb/evaluate.py +2 -2
  26. mteb/models/model_implementations/bm25.py +2 -2
  27. mteb/models/model_implementations/pylate_models.py +4 -4
  28. mteb/models/models_protocols.py +2 -2
  29. mteb/models/search_wrappers.py +4 -4
  30. mteb/tasks/bitext_mining/multilingual/bible_nlp_bitext_mining.py +1 -1
  31. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  32. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  33. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  34. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  35. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +1 -1
  36. mteb/tasks/classification/ben/bengali_document_classification.py +2 -2
  37. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +2 -2
  38. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -1
  39. mteb/tasks/classification/multilingual/hin_dialect_classification.py +1 -1
  40. mteb/tasks/classification/multilingual/indic_lang_classification.py +1 -1
  41. mteb/tasks/classification/multilingual/indic_sentiment_classification.py +1 -1
  42. mteb/tasks/classification/multilingual/language_classification.py +1 -1
  43. mteb/tasks/classification/multilingual/south_african_lang_classification.py +1 -1
  44. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  45. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +2 -2
  46. mteb/tasks/classification/swa/swahili_news_classification.py +2 -2
  47. mteb/tasks/clustering/deu/ten_k_gnad_clustering_p2p.py +1 -1
  48. mteb/tasks/clustering/deu/ten_k_gnad_clustering_s2s.py +1 -1
  49. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  50. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  51. mteb/tasks/clustering/nob/vg_hierarchical_clustering.py +2 -2
  52. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  53. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  54. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  55. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  56. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +1 -1
  57. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
  58. mteb/tasks/pair_classification/multilingual/pub_chem_wiki_pair_classification.py +1 -1
  59. mteb/tasks/pair_classification/multilingual/rte3.py +1 -1
  60. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  61. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  62. mteb/tasks/retrieval/code/code_rag.py +8 -8
  63. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  64. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  65. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  66. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  67. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  68. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  69. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  70. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  71. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  72. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  73. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  74. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  75. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  76. mteb/tasks/retrieval/eng/bright_retrieval.py +1 -1
  77. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  78. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  79. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  80. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  81. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  82. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  83. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  84. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  85. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  86. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  87. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  88. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  89. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  90. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  91. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  92. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  93. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  94. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  95. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  96. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  97. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  98. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  99. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  100. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  101. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  102. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  103. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  104. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  105. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  106. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  107. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  108. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  109. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  110. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  111. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  112. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  113. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  114. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  115. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  116. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  117. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  118. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  119. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  120. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  121. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  122. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
  123. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  124. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  125. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +5 -5
  126. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  127. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  128. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  129. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  130. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  131. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  132. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  133. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  134. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  135. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  136. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  137. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  138. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  139. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  140. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  141. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  142. mteb/tasks/retrieval/nob/norquad.py +2 -2
  143. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  144. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  145. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  146. mteb/tasks/sts/multilingual/sem_rel24_sts.py +1 -1
  147. mteb/tasks/sts/multilingual/sts_benchmark_multilingual_sts.py +1 -1
  148. mteb/tasks/sts/por/assin2_sts.py +1 -1
  149. {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/METADATA +1 -1
  150. {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/RECORD +154 -154
  151. {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/WHEEL +0 -0
  152. {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/entry_points.txt +0 -0
  153. {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/licenses/LICENSE +0 -0
  154. {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/top_level.txt +0 -0
@@ -108,7 +108,7 @@ class MrTidyRetrieval(AbsTaskRetrieval):
108
108
  """,
109
109
  )
110
110
 
111
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
111
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
112
112
  if self.data_loaded:
113
113
  return
114
114
 
@@ -97,7 +97,7 @@ class PublicHealthQARetrieval(AbsTaskRetrieval):
97
97
  """,
98
98
  )
99
99
 
100
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
100
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
101
101
  if self.data_loaded:
102
102
  return
103
103
 
@@ -103,7 +103,7 @@ class RuSciBenchCiteRetrieval(AbsTaskRetrieval):
103
103
  },
104
104
  )
105
105
 
106
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
106
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
107
107
  if self.data_loaded:
108
108
  return
109
109
 
@@ -161,7 +161,7 @@ class RuSciBenchCociteRetrieval(AbsTaskRetrieval):
161
161
  },
162
162
  )
163
163
 
164
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
164
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
165
165
  if self.data_loaded:
166
166
  return
167
167
 
@@ -96,7 +96,7 @@ de Vries, Harm},
96
96
  """,
97
97
  )
98
98
 
99
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
99
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
100
100
  if self.data_loaded:
101
101
  return
102
102
 
@@ -126,7 +126,7 @@ class VDRMultilingualRetrieval(AbsTaskRetrieval):
126
126
  """,
127
127
  )
128
128
 
129
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
129
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
130
130
  if self.data_loaded:
131
131
  return
132
132
 
@@ -16,7 +16,7 @@ def _load_data(
16
16
  splits: list[str],
17
17
  langs: list | None = None,
18
18
  revision: str | None = None,
19
- num_proc: int = 1,
19
+ num_proc: int | None = None,
20
20
  ):
21
21
  if langs is None:
22
22
  corpus = {}
@@ -131,7 +131,7 @@ class Vidore2ESGReportsRetrieval(AbsTaskRetrieval):
131
131
  prompt={"query": "Find a screenshot that relevant to the user's question."},
132
132
  )
133
133
 
134
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
134
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
135
135
  if self.data_loaded:
136
136
  return
137
137
 
@@ -179,7 +179,7 @@ class Vidore2EconomicsReportsRetrieval(AbsTaskRetrieval):
179
179
  prompt={"query": "Find a screenshot that relevant to the user's question."},
180
180
  )
181
181
 
182
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
182
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
183
183
  if self.data_loaded:
184
184
  return
185
185
 
@@ -227,7 +227,7 @@ class Vidore2BioMedicalLecturesRetrieval(AbsTaskRetrieval):
227
227
  prompt={"query": "Find a screenshot that relevant to the user's question."},
228
228
  )
229
229
 
230
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
230
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
231
231
  if self.data_loaded:
232
232
  return
233
233
 
@@ -275,7 +275,7 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskRetrieval):
275
275
  prompt={"query": "Find a screenshot that relevant to the user's question."},
276
276
  )
277
277
 
278
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
278
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
279
279
  if self.data_loaded:
280
280
  return
281
281
 
@@ -116,7 +116,7 @@ class WITT2IRetrieval(AbsTaskRetrieval):
116
116
  """,
117
117
  )
118
118
 
119
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
119
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
120
120
  if self.data_loaded:
121
121
  return
122
122
 
@@ -104,7 +104,7 @@ class XFlickr30kCoT2IRetrieval(AbsTaskRetrieval):
104
104
  """,
105
105
  )
106
106
 
107
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
107
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
108
108
  if self.data_loaded:
109
109
  return
110
110
 
@@ -64,7 +64,7 @@ class XQuADRetrieval(AbsTaskRetrieval):
64
64
  """,
65
65
  )
66
66
 
67
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
67
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
68
68
  if self.data_loaded:
69
69
  return
70
70
 
@@ -146,7 +146,7 @@ class XM3600T2IRetrieval(AbsTaskRetrieval):
146
146
  """,
147
147
  )
148
148
 
149
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
149
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
150
150
  if self.data_loaded:
151
151
  return
152
152
 
@@ -42,7 +42,7 @@ class CQADupstackAndroidNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackAndroid"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackEnglishNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackEnglish"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackGamingNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackGamingRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackGisNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackGisRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackMathematicaNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackMathematicaRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackPhysicsNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackPhysicsRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackProgrammersNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackProgrammersRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackStatsNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackStatsRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackTexNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackTexRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackUnixNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackUnixRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackWebmastersNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackWebmastersRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackWordpressNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackWordpressRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -50,7 +50,7 @@ Fishel, Mark},
50
50
  },
51
51
  )
52
52
 
53
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
53
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
54
54
  """Load dataset from HuggingFace hub"""
55
55
  if self.data_loaded:
56
56
  return
@@ -58,7 +58,7 @@ Fishel, Mark},
58
58
  self.dataset_transform()
59
59
  self.data_loaded = True
60
60
 
61
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
61
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
62
62
  """And transform to a retrieval dataset, which have the following attributes
63
63
 
64
64
  self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
@@ -37,7 +37,7 @@ class SNLRetrieval(AbsTaskRetrieval):
37
37
  task_subtypes=["Article retrieval"],
38
38
  )
39
39
 
40
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
40
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
41
41
  """Load dataset from HuggingFace hub"""
42
42
  if self.data_loaded:
43
43
  return
@@ -45,7 +45,7 @@ class SNLRetrieval(AbsTaskRetrieval):
45
45
  self.dataset_transform()
46
46
  self.data_loaded = True
47
47
 
48
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
48
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
49
49
  """And transform to a retrieval dataset, which have the following attributes
50
50
 
51
51
  self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
@@ -36,7 +36,7 @@ class SlovakSumRetrieval(AbsTaskRetrieval):
36
36
  """,
37
37
  )
38
38
 
39
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
39
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
40
40
  if self.data_loaded:
41
41
  return
42
42
  self.corpus, self.queries, self.relevant_docs = {}, {}, {}
@@ -52,7 +52,7 @@ Zong, Chengqing},
52
52
  """,
53
53
  )
54
54
 
55
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
55
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
56
56
  if self.data_loaded:
57
57
  return
58
58
 
@@ -66,6 +66,6 @@ Seid Muhie Yimam and Saif M. Mohammad},
66
66
  min_score = 0
67
67
  max_score = 1
68
68
 
69
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
69
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
70
70
  for lang, subset in self.dataset.items():
71
71
  self.dataset[lang] = subset.rename_column("label", "score")
@@ -56,6 +56,6 @@ class STSBenchmarkMultilingualSTS(AbsTaskSTS):
56
56
  min_score = 0
57
57
  max_score = 5
58
58
 
59
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
59
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
60
60
  for lang, subset in self.dataset.items():
61
61
  self.dataset[lang] = subset.rename_column("similarity_score", "score")
@@ -39,7 +39,7 @@ class Assin2STS(AbsTaskSTS):
39
39
  min_score = 1
40
40
  max_score = 5
41
41
 
42
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
42
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
43
43
  self.dataset = self.dataset.rename_columns(
44
44
  {
45
45
  "premise": "sentence1",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.7.17
3
+ Version: 2.7.18
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>