mteb 2.7.17__py3-none-any.whl → 2.7.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. mteb/_create_dataloaders.py +16 -16
  2. mteb/_evaluators/any_sts_evaluator.py +1 -1
  3. mteb/_evaluators/clustering_evaluator.py +1 -1
  4. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
  5. mteb/_evaluators/pair_classification_evaluator.py +1 -1
  6. mteb/_evaluators/retrieval_evaluator.py +1 -1
  7. mteb/_evaluators/sklearn_evaluator.py +4 -2
  8. mteb/_evaluators/text/bitext_mining_evaluator.py +1 -1
  9. mteb/_evaluators/text/summarization_evaluator.py +1 -1
  10. mteb/_evaluators/zeroshot_classification_evaluator.py +1 -1
  11. mteb/abstasks/abstask.py +4 -4
  12. mteb/abstasks/classification.py +2 -2
  13. mteb/abstasks/clustering.py +1 -1
  14. mteb/abstasks/clustering_legacy.py +1 -1
  15. mteb/abstasks/image/image_text_pair_classification.py +1 -1
  16. mteb/abstasks/multilabel_classification.py +1 -1
  17. mteb/abstasks/pair_classification.py +1 -1
  18. mteb/abstasks/retrieval.py +8 -5
  19. mteb/abstasks/retrieval_dataset_loaders.py +27 -8
  20. mteb/abstasks/sts.py +1 -1
  21. mteb/abstasks/text/bitext_mining.py +2 -2
  22. mteb/abstasks/text/reranking.py +1 -1
  23. mteb/abstasks/text/summarization.py +1 -1
  24. mteb/abstasks/zeroshot_classification.py +1 -1
  25. mteb/evaluate.py +2 -2
  26. mteb/models/model_implementations/bm25.py +2 -2
  27. mteb/models/model_implementations/ict_time_and_querit_models.py +115 -0
  28. mteb/models/model_implementations/pylate_models.py +4 -4
  29. mteb/models/models_protocols.py +2 -2
  30. mteb/models/search_wrappers.py +4 -4
  31. mteb/tasks/bitext_mining/multilingual/bible_nlp_bitext_mining.py +1 -1
  32. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  33. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  34. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  35. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  36. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +1 -1
  37. mteb/tasks/classification/ben/bengali_document_classification.py +2 -2
  38. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +2 -2
  39. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -1
  40. mteb/tasks/classification/multilingual/hin_dialect_classification.py +1 -1
  41. mteb/tasks/classification/multilingual/indic_lang_classification.py +1 -1
  42. mteb/tasks/classification/multilingual/indic_sentiment_classification.py +1 -1
  43. mteb/tasks/classification/multilingual/language_classification.py +1 -1
  44. mteb/tasks/classification/multilingual/south_african_lang_classification.py +1 -1
  45. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  46. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +2 -2
  47. mteb/tasks/classification/swa/swahili_news_classification.py +2 -2
  48. mteb/tasks/clustering/deu/ten_k_gnad_clustering_p2p.py +1 -1
  49. mteb/tasks/clustering/deu/ten_k_gnad_clustering_s2s.py +1 -1
  50. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  51. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  52. mteb/tasks/clustering/nob/vg_hierarchical_clustering.py +2 -2
  53. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  54. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  55. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  56. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  57. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +1 -1
  58. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
  59. mteb/tasks/pair_classification/multilingual/pub_chem_wiki_pair_classification.py +1 -1
  60. mteb/tasks/pair_classification/multilingual/rte3.py +1 -1
  61. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  62. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  63. mteb/tasks/retrieval/code/code_rag.py +8 -8
  64. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  65. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  66. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  67. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  68. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  69. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  70. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  71. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  72. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  73. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  74. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  75. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  76. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  77. mteb/tasks/retrieval/eng/bright_retrieval.py +1 -1
  78. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  79. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  80. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  81. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  82. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  83. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  84. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  85. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  86. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  87. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  88. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  89. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  90. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  91. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  92. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  93. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  94. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  95. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  96. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  97. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  98. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  99. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  100. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  101. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  102. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  103. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  104. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  105. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  106. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  107. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  108. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  109. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  110. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  111. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  112. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  113. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  114. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  115. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  116. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  117. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  118. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  119. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  120. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  121. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  122. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  123. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
  124. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  125. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  126. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +5 -5
  127. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  128. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  129. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  130. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  131. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  132. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  133. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  134. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  135. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  136. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  137. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  138. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  139. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  140. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  141. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  142. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  143. mteb/tasks/retrieval/nob/norquad.py +2 -2
  144. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  145. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  146. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  147. mteb/tasks/sts/multilingual/sem_rel24_sts.py +1 -1
  148. mteb/tasks/sts/multilingual/sts_benchmark_multilingual_sts.py +1 -1
  149. mteb/tasks/sts/por/assin2_sts.py +1 -1
  150. mteb/types/_encoder_io.py +1 -1
  151. {mteb-2.7.17.dist-info → mteb-2.7.19.dist-info}/METADATA +1 -1
  152. {mteb-2.7.17.dist-info → mteb-2.7.19.dist-info}/RECORD +156 -155
  153. {mteb-2.7.17.dist-info → mteb-2.7.19.dist-info}/WHEEL +0 -0
  154. {mteb-2.7.17.dist-info → mteb-2.7.19.dist-info}/entry_points.txt +0 -0
  155. {mteb-2.7.17.dist-info → mteb-2.7.19.dist-info}/licenses/LICENSE +0 -0
  156. {mteb-2.7.17.dist-info → mteb-2.7.19.dist-info}/top_level.txt +0 -0
@@ -95,7 +95,7 @@ class VidoreArxivQARetrieval(AbsTaskRetrieval):
95
95
  prompt={"query": "Find a screenshot that relevant to the user's question."},
96
96
  )
97
97
 
98
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
98
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
99
99
  self.corpus, self.queries, self.relevant_docs = _load_data(
100
100
  path=self.metadata.dataset["path"],
101
101
  splits=self.metadata.eval_splits,
@@ -138,7 +138,7 @@ class VidoreDocVQARetrieval(AbsTaskRetrieval):
138
138
  prompt={"query": "Find a screenshot that relevant to the user's question."},
139
139
  )
140
140
 
141
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
141
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
142
142
  self.corpus, self.queries, self.relevant_docs = _load_data(
143
143
  path=self.metadata.dataset["path"],
144
144
  splits=self.metadata.eval_splits,
@@ -181,7 +181,7 @@ class VidoreInfoVQARetrieval(AbsTaskRetrieval):
181
181
  prompt={"query": "Find a screenshot that relevant to the user's question."},
182
182
  )
183
183
 
184
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
184
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
185
185
  self.corpus, self.queries, self.relevant_docs = _load_data(
186
186
  path=self.metadata.dataset["path"],
187
187
  splits=self.metadata.eval_splits,
@@ -224,7 +224,7 @@ class VidoreTabfquadRetrieval(AbsTaskRetrieval):
224
224
  prompt={"query": "Find a screenshot that relevant to the user's question."},
225
225
  )
226
226
 
227
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
227
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
228
228
  self.corpus, self.queries, self.relevant_docs = _load_data(
229
229
  path=self.metadata.dataset["path"],
230
230
  splits=self.metadata.eval_splits,
@@ -267,7 +267,7 @@ class VidoreTatdqaRetrieval(AbsTaskRetrieval):
267
267
  prompt={"query": "Find a screenshot that relevant to the user's question."},
268
268
  )
269
269
 
270
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
270
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
271
271
  self.corpus, self.queries, self.relevant_docs = _load_data(
272
272
  path=self.metadata.dataset["path"],
273
273
  splits=self.metadata.eval_splits,
@@ -310,7 +310,7 @@ class VidoreShiftProjectRetrieval(AbsTaskRetrieval):
310
310
  prompt={"query": "Find a screenshot that relevant to the user's question."},
311
311
  )
312
312
 
313
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
313
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
314
314
  self.corpus, self.queries, self.relevant_docs = _load_data(
315
315
  path=self.metadata.dataset["path"],
316
316
  splits=self.metadata.eval_splits,
@@ -354,7 +354,7 @@ class VidoreSyntheticDocQAAIRetrieval(AbsTaskRetrieval):
354
354
  adapted_from=["VidoreDocVQARetrieval"],
355
355
  )
356
356
 
357
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
357
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
358
358
  self.corpus, self.queries, self.relevant_docs = _load_data(
359
359
  path=self.metadata.dataset["path"],
360
360
  splits=self.metadata.eval_splits,
@@ -398,7 +398,7 @@ class VidoreSyntheticDocQAEnergyRetrieval(AbsTaskRetrieval):
398
398
  adapted_from=["VidoreDocVQARetrieval"],
399
399
  )
400
400
 
401
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
401
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
402
402
  self.corpus, self.queries, self.relevant_docs = _load_data(
403
403
  path=self.metadata.dataset["path"],
404
404
  splits=self.metadata.eval_splits,
@@ -442,7 +442,7 @@ class VidoreSyntheticDocQAGovernmentReportsRetrieval(AbsTaskRetrieval):
442
442
  adapted_from=["VidoreDocVQARetrieval"],
443
443
  )
444
444
 
445
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
445
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
446
446
  self.corpus, self.queries, self.relevant_docs = _load_data(
447
447
  path=self.metadata.dataset["path"],
448
448
  splits=self.metadata.eval_splits,
@@ -486,7 +486,7 @@ class VidoreSyntheticDocQAHealthcareIndustryRetrieval(AbsTaskRetrieval):
486
486
  adapted_from=["VidoreDocVQARetrieval"],
487
487
  )
488
488
 
489
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
489
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
490
490
  self.corpus, self.queries, self.relevant_docs = _load_data(
491
491
  path=self.metadata.dataset["path"],
492
492
  splits=self.metadata.eval_splits,
@@ -49,7 +49,7 @@ Liu, Yang},
49
49
  """,
50
50
  )
51
51
 
52
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
52
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
53
53
  if self.data_loaded:
54
54
  return
55
55
  dataset_raw = datasets.load_dataset(
@@ -38,7 +38,7 @@ class SyntecRetrieval(AbsTaskRetrieval):
38
38
  """,
39
39
  )
40
40
 
41
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
41
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
42
42
  if self.data_loaded:
43
43
  return
44
44
  # fetch both subsets of the dataset
@@ -43,7 +43,7 @@ class HunSum2AbstractiveRetrieval(AbsTaskRetrieval):
43
43
  """,
44
44
  )
45
45
 
46
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
47
47
  if self.data_loaded:
48
48
  return
49
49
  self.corpus, self.queries, self.relevant_docs = {}, {}, {}
@@ -33,7 +33,7 @@ class GeorgianFAQRetrieval(AbsTaskRetrieval):
33
33
  bibtex_citation="",
34
34
  )
35
35
 
36
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
36
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
37
37
  if self.data_loaded:
38
38
  return
39
39
 
@@ -53,7 +53,7 @@ class CrossLingualSemanticDiscriminationWMT19(AbsTaskRetrieval):
53
53
  )
54
54
  num_of_distractors = 4
55
55
 
56
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
56
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
57
57
  """Generic data loader function for original clsd datasets with the format shown in "hf_dataset_link".
58
58
  Loading the hf dataset, it populates the following three variables to be used for retrieval evaluation.
59
59
 
@@ -54,7 +54,7 @@ class CrossLingualSemanticDiscriminationWMT21(AbsTaskRetrieval):
54
54
 
55
55
  num_of_distractors = 4
56
56
 
57
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
57
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
58
58
  """Generic data loader function for original clsd datasets with the format shown in "hf_dataset_link".
59
59
  Loading the hf dataset, it populates the following three variables to be used for retrieval evaluation.
60
60
 
@@ -111,7 +111,7 @@ class CUREv1Retrieval(AbsTaskRetrieval):
111
111
 
112
112
  return queries
113
113
 
114
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
114
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
115
115
  if self.data_loaded:
116
116
  return
117
117
 
@@ -148,7 +148,7 @@ def _load_data(
148
148
  return corpus, queries, relevant_docs
149
149
 
150
150
 
151
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
151
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
152
152
  if self.data_loaded:
153
153
  return
154
154
 
@@ -143,7 +143,7 @@ class MIRACLVisionRetrieval(AbsTaskRetrieval):
143
143
  prompt={"query": "Find a screenshot that is relevant to the user's query."},
144
144
  )
145
145
 
146
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
146
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
147
147
  if self.data_loaded:
148
148
  return
149
149
 
@@ -108,7 +108,7 @@ class MrTidyRetrieval(AbsTaskRetrieval):
108
108
  """,
109
109
  )
110
110
 
111
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
111
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
112
112
  if self.data_loaded:
113
113
  return
114
114
 
@@ -97,7 +97,7 @@ class PublicHealthQARetrieval(AbsTaskRetrieval):
97
97
  """,
98
98
  )
99
99
 
100
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
100
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
101
101
  if self.data_loaded:
102
102
  return
103
103
 
@@ -103,7 +103,7 @@ class RuSciBenchCiteRetrieval(AbsTaskRetrieval):
103
103
  },
104
104
  )
105
105
 
106
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
106
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
107
107
  if self.data_loaded:
108
108
  return
109
109
 
@@ -161,7 +161,7 @@ class RuSciBenchCociteRetrieval(AbsTaskRetrieval):
161
161
  },
162
162
  )
163
163
 
164
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
164
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
165
165
  if self.data_loaded:
166
166
  return
167
167
 
@@ -96,7 +96,7 @@ de Vries, Harm},
96
96
  """,
97
97
  )
98
98
 
99
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
99
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
100
100
  if self.data_loaded:
101
101
  return
102
102
 
@@ -126,7 +126,7 @@ class VDRMultilingualRetrieval(AbsTaskRetrieval):
126
126
  """,
127
127
  )
128
128
 
129
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
129
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
130
130
  if self.data_loaded:
131
131
  return
132
132
 
@@ -16,7 +16,7 @@ def _load_data(
16
16
  splits: list[str],
17
17
  langs: list | None = None,
18
18
  revision: str | None = None,
19
- num_proc: int = 1,
19
+ num_proc: int | None = None,
20
20
  ):
21
21
  if langs is None:
22
22
  corpus = {}
@@ -131,7 +131,7 @@ class Vidore2ESGReportsRetrieval(AbsTaskRetrieval):
131
131
  prompt={"query": "Find a screenshot that relevant to the user's question."},
132
132
  )
133
133
 
134
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
134
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
135
135
  if self.data_loaded:
136
136
  return
137
137
 
@@ -179,7 +179,7 @@ class Vidore2EconomicsReportsRetrieval(AbsTaskRetrieval):
179
179
  prompt={"query": "Find a screenshot that relevant to the user's question."},
180
180
  )
181
181
 
182
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
182
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
183
183
  if self.data_loaded:
184
184
  return
185
185
 
@@ -227,7 +227,7 @@ class Vidore2BioMedicalLecturesRetrieval(AbsTaskRetrieval):
227
227
  prompt={"query": "Find a screenshot that relevant to the user's question."},
228
228
  )
229
229
 
230
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
230
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
231
231
  if self.data_loaded:
232
232
  return
233
233
 
@@ -275,7 +275,7 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskRetrieval):
275
275
  prompt={"query": "Find a screenshot that relevant to the user's question."},
276
276
  )
277
277
 
278
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
278
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
279
279
  if self.data_loaded:
280
280
  return
281
281
 
@@ -116,7 +116,7 @@ class WITT2IRetrieval(AbsTaskRetrieval):
116
116
  """,
117
117
  )
118
118
 
119
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
119
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
120
120
  if self.data_loaded:
121
121
  return
122
122
 
@@ -104,7 +104,7 @@ class XFlickr30kCoT2IRetrieval(AbsTaskRetrieval):
104
104
  """,
105
105
  )
106
106
 
107
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
107
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
108
108
  if self.data_loaded:
109
109
  return
110
110
 
@@ -64,7 +64,7 @@ class XQuADRetrieval(AbsTaskRetrieval):
64
64
  """,
65
65
  )
66
66
 
67
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
67
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
68
68
  if self.data_loaded:
69
69
  return
70
70
 
@@ -146,7 +146,7 @@ class XM3600T2IRetrieval(AbsTaskRetrieval):
146
146
  """,
147
147
  )
148
148
 
149
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
149
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
150
150
  if self.data_loaded:
151
151
  return
152
152
 
@@ -42,7 +42,7 @@ class CQADupstackAndroidNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackAndroid"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackEnglishNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackEnglish"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackGamingNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackGamingRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackGisNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackGisRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackMathematicaNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackMathematicaRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackPhysicsNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackPhysicsRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackProgrammersNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackProgrammersRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackStatsNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackStatsRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackTexNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackTexRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackUnixNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackUnixRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackWebmastersNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackWebmastersRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -42,7 +42,7 @@ class CQADupstackWordpressNLRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["CQADupstackWordpressRetrieval"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -50,7 +50,7 @@ Fishel, Mark},
50
50
  },
51
51
  )
52
52
 
53
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
53
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
54
54
  """Load dataset from HuggingFace hub"""
55
55
  if self.data_loaded:
56
56
  return
@@ -58,7 +58,7 @@ Fishel, Mark},
58
58
  self.dataset_transform()
59
59
  self.data_loaded = True
60
60
 
61
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
61
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
62
62
  """And transform to a retrieval dataset, which have the following attributes
63
63
 
64
64
  self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
@@ -37,7 +37,7 @@ class SNLRetrieval(AbsTaskRetrieval):
37
37
  task_subtypes=["Article retrieval"],
38
38
  )
39
39
 
40
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
40
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
41
41
  """Load dataset from HuggingFace hub"""
42
42
  if self.data_loaded:
43
43
  return
@@ -45,7 +45,7 @@ class SNLRetrieval(AbsTaskRetrieval):
45
45
  self.dataset_transform()
46
46
  self.data_loaded = True
47
47
 
48
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
48
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
49
49
  """And transform to a retrieval dataset, which have the following attributes
50
50
 
51
51
  self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
@@ -36,7 +36,7 @@ class SlovakSumRetrieval(AbsTaskRetrieval):
36
36
  """,
37
37
  )
38
38
 
39
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
39
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
40
40
  if self.data_loaded:
41
41
  return
42
42
  self.corpus, self.queries, self.relevant_docs = {}, {}, {}
@@ -52,7 +52,7 @@ Zong, Chengqing},
52
52
  """,
53
53
  )
54
54
 
55
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
55
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
56
56
  if self.data_loaded:
57
57
  return
58
58
 
@@ -66,6 +66,6 @@ Seid Muhie Yimam and Saif M. Mohammad},
66
66
  min_score = 0
67
67
  max_score = 1
68
68
 
69
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
69
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
70
70
  for lang, subset in self.dataset.items():
71
71
  self.dataset[lang] = subset.rename_column("label", "score")
@@ -56,6 +56,6 @@ class STSBenchmarkMultilingualSTS(AbsTaskSTS):
56
56
  min_score = 0
57
57
  max_score = 5
58
58
 
59
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
59
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
60
60
  for lang, subset in self.dataset.items():
61
61
  self.dataset[lang] = subset.rename_column("similarity_score", "score")
@@ -39,7 +39,7 @@ class Assin2STS(AbsTaskSTS):
39
39
  min_score = 1
40
40
  max_score = 5
41
41
 
42
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
42
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
43
43
  self.dataset = self.dataset.rename_columns(
44
44
  {
45
45
  "premise": "sentence1",
mteb/types/_encoder_io.py CHANGED
@@ -27,7 +27,7 @@ class EncodeKwargs(TypedDict):
27
27
 
28
28
 
29
29
  # --- Output types ---
30
- Array = NDArray[np.floating | np.integer | np.bool] | torch.Tensor
30
+ Array = NDArray[np.floating | np.integer | np.bool_] | torch.Tensor
31
31
  """General array type, can be a numpy array (float, int, or bool) or a torch tensor."""
32
32
 
33
33
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.7.17
3
+ Version: 2.7.19
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>