mteb 2.7.17__py3-none-any.whl → 2.7.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. mteb/_create_dataloaders.py +16 -16
  2. mteb/_evaluators/any_sts_evaluator.py +1 -1
  3. mteb/_evaluators/clustering_evaluator.py +1 -1
  4. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
  5. mteb/_evaluators/pair_classification_evaluator.py +1 -1
  6. mteb/_evaluators/retrieval_evaluator.py +1 -1
  7. mteb/_evaluators/sklearn_evaluator.py +4 -2
  8. mteb/_evaluators/text/bitext_mining_evaluator.py +1 -1
  9. mteb/_evaluators/text/summarization_evaluator.py +1 -1
  10. mteb/_evaluators/zeroshot_classification_evaluator.py +1 -1
  11. mteb/abstasks/abstask.py +4 -4
  12. mteb/abstasks/classification.py +2 -2
  13. mteb/abstasks/clustering.py +1 -1
  14. mteb/abstasks/clustering_legacy.py +1 -1
  15. mteb/abstasks/image/image_text_pair_classification.py +1 -1
  16. mteb/abstasks/multilabel_classification.py +1 -1
  17. mteb/abstasks/pair_classification.py +1 -1
  18. mteb/abstasks/retrieval.py +8 -5
  19. mteb/abstasks/retrieval_dataset_loaders.py +27 -8
  20. mteb/abstasks/sts.py +1 -1
  21. mteb/abstasks/text/bitext_mining.py +2 -2
  22. mteb/abstasks/text/reranking.py +1 -1
  23. mteb/abstasks/text/summarization.py +1 -1
  24. mteb/abstasks/zeroshot_classification.py +1 -1
  25. mteb/evaluate.py +2 -2
  26. mteb/models/model_implementations/bm25.py +2 -2
  27. mteb/models/model_implementations/ict_time_and_querit_models.py +115 -0
  28. mteb/models/model_implementations/pylate_models.py +4 -4
  29. mteb/models/models_protocols.py +2 -2
  30. mteb/models/search_wrappers.py +4 -4
  31. mteb/tasks/bitext_mining/multilingual/bible_nlp_bitext_mining.py +1 -1
  32. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  33. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  34. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  35. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  36. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +1 -1
  37. mteb/tasks/classification/ben/bengali_document_classification.py +2 -2
  38. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +2 -2
  39. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -1
  40. mteb/tasks/classification/multilingual/hin_dialect_classification.py +1 -1
  41. mteb/tasks/classification/multilingual/indic_lang_classification.py +1 -1
  42. mteb/tasks/classification/multilingual/indic_sentiment_classification.py +1 -1
  43. mteb/tasks/classification/multilingual/language_classification.py +1 -1
  44. mteb/tasks/classification/multilingual/south_african_lang_classification.py +1 -1
  45. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  46. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +2 -2
  47. mteb/tasks/classification/swa/swahili_news_classification.py +2 -2
  48. mteb/tasks/clustering/deu/ten_k_gnad_clustering_p2p.py +1 -1
  49. mteb/tasks/clustering/deu/ten_k_gnad_clustering_s2s.py +1 -1
  50. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  51. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  52. mteb/tasks/clustering/nob/vg_hierarchical_clustering.py +2 -2
  53. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  54. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  55. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  56. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  57. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +1 -1
  58. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
  59. mteb/tasks/pair_classification/multilingual/pub_chem_wiki_pair_classification.py +1 -1
  60. mteb/tasks/pair_classification/multilingual/rte3.py +1 -1
  61. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  62. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  63. mteb/tasks/retrieval/code/code_rag.py +8 -8
  64. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  65. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  66. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  67. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  68. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  69. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  70. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  71. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  72. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  73. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  74. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  75. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  76. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  77. mteb/tasks/retrieval/eng/bright_retrieval.py +1 -1
  78. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  79. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  80. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  81. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  82. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  83. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  84. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  85. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  86. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  87. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  88. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  89. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  90. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  91. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  92. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  93. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  94. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  95. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  96. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  97. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  98. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  99. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  100. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  101. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  102. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  103. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  104. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  105. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  106. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  107. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  108. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  109. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  110. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  111. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  112. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  113. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  114. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  115. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  116. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  117. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  118. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  119. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  120. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  121. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  122. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  123. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
  124. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  125. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  126. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +5 -5
  127. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  128. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  129. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  130. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  131. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  132. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  133. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  134. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  135. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  136. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  137. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  138. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  139. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  140. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  141. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  142. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  143. mteb/tasks/retrieval/nob/norquad.py +2 -2
  144. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  145. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  146. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  147. mteb/tasks/sts/multilingual/sem_rel24_sts.py +1 -1
  148. mteb/tasks/sts/multilingual/sts_benchmark_multilingual_sts.py +1 -1
  149. mteb/tasks/sts/por/assin2_sts.py +1 -1
  150. mteb/types/_encoder_io.py +1 -1
  151. {mteb-2.7.17.dist-info → mteb-2.7.19.dist-info}/METADATA +1 -1
  152. {mteb-2.7.17.dist-info → mteb-2.7.19.dist-info}/RECORD +156 -155
  153. {mteb-2.7.17.dist-info → mteb-2.7.19.dist-info}/WHEEL +0 -0
  154. {mteb-2.7.17.dist-info → mteb-2.7.19.dist-info}/entry_points.txt +0 -0
  155. {mteb-2.7.17.dist-info → mteb-2.7.19.dist-info}/licenses/LICENSE +0 -0
  156. {mteb-2.7.17.dist-info → mteb-2.7.19.dist-info}/top_level.txt +0 -0
@@ -51,7 +51,7 @@ class CodeRAGProgrammingSolutionsRetrieval(AbsTaskRetrieval):
51
51
  **common_args,
52
52
  )
53
53
 
54
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
54
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
55
55
  """Load dataset from HuggingFace hub"""
56
56
  if self.data_loaded:
57
57
  return
@@ -59,7 +59,7 @@ class CodeRAGProgrammingSolutionsRetrieval(AbsTaskRetrieval):
59
59
  self.dataset_transform()
60
60
  self.data_loaded = True
61
61
 
62
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
62
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
63
63
  """And transform to a retrieval dataset, which have the following attributes
64
64
 
65
65
  self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
@@ -108,7 +108,7 @@ class CodeRAGOnlineTutorialsRetrieval(AbsTaskRetrieval):
108
108
  **common_args,
109
109
  )
110
110
 
111
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
111
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
112
112
  """Load dataset from HuggingFace hub"""
113
113
  if self.data_loaded:
114
114
  return
@@ -116,7 +116,7 @@ class CodeRAGOnlineTutorialsRetrieval(AbsTaskRetrieval):
116
116
  self.dataset_transform()
117
117
  self.data_loaded = True
118
118
 
119
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
119
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
120
120
  """And transform to a retrieval dataset, which have the following attributes
121
121
 
122
122
  self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
@@ -168,7 +168,7 @@ class CodeRAGLibraryDocumentationSolutionsRetrieval(AbsTaskRetrieval):
168
168
  **common_args,
169
169
  )
170
170
 
171
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
171
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
172
172
  """Load dataset from HuggingFace hub"""
173
173
  if self.data_loaded:
174
174
  return
@@ -176,7 +176,7 @@ class CodeRAGLibraryDocumentationSolutionsRetrieval(AbsTaskRetrieval):
176
176
  self.dataset_transform()
177
177
  self.data_loaded = True
178
178
 
179
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
179
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
180
180
  """And transform to a retrieval dataset, which have the following attributes
181
181
 
182
182
  self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
@@ -225,7 +225,7 @@ class CodeRAGStackoverflowPostsRetrieval(AbsTaskRetrieval):
225
225
  **common_args,
226
226
  )
227
227
 
228
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
228
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
229
229
  """Load dataset from HuggingFace hub"""
230
230
  if self.data_loaded:
231
231
  return
@@ -233,7 +233,7 @@ class CodeRAGStackoverflowPostsRetrieval(AbsTaskRetrieval):
233
233
  self.dataset_transform()
234
234
  self.data_loaded = True
235
235
 
236
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
236
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
237
237
  """And transform to a retrieval dataset, which have the following attributes
238
238
 
239
239
  self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
@@ -99,7 +99,7 @@ class CodeSearchNetCCRetrieval(AbsTaskRetrieval):
99
99
  """,
100
100
  )
101
101
 
102
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
102
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
103
103
  if self.data_loaded:
104
104
  return
105
105
 
@@ -97,7 +97,7 @@ class COIRCodeSearchNetRetrieval(AbsTaskRetrieval):
97
97
  """,
98
98
  )
99
99
 
100
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
100
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
101
101
  if self.data_loaded:
102
102
  return
103
103
 
@@ -34,7 +34,7 @@ class DS1000Retrieval(AbsTaskRetrieval):
34
34
  """,
35
35
  )
36
36
 
37
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
37
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
38
38
  if self.data_loaded:
39
39
  return
40
40
 
@@ -37,7 +37,7 @@ class FreshStackRetrieval(AbsTaskRetrieval):
37
37
  """,
38
38
  )
39
39
 
40
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
40
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
41
41
  if self.data_loaded:
42
42
  return
43
43
 
@@ -34,7 +34,7 @@ class HumanEvalRetrieval(AbsTaskRetrieval):
34
34
  }""",
35
35
  )
36
36
 
37
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
37
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
38
38
  if self.data_loaded:
39
39
  return
40
40
 
@@ -34,7 +34,7 @@ class MBPPRetrieval(AbsTaskRetrieval):
34
34
  """,
35
35
  )
36
36
 
37
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
37
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
38
38
  if self.data_loaded:
39
39
  return
40
40
 
@@ -36,7 +36,7 @@ class WikiSQLRetrieval(AbsTaskRetrieval):
36
36
  """,
37
37
  )
38
38
 
39
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
39
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
40
40
  if self.data_loaded:
41
41
  return
42
42
 
@@ -47,7 +47,7 @@ Derczynski, Leon},
47
47
  task_subtypes=["Claim verification"],
48
48
  )
49
49
 
50
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
50
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
51
51
  """Load dataset from HuggingFace hub"""
52
52
  if self.data_loaded:
53
53
  return
@@ -55,7 +55,7 @@ Derczynski, Leon},
55
55
  self.dataset_transform()
56
56
  self.data_loaded = True
57
57
 
58
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
58
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
59
59
  """And transform to a retrieval dataset, which have the following attributes
60
60
 
61
61
  self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
@@ -60,7 +60,7 @@ Piperidis, Stelios},
60
60
  task_subtypes=["Article retrieval"],
61
61
  )
62
62
 
63
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
63
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
64
64
  """Load dataset from HuggingFace hub"""
65
65
  if self.data_loaded:
66
66
  return
@@ -68,7 +68,7 @@ Piperidis, Stelios},
68
68
  self.dataset_transform()
69
69
  self.data_loaded = True
70
70
 
71
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
71
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
72
72
  """And transform to a retrieval dataset, which have the following attributes
73
73
 
74
74
  self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
@@ -36,7 +36,7 @@ class TwitterHjerneRetrieval(AbsTaskRetrieval):
36
36
  task_subtypes=["Question answering"],
37
37
  )
38
38
 
39
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
39
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
40
40
  """Load dataset from HuggingFace hub"""
41
41
  if self.data_loaded:
42
42
  return
@@ -44,7 +44,7 @@ class TwitterHjerneRetrieval(AbsTaskRetrieval):
44
44
  self.dataset_transform()
45
45
  self.data_loaded = True
46
46
 
47
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
47
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
48
48
  """And transform to a retrieval dataset, which have the following attributes
49
49
 
50
50
  self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
@@ -50,7 +50,7 @@ Lukas, Leon},
50
50
  def get_hash(input_str) -> str:
51
51
  return hashlib.md5(input_str.encode("utf-8")).hexdigest()
52
52
 
53
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
53
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
54
54
  if self.data_loaded:
55
55
  return
56
56
 
@@ -58,7 +58,7 @@ class GermanQuADRetrieval(AbsTaskRetrieval):
58
58
  """,
59
59
  )
60
60
 
61
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
61
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
62
62
  if self.data_loaded:
63
63
  return
64
64
 
@@ -31,7 +31,7 @@ class GreekCivicsQA(AbsTaskRetrieval):
31
31
  bibtex_citation="",
32
32
  )
33
33
 
34
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
34
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
35
35
  if self.data_loaded:
36
36
  return
37
37
  # fetch both subsets of the dataset
@@ -83,7 +83,7 @@ def load_bright_data(
83
83
  return corpus, queries, relevant_docs
84
84
 
85
85
 
86
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
86
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
87
87
  if self.data_loaded:
88
88
  return
89
89
 
@@ -36,7 +36,7 @@ class ChatDoctorRetrieval(AbsTaskRetrieval):
36
36
  },
37
37
  )
38
38
 
39
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
39
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
40
40
  if self.data_loaded:
41
41
  return
42
42
 
@@ -37,7 +37,7 @@ class FinQARetrieval(AbsTaskRetrieval):
37
37
  },
38
38
  )
39
39
 
40
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
40
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
41
41
  if self.data_loaded:
42
42
  return
43
43
 
@@ -37,7 +37,7 @@ class FinanceBenchRetrieval(AbsTaskRetrieval):
37
37
  },
38
38
  )
39
39
 
40
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
40
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
41
41
  if self.data_loaded:
42
42
  return
43
43
 
@@ -90,7 +90,7 @@ class HatefulMemesI2TRetrieval(AbsTaskRetrieval):
90
90
  """,
91
91
  )
92
92
 
93
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
93
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
94
94
  if self.data_loaded:
95
95
  return
96
96
  self.corpus, self.queries, self.relevant_docs = _load_data(
@@ -90,7 +90,7 @@ class HatefulMemesT2IRetrieval(AbsTaskRetrieval):
90
90
  """,
91
91
  )
92
92
 
93
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
93
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
94
94
  if self.data_loaded:
95
95
  return
96
96
  self.corpus, self.queries, self.relevant_docs = _load_data(
@@ -37,7 +37,7 @@ class HC3FinanceRetrieval(AbsTaskRetrieval):
37
37
  },
38
38
  )
39
39
 
40
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
40
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
41
41
  if self.data_loaded:
42
42
  return
43
43
 
@@ -55,7 +55,7 @@ Roark, Brian},
55
55
  """,
56
56
  )
57
57
 
58
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
58
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
59
59
  if self.data_loaded:
60
60
  return
61
61
 
@@ -46,7 +46,7 @@ class LEMBNeedleRetrieval(AbsTaskRetrieval):
46
46
  """,
47
47
  )
48
48
 
49
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
49
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
50
50
  if self.data_loaded:
51
51
  return
52
52
 
@@ -46,7 +46,7 @@ class LEMBPasskeyRetrieval(AbsTaskRetrieval):
46
46
  """,
47
47
  )
48
48
 
49
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
49
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
50
50
  if self.data_loaded:
51
51
  return
52
52
 
@@ -51,7 +51,7 @@ Villavicencio, Aline},
51
51
  """,
52
52
  )
53
53
 
54
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
54
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
55
55
  if self.data_loaded:
56
56
  return
57
57
 
@@ -40,7 +40,7 @@ class LEMBWikimQARetrieval(AbsTaskRetrieval):
40
40
  """,
41
41
  )
42
42
 
43
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
43
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
44
44
  if self.data_loaded:
45
45
  return
46
46
 
@@ -64,7 +64,7 @@ Zhou, Yichao},
64
64
  """,
65
65
  )
66
66
 
67
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
67
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
68
68
  if self.data_loaded:
69
69
  return
70
70
 
@@ -35,7 +35,7 @@ class LitSearchRetrieval(AbsTaskRetrieval):
35
35
  """,
36
36
  )
37
37
 
38
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
38
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
39
39
  if self.data_loaded:
40
40
  return
41
41
  self.corpus, self.queries, self.relevant_docs = {}, {}, {}
@@ -113,7 +113,7 @@ class MemotionI2TRetrieval(AbsTaskRetrieval):
113
113
  """,
114
114
  )
115
115
 
116
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
116
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
117
117
  if self.data_loaded:
118
118
  return
119
119
  self.corpus, self.queries, self.relevant_docs = _load_data(
@@ -112,7 +112,7 @@ class MemotionT2IRetrieval(AbsTaskRetrieval):
112
112
  """,
113
113
  )
114
114
 
115
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
115
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
116
116
  if self.data_loaded:
117
117
  return
118
118
  self.corpus, self.queries, self.relevant_docs = _load_data(
@@ -55,7 +55,7 @@ Reddy, Siva},
55
55
  """,
56
56
  )
57
57
 
58
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
58
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
59
59
  if self.data_loaded:
60
60
  return
61
61
  self.corpus, self.queries, self.relevant_docs = {}, {}, {}
@@ -40,7 +40,7 @@ class NanoArguAnaRetrieval(AbsTaskRetrieval):
40
40
  adapted_from=["ArguAna"],
41
41
  )
42
42
 
43
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
43
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
44
44
  if self.data_loaded:
45
45
  return
46
46
 
@@ -44,7 +44,7 @@ class NanoClimateFeverRetrieval(AbsTaskRetrieval):
44
44
  adapted_from=["ClimateFEVER"],
45
45
  )
46
46
 
47
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
47
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
48
48
  if self.data_loaded:
49
49
  return
50
50
 
@@ -42,7 +42,7 @@ class NanoDBPediaRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["DBPedia"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -57,7 +57,7 @@ Stent, Amanda},
57
57
  adapted_from=["FEVER"],
58
58
  )
59
59
 
60
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
60
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
61
61
  if self.data_loaded:
62
62
  return
63
63
 
@@ -43,7 +43,7 @@ class NanoFiQA2018Retrieval(AbsTaskRetrieval):
43
43
  adapted_from=["FiQA2018"],
44
44
  )
45
45
 
46
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
47
47
  if self.data_loaded:
48
48
  return
49
49
 
@@ -60,7 +60,7 @@ Tsujii, Jun{'}ichi},
60
60
  adapted_from=["HotpotQA"],
61
61
  )
62
62
 
63
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
63
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
64
64
  if self.data_loaded:
65
65
  return
66
66
 
@@ -55,7 +55,7 @@ Li Deng},
55
55
  adapted_from=["MSMARCO"],
56
56
  )
57
57
 
58
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
58
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
59
59
  if self.data_loaded:
60
60
  return
61
61
 
@@ -46,7 +46,7 @@ class NanoNFCorpusRetrieval(AbsTaskRetrieval):
46
46
  adapted_from=["NFCorpus"],
47
47
  )
48
48
 
49
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
49
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
50
50
  if self.data_loaded:
51
51
  return
52
52
 
@@ -46,7 +46,7 @@ Linguistics},
46
46
  adapted_from=["NQ"],
47
47
  )
48
48
 
49
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
49
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
50
50
  if self.data_loaded:
51
51
  return
52
52
 
@@ -45,7 +45,7 @@ class NanoQuoraRetrieval(AbsTaskRetrieval):
45
45
  adapted_from=["QuoraRetrieval"],
46
46
  )
47
47
 
48
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
48
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
49
49
  if self.data_loaded:
50
50
  return
51
51
 
@@ -42,7 +42,7 @@ class NanoSciFactRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["SciFact"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -44,7 +44,7 @@ class NanoSCIDOCSRetrieval(AbsTaskRetrieval):
44
44
  adapted_from=["SCIDOCS"],
45
45
  )
46
46
 
47
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
47
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
48
48
  if self.data_loaded:
49
49
  return
50
50
 
@@ -53,7 +53,7 @@ Questions}},
53
53
  adapted_from=["Touche2020"],
54
54
  )
55
55
 
56
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
56
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
57
57
  if self.data_loaded:
58
58
  return
59
59
 
@@ -43,7 +43,7 @@ class NarrativeQARetrieval(AbsTaskRetrieval):
43
43
  """,
44
44
  )
45
45
 
46
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
47
47
  if self.data_loaded:
48
48
  return
49
49
 
@@ -70,7 +70,7 @@ class R2MEDBiologyRetrieval(AbsTaskRetrieval):
70
70
  """,
71
71
  )
72
72
 
73
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
73
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
74
74
  if self.data_loaded:
75
75
  return
76
76
 
@@ -114,7 +114,7 @@ class R2MEDBioinformaticsRetrieval(AbsTaskRetrieval):
114
114
  """,
115
115
  )
116
116
 
117
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
117
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
118
118
  if self.data_loaded:
119
119
  return
120
120
 
@@ -158,7 +158,7 @@ class R2MEDMedicalSciencesRetrieval(AbsTaskRetrieval):
158
158
  """,
159
159
  )
160
160
 
161
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
161
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
162
162
  if self.data_loaded:
163
163
  return
164
164
 
@@ -202,7 +202,7 @@ class R2MEDMedXpertQAExamRetrieval(AbsTaskRetrieval):
202
202
  """,
203
203
  )
204
204
 
205
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
205
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
206
206
  if self.data_loaded:
207
207
  return
208
208
 
@@ -246,7 +246,7 @@ class R2MEDMedQADiagRetrieval(AbsTaskRetrieval):
246
246
  """,
247
247
  )
248
248
 
249
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
249
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
250
250
  if self.data_loaded:
251
251
  return
252
252
 
@@ -290,7 +290,7 @@ class R2MEDPMCTreatmentRetrieval(AbsTaskRetrieval):
290
290
  """,
291
291
  )
292
292
 
293
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
293
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
294
294
  if self.data_loaded:
295
295
  return
296
296
 
@@ -334,7 +334,7 @@ class R2MEDPMCClinicalRetrieval(AbsTaskRetrieval):
334
334
  """,
335
335
  )
336
336
 
337
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
337
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
338
338
  if self.data_loaded:
339
339
  return
340
340
 
@@ -378,7 +378,7 @@ class R2MEDIIYiClinicalRetrieval(AbsTaskRetrieval):
378
378
  """,
379
379
  )
380
380
 
381
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
381
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
382
382
  if self.data_loaded:
383
383
  return
384
384
 
@@ -88,7 +88,7 @@ class SciMMIRI2TRetrieval(AbsTaskRetrieval):
88
88
  """,
89
89
  )
90
90
 
91
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
91
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
92
92
  if self.data_loaded:
93
93
  return
94
94
  self.corpus, self.queries, self.relevant_docs = _load_data(
@@ -88,7 +88,7 @@ class SciMMIRT2IRetrieval(AbsTaskRetrieval):
88
88
  """,
89
89
  )
90
90
 
91
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
91
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
92
92
  if self.data_loaded:
93
93
  return
94
94
  self.corpus, self.queries, self.relevant_docs = _load_data(