mteb 2.7.17__py3-none-any.whl → 2.7.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. mteb/_create_dataloaders.py +16 -16
  2. mteb/_evaluators/any_sts_evaluator.py +1 -1
  3. mteb/_evaluators/clustering_evaluator.py +1 -1
  4. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
  5. mteb/_evaluators/pair_classification_evaluator.py +1 -1
  6. mteb/_evaluators/retrieval_evaluator.py +1 -1
  7. mteb/_evaluators/sklearn_evaluator.py +4 -2
  8. mteb/_evaluators/text/bitext_mining_evaluator.py +1 -1
  9. mteb/_evaluators/text/summarization_evaluator.py +1 -1
  10. mteb/_evaluators/zeroshot_classification_evaluator.py +1 -1
  11. mteb/abstasks/abstask.py +4 -4
  12. mteb/abstasks/classification.py +2 -2
  13. mteb/abstasks/clustering.py +1 -1
  14. mteb/abstasks/clustering_legacy.py +1 -1
  15. mteb/abstasks/image/image_text_pair_classification.py +1 -1
  16. mteb/abstasks/multilabel_classification.py +1 -1
  17. mteb/abstasks/pair_classification.py +1 -1
  18. mteb/abstasks/retrieval.py +8 -5
  19. mteb/abstasks/retrieval_dataset_loaders.py +27 -8
  20. mteb/abstasks/sts.py +1 -1
  21. mteb/abstasks/text/bitext_mining.py +2 -2
  22. mteb/abstasks/text/reranking.py +1 -1
  23. mteb/abstasks/text/summarization.py +1 -1
  24. mteb/abstasks/zeroshot_classification.py +1 -1
  25. mteb/evaluate.py +2 -2
  26. mteb/models/model_implementations/bm25.py +2 -2
  27. mteb/models/model_implementations/pylate_models.py +4 -4
  28. mteb/models/models_protocols.py +2 -2
  29. mteb/models/search_wrappers.py +4 -4
  30. mteb/tasks/bitext_mining/multilingual/bible_nlp_bitext_mining.py +1 -1
  31. mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
  32. mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
  33. mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
  34. mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
  35. mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +1 -1
  36. mteb/tasks/classification/ben/bengali_document_classification.py +2 -2
  37. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +2 -2
  38. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -1
  39. mteb/tasks/classification/multilingual/hin_dialect_classification.py +1 -1
  40. mteb/tasks/classification/multilingual/indic_lang_classification.py +1 -1
  41. mteb/tasks/classification/multilingual/indic_sentiment_classification.py +1 -1
  42. mteb/tasks/classification/multilingual/language_classification.py +1 -1
  43. mteb/tasks/classification/multilingual/south_african_lang_classification.py +1 -1
  44. mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
  45. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +2 -2
  46. mteb/tasks/classification/swa/swahili_news_classification.py +2 -2
  47. mteb/tasks/clustering/deu/ten_k_gnad_clustering_p2p.py +1 -1
  48. mteb/tasks/clustering/deu/ten_k_gnad_clustering_s2s.py +1 -1
  49. mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
  50. mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
  51. mteb/tasks/clustering/nob/vg_hierarchical_clustering.py +2 -2
  52. mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
  53. mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
  54. mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
  55. mteb/tasks/multichoice/eng/cv_bench.py +4 -4
  56. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +1 -1
  57. mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
  58. mteb/tasks/pair_classification/multilingual/pub_chem_wiki_pair_classification.py +1 -1
  59. mteb/tasks/pair_classification/multilingual/rte3.py +1 -1
  60. mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
  61. mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
  62. mteb/tasks/retrieval/code/code_rag.py +8 -8
  63. mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
  64. mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
  65. mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
  66. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
  67. mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
  68. mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
  69. mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
  70. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
  71. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  72. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  73. mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
  74. mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
  75. mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
  76. mteb/tasks/retrieval/eng/bright_retrieval.py +1 -1
  77. mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
  78. mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
  79. mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
  80. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
  81. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
  82. mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
  83. mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
  84. mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
  85. mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
  86. mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
  87. mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
  88. mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
  89. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
  90. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
  91. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
  92. mteb/tasks/retrieval/eng/ml_questions.py +1 -1
  93. mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
  94. mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
  95. mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
  96. mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
  97. mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
  98. mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
  99. mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
  100. mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
  101. mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
  102. mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
  103. mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
  104. mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
  105. mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
  106. mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
  107. mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
  108. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
  109. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
  110. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
  111. mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
  112. mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
  113. mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
  114. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
  115. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
  116. mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
  117. mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
  118. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
  119. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
  120. mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
  121. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
  122. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
  123. mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
  124. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  125. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +5 -5
  126. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
  127. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
  128. mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
  129. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
  130. mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
  131. mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
  132. mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
  133. mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
  134. mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
  135. mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
  136. mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
  137. mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
  138. mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
  139. mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
  140. mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
  141. mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
  142. mteb/tasks/retrieval/nob/norquad.py +2 -2
  143. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  144. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
  145. mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
  146. mteb/tasks/sts/multilingual/sem_rel24_sts.py +1 -1
  147. mteb/tasks/sts/multilingual/sts_benchmark_multilingual_sts.py +1 -1
  148. mteb/tasks/sts/por/assin2_sts.py +1 -1
  149. {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/METADATA +1 -1
  150. {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/RECORD +154 -154
  151. {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/WHEEL +0 -0
  152. {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/entry_points.txt +0 -0
  153. {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/licenses/LICENSE +0 -0
  154. {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/top_level.txt +0 -0
@@ -36,7 +36,7 @@ class TwitterHjerneRetrieval(AbsTaskRetrieval):
36
36
  task_subtypes=["Question answering"],
37
37
  )
38
38
 
39
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
39
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
40
40
  """Load dataset from HuggingFace hub"""
41
41
  if self.data_loaded:
42
42
  return
@@ -44,7 +44,7 @@ class TwitterHjerneRetrieval(AbsTaskRetrieval):
44
44
  self.dataset_transform()
45
45
  self.data_loaded = True
46
46
 
47
- def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
47
+ def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
48
48
  """And transform to a retrieval dataset, which have the following attributes
49
49
 
50
50
  self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
@@ -50,7 +50,7 @@ Lukas, Leon},
50
50
  def get_hash(input_str) -> str:
51
51
  return hashlib.md5(input_str.encode("utf-8")).hexdigest()
52
52
 
53
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
53
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
54
54
  if self.data_loaded:
55
55
  return
56
56
 
@@ -58,7 +58,7 @@ class GermanQuADRetrieval(AbsTaskRetrieval):
58
58
  """,
59
59
  )
60
60
 
61
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
61
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
62
62
  if self.data_loaded:
63
63
  return
64
64
 
@@ -31,7 +31,7 @@ class GreekCivicsQA(AbsTaskRetrieval):
31
31
  bibtex_citation="",
32
32
  )
33
33
 
34
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
34
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
35
35
  if self.data_loaded:
36
36
  return
37
37
  # fetch both subsets of the dataset
@@ -83,7 +83,7 @@ def load_bright_data(
83
83
  return corpus, queries, relevant_docs
84
84
 
85
85
 
86
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
86
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
87
87
  if self.data_loaded:
88
88
  return
89
89
 
@@ -36,7 +36,7 @@ class ChatDoctorRetrieval(AbsTaskRetrieval):
36
36
  },
37
37
  )
38
38
 
39
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
39
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
40
40
  if self.data_loaded:
41
41
  return
42
42
 
@@ -37,7 +37,7 @@ class FinQARetrieval(AbsTaskRetrieval):
37
37
  },
38
38
  )
39
39
 
40
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
40
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
41
41
  if self.data_loaded:
42
42
  return
43
43
 
@@ -37,7 +37,7 @@ class FinanceBenchRetrieval(AbsTaskRetrieval):
37
37
  },
38
38
  )
39
39
 
40
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
40
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
41
41
  if self.data_loaded:
42
42
  return
43
43
 
@@ -90,7 +90,7 @@ class HatefulMemesI2TRetrieval(AbsTaskRetrieval):
90
90
  """,
91
91
  )
92
92
 
93
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
93
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
94
94
  if self.data_loaded:
95
95
  return
96
96
  self.corpus, self.queries, self.relevant_docs = _load_data(
@@ -90,7 +90,7 @@ class HatefulMemesT2IRetrieval(AbsTaskRetrieval):
90
90
  """,
91
91
  )
92
92
 
93
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
93
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
94
94
  if self.data_loaded:
95
95
  return
96
96
  self.corpus, self.queries, self.relevant_docs = _load_data(
@@ -37,7 +37,7 @@ class HC3FinanceRetrieval(AbsTaskRetrieval):
37
37
  },
38
38
  )
39
39
 
40
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
40
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
41
41
  if self.data_loaded:
42
42
  return
43
43
 
@@ -55,7 +55,7 @@ Roark, Brian},
55
55
  """,
56
56
  )
57
57
 
58
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
58
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
59
59
  if self.data_loaded:
60
60
  return
61
61
 
@@ -46,7 +46,7 @@ class LEMBNeedleRetrieval(AbsTaskRetrieval):
46
46
  """,
47
47
  )
48
48
 
49
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
49
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
50
50
  if self.data_loaded:
51
51
  return
52
52
 
@@ -46,7 +46,7 @@ class LEMBPasskeyRetrieval(AbsTaskRetrieval):
46
46
  """,
47
47
  )
48
48
 
49
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
49
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
50
50
  if self.data_loaded:
51
51
  return
52
52
 
@@ -51,7 +51,7 @@ Villavicencio, Aline},
51
51
  """,
52
52
  )
53
53
 
54
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
54
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
55
55
  if self.data_loaded:
56
56
  return
57
57
 
@@ -40,7 +40,7 @@ class LEMBWikimQARetrieval(AbsTaskRetrieval):
40
40
  """,
41
41
  )
42
42
 
43
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
43
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
44
44
  if self.data_loaded:
45
45
  return
46
46
 
@@ -64,7 +64,7 @@ Zhou, Yichao},
64
64
  """,
65
65
  )
66
66
 
67
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
67
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
68
68
  if self.data_loaded:
69
69
  return
70
70
 
@@ -35,7 +35,7 @@ class LitSearchRetrieval(AbsTaskRetrieval):
35
35
  """,
36
36
  )
37
37
 
38
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
38
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
39
39
  if self.data_loaded:
40
40
  return
41
41
  self.corpus, self.queries, self.relevant_docs = {}, {}, {}
@@ -113,7 +113,7 @@ class MemotionI2TRetrieval(AbsTaskRetrieval):
113
113
  """,
114
114
  )
115
115
 
116
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
116
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
117
117
  if self.data_loaded:
118
118
  return
119
119
  self.corpus, self.queries, self.relevant_docs = _load_data(
@@ -112,7 +112,7 @@ class MemotionT2IRetrieval(AbsTaskRetrieval):
112
112
  """,
113
113
  )
114
114
 
115
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
115
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
116
116
  if self.data_loaded:
117
117
  return
118
118
  self.corpus, self.queries, self.relevant_docs = _load_data(
@@ -55,7 +55,7 @@ Reddy, Siva},
55
55
  """,
56
56
  )
57
57
 
58
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
58
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
59
59
  if self.data_loaded:
60
60
  return
61
61
  self.corpus, self.queries, self.relevant_docs = {}, {}, {}
@@ -40,7 +40,7 @@ class NanoArguAnaRetrieval(AbsTaskRetrieval):
40
40
  adapted_from=["ArguAna"],
41
41
  )
42
42
 
43
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
43
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
44
44
  if self.data_loaded:
45
45
  return
46
46
 
@@ -44,7 +44,7 @@ class NanoClimateFeverRetrieval(AbsTaskRetrieval):
44
44
  adapted_from=["ClimateFEVER"],
45
45
  )
46
46
 
47
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
47
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
48
48
  if self.data_loaded:
49
49
  return
50
50
 
@@ -42,7 +42,7 @@ class NanoDBPediaRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["DBPedia"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -57,7 +57,7 @@ Stent, Amanda},
57
57
  adapted_from=["FEVER"],
58
58
  )
59
59
 
60
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
60
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
61
61
  if self.data_loaded:
62
62
  return
63
63
 
@@ -43,7 +43,7 @@ class NanoFiQA2018Retrieval(AbsTaskRetrieval):
43
43
  adapted_from=["FiQA2018"],
44
44
  )
45
45
 
46
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
47
47
  if self.data_loaded:
48
48
  return
49
49
 
@@ -60,7 +60,7 @@ Tsujii, Jun{'}ichi},
60
60
  adapted_from=["HotpotQA"],
61
61
  )
62
62
 
63
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
63
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
64
64
  if self.data_loaded:
65
65
  return
66
66
 
@@ -55,7 +55,7 @@ Li Deng},
55
55
  adapted_from=["MSMARCO"],
56
56
  )
57
57
 
58
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
58
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
59
59
  if self.data_loaded:
60
60
  return
61
61
 
@@ -46,7 +46,7 @@ class NanoNFCorpusRetrieval(AbsTaskRetrieval):
46
46
  adapted_from=["NFCorpus"],
47
47
  )
48
48
 
49
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
49
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
50
50
  if self.data_loaded:
51
51
  return
52
52
 
@@ -46,7 +46,7 @@ Linguistics},
46
46
  adapted_from=["NQ"],
47
47
  )
48
48
 
49
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
49
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
50
50
  if self.data_loaded:
51
51
  return
52
52
 
@@ -45,7 +45,7 @@ class NanoQuoraRetrieval(AbsTaskRetrieval):
45
45
  adapted_from=["QuoraRetrieval"],
46
46
  )
47
47
 
48
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
48
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
49
49
  if self.data_loaded:
50
50
  return
51
51
 
@@ -42,7 +42,7 @@ class NanoSciFactRetrieval(AbsTaskRetrieval):
42
42
  adapted_from=["SciFact"],
43
43
  )
44
44
 
45
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
45
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
46
46
  if self.data_loaded:
47
47
  return
48
48
 
@@ -44,7 +44,7 @@ class NanoSCIDOCSRetrieval(AbsTaskRetrieval):
44
44
  adapted_from=["SCIDOCS"],
45
45
  )
46
46
 
47
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
47
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
48
48
  if self.data_loaded:
49
49
  return
50
50
 
@@ -53,7 +53,7 @@ Questions}},
53
53
  adapted_from=["Touche2020"],
54
54
  )
55
55
 
56
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
56
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
57
57
  if self.data_loaded:
58
58
  return
59
59
 
@@ -43,7 +43,7 @@ class NarrativeQARetrieval(AbsTaskRetrieval):
43
43
  """,
44
44
  )
45
45
 
46
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
47
47
  if self.data_loaded:
48
48
  return
49
49
 
@@ -70,7 +70,7 @@ class R2MEDBiologyRetrieval(AbsTaskRetrieval):
70
70
  """,
71
71
  )
72
72
 
73
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
73
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
74
74
  if self.data_loaded:
75
75
  return
76
76
 
@@ -114,7 +114,7 @@ class R2MEDBioinformaticsRetrieval(AbsTaskRetrieval):
114
114
  """,
115
115
  )
116
116
 
117
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
117
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
118
118
  if self.data_loaded:
119
119
  return
120
120
 
@@ -158,7 +158,7 @@ class R2MEDMedicalSciencesRetrieval(AbsTaskRetrieval):
158
158
  """,
159
159
  )
160
160
 
161
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
161
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
162
162
  if self.data_loaded:
163
163
  return
164
164
 
@@ -202,7 +202,7 @@ class R2MEDMedXpertQAExamRetrieval(AbsTaskRetrieval):
202
202
  """,
203
203
  )
204
204
 
205
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
205
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
206
206
  if self.data_loaded:
207
207
  return
208
208
 
@@ -246,7 +246,7 @@ class R2MEDMedQADiagRetrieval(AbsTaskRetrieval):
246
246
  """,
247
247
  )
248
248
 
249
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
249
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
250
250
  if self.data_loaded:
251
251
  return
252
252
 
@@ -290,7 +290,7 @@ class R2MEDPMCTreatmentRetrieval(AbsTaskRetrieval):
290
290
  """,
291
291
  )
292
292
 
293
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
293
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
294
294
  if self.data_loaded:
295
295
  return
296
296
 
@@ -334,7 +334,7 @@ class R2MEDPMCClinicalRetrieval(AbsTaskRetrieval):
334
334
  """,
335
335
  )
336
336
 
337
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
337
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
338
338
  if self.data_loaded:
339
339
  return
340
340
 
@@ -378,7 +378,7 @@ class R2MEDIIYiClinicalRetrieval(AbsTaskRetrieval):
378
378
  """,
379
379
  )
380
380
 
381
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
381
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
382
382
  if self.data_loaded:
383
383
  return
384
384
 
@@ -88,7 +88,7 @@ class SciMMIRI2TRetrieval(AbsTaskRetrieval):
88
88
  """,
89
89
  )
90
90
 
91
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
91
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
92
92
  if self.data_loaded:
93
93
  return
94
94
  self.corpus, self.queries, self.relevant_docs = _load_data(
@@ -88,7 +88,7 @@ class SciMMIRT2IRetrieval(AbsTaskRetrieval):
88
88
  """,
89
89
  )
90
90
 
91
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
91
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
92
92
  if self.data_loaded:
93
93
  return
94
94
  self.corpus, self.queries, self.relevant_docs = _load_data(
@@ -95,7 +95,7 @@ class VidoreArxivQARetrieval(AbsTaskRetrieval):
95
95
  prompt={"query": "Find a screenshot that relevant to the user's question."},
96
96
  )
97
97
 
98
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
98
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
99
99
  self.corpus, self.queries, self.relevant_docs = _load_data(
100
100
  path=self.metadata.dataset["path"],
101
101
  splits=self.metadata.eval_splits,
@@ -138,7 +138,7 @@ class VidoreDocVQARetrieval(AbsTaskRetrieval):
138
138
  prompt={"query": "Find a screenshot that relevant to the user's question."},
139
139
  )
140
140
 
141
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
141
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
142
142
  self.corpus, self.queries, self.relevant_docs = _load_data(
143
143
  path=self.metadata.dataset["path"],
144
144
  splits=self.metadata.eval_splits,
@@ -181,7 +181,7 @@ class VidoreInfoVQARetrieval(AbsTaskRetrieval):
181
181
  prompt={"query": "Find a screenshot that relevant to the user's question."},
182
182
  )
183
183
 
184
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
184
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
185
185
  self.corpus, self.queries, self.relevant_docs = _load_data(
186
186
  path=self.metadata.dataset["path"],
187
187
  splits=self.metadata.eval_splits,
@@ -224,7 +224,7 @@ class VidoreTabfquadRetrieval(AbsTaskRetrieval):
224
224
  prompt={"query": "Find a screenshot that relevant to the user's question."},
225
225
  )
226
226
 
227
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
227
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
228
228
  self.corpus, self.queries, self.relevant_docs = _load_data(
229
229
  path=self.metadata.dataset["path"],
230
230
  splits=self.metadata.eval_splits,
@@ -267,7 +267,7 @@ class VidoreTatdqaRetrieval(AbsTaskRetrieval):
267
267
  prompt={"query": "Find a screenshot that relevant to the user's question."},
268
268
  )
269
269
 
270
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
270
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
271
271
  self.corpus, self.queries, self.relevant_docs = _load_data(
272
272
  path=self.metadata.dataset["path"],
273
273
  splits=self.metadata.eval_splits,
@@ -310,7 +310,7 @@ class VidoreShiftProjectRetrieval(AbsTaskRetrieval):
310
310
  prompt={"query": "Find a screenshot that relevant to the user's question."},
311
311
  )
312
312
 
313
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
313
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
314
314
  self.corpus, self.queries, self.relevant_docs = _load_data(
315
315
  path=self.metadata.dataset["path"],
316
316
  splits=self.metadata.eval_splits,
@@ -354,7 +354,7 @@ class VidoreSyntheticDocQAAIRetrieval(AbsTaskRetrieval):
354
354
  adapted_from=["VidoreDocVQARetrieval"],
355
355
  )
356
356
 
357
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
357
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
358
358
  self.corpus, self.queries, self.relevant_docs = _load_data(
359
359
  path=self.metadata.dataset["path"],
360
360
  splits=self.metadata.eval_splits,
@@ -398,7 +398,7 @@ class VidoreSyntheticDocQAEnergyRetrieval(AbsTaskRetrieval):
398
398
  adapted_from=["VidoreDocVQARetrieval"],
399
399
  )
400
400
 
401
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
401
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
402
402
  self.corpus, self.queries, self.relevant_docs = _load_data(
403
403
  path=self.metadata.dataset["path"],
404
404
  splits=self.metadata.eval_splits,
@@ -442,7 +442,7 @@ class VidoreSyntheticDocQAGovernmentReportsRetrieval(AbsTaskRetrieval):
442
442
  adapted_from=["VidoreDocVQARetrieval"],
443
443
  )
444
444
 
445
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
445
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
446
446
  self.corpus, self.queries, self.relevant_docs = _load_data(
447
447
  path=self.metadata.dataset["path"],
448
448
  splits=self.metadata.eval_splits,
@@ -486,7 +486,7 @@ class VidoreSyntheticDocQAHealthcareIndustryRetrieval(AbsTaskRetrieval):
486
486
  adapted_from=["VidoreDocVQARetrieval"],
487
487
  )
488
488
 
489
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
489
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
490
490
  self.corpus, self.queries, self.relevant_docs = _load_data(
491
491
  path=self.metadata.dataset["path"],
492
492
  splits=self.metadata.eval_splits,
@@ -49,7 +49,7 @@ Liu, Yang},
49
49
  """,
50
50
  )
51
51
 
52
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
52
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
53
53
  if self.data_loaded:
54
54
  return
55
55
  dataset_raw = datasets.load_dataset(
@@ -38,7 +38,7 @@ class SyntecRetrieval(AbsTaskRetrieval):
38
38
  """,
39
39
  )
40
40
 
41
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
41
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
42
42
  if self.data_loaded:
43
43
  return
44
44
  # fetch both subsets of the dataset
@@ -43,7 +43,7 @@ class HunSum2AbstractiveRetrieval(AbsTaskRetrieval):
43
43
  """,
44
44
  )
45
45
 
46
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
46
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
47
47
  if self.data_loaded:
48
48
  return
49
49
  self.corpus, self.queries, self.relevant_docs = {}, {}, {}
@@ -33,7 +33,7 @@ class GeorgianFAQRetrieval(AbsTaskRetrieval):
33
33
  bibtex_citation="",
34
34
  )
35
35
 
36
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
36
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
37
37
  if self.data_loaded:
38
38
  return
39
39
 
@@ -53,7 +53,7 @@ class CrossLingualSemanticDiscriminationWMT19(AbsTaskRetrieval):
53
53
  )
54
54
  num_of_distractors = 4
55
55
 
56
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
56
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
57
57
  """Generic data loader function for original clsd datasets with the format shown in "hf_dataset_link".
58
58
  Loading the hf dataset, it populates the following three variables to be used for retrieval evaluation.
59
59
 
@@ -54,7 +54,7 @@ class CrossLingualSemanticDiscriminationWMT21(AbsTaskRetrieval):
54
54
 
55
55
  num_of_distractors = 4
56
56
 
57
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
57
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
58
58
  """Generic data loader function for original clsd datasets with the format shown in "hf_dataset_link".
59
59
  Loading the hf dataset, it populates the following three variables to be used for retrieval evaluation.
60
60
 
@@ -111,7 +111,7 @@ class CUREv1Retrieval(AbsTaskRetrieval):
111
111
 
112
112
  return queries
113
113
 
114
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
114
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
115
115
  if self.data_loaded:
116
116
  return
117
117
 
@@ -148,7 +148,7 @@ def _load_data(
148
148
  return corpus, queries, relevant_docs
149
149
 
150
150
 
151
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
151
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
152
152
  if self.data_loaded:
153
153
  return
154
154
 
@@ -143,7 +143,7 @@ class MIRACLVisionRetrieval(AbsTaskRetrieval):
143
143
  prompt={"query": "Find a screenshot that is relevant to the user's query."},
144
144
  )
145
145
 
146
- def load_data(self, num_proc: int = 1, **kwargs) -> None:
146
+ def load_data(self, num_proc: int | None = None, **kwargs) -> None:
147
147
  if self.data_loaded:
148
148
  return
149
149