mteb 2.7.17__py3-none-any.whl → 2.7.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +16 -16
- mteb/_evaluators/any_sts_evaluator.py +1 -1
- mteb/_evaluators/clustering_evaluator.py +1 -1
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
- mteb/_evaluators/pair_classification_evaluator.py +1 -1
- mteb/_evaluators/retrieval_evaluator.py +1 -1
- mteb/_evaluators/sklearn_evaluator.py +4 -2
- mteb/_evaluators/text/bitext_mining_evaluator.py +1 -1
- mteb/_evaluators/text/summarization_evaluator.py +1 -1
- mteb/_evaluators/zeroshot_classification_evaluator.py +1 -1
- mteb/abstasks/abstask.py +4 -4
- mteb/abstasks/classification.py +2 -2
- mteb/abstasks/clustering.py +1 -1
- mteb/abstasks/clustering_legacy.py +1 -1
- mteb/abstasks/image/image_text_pair_classification.py +1 -1
- mteb/abstasks/multilabel_classification.py +1 -1
- mteb/abstasks/pair_classification.py +1 -1
- mteb/abstasks/retrieval.py +8 -5
- mteb/abstasks/retrieval_dataset_loaders.py +27 -8
- mteb/abstasks/sts.py +1 -1
- mteb/abstasks/text/bitext_mining.py +2 -2
- mteb/abstasks/text/reranking.py +1 -1
- mteb/abstasks/text/summarization.py +1 -1
- mteb/abstasks/zeroshot_classification.py +1 -1
- mteb/evaluate.py +2 -2
- mteb/models/model_implementations/bm25.py +2 -2
- mteb/models/model_implementations/pylate_models.py +4 -4
- mteb/models/models_protocols.py +2 -2
- mteb/models/search_wrappers.py +4 -4
- mteb/tasks/bitext_mining/multilingual/bible_nlp_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +1 -1
- mteb/tasks/classification/ben/bengali_document_classification.py +2 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/hin_dialect_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/language_classification.py +1 -1
- mteb/tasks/classification/multilingual/south_african_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +2 -2
- mteb/tasks/clustering/deu/ten_k_gnad_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/ten_k_gnad_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/nob/vg_hierarchical_clustering.py +2 -2
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +1 -1
- mteb/tasks/pair_classification/multilingual/pub_chem_wiki_pair_classification.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +1 -1
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +8 -8
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/bright_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -2
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/sts/multilingual/sem_rel24_sts.py +1 -1
- mteb/tasks/sts/multilingual/sts_benchmark_multilingual_sts.py +1 -1
- mteb/tasks/sts/por/assin2_sts.py +1 -1
- {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/METADATA +1 -1
- {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/RECORD +154 -154
- {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/WHEEL +0 -0
- {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.17.dist-info → mteb-2.7.18.dist-info}/top_level.txt +0 -0
|
@@ -36,7 +36,7 @@ class TwitterHjerneRetrieval(AbsTaskRetrieval):
|
|
|
36
36
|
task_subtypes=["Question answering"],
|
|
37
37
|
)
|
|
38
38
|
|
|
39
|
-
def load_data(self, num_proc: int =
|
|
39
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
40
40
|
"""Load dataset from HuggingFace hub"""
|
|
41
41
|
if self.data_loaded:
|
|
42
42
|
return
|
|
@@ -44,7 +44,7 @@ class TwitterHjerneRetrieval(AbsTaskRetrieval):
|
|
|
44
44
|
self.dataset_transform()
|
|
45
45
|
self.data_loaded = True
|
|
46
46
|
|
|
47
|
-
def dataset_transform(self, num_proc: int =
|
|
47
|
+
def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
|
|
48
48
|
"""And transform to a retrieval dataset, which have the following attributes
|
|
49
49
|
|
|
50
50
|
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
|
|
@@ -50,7 +50,7 @@ Lukas, Leon},
|
|
|
50
50
|
def get_hash(input_str) -> str:
|
|
51
51
|
return hashlib.md5(input_str.encode("utf-8")).hexdigest()
|
|
52
52
|
|
|
53
|
-
def load_data(self, num_proc: int =
|
|
53
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
54
54
|
if self.data_loaded:
|
|
55
55
|
return
|
|
56
56
|
|
|
@@ -31,7 +31,7 @@ class GreekCivicsQA(AbsTaskRetrieval):
|
|
|
31
31
|
bibtex_citation="",
|
|
32
32
|
)
|
|
33
33
|
|
|
34
|
-
def load_data(self, num_proc: int =
|
|
34
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
35
35
|
if self.data_loaded:
|
|
36
36
|
return
|
|
37
37
|
# fetch both subsets of the dataset
|
|
@@ -90,7 +90,7 @@ class HatefulMemesI2TRetrieval(AbsTaskRetrieval):
|
|
|
90
90
|
""",
|
|
91
91
|
)
|
|
92
92
|
|
|
93
|
-
def load_data(self, num_proc: int =
|
|
93
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
94
94
|
if self.data_loaded:
|
|
95
95
|
return
|
|
96
96
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
@@ -90,7 +90,7 @@ class HatefulMemesT2IRetrieval(AbsTaskRetrieval):
|
|
|
90
90
|
""",
|
|
91
91
|
)
|
|
92
92
|
|
|
93
|
-
def load_data(self, num_proc: int =
|
|
93
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
94
94
|
if self.data_loaded:
|
|
95
95
|
return
|
|
96
96
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
@@ -35,7 +35,7 @@ class LitSearchRetrieval(AbsTaskRetrieval):
|
|
|
35
35
|
""",
|
|
36
36
|
)
|
|
37
37
|
|
|
38
|
-
def load_data(self, num_proc: int =
|
|
38
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
39
39
|
if self.data_loaded:
|
|
40
40
|
return
|
|
41
41
|
self.corpus, self.queries, self.relevant_docs = {}, {}, {}
|
|
@@ -113,7 +113,7 @@ class MemotionI2TRetrieval(AbsTaskRetrieval):
|
|
|
113
113
|
""",
|
|
114
114
|
)
|
|
115
115
|
|
|
116
|
-
def load_data(self, num_proc: int =
|
|
116
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
117
117
|
if self.data_loaded:
|
|
118
118
|
return
|
|
119
119
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
@@ -112,7 +112,7 @@ class MemotionT2IRetrieval(AbsTaskRetrieval):
|
|
|
112
112
|
""",
|
|
113
113
|
)
|
|
114
114
|
|
|
115
|
-
def load_data(self, num_proc: int =
|
|
115
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
116
116
|
if self.data_loaded:
|
|
117
117
|
return
|
|
118
118
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
@@ -44,7 +44,7 @@ class NanoClimateFeverRetrieval(AbsTaskRetrieval):
|
|
|
44
44
|
adapted_from=["ClimateFEVER"],
|
|
45
45
|
)
|
|
46
46
|
|
|
47
|
-
def load_data(self, num_proc: int =
|
|
47
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
48
48
|
if self.data_loaded:
|
|
49
49
|
return
|
|
50
50
|
|
|
@@ -70,7 +70,7 @@ class R2MEDBiologyRetrieval(AbsTaskRetrieval):
|
|
|
70
70
|
""",
|
|
71
71
|
)
|
|
72
72
|
|
|
73
|
-
def load_data(self, num_proc: int =
|
|
73
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
74
74
|
if self.data_loaded:
|
|
75
75
|
return
|
|
76
76
|
|
|
@@ -114,7 +114,7 @@ class R2MEDBioinformaticsRetrieval(AbsTaskRetrieval):
|
|
|
114
114
|
""",
|
|
115
115
|
)
|
|
116
116
|
|
|
117
|
-
def load_data(self, num_proc: int =
|
|
117
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
118
118
|
if self.data_loaded:
|
|
119
119
|
return
|
|
120
120
|
|
|
@@ -158,7 +158,7 @@ class R2MEDMedicalSciencesRetrieval(AbsTaskRetrieval):
|
|
|
158
158
|
""",
|
|
159
159
|
)
|
|
160
160
|
|
|
161
|
-
def load_data(self, num_proc: int =
|
|
161
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
162
162
|
if self.data_loaded:
|
|
163
163
|
return
|
|
164
164
|
|
|
@@ -202,7 +202,7 @@ class R2MEDMedXpertQAExamRetrieval(AbsTaskRetrieval):
|
|
|
202
202
|
""",
|
|
203
203
|
)
|
|
204
204
|
|
|
205
|
-
def load_data(self, num_proc: int =
|
|
205
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
206
206
|
if self.data_loaded:
|
|
207
207
|
return
|
|
208
208
|
|
|
@@ -246,7 +246,7 @@ class R2MEDMedQADiagRetrieval(AbsTaskRetrieval):
|
|
|
246
246
|
""",
|
|
247
247
|
)
|
|
248
248
|
|
|
249
|
-
def load_data(self, num_proc: int =
|
|
249
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
250
250
|
if self.data_loaded:
|
|
251
251
|
return
|
|
252
252
|
|
|
@@ -290,7 +290,7 @@ class R2MEDPMCTreatmentRetrieval(AbsTaskRetrieval):
|
|
|
290
290
|
""",
|
|
291
291
|
)
|
|
292
292
|
|
|
293
|
-
def load_data(self, num_proc: int =
|
|
293
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
294
294
|
if self.data_loaded:
|
|
295
295
|
return
|
|
296
296
|
|
|
@@ -334,7 +334,7 @@ class R2MEDPMCClinicalRetrieval(AbsTaskRetrieval):
|
|
|
334
334
|
""",
|
|
335
335
|
)
|
|
336
336
|
|
|
337
|
-
def load_data(self, num_proc: int =
|
|
337
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
338
338
|
if self.data_loaded:
|
|
339
339
|
return
|
|
340
340
|
|
|
@@ -378,7 +378,7 @@ class R2MEDIIYiClinicalRetrieval(AbsTaskRetrieval):
|
|
|
378
378
|
""",
|
|
379
379
|
)
|
|
380
380
|
|
|
381
|
-
def load_data(self, num_proc: int =
|
|
381
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
382
382
|
if self.data_loaded:
|
|
383
383
|
return
|
|
384
384
|
|
|
@@ -88,7 +88,7 @@ class SciMMIRI2TRetrieval(AbsTaskRetrieval):
|
|
|
88
88
|
""",
|
|
89
89
|
)
|
|
90
90
|
|
|
91
|
-
def load_data(self, num_proc: int =
|
|
91
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
92
92
|
if self.data_loaded:
|
|
93
93
|
return
|
|
94
94
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
@@ -88,7 +88,7 @@ class SciMMIRT2IRetrieval(AbsTaskRetrieval):
|
|
|
88
88
|
""",
|
|
89
89
|
)
|
|
90
90
|
|
|
91
|
-
def load_data(self, num_proc: int =
|
|
91
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
92
92
|
if self.data_loaded:
|
|
93
93
|
return
|
|
94
94
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
@@ -95,7 +95,7 @@ class VidoreArxivQARetrieval(AbsTaskRetrieval):
|
|
|
95
95
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
96
96
|
)
|
|
97
97
|
|
|
98
|
-
def load_data(self, num_proc: int =
|
|
98
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
99
99
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
100
100
|
path=self.metadata.dataset["path"],
|
|
101
101
|
splits=self.metadata.eval_splits,
|
|
@@ -138,7 +138,7 @@ class VidoreDocVQARetrieval(AbsTaskRetrieval):
|
|
|
138
138
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
139
139
|
)
|
|
140
140
|
|
|
141
|
-
def load_data(self, num_proc: int =
|
|
141
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
142
142
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
143
143
|
path=self.metadata.dataset["path"],
|
|
144
144
|
splits=self.metadata.eval_splits,
|
|
@@ -181,7 +181,7 @@ class VidoreInfoVQARetrieval(AbsTaskRetrieval):
|
|
|
181
181
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
182
182
|
)
|
|
183
183
|
|
|
184
|
-
def load_data(self, num_proc: int =
|
|
184
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
185
185
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
186
186
|
path=self.metadata.dataset["path"],
|
|
187
187
|
splits=self.metadata.eval_splits,
|
|
@@ -224,7 +224,7 @@ class VidoreTabfquadRetrieval(AbsTaskRetrieval):
|
|
|
224
224
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
225
225
|
)
|
|
226
226
|
|
|
227
|
-
def load_data(self, num_proc: int =
|
|
227
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
228
228
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
229
229
|
path=self.metadata.dataset["path"],
|
|
230
230
|
splits=self.metadata.eval_splits,
|
|
@@ -267,7 +267,7 @@ class VidoreTatdqaRetrieval(AbsTaskRetrieval):
|
|
|
267
267
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
268
268
|
)
|
|
269
269
|
|
|
270
|
-
def load_data(self, num_proc: int =
|
|
270
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
271
271
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
272
272
|
path=self.metadata.dataset["path"],
|
|
273
273
|
splits=self.metadata.eval_splits,
|
|
@@ -310,7 +310,7 @@ class VidoreShiftProjectRetrieval(AbsTaskRetrieval):
|
|
|
310
310
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
311
311
|
)
|
|
312
312
|
|
|
313
|
-
def load_data(self, num_proc: int =
|
|
313
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
314
314
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
315
315
|
path=self.metadata.dataset["path"],
|
|
316
316
|
splits=self.metadata.eval_splits,
|
|
@@ -354,7 +354,7 @@ class VidoreSyntheticDocQAAIRetrieval(AbsTaskRetrieval):
|
|
|
354
354
|
adapted_from=["VidoreDocVQARetrieval"],
|
|
355
355
|
)
|
|
356
356
|
|
|
357
|
-
def load_data(self, num_proc: int =
|
|
357
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
358
358
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
359
359
|
path=self.metadata.dataset["path"],
|
|
360
360
|
splits=self.metadata.eval_splits,
|
|
@@ -398,7 +398,7 @@ class VidoreSyntheticDocQAEnergyRetrieval(AbsTaskRetrieval):
|
|
|
398
398
|
adapted_from=["VidoreDocVQARetrieval"],
|
|
399
399
|
)
|
|
400
400
|
|
|
401
|
-
def load_data(self, num_proc: int =
|
|
401
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
402
402
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
403
403
|
path=self.metadata.dataset["path"],
|
|
404
404
|
splits=self.metadata.eval_splits,
|
|
@@ -442,7 +442,7 @@ class VidoreSyntheticDocQAGovernmentReportsRetrieval(AbsTaskRetrieval):
|
|
|
442
442
|
adapted_from=["VidoreDocVQARetrieval"],
|
|
443
443
|
)
|
|
444
444
|
|
|
445
|
-
def load_data(self, num_proc: int =
|
|
445
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
446
446
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
447
447
|
path=self.metadata.dataset["path"],
|
|
448
448
|
splits=self.metadata.eval_splits,
|
|
@@ -486,7 +486,7 @@ class VidoreSyntheticDocQAHealthcareIndustryRetrieval(AbsTaskRetrieval):
|
|
|
486
486
|
adapted_from=["VidoreDocVQARetrieval"],
|
|
487
487
|
)
|
|
488
488
|
|
|
489
|
-
def load_data(self, num_proc: int =
|
|
489
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
490
490
|
self.corpus, self.queries, self.relevant_docs = _load_data(
|
|
491
491
|
path=self.metadata.dataset["path"],
|
|
492
492
|
splits=self.metadata.eval_splits,
|
|
@@ -38,7 +38,7 @@ class SyntecRetrieval(AbsTaskRetrieval):
|
|
|
38
38
|
""",
|
|
39
39
|
)
|
|
40
40
|
|
|
41
|
-
def load_data(self, num_proc: int =
|
|
41
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
42
42
|
if self.data_loaded:
|
|
43
43
|
return
|
|
44
44
|
# fetch both subsets of the dataset
|
|
@@ -43,7 +43,7 @@ class HunSum2AbstractiveRetrieval(AbsTaskRetrieval):
|
|
|
43
43
|
""",
|
|
44
44
|
)
|
|
45
45
|
|
|
46
|
-
def load_data(self, num_proc: int =
|
|
46
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
47
47
|
if self.data_loaded:
|
|
48
48
|
return
|
|
49
49
|
self.corpus, self.queries, self.relevant_docs = {}, {}, {}
|
|
@@ -53,7 +53,7 @@ class CrossLingualSemanticDiscriminationWMT19(AbsTaskRetrieval):
|
|
|
53
53
|
)
|
|
54
54
|
num_of_distractors = 4
|
|
55
55
|
|
|
56
|
-
def load_data(self, num_proc: int =
|
|
56
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
57
57
|
"""Generic data loader function for original clsd datasets with the format shown in "hf_dataset_link".
|
|
58
58
|
Loading the hf dataset, it populates the following three variables to be used for retrieval evaluation.
|
|
59
59
|
|
|
@@ -54,7 +54,7 @@ class CrossLingualSemanticDiscriminationWMT21(AbsTaskRetrieval):
|
|
|
54
54
|
|
|
55
55
|
num_of_distractors = 4
|
|
56
56
|
|
|
57
|
-
def load_data(self, num_proc: int =
|
|
57
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
58
58
|
"""Generic data loader function for original clsd datasets with the format shown in "hf_dataset_link".
|
|
59
59
|
Loading the hf dataset, it populates the following three variables to be used for retrieval evaluation.
|
|
60
60
|
|
|
@@ -143,7 +143,7 @@ class MIRACLVisionRetrieval(AbsTaskRetrieval):
|
|
|
143
143
|
prompt={"query": "Find a screenshot that is relevant to the user's query."},
|
|
144
144
|
)
|
|
145
145
|
|
|
146
|
-
def load_data(self, num_proc: int =
|
|
146
|
+
def load_data(self, num_proc: int | None = None, **kwargs) -> None:
|
|
147
147
|
if self.data_loaded:
|
|
148
148
|
return
|
|
149
149
|
|