mteb 2.7.2__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. mteb/_create_dataloaders.py +16 -9
  2. mteb/_evaluators/any_sts_evaluator.py +10 -5
  3. mteb/_evaluators/clustering_evaluator.py +10 -4
  4. mteb/_evaluators/evaluator.py +9 -4
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
  6. mteb/_evaluators/pair_classification_evaluator.py +10 -5
  7. mteb/_evaluators/retrieval_evaluator.py +19 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +14 -10
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +8 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +17 -9
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +15 -6
  21. mteb/abstasks/clustering.py +17 -8
  22. mteb/abstasks/clustering_legacy.py +14 -6
  23. mteb/abstasks/image/image_text_pair_classification.py +17 -7
  24. mteb/abstasks/multilabel_classification.py +11 -5
  25. mteb/abstasks/pair_classification.py +19 -9
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +28 -17
  28. mteb/abstasks/retrieval_dataset_loaders.py +11 -8
  29. mteb/abstasks/sts.py +19 -10
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +14 -7
  32. mteb/abstasks/text/summarization.py +17 -7
  33. mteb/abstasks/zeroshot_classification.py +15 -7
  34. mteb/benchmarks/_create_table.py +13 -3
  35. mteb/benchmarks/benchmark.py +11 -1
  36. mteb/benchmarks/benchmarks/__init__.py +2 -0
  37. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  38. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  39. mteb/cache.py +10 -5
  40. mteb/cli/_display_tasks.py +9 -3
  41. mteb/cli/build_cli.py +5 -2
  42. mteb/cli/generate_model_card.py +9 -2
  43. mteb/deprecated_evaluator.py +16 -12
  44. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  45. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  48. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  49. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  50. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  51. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  52. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  64. mteb/evaluate.py +20 -18
  65. mteb/filter_tasks.py +12 -7
  66. mteb/get_tasks.py +9 -4
  67. mteb/languages/language_scripts.py +8 -3
  68. mteb/leaderboard/app.py +7 -3
  69. mteb/leaderboard/table.py +7 -2
  70. mteb/load_results.py +9 -3
  71. mteb/models/abs_encoder.py +22 -12
  72. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  73. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  74. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  75. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  76. mteb/models/get_model_meta.py +11 -4
  77. mteb/models/instruct_wrapper.py +13 -5
  78. mteb/models/model_implementations/align_models.py +10 -4
  79. mteb/models/model_implementations/amazon_models.py +1 -0
  80. mteb/models/model_implementations/andersborges.py +2 -0
  81. mteb/models/model_implementations/ara_models.py +1 -0
  82. mteb/models/model_implementations/arctic_models.py +8 -0
  83. mteb/models/model_implementations/b1ade_models.py +1 -0
  84. mteb/models/model_implementations/bedrock_models.py +20 -6
  85. mteb/models/model_implementations/bge_models.py +40 -1
  86. mteb/models/model_implementations/bica_model.py +1 -0
  87. mteb/models/model_implementations/blip2_models.py +11 -4
  88. mteb/models/model_implementations/blip_models.py +17 -4
  89. mteb/models/model_implementations/bm25.py +22 -14
  90. mteb/models/model_implementations/bmretriever_models.py +10 -2
  91. mteb/models/model_implementations/cadet_models.py +1 -0
  92. mteb/models/model_implementations/cde_models.py +11 -5
  93. mteb/models/model_implementations/clip_models.py +12 -4
  94. mteb/models/model_implementations/clips_models.py +3 -0
  95. mteb/models/model_implementations/codefuse_models.py +5 -0
  96. mteb/models/model_implementations/codesage_models.py +3 -0
  97. mteb/models/model_implementations/cohere_models.py +14 -4
  98. mteb/models/model_implementations/cohere_v.py +14 -4
  99. mteb/models/model_implementations/colpali_models.py +7 -3
  100. mteb/models/model_implementations/colqwen_models.py +17 -31
  101. mteb/models/model_implementations/colsmol_models.py +3 -1
  102. mteb/models/model_implementations/conan_models.py +11 -4
  103. mteb/models/model_implementations/dino_models.py +28 -4
  104. mteb/models/model_implementations/e5_instruct.py +4 -0
  105. mteb/models/model_implementations/e5_models.py +9 -0
  106. mteb/models/model_implementations/e5_v.py +10 -4
  107. mteb/models/model_implementations/eagerworks_models.py +11 -4
  108. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  109. mteb/models/model_implementations/en_code_retriever.py +1 -0
  110. mteb/models/model_implementations/euler_models.py +1 -0
  111. mteb/models/model_implementations/evaclip_models.py +13 -4
  112. mteb/models/model_implementations/fa_models.py +9 -0
  113. mteb/models/model_implementations/facebookai.py +2 -0
  114. mteb/models/model_implementations/geogpt_models.py +1 -0
  115. mteb/models/model_implementations/gme_v_models.py +7 -3
  116. mteb/models/model_implementations/google_models.py +15 -4
  117. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
  118. mteb/models/model_implementations/gritlm_models.py +2 -0
  119. mteb/models/model_implementations/gte_models.py +9 -0
  120. mteb/models/model_implementations/hinvec_models.py +6 -1
  121. mteb/models/model_implementations/human.py +1 -0
  122. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  123. mteb/models/model_implementations/inf_models.py +2 -0
  124. mteb/models/model_implementations/jasper_models.py +14 -5
  125. mteb/models/model_implementations/jina_clip.py +10 -4
  126. mteb/models/model_implementations/jina_models.py +17 -5
  127. mteb/models/model_implementations/kalm_models.py +24 -12
  128. mteb/models/model_implementations/kblab.py +1 -0
  129. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  130. mteb/models/model_implementations/kfst.py +1 -0
  131. mteb/models/model_implementations/kowshik24_models.py +1 -0
  132. mteb/models/model_implementations/lens_models.py +2 -0
  133. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  134. mteb/models/model_implementations/linq_models.py +7 -1
  135. mteb/models/model_implementations/listconranker.py +10 -4
  136. mteb/models/model_implementations/llm2clip_models.py +12 -4
  137. mteb/models/model_implementations/llm2vec_models.py +20 -6
  138. mteb/models/model_implementations/mcinext_models.py +8 -2
  139. mteb/models/model_implementations/mdbr_models.py +2 -0
  140. mteb/models/model_implementations/misc_models.py +63 -0
  141. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  142. mteb/models/model_implementations/mme5_models.py +2 -1
  143. mteb/models/model_implementations/moco_models.py +11 -4
  144. mteb/models/model_implementations/mod_models.py +2 -1
  145. mteb/models/model_implementations/model2vec_models.py +23 -4
  146. mteb/models/model_implementations/moka_models.py +3 -0
  147. mteb/models/model_implementations/nbailab.py +3 -0
  148. mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
  149. mteb/models/model_implementations/nomic_models.py +16 -4
  150. mteb/models/model_implementations/nomic_models_vision.py +5 -3
  151. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
  152. mteb/models/model_implementations/nvidia_models.py +15 -4
  153. mteb/models/model_implementations/octen_models.py +3 -1
  154. mteb/models/model_implementations/openai_models.py +14 -4
  155. mteb/models/model_implementations/openclip_models.py +17 -4
  156. mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
  157. mteb/models/model_implementations/ops_moa_models.py +9 -2
  158. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  159. mteb/models/model_implementations/pawan_models.py +1 -0
  160. mteb/models/model_implementations/piccolo_models.py +2 -0
  161. mteb/models/model_implementations/promptriever_models.py +16 -6
  162. mteb/models/model_implementations/pylate_models.py +22 -13
  163. mteb/models/model_implementations/qodo_models.py +2 -0
  164. mteb/models/model_implementations/qtack_models.py +1 -0
  165. mteb/models/model_implementations/qwen3_models.py +11 -1
  166. mteb/models/model_implementations/qzhou_models.py +2 -0
  167. mteb/models/model_implementations/random_baseline.py +4 -3
  168. mteb/models/model_implementations/rasgaard_models.py +1 -0
  169. mteb/models/model_implementations/reasonir_model.py +65 -0
  170. mteb/models/model_implementations/repllama_models.py +15 -6
  171. mteb/models/model_implementations/rerankers_custom.py +13 -4
  172. mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
  173. mteb/models/model_implementations/richinfoai_models.py +1 -0
  174. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  175. mteb/models/model_implementations/ruri_models.py +10 -0
  176. mteb/models/model_implementations/salesforce_models.py +10 -1
  177. mteb/models/model_implementations/samilpwc_models.py +1 -0
  178. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  179. mteb/models/model_implementations/searchmap_models.py +1 -0
  180. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  181. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
  182. mteb/models/model_implementations/seed_models.py +2 -1
  183. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  184. mteb/models/model_implementations/shuu_model.py +1 -0
  185. mteb/models/model_implementations/siglip_models.py +19 -4
  186. mteb/models/model_implementations/slm_models.py +7 -4
  187. mteb/models/model_implementations/sonar_models.py +2 -1
  188. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  189. mteb/models/model_implementations/stella_models.py +6 -0
  190. mteb/models/model_implementations/tarka_models.py +2 -0
  191. mteb/models/model_implementations/text2vec_models.py +3 -0
  192. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  193. mteb/models/model_implementations/uae_models.py +10 -4
  194. mteb/models/model_implementations/vdr_models.py +8 -1
  195. mteb/models/model_implementations/vi_vn_models.py +6 -0
  196. mteb/models/model_implementations/vista_models.py +11 -4
  197. mteb/models/model_implementations/vlm2vec_models.py +11 -4
  198. mteb/models/model_implementations/voyage_models.py +25 -4
  199. mteb/models/model_implementations/voyage_v.py +11 -6
  200. mteb/models/model_implementations/xyz_models.py +1 -0
  201. mteb/models/model_implementations/youtu_models.py +1 -0
  202. mteb/models/model_implementations/yuan_models.py +1 -0
  203. mteb/models/model_implementations/yuan_models_en.py +2 -1
  204. mteb/models/model_meta.py +47 -9
  205. mteb/models/models_protocols.py +19 -18
  206. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  207. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  208. mteb/models/search_wrappers.py +19 -12
  209. mteb/models/sentence_transformer_wrapper.py +4 -3
  210. mteb/models/vllm_wrapper.py +8 -6
  211. mteb/results/benchmark_results.py +22 -17
  212. mteb/results/model_result.py +21 -15
  213. mteb/results/task_result.py +15 -9
  214. mteb/similarity_functions.py +8 -2
  215. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  216. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  217. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  218. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  219. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  220. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  221. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  222. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  223. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  224. mteb/tasks/clustering/nob/snl_clustering.py +7 -2
  225. mteb/tasks/clustering/nob/vg_clustering.py +7 -2
  226. mteb/tasks/retrieval/eng/__init__.py +42 -0
  227. mteb/tasks/retrieval/eng/bright_retrieval.py +9 -1
  228. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  229. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  230. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
  231. mteb/types/_encoder_io.py +1 -1
  232. mteb/types/statistics.py +9 -2
  233. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/METADATA +1 -1
  234. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/RECORD +238 -217
  235. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/WHEEL +0 -0
  236. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/entry_points.txt +0 -0
  237. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/licenses/LICENSE +0 -0
  238. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,968 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict
4
+
5
+ import datasets
6
+
7
+ from mteb.abstasks import AbsTaskRetrieval
8
+ from mteb.abstasks.task_metadata import TaskMetadata
9
+
10
+
11
+ def load_bright_data(
12
+ path: str,
13
+ domain: str,
14
+ eval_splits: list,
15
+ cache_dir: str | None = None,
16
+ revision: str | None = None,
17
+ ):
18
+ eval_split = eval_splits[0]
19
+ corpus_name = "documents" if eval_split == "standard" else "long_documents"
20
+ gold_ids_field = "gold_ids" if eval_split == "standard" else "gold_ids_long"
21
+
22
+ corpus = dict.fromkeys(eval_splits)
23
+ queries = dict.fromkeys(eval_splits)
24
+ relevant_docs = dict.fromkeys(eval_splits)
25
+ top_ranked = dict.fromkeys(eval_splits)
26
+
27
+ domain_corpus = datasets.load_dataset(
28
+ path,
29
+ corpus_name,
30
+ split=domain,
31
+ cache_dir=cache_dir,
32
+ revision=revision,
33
+ )
34
+ examples = datasets.load_dataset(
35
+ path,
36
+ "examples",
37
+ split=domain,
38
+ cache_dir=cache_dir,
39
+ revision=revision,
40
+ )
41
+ corpus[eval_split] = {e["id"]: {"text": e["content"]} for e in domain_corpus}
42
+ queries[eval_split] = {e["id"]: e["query"] for e in examples}
43
+ relevant_docs[eval_split] = defaultdict(dict)
44
+ top_ranked[eval_split] = defaultdict(list)
45
+
46
+ # Get all document IDs
47
+ all_doc_ids = [e["id"] for e in domain_corpus]
48
+ have_excluded_ids = False
49
+
50
+ for e in examples:
51
+ qid = e["id"]
52
+ gold_ids = e[gold_ids_field]
53
+ for gid in gold_ids:
54
+ relevant_docs[eval_split][qid].update({gid: 1})
55
+
56
+ # Create top_ranked: all documents except excluded_ids
57
+ excluded_ids = e.get("excluded_ids", [])
58
+ if excluded_ids and excluded_ids != ["N/A"]:
59
+ excluded_set = set(excluded_ids)
60
+ top_ranked[eval_split][qid] = [
61
+ doc_id for doc_id in all_doc_ids if doc_id not in excluded_set
62
+ ]
63
+ have_excluded_ids = True
64
+ else:
65
+ # No exclusions, use all documents
66
+ top_ranked[eval_split][qid] = all_doc_ids
67
+
68
+ corpus = datasets.DatasetDict(corpus)
69
+ queries = datasets.DatasetDict(queries)
70
+ relevant_docs = datasets.DatasetDict(relevant_docs)
71
+ if have_excluded_ids:
72
+ top_ranked = datasets.DatasetDict(top_ranked)
73
+ else:
74
+ top_ranked = None
75
+ return corpus, queries, relevant_docs, top_ranked
76
+
77
+
78
+ _BIBTEX_CITATION = r"""
79
+ @misc{su2024brightrealisticchallengingbenchmark,
80
+ archiveprefix = {arXiv},
81
+ author = {Hongjin Su and Howard Yen and Mengzhou Xia and Weijia Shi and Niklas Muennighoff and Han-yu Wang and Haisu Liu and Quan Shi and Zachary S. Siegel and Michael Tang and Ruoxi Sun and Jinsung Yoon and Sercan O. Arik and Danqi Chen and Tao Yu},
82
+ eprint = {2407.12883},
83
+ primaryclass = {cs.CL},
84
+ title = {BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval},
85
+ url = {https://arxiv.org/abs/2407.12883},
86
+ year = {2024},
87
+ }
88
+ """
89
+
90
+
91
+ class BrightBiologyRetrieval(AbsTaskRetrieval):
92
+ metadata = TaskMetadata(
93
+ name="BrightBiologyRetrieval",
94
+ dataset={
95
+ "path": "xlangai/BRIGHT",
96
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
97
+ },
98
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
99
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Biology StackExchange answers.",
100
+ type="Retrieval",
101
+ prompt={
102
+ "query": "Represent this biology post for searching relevant passages: "
103
+ },
104
+ category="t2t",
105
+ eval_splits=["standard"],
106
+ eval_langs=["eng-Latn"],
107
+ main_score="ndcg_at_10",
108
+ date=("2024-03-01", "2024-06-01"),
109
+ domains=["Non-fiction", "Written"],
110
+ task_subtypes=["Article retrieval"],
111
+ license="cc-by-4.0",
112
+ annotations_creators="derived",
113
+ dialect=[],
114
+ sample_creation="found",
115
+ modalities=["text"],
116
+ bibtex_citation=_BIBTEX_CITATION,
117
+ )
118
+
119
+ def load_data(self, **kwargs):
120
+ if self.data_loaded:
121
+ return
122
+
123
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
124
+ load_bright_data(
125
+ path=self.metadata.dataset["path"],
126
+ eval_splits=self.metadata.eval_splits,
127
+ domain="biology",
128
+ cache_dir=kwargs.get("cache_dir", None),
129
+ revision=self.metadata.dataset["revision"],
130
+ )
131
+ )
132
+ self.data_loaded = True
133
+
134
+
135
+ class BrightEarthScienceRetrieval(AbsTaskRetrieval):
136
+ metadata = TaskMetadata(
137
+ name="BrightEarthScienceRetrieval",
138
+ dataset={
139
+ "path": "xlangai/BRIGHT",
140
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
141
+ },
142
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
143
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Earth Science StackExchange answers.",
144
+ type="Retrieval",
145
+ prompt={
146
+ "query": "Represent this earth_science post for searching relevant passages: "
147
+ },
148
+ category="t2t",
149
+ eval_splits=["standard"],
150
+ eval_langs=["eng-Latn"],
151
+ main_score="ndcg_at_10",
152
+ date=("2024-03-01", "2024-06-01"),
153
+ domains=["Non-fiction", "Written"],
154
+ task_subtypes=["Article retrieval"],
155
+ license="cc-by-4.0",
156
+ annotations_creators="derived",
157
+ dialect=[],
158
+ sample_creation="found",
159
+ modalities=["text"],
160
+ bibtex_citation=_BIBTEX_CITATION,
161
+ )
162
+
163
+ def load_data(self, **kwargs):
164
+ if self.data_loaded:
165
+ return
166
+
167
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
168
+ load_bright_data(
169
+ path=self.metadata.dataset["path"],
170
+ eval_splits=self.metadata.eval_splits,
171
+ domain="earth_science",
172
+ cache_dir=kwargs.get("cache_dir", None),
173
+ revision=self.metadata.dataset["revision"],
174
+ )
175
+ )
176
+ self.data_loaded = True
177
+
178
+
179
+ class BrightEconomicsRetrieval(AbsTaskRetrieval):
180
+ metadata = TaskMetadata(
181
+ name="BrightEconomicsRetrieval",
182
+ dataset={
183
+ "path": "xlangai/BRIGHT",
184
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
185
+ },
186
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
187
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Economics StackExchange answers.",
188
+ type="Retrieval",
189
+ prompt={
190
+ "query": "Represent this economics post for searching relevant passages: "
191
+ },
192
+ category="t2t",
193
+ eval_splits=["standard"],
194
+ eval_langs=["eng-Latn"],
195
+ main_score="ndcg_at_10",
196
+ date=("2024-03-01", "2024-06-01"),
197
+ domains=["Non-fiction", "Written"],
198
+ task_subtypes=["Article retrieval"],
199
+ license="cc-by-4.0",
200
+ annotations_creators="derived",
201
+ dialect=[],
202
+ sample_creation="found",
203
+ modalities=["text"],
204
+ bibtex_citation=_BIBTEX_CITATION,
205
+ )
206
+
207
+ def load_data(self, **kwargs):
208
+ if self.data_loaded:
209
+ return
210
+
211
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
212
+ load_bright_data(
213
+ path=self.metadata.dataset["path"],
214
+ eval_splits=self.metadata.eval_splits,
215
+ domain="economics",
216
+ cache_dir=kwargs.get("cache_dir", None),
217
+ revision=self.metadata.dataset["revision"],
218
+ )
219
+ )
220
+ self.data_loaded = True
221
+
222
+
223
+ class BrightPsychologyRetrieval(AbsTaskRetrieval):
224
+ metadata = TaskMetadata(
225
+ name="BrightPsychologyRetrieval",
226
+ dataset={
227
+ "path": "xlangai/BRIGHT",
228
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
229
+ },
230
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
231
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Psychology StackExchange answers.",
232
+ type="Retrieval",
233
+ prompt={
234
+ "query": "Represent this psychology post for searching relevant passages: "
235
+ },
236
+ category="t2t",
237
+ eval_splits=["standard"],
238
+ eval_langs=["eng-Latn"],
239
+ main_score="ndcg_at_10",
240
+ date=("2024-03-01", "2024-06-01"),
241
+ domains=["Non-fiction", "Written"],
242
+ task_subtypes=["Article retrieval"],
243
+ license="cc-by-4.0",
244
+ annotations_creators="derived",
245
+ dialect=[],
246
+ sample_creation="found",
247
+ modalities=["text"],
248
+ bibtex_citation=_BIBTEX_CITATION,
249
+ )
250
+
251
+ def load_data(self, **kwargs):
252
+ if self.data_loaded:
253
+ return
254
+
255
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
256
+ load_bright_data(
257
+ path=self.metadata.dataset["path"],
258
+ eval_splits=self.metadata.eval_splits,
259
+ domain="psychology",
260
+ cache_dir=kwargs.get("cache_dir", None),
261
+ revision=self.metadata.dataset["revision"],
262
+ )
263
+ )
264
+ self.data_loaded = True
265
+
266
+
267
+ class BrightRoboticsRetrieval(AbsTaskRetrieval):
268
+ metadata = TaskMetadata(
269
+ name="BrightRoboticsRetrieval",
270
+ dataset={
271
+ "path": "xlangai/BRIGHT",
272
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
273
+ },
274
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
275
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Robotics StackExchange answers.",
276
+ type="Retrieval",
277
+ prompt={
278
+ "query": "Represent this robotics post for searching relevant passages: "
279
+ },
280
+ category="t2t",
281
+ eval_splits=["standard"],
282
+ eval_langs=["eng-Latn"],
283
+ main_score="ndcg_at_10",
284
+ date=("2024-03-01", "2024-06-01"),
285
+ domains=["Non-fiction", "Written"],
286
+ task_subtypes=["Article retrieval"],
287
+ license="cc-by-4.0",
288
+ annotations_creators="derived",
289
+ dialect=[],
290
+ sample_creation="found",
291
+ modalities=["text"],
292
+ bibtex_citation=_BIBTEX_CITATION,
293
+ )
294
+
295
+ def load_data(self, **kwargs):
296
+ if self.data_loaded:
297
+ return
298
+
299
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
300
+ load_bright_data(
301
+ path=self.metadata.dataset["path"],
302
+ eval_splits=self.metadata.eval_splits,
303
+ domain="robotics",
304
+ cache_dir=kwargs.get("cache_dir", None),
305
+ revision=self.metadata.dataset["revision"],
306
+ )
307
+ )
308
+ self.data_loaded = True
309
+
310
+
311
+ class BrightStackoverflowRetrieval(AbsTaskRetrieval):
312
+ metadata = TaskMetadata(
313
+ name="BrightStackoverflowRetrieval",
314
+ dataset={
315
+ "path": "xlangai/BRIGHT",
316
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
317
+ },
318
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
319
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Stack Overflow answers.",
320
+ type="Retrieval",
321
+ prompt={
322
+ "query": "Represent this stackoverflow post for searching relevant passages: "
323
+ },
324
+ category="t2t",
325
+ eval_splits=["standard"],
326
+ eval_langs=["eng-Latn"],
327
+ main_score="ndcg_at_10",
328
+ date=("2024-03-01", "2024-06-01"),
329
+ domains=["Non-fiction", "Written"],
330
+ task_subtypes=["Article retrieval"],
331
+ license="cc-by-4.0",
332
+ annotations_creators="derived",
333
+ dialect=[],
334
+ sample_creation="found",
335
+ modalities=["text"],
336
+ bibtex_citation=_BIBTEX_CITATION,
337
+ )
338
+
339
+ def load_data(self, **kwargs):
340
+ if self.data_loaded:
341
+ return
342
+
343
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
344
+ load_bright_data(
345
+ path=self.metadata.dataset["path"],
346
+ eval_splits=self.metadata.eval_splits,
347
+ domain="stackoverflow",
348
+ cache_dir=kwargs.get("cache_dir", None),
349
+ revision=self.metadata.dataset["revision"],
350
+ )
351
+ )
352
+ self.data_loaded = True
353
+
354
+
355
+ class BrightSustainableLivingRetrieval(AbsTaskRetrieval):
356
+ metadata = TaskMetadata(
357
+ name="BrightSustainableLivingRetrieval",
358
+ dataset={
359
+ "path": "xlangai/BRIGHT",
360
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
361
+ },
362
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
363
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Sustainable Living StackExchange answers.",
364
+ type="Retrieval",
365
+ prompt={
366
+ "query": "Represent this sustainable_living post for searching relevant passages: "
367
+ },
368
+ category="t2t",
369
+ eval_splits=["standard"],
370
+ eval_langs=["eng-Latn"],
371
+ main_score="ndcg_at_10",
372
+ date=("2024-03-01", "2024-06-01"),
373
+ domains=["Non-fiction", "Written"],
374
+ task_subtypes=["Article retrieval"],
375
+ license="cc-by-4.0",
376
+ annotations_creators="derived",
377
+ dialect=[],
378
+ sample_creation="found",
379
+ modalities=["text"],
380
+ bibtex_citation=_BIBTEX_CITATION,
381
+ )
382
+
383
+ def load_data(self, **kwargs):
384
+ if self.data_loaded:
385
+ return
386
+
387
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
388
+ load_bright_data(
389
+ path=self.metadata.dataset["path"],
390
+ eval_splits=self.metadata.eval_splits,
391
+ domain="sustainable_living",
392
+ cache_dir=kwargs.get("cache_dir", None),
393
+ revision=self.metadata.dataset["revision"],
394
+ )
395
+ )
396
+ self.data_loaded = True
397
+
398
+
399
+ class BrightPonyRetrieval(AbsTaskRetrieval):
400
+ metadata = TaskMetadata(
401
+ name="BrightPonyRetrieval",
402
+ dataset={
403
+ "path": "xlangai/BRIGHT",
404
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
405
+ },
406
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
407
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of Pony programming language syntax documentation.",
408
+ type="Retrieval",
409
+ prompt={
410
+ "query": "Represent this Pony question for searching relevant passages: "
411
+ },
412
+ category="t2t",
413
+ eval_splits=["standard"],
414
+ eval_langs=["eng-Latn"],
415
+ main_score="ndcg_at_10",
416
+ date=("2024-03-01", "2024-06-01"),
417
+ domains=["Non-fiction", "Written"],
418
+ task_subtypes=["Article retrieval"],
419
+ license="cc-by-4.0",
420
+ annotations_creators="derived",
421
+ dialect=[],
422
+ sample_creation="found",
423
+ modalities=["text"],
424
+ bibtex_citation=_BIBTEX_CITATION,
425
+ )
426
+
427
+ def load_data(self, **kwargs):
428
+ if self.data_loaded:
429
+ return
430
+
431
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
432
+ load_bright_data(
433
+ path=self.metadata.dataset["path"],
434
+ eval_splits=self.metadata.eval_splits,
435
+ domain="pony",
436
+ cache_dir=kwargs.get("cache_dir", None),
437
+ revision=self.metadata.dataset["revision"],
438
+ )
439
+ )
440
+ self.data_loaded = True
441
+
442
+
443
+ class BrightLeetcodeRetrieval(AbsTaskRetrieval):
444
+ metadata = TaskMetadata(
445
+ name="BrightLeetcodeRetrieval",
446
+ dataset={
447
+ "path": "xlangai/BRIGHT",
448
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
449
+ },
450
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
451
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of similar algorithmic problems based on shared solution techniques.",
452
+ type="Retrieval",
453
+ prompt={
454
+ "query": "Represent this Coding problem for searching relevant examples: "
455
+ },
456
+ category="t2t",
457
+ eval_splits=["standard"],
458
+ eval_langs=["eng-Latn"],
459
+ main_score="ndcg_at_10",
460
+ date=("2024-03-01", "2024-06-01"),
461
+ domains=["Non-fiction", "Written"],
462
+ task_subtypes=["Article retrieval"],
463
+ license="cc-by-4.0",
464
+ annotations_creators="derived",
465
+ dialect=[],
466
+ sample_creation="found",
467
+ modalities=["text"],
468
+ bibtex_citation=_BIBTEX_CITATION,
469
+ )
470
+
471
+ def load_data(self, **kwargs):
472
+ if self.data_loaded:
473
+ return
474
+
475
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
476
+ load_bright_data(
477
+ path=self.metadata.dataset["path"],
478
+ eval_splits=self.metadata.eval_splits,
479
+ domain="leetcode",
480
+ cache_dir=kwargs.get("cache_dir", None),
481
+ revision=self.metadata.dataset["revision"],
482
+ )
483
+ )
484
+ self.data_loaded = True
485
+
486
+
487
+ class BrightAopsRetrieval(AbsTaskRetrieval):
488
+ metadata = TaskMetadata(
489
+ name="BrightAopsRetrieval",
490
+ dataset={
491
+ "path": "xlangai/BRIGHT",
492
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
493
+ },
494
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
495
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of similar Math Olympiad problems from Art of Problem Solving.",
496
+ type="Retrieval",
497
+ prompt={
498
+ "query": "Represent this Math problem for searching relevant examples: "
499
+ },
500
+ category="t2t",
501
+ eval_splits=["standard"],
502
+ eval_langs=["eng-Latn"],
503
+ main_score="ndcg_at_10",
504
+ date=("2024-03-01", "2024-06-01"),
505
+ domains=["Non-fiction", "Written"],
506
+ task_subtypes=["Article retrieval"],
507
+ license="cc-by-4.0",
508
+ annotations_creators="derived",
509
+ dialect=[],
510
+ sample_creation="found",
511
+ modalities=["text"],
512
+ bibtex_citation=_BIBTEX_CITATION,
513
+ )
514
+
515
+ def load_data(self, **kwargs):
516
+ if self.data_loaded:
517
+ return
518
+
519
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
520
+ load_bright_data(
521
+ path=self.metadata.dataset["path"],
522
+ eval_splits=self.metadata.eval_splits,
523
+ domain="aops",
524
+ cache_dir=kwargs.get("cache_dir", None),
525
+ revision=self.metadata.dataset["revision"],
526
+ )
527
+ )
528
+ self.data_loaded = True
529
+
530
+
531
+ class BrightTheoremQATheoremsRetrieval(AbsTaskRetrieval):
532
+ metadata = TaskMetadata(
533
+ name="BrightTheoremQATheoremsRetrieval",
534
+ dataset={
535
+ "path": "xlangai/BRIGHT",
536
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
537
+ },
538
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
539
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of theorem definitions and proofs from ProofWiki.",
540
+ type="Retrieval",
541
+ prompt={
542
+ "query": "Represent this Math problem for searching relevant theorems: "
543
+ },
544
+ category="t2t",
545
+ eval_splits=["standard"],
546
+ eval_langs=["eng-Latn"],
547
+ main_score="ndcg_at_10",
548
+ date=("2024-03-01", "2024-06-01"),
549
+ domains=["Non-fiction", "Written"],
550
+ task_subtypes=["Article retrieval"],
551
+ license="cc-by-4.0",
552
+ annotations_creators="derived",
553
+ dialect=[],
554
+ sample_creation="found",
555
+ modalities=["text"],
556
+ bibtex_citation=_BIBTEX_CITATION,
557
+ )
558
+
559
+ def load_data(self, **kwargs):
560
+ if self.data_loaded:
561
+ return
562
+
563
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
564
+ load_bright_data(
565
+ path=self.metadata.dataset["path"],
566
+ eval_splits=self.metadata.eval_splits,
567
+ domain="theoremqa_theorems",
568
+ cache_dir=kwargs.get("cache_dir", None),
569
+ revision=self.metadata.dataset["revision"],
570
+ )
571
+ )
572
+ self.data_loaded = True
573
+
574
+
575
+ class BrightTheoremQAQuestionsRetrieval(AbsTaskRetrieval):
576
+ metadata = TaskMetadata(
577
+ name="BrightTheoremQAQuestionsRetrieval",
578
+ dataset={
579
+ "path": "xlangai/BRIGHT",
580
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
581
+ },
582
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
583
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of theorem definitions from ProofWiki given questions rephrased as real-world scenarios.",
584
+ type="Retrieval",
585
+ prompt={
586
+ "query": "Represent this Math problem for searching relevant examples: "
587
+ },
588
+ category="t2t",
589
+ eval_splits=["standard"],
590
+ eval_langs=["eng-Latn"],
591
+ main_score="ndcg_at_10",
592
+ date=("2024-03-01", "2024-06-01"),
593
+ domains=["Non-fiction", "Written"],
594
+ task_subtypes=["Article retrieval"],
595
+ license="cc-by-4.0",
596
+ annotations_creators="derived",
597
+ dialect=[],
598
+ sample_creation="found",
599
+ modalities=["text"],
600
+ bibtex_citation=_BIBTEX_CITATION,
601
+ )
602
+
603
+ def load_data(self, **kwargs):
604
+ if self.data_loaded:
605
+ return
606
+
607
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
608
+ load_bright_data(
609
+ path=self.metadata.dataset["path"],
610
+ eval_splits=self.metadata.eval_splits,
611
+ domain="theoremqa_questions",
612
+ cache_dir=kwargs.get("cache_dir", None),
613
+ revision=self.metadata.dataset["revision"],
614
+ )
615
+ )
616
+ self.data_loaded = True
617
+
618
+
619
+ class BrightBiologyLongRetrieval(AbsTaskRetrieval):
620
+ metadata = TaskMetadata(
621
+ name="BrightBiologyLongRetrieval",
622
+ dataset={
623
+ "path": "xlangai/BRIGHT",
624
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
625
+ },
626
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
627
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Biology StackExchange answers with long documents.",
628
+ type="Retrieval",
629
+ prompt={
630
+ "query": "Represent this biology post for searching relevant passages: "
631
+ },
632
+ category="t2t",
633
+ eval_splits=["long"],
634
+ eval_langs=["eng-Latn"],
635
+ main_score="recall_at_1",
636
+ date=("2024-03-01", "2024-06-01"),
637
+ domains=["Non-fiction", "Written"],
638
+ task_subtypes=["Article retrieval"],
639
+ license="cc-by-4.0",
640
+ annotations_creators="derived",
641
+ dialect=[],
642
+ sample_creation="found",
643
+ modalities=["text"],
644
+ bibtex_citation=_BIBTEX_CITATION,
645
+ )
646
+
647
+ def load_data(self, **kwargs):
648
+ if self.data_loaded:
649
+ return
650
+
651
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
652
+ load_bright_data(
653
+ path=self.metadata.dataset["path"],
654
+ eval_splits=self.metadata.eval_splits,
655
+ domain="biology",
656
+ cache_dir=kwargs.get("cache_dir", None),
657
+ revision=self.metadata.dataset["revision"],
658
+ )
659
+ )
660
+ self.data_loaded = True
661
+
662
+
663
+ class BrightEarthScienceLongRetrieval(AbsTaskRetrieval):
664
+ metadata = TaskMetadata(
665
+ name="BrightEarthScienceLongRetrieval",
666
+ dataset={
667
+ "path": "xlangai/BRIGHT",
668
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
669
+ },
670
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
671
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Earth Science StackExchange answers with long documents.",
672
+ type="Retrieval",
673
+ prompt={
674
+ "query": "Represent this earth_science post for searching relevant passages: "
675
+ },
676
+ category="t2t",
677
+ eval_splits=["long"],
678
+ eval_langs=["eng-Latn"],
679
+ main_score="recall_at_1",
680
+ date=("2024-03-01", "2024-06-01"),
681
+ domains=["Non-fiction", "Written"],
682
+ task_subtypes=["Article retrieval"],
683
+ license="cc-by-4.0",
684
+ annotations_creators="derived",
685
+ dialect=[],
686
+ sample_creation="found",
687
+ modalities=["text"],
688
+ bibtex_citation=_BIBTEX_CITATION,
689
+ )
690
+
691
+ def load_data(self, **kwargs):
692
+ if self.data_loaded:
693
+ return
694
+
695
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
696
+ load_bright_data(
697
+ path=self.metadata.dataset["path"],
698
+ eval_splits=self.metadata.eval_splits,
699
+ domain="earth_science",
700
+ cache_dir=kwargs.get("cache_dir", None),
701
+ revision=self.metadata.dataset["revision"],
702
+ )
703
+ )
704
+ self.data_loaded = True
705
+
706
+
707
+ class BrightEconomicsLongRetrieval(AbsTaskRetrieval):
708
+ metadata = TaskMetadata(
709
+ name="BrightEconomicsLongRetrieval",
710
+ dataset={
711
+ "path": "xlangai/BRIGHT",
712
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
713
+ },
714
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
715
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Economics StackExchange answers with long documents.",
716
+ type="Retrieval",
717
+ prompt={
718
+ "query": "Represent this economics post for searching relevant passages: "
719
+ },
720
+ category="t2t",
721
+ eval_splits=["long"],
722
+ eval_langs=["eng-Latn"],
723
+ main_score="recall_at_1",
724
+ date=("2024-03-01", "2024-06-01"),
725
+ domains=["Non-fiction", "Written"],
726
+ task_subtypes=["Article retrieval"],
727
+ license="cc-by-4.0",
728
+ annotations_creators="derived",
729
+ dialect=[],
730
+ sample_creation="found",
731
+ modalities=["text"],
732
+ bibtex_citation=_BIBTEX_CITATION,
733
+ )
734
+
735
+ def load_data(self, **kwargs):
736
+ if self.data_loaded:
737
+ return
738
+
739
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
740
+ load_bright_data(
741
+ path=self.metadata.dataset["path"],
742
+ eval_splits=self.metadata.eval_splits,
743
+ domain="economics",
744
+ cache_dir=kwargs.get("cache_dir", None),
745
+ revision=self.metadata.dataset["revision"],
746
+ )
747
+ )
748
+ self.data_loaded = True
749
+
750
+
751
+ class BrightPsychologyLongRetrieval(AbsTaskRetrieval):
752
+ metadata = TaskMetadata(
753
+ name="BrightPsychologyLongRetrieval",
754
+ dataset={
755
+ "path": "xlangai/BRIGHT",
756
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
757
+ },
758
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
759
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Psychology StackExchange answers with long documents.",
760
+ type="Retrieval",
761
+ prompt={
762
+ "query": "Represent this psychology post for searching relevant passages: "
763
+ },
764
+ category="t2t",
765
+ eval_splits=["long"],
766
+ eval_langs=["eng-Latn"],
767
+ main_score="recall_at_1",
768
+ date=("2024-03-01", "2024-06-01"),
769
+ domains=["Non-fiction", "Written"],
770
+ task_subtypes=["Article retrieval"],
771
+ license="cc-by-4.0",
772
+ annotations_creators="derived",
773
+ dialect=[],
774
+ sample_creation="found",
775
+ modalities=["text"],
776
+ bibtex_citation=_BIBTEX_CITATION,
777
+ )
778
+
779
+ def load_data(self, **kwargs):
780
+ if self.data_loaded:
781
+ return
782
+
783
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
784
+ load_bright_data(
785
+ path=self.metadata.dataset["path"],
786
+ eval_splits=self.metadata.eval_splits,
787
+ domain="psychology",
788
+ cache_dir=kwargs.get("cache_dir", None),
789
+ revision=self.metadata.dataset["revision"],
790
+ )
791
+ )
792
+ self.data_loaded = True
793
+
794
+
795
+ class BrightRoboticsLongRetrieval(AbsTaskRetrieval):
796
+ metadata = TaskMetadata(
797
+ name="BrightRoboticsLongRetrieval",
798
+ dataset={
799
+ "path": "xlangai/BRIGHT",
800
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
801
+ },
802
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
803
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Robotics StackExchange answers with long documents.",
804
+ type="Retrieval",
805
+ prompt={
806
+ "query": "Represent this robotics post for searching relevant passages: "
807
+ },
808
+ category="t2t",
809
+ eval_splits=["long"],
810
+ eval_langs=["eng-Latn"],
811
+ main_score="recall_at_1",
812
+ date=("2024-03-01", "2024-06-01"),
813
+ domains=["Non-fiction", "Written"],
814
+ task_subtypes=["Article retrieval"],
815
+ license="cc-by-4.0",
816
+ annotations_creators="derived",
817
+ dialect=[],
818
+ sample_creation="found",
819
+ modalities=["text"],
820
+ bibtex_citation=_BIBTEX_CITATION,
821
+ )
822
+
823
+ def load_data(self, **kwargs):
824
+ if self.data_loaded:
825
+ return
826
+
827
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
828
+ load_bright_data(
829
+ path=self.metadata.dataset["path"],
830
+ eval_splits=self.metadata.eval_splits,
831
+ domain="robotics",
832
+ cache_dir=kwargs.get("cache_dir", None),
833
+ revision=self.metadata.dataset["revision"],
834
+ )
835
+ )
836
+ self.data_loaded = True
837
+
838
+
839
+ class BrightStackoverflowLongRetrieval(AbsTaskRetrieval):
840
+ metadata = TaskMetadata(
841
+ name="BrightStackoverflowLongRetrieval",
842
+ dataset={
843
+ "path": "xlangai/BRIGHT",
844
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
845
+ },
846
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
847
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Stack Overflow answers with long documents.",
848
+ type="Retrieval",
849
+ prompt={
850
+ "query": "Represent this stackoverflow post for searching relevant passages: "
851
+ },
852
+ category="t2t",
853
+ eval_splits=["long"],
854
+ eval_langs=["eng-Latn"],
855
+ main_score="recall_at_1",
856
+ date=("2024-03-01", "2024-06-01"),
857
+ domains=["Non-fiction", "Written"],
858
+ task_subtypes=["Article retrieval"],
859
+ license="cc-by-4.0",
860
+ annotations_creators="derived",
861
+ dialect=[],
862
+ sample_creation="found",
863
+ modalities=["text"],
864
+ bibtex_citation=_BIBTEX_CITATION,
865
+ )
866
+
867
+ def load_data(self, **kwargs):
868
+ if self.data_loaded:
869
+ return
870
+
871
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
872
+ load_bright_data(
873
+ path=self.metadata.dataset["path"],
874
+ eval_splits=self.metadata.eval_splits,
875
+ domain="stackoverflow",
876
+ cache_dir=kwargs.get("cache_dir", None),
877
+ revision=self.metadata.dataset["revision"],
878
+ )
879
+ )
880
+ self.data_loaded = True
881
+
882
+
883
+ class BrightSustainableLivingLongRetrieval(AbsTaskRetrieval):
884
+ metadata = TaskMetadata(
885
+ name="BrightSustainableLivingLongRetrieval",
886
+ dataset={
887
+ "path": "xlangai/BRIGHT",
888
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
889
+ },
890
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
891
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Sustainable Living StackExchange answers with long documents.",
892
+ type="Retrieval",
893
+ prompt={
894
+ "query": "Represent this sustainable_living post for searching relevant passages: "
895
+ },
896
+ category="t2t",
897
+ eval_splits=["long"],
898
+ eval_langs=["eng-Latn"],
899
+ main_score="recall_at_1",
900
+ date=("2024-03-01", "2024-06-01"),
901
+ domains=["Non-fiction", "Written"],
902
+ task_subtypes=["Article retrieval"],
903
+ license="cc-by-4.0",
904
+ annotations_creators="derived",
905
+ dialect=[],
906
+ sample_creation="found",
907
+ modalities=["text"],
908
+ bibtex_citation=_BIBTEX_CITATION,
909
+ )
910
+
911
+ def load_data(self, **kwargs):
912
+ if self.data_loaded:
913
+ return
914
+
915
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
916
+ load_bright_data(
917
+ path=self.metadata.dataset["path"],
918
+ eval_splits=self.metadata.eval_splits,
919
+ domain="sustainable_living",
920
+ cache_dir=kwargs.get("cache_dir", None),
921
+ revision=self.metadata.dataset["revision"],
922
+ )
923
+ )
924
+ self.data_loaded = True
925
+
926
+
927
+ class BrightPonyLongRetrieval(AbsTaskRetrieval):
928
+ metadata = TaskMetadata(
929
+ name="BrightPonyLongRetrieval",
930
+ dataset={
931
+ "path": "xlangai/BRIGHT",
932
+ "revision": "3066d29c9651a576c8aba4832d249807b181ecae",
933
+ },
934
+ reference="https://huggingface.co/datasets/xlangai/BRIGHT",
935
+ description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of Pony programming language syntax documentation with long documents.",
936
+ type="Retrieval",
937
+ prompt={
938
+ "query": "Represent this Pony question for searching relevant passages: "
939
+ },
940
+ category="t2t",
941
+ eval_splits=["long"],
942
+ eval_langs=["eng-Latn"],
943
+ main_score="recall_at_1",
944
+ date=("2024-03-01", "2024-06-01"),
945
+ domains=["Non-fiction", "Written"],
946
+ task_subtypes=["Article retrieval"],
947
+ license="cc-by-4.0",
948
+ annotations_creators="derived",
949
+ dialect=[],
950
+ sample_creation="found",
951
+ modalities=["text"],
952
+ bibtex_citation=_BIBTEX_CITATION,
953
+ )
954
+
955
+ def load_data(self, **kwargs):
956
+ if self.data_loaded:
957
+ return
958
+
959
+ self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
960
+ load_bright_data(
961
+ path=self.metadata.dataset["path"],
962
+ eval_splits=self.metadata.eval_splits,
963
+ domain="pony",
964
+ cache_dir=kwargs.get("cache_dir", None),
965
+ revision=self.metadata.dataset["revision"],
966
+ )
967
+ )
968
+ self.data_loaded = True