mteb 2.7.2__py3-none-any.whl → 2.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +16 -9
- mteb/_evaluators/any_sts_evaluator.py +10 -5
- mteb/_evaluators/clustering_evaluator.py +10 -4
- mteb/_evaluators/evaluator.py +9 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
- mteb/_evaluators/pair_classification_evaluator.py +10 -5
- mteb/_evaluators/retrieval_evaluator.py +19 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +14 -10
- mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
- mteb/_evaluators/text/summarization_evaluator.py +8 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +17 -9
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +15 -6
- mteb/abstasks/clustering.py +17 -8
- mteb/abstasks/clustering_legacy.py +14 -6
- mteb/abstasks/image/image_text_pair_classification.py +17 -7
- mteb/abstasks/multilabel_classification.py +11 -5
- mteb/abstasks/pair_classification.py +19 -9
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +28 -17
- mteb/abstasks/retrieval_dataset_loaders.py +11 -8
- mteb/abstasks/sts.py +19 -10
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +14 -7
- mteb/abstasks/text/summarization.py +17 -7
- mteb/abstasks/zeroshot_classification.py +15 -7
- mteb/benchmarks/_create_table.py +13 -3
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +41 -2
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +10 -5
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/evaluate.py +20 -18
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +7 -3
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +11 -4
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +10 -4
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +2 -0
- mteb/models/model_implementations/ara_models.py +1 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +40 -1
- mteb/models/model_implementations/bica_model.py +1 -0
- mteb/models/model_implementations/blip2_models.py +11 -4
- mteb/models/model_implementations/blip_models.py +17 -4
- mteb/models/model_implementations/bm25.py +22 -14
- mteb/models/model_implementations/bmretriever_models.py +10 -2
- mteb/models/model_implementations/cadet_models.py +1 -0
- mteb/models/model_implementations/cde_models.py +11 -5
- mteb/models/model_implementations/clip_models.py +12 -4
- mteb/models/model_implementations/clips_models.py +3 -0
- mteb/models/model_implementations/codefuse_models.py +5 -0
- mteb/models/model_implementations/codesage_models.py +3 -0
- mteb/models/model_implementations/cohere_models.py +14 -4
- mteb/models/model_implementations/cohere_v.py +14 -4
- mteb/models/model_implementations/colpali_models.py +7 -3
- mteb/models/model_implementations/colqwen_models.py +17 -31
- mteb/models/model_implementations/colsmol_models.py +3 -1
- mteb/models/model_implementations/conan_models.py +11 -4
- mteb/models/model_implementations/dino_models.py +28 -4
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +9 -0
- mteb/models/model_implementations/e5_v.py +10 -4
- mteb/models/model_implementations/eagerworks_models.py +11 -4
- mteb/models/model_implementations/emillykkejensen_models.py +3 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +1 -0
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +9 -0
- mteb/models/model_implementations/facebookai.py +2 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +7 -3
- mteb/models/model_implementations/google_models.py +15 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
- mteb/models/model_implementations/gritlm_models.py +2 -0
- mteb/models/model_implementations/gte_models.py +9 -0
- mteb/models/model_implementations/hinvec_models.py +6 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +14 -5
- mteb/models/model_implementations/jina_clip.py +10 -4
- mteb/models/model_implementations/jina_models.py +17 -5
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +1 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
- mteb/models/model_implementations/kfst.py +1 -0
- mteb/models/model_implementations/kowshik24_models.py +1 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +7 -1
- mteb/models/model_implementations/listconranker.py +10 -4
- mteb/models/model_implementations/llm2clip_models.py +12 -4
- mteb/models/model_implementations/llm2vec_models.py +20 -6
- mteb/models/model_implementations/mcinext_models.py +8 -2
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +63 -0
- mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +11 -4
- mteb/models/model_implementations/mod_models.py +2 -1
- mteb/models/model_implementations/model2vec_models.py +23 -4
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/nbailab.py +3 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
- mteb/models/model_implementations/nomic_models.py +16 -4
- mteb/models/model_implementations/nomic_models_vision.py +5 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
- mteb/models/model_implementations/nvidia_models.py +15 -4
- mteb/models/model_implementations/octen_models.py +3 -1
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +17 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
- mteb/models/model_implementations/ops_moa_models.py +9 -2
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
- mteb/models/model_implementations/pawan_models.py +1 -0
- mteb/models/model_implementations/piccolo_models.py +2 -0
- mteb/models/model_implementations/promptriever_models.py +16 -6
- mteb/models/model_implementations/pylate_models.py +22 -13
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +11 -1
- mteb/models/model_implementations/qzhou_models.py +2 -0
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/rasgaard_models.py +1 -0
- mteb/models/model_implementations/reasonir_model.py +65 -0
- mteb/models/model_implementations/repllama_models.py +15 -6
- mteb/models/model_implementations/rerankers_custom.py +13 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +20 -0
- mteb/models/model_implementations/ruri_models.py +10 -0
- mteb/models/model_implementations/salesforce_models.py +10 -1
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +18 -0
- mteb/models/model_implementations/shuu_model.py +1 -0
- mteb/models/model_implementations/siglip_models.py +19 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +2 -0
- mteb/models/model_implementations/text2vec_models.py +3 -0
- mteb/models/model_implementations/ua_sentence_models.py +1 -0
- mteb/models/model_implementations/uae_models.py +10 -4
- mteb/models/model_implementations/vdr_models.py +8 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -0
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +11 -4
- mteb/models/model_implementations/voyage_models.py +25 -4
- mteb/models/model_implementations/voyage_v.py +11 -6
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +2 -1
- mteb/models/model_meta.py +47 -9
- mteb/models/models_protocols.py +19 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +19 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +15 -9
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/clustering/nob/snl_clustering.py +7 -2
- mteb/tasks/clustering/nob/vg_clustering.py +7 -2
- mteb/tasks/retrieval/eng/__init__.py +42 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +9 -1
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/METADATA +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/RECORD +238 -217
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/WHEEL +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,968 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
|
|
5
|
+
import datasets
|
|
6
|
+
|
|
7
|
+
from mteb.abstasks import AbsTaskRetrieval
|
|
8
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def load_bright_data(
|
|
12
|
+
path: str,
|
|
13
|
+
domain: str,
|
|
14
|
+
eval_splits: list,
|
|
15
|
+
cache_dir: str | None = None,
|
|
16
|
+
revision: str | None = None,
|
|
17
|
+
):
|
|
18
|
+
eval_split = eval_splits[0]
|
|
19
|
+
corpus_name = "documents" if eval_split == "standard" else "long_documents"
|
|
20
|
+
gold_ids_field = "gold_ids" if eval_split == "standard" else "gold_ids_long"
|
|
21
|
+
|
|
22
|
+
corpus = dict.fromkeys(eval_splits)
|
|
23
|
+
queries = dict.fromkeys(eval_splits)
|
|
24
|
+
relevant_docs = dict.fromkeys(eval_splits)
|
|
25
|
+
top_ranked = dict.fromkeys(eval_splits)
|
|
26
|
+
|
|
27
|
+
domain_corpus = datasets.load_dataset(
|
|
28
|
+
path,
|
|
29
|
+
corpus_name,
|
|
30
|
+
split=domain,
|
|
31
|
+
cache_dir=cache_dir,
|
|
32
|
+
revision=revision,
|
|
33
|
+
)
|
|
34
|
+
examples = datasets.load_dataset(
|
|
35
|
+
path,
|
|
36
|
+
"examples",
|
|
37
|
+
split=domain,
|
|
38
|
+
cache_dir=cache_dir,
|
|
39
|
+
revision=revision,
|
|
40
|
+
)
|
|
41
|
+
corpus[eval_split] = {e["id"]: {"text": e["content"]} for e in domain_corpus}
|
|
42
|
+
queries[eval_split] = {e["id"]: e["query"] for e in examples}
|
|
43
|
+
relevant_docs[eval_split] = defaultdict(dict)
|
|
44
|
+
top_ranked[eval_split] = defaultdict(list)
|
|
45
|
+
|
|
46
|
+
# Get all document IDs
|
|
47
|
+
all_doc_ids = [e["id"] for e in domain_corpus]
|
|
48
|
+
have_excluded_ids = False
|
|
49
|
+
|
|
50
|
+
for e in examples:
|
|
51
|
+
qid = e["id"]
|
|
52
|
+
gold_ids = e[gold_ids_field]
|
|
53
|
+
for gid in gold_ids:
|
|
54
|
+
relevant_docs[eval_split][qid].update({gid: 1})
|
|
55
|
+
|
|
56
|
+
# Create top_ranked: all documents except excluded_ids
|
|
57
|
+
excluded_ids = e.get("excluded_ids", [])
|
|
58
|
+
if excluded_ids and excluded_ids != ["N/A"]:
|
|
59
|
+
excluded_set = set(excluded_ids)
|
|
60
|
+
top_ranked[eval_split][qid] = [
|
|
61
|
+
doc_id for doc_id in all_doc_ids if doc_id not in excluded_set
|
|
62
|
+
]
|
|
63
|
+
have_excluded_ids = True
|
|
64
|
+
else:
|
|
65
|
+
# No exclusions, use all documents
|
|
66
|
+
top_ranked[eval_split][qid] = all_doc_ids
|
|
67
|
+
|
|
68
|
+
corpus = datasets.DatasetDict(corpus)
|
|
69
|
+
queries = datasets.DatasetDict(queries)
|
|
70
|
+
relevant_docs = datasets.DatasetDict(relevant_docs)
|
|
71
|
+
if have_excluded_ids:
|
|
72
|
+
top_ranked = datasets.DatasetDict(top_ranked)
|
|
73
|
+
else:
|
|
74
|
+
top_ranked = None
|
|
75
|
+
return corpus, queries, relevant_docs, top_ranked
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
_BIBTEX_CITATION = r"""
|
|
79
|
+
@misc{su2024brightrealisticchallengingbenchmark,
|
|
80
|
+
archiveprefix = {arXiv},
|
|
81
|
+
author = {Hongjin Su and Howard Yen and Mengzhou Xia and Weijia Shi and Niklas Muennighoff and Han-yu Wang and Haisu Liu and Quan Shi and Zachary S. Siegel and Michael Tang and Ruoxi Sun and Jinsung Yoon and Sercan O. Arik and Danqi Chen and Tao Yu},
|
|
82
|
+
eprint = {2407.12883},
|
|
83
|
+
primaryclass = {cs.CL},
|
|
84
|
+
title = {BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval},
|
|
85
|
+
url = {https://arxiv.org/abs/2407.12883},
|
|
86
|
+
year = {2024},
|
|
87
|
+
}
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class BrightBiologyRetrieval(AbsTaskRetrieval):
|
|
92
|
+
metadata = TaskMetadata(
|
|
93
|
+
name="BrightBiologyRetrieval",
|
|
94
|
+
dataset={
|
|
95
|
+
"path": "xlangai/BRIGHT",
|
|
96
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
97
|
+
},
|
|
98
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
99
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Biology StackExchange answers.",
|
|
100
|
+
type="Retrieval",
|
|
101
|
+
prompt={
|
|
102
|
+
"query": "Represent this biology post for searching relevant passages: "
|
|
103
|
+
},
|
|
104
|
+
category="t2t",
|
|
105
|
+
eval_splits=["standard"],
|
|
106
|
+
eval_langs=["eng-Latn"],
|
|
107
|
+
main_score="ndcg_at_10",
|
|
108
|
+
date=("2024-03-01", "2024-06-01"),
|
|
109
|
+
domains=["Non-fiction", "Written"],
|
|
110
|
+
task_subtypes=["Article retrieval"],
|
|
111
|
+
license="cc-by-4.0",
|
|
112
|
+
annotations_creators="derived",
|
|
113
|
+
dialect=[],
|
|
114
|
+
sample_creation="found",
|
|
115
|
+
modalities=["text"],
|
|
116
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
def load_data(self, **kwargs):
|
|
120
|
+
if self.data_loaded:
|
|
121
|
+
return
|
|
122
|
+
|
|
123
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
124
|
+
load_bright_data(
|
|
125
|
+
path=self.metadata.dataset["path"],
|
|
126
|
+
eval_splits=self.metadata.eval_splits,
|
|
127
|
+
domain="biology",
|
|
128
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
129
|
+
revision=self.metadata.dataset["revision"],
|
|
130
|
+
)
|
|
131
|
+
)
|
|
132
|
+
self.data_loaded = True
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class BrightEarthScienceRetrieval(AbsTaskRetrieval):
|
|
136
|
+
metadata = TaskMetadata(
|
|
137
|
+
name="BrightEarthScienceRetrieval",
|
|
138
|
+
dataset={
|
|
139
|
+
"path": "xlangai/BRIGHT",
|
|
140
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
141
|
+
},
|
|
142
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
143
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Earth Science StackExchange answers.",
|
|
144
|
+
type="Retrieval",
|
|
145
|
+
prompt={
|
|
146
|
+
"query": "Represent this earth_science post for searching relevant passages: "
|
|
147
|
+
},
|
|
148
|
+
category="t2t",
|
|
149
|
+
eval_splits=["standard"],
|
|
150
|
+
eval_langs=["eng-Latn"],
|
|
151
|
+
main_score="ndcg_at_10",
|
|
152
|
+
date=("2024-03-01", "2024-06-01"),
|
|
153
|
+
domains=["Non-fiction", "Written"],
|
|
154
|
+
task_subtypes=["Article retrieval"],
|
|
155
|
+
license="cc-by-4.0",
|
|
156
|
+
annotations_creators="derived",
|
|
157
|
+
dialect=[],
|
|
158
|
+
sample_creation="found",
|
|
159
|
+
modalities=["text"],
|
|
160
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def load_data(self, **kwargs):
|
|
164
|
+
if self.data_loaded:
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
168
|
+
load_bright_data(
|
|
169
|
+
path=self.metadata.dataset["path"],
|
|
170
|
+
eval_splits=self.metadata.eval_splits,
|
|
171
|
+
domain="earth_science",
|
|
172
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
173
|
+
revision=self.metadata.dataset["revision"],
|
|
174
|
+
)
|
|
175
|
+
)
|
|
176
|
+
self.data_loaded = True
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class BrightEconomicsRetrieval(AbsTaskRetrieval):
|
|
180
|
+
metadata = TaskMetadata(
|
|
181
|
+
name="BrightEconomicsRetrieval",
|
|
182
|
+
dataset={
|
|
183
|
+
"path": "xlangai/BRIGHT",
|
|
184
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
185
|
+
},
|
|
186
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
187
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Economics StackExchange answers.",
|
|
188
|
+
type="Retrieval",
|
|
189
|
+
prompt={
|
|
190
|
+
"query": "Represent this economics post for searching relevant passages: "
|
|
191
|
+
},
|
|
192
|
+
category="t2t",
|
|
193
|
+
eval_splits=["standard"],
|
|
194
|
+
eval_langs=["eng-Latn"],
|
|
195
|
+
main_score="ndcg_at_10",
|
|
196
|
+
date=("2024-03-01", "2024-06-01"),
|
|
197
|
+
domains=["Non-fiction", "Written"],
|
|
198
|
+
task_subtypes=["Article retrieval"],
|
|
199
|
+
license="cc-by-4.0",
|
|
200
|
+
annotations_creators="derived",
|
|
201
|
+
dialect=[],
|
|
202
|
+
sample_creation="found",
|
|
203
|
+
modalities=["text"],
|
|
204
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def load_data(self, **kwargs):
|
|
208
|
+
if self.data_loaded:
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
212
|
+
load_bright_data(
|
|
213
|
+
path=self.metadata.dataset["path"],
|
|
214
|
+
eval_splits=self.metadata.eval_splits,
|
|
215
|
+
domain="economics",
|
|
216
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
217
|
+
revision=self.metadata.dataset["revision"],
|
|
218
|
+
)
|
|
219
|
+
)
|
|
220
|
+
self.data_loaded = True
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
class BrightPsychologyRetrieval(AbsTaskRetrieval):
|
|
224
|
+
metadata = TaskMetadata(
|
|
225
|
+
name="BrightPsychologyRetrieval",
|
|
226
|
+
dataset={
|
|
227
|
+
"path": "xlangai/BRIGHT",
|
|
228
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
229
|
+
},
|
|
230
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
231
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Psychology StackExchange answers.",
|
|
232
|
+
type="Retrieval",
|
|
233
|
+
prompt={
|
|
234
|
+
"query": "Represent this psychology post for searching relevant passages: "
|
|
235
|
+
},
|
|
236
|
+
category="t2t",
|
|
237
|
+
eval_splits=["standard"],
|
|
238
|
+
eval_langs=["eng-Latn"],
|
|
239
|
+
main_score="ndcg_at_10",
|
|
240
|
+
date=("2024-03-01", "2024-06-01"),
|
|
241
|
+
domains=["Non-fiction", "Written"],
|
|
242
|
+
task_subtypes=["Article retrieval"],
|
|
243
|
+
license="cc-by-4.0",
|
|
244
|
+
annotations_creators="derived",
|
|
245
|
+
dialect=[],
|
|
246
|
+
sample_creation="found",
|
|
247
|
+
modalities=["text"],
|
|
248
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
def load_data(self, **kwargs):
|
|
252
|
+
if self.data_loaded:
|
|
253
|
+
return
|
|
254
|
+
|
|
255
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
256
|
+
load_bright_data(
|
|
257
|
+
path=self.metadata.dataset["path"],
|
|
258
|
+
eval_splits=self.metadata.eval_splits,
|
|
259
|
+
domain="psychology",
|
|
260
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
261
|
+
revision=self.metadata.dataset["revision"],
|
|
262
|
+
)
|
|
263
|
+
)
|
|
264
|
+
self.data_loaded = True
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
class BrightRoboticsRetrieval(AbsTaskRetrieval):
|
|
268
|
+
metadata = TaskMetadata(
|
|
269
|
+
name="BrightRoboticsRetrieval",
|
|
270
|
+
dataset={
|
|
271
|
+
"path": "xlangai/BRIGHT",
|
|
272
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
273
|
+
},
|
|
274
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
275
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Robotics StackExchange answers.",
|
|
276
|
+
type="Retrieval",
|
|
277
|
+
prompt={
|
|
278
|
+
"query": "Represent this robotics post for searching relevant passages: "
|
|
279
|
+
},
|
|
280
|
+
category="t2t",
|
|
281
|
+
eval_splits=["standard"],
|
|
282
|
+
eval_langs=["eng-Latn"],
|
|
283
|
+
main_score="ndcg_at_10",
|
|
284
|
+
date=("2024-03-01", "2024-06-01"),
|
|
285
|
+
domains=["Non-fiction", "Written"],
|
|
286
|
+
task_subtypes=["Article retrieval"],
|
|
287
|
+
license="cc-by-4.0",
|
|
288
|
+
annotations_creators="derived",
|
|
289
|
+
dialect=[],
|
|
290
|
+
sample_creation="found",
|
|
291
|
+
modalities=["text"],
|
|
292
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
def load_data(self, **kwargs):
|
|
296
|
+
if self.data_loaded:
|
|
297
|
+
return
|
|
298
|
+
|
|
299
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
300
|
+
load_bright_data(
|
|
301
|
+
path=self.metadata.dataset["path"],
|
|
302
|
+
eval_splits=self.metadata.eval_splits,
|
|
303
|
+
domain="robotics",
|
|
304
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
305
|
+
revision=self.metadata.dataset["revision"],
|
|
306
|
+
)
|
|
307
|
+
)
|
|
308
|
+
self.data_loaded = True
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class BrightStackoverflowRetrieval(AbsTaskRetrieval):
|
|
312
|
+
metadata = TaskMetadata(
|
|
313
|
+
name="BrightStackoverflowRetrieval",
|
|
314
|
+
dataset={
|
|
315
|
+
"path": "xlangai/BRIGHT",
|
|
316
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
317
|
+
},
|
|
318
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
319
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Stack Overflow answers.",
|
|
320
|
+
type="Retrieval",
|
|
321
|
+
prompt={
|
|
322
|
+
"query": "Represent this stackoverflow post for searching relevant passages: "
|
|
323
|
+
},
|
|
324
|
+
category="t2t",
|
|
325
|
+
eval_splits=["standard"],
|
|
326
|
+
eval_langs=["eng-Latn"],
|
|
327
|
+
main_score="ndcg_at_10",
|
|
328
|
+
date=("2024-03-01", "2024-06-01"),
|
|
329
|
+
domains=["Non-fiction", "Written"],
|
|
330
|
+
task_subtypes=["Article retrieval"],
|
|
331
|
+
license="cc-by-4.0",
|
|
332
|
+
annotations_creators="derived",
|
|
333
|
+
dialect=[],
|
|
334
|
+
sample_creation="found",
|
|
335
|
+
modalities=["text"],
|
|
336
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
def load_data(self, **kwargs):
|
|
340
|
+
if self.data_loaded:
|
|
341
|
+
return
|
|
342
|
+
|
|
343
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
344
|
+
load_bright_data(
|
|
345
|
+
path=self.metadata.dataset["path"],
|
|
346
|
+
eval_splits=self.metadata.eval_splits,
|
|
347
|
+
domain="stackoverflow",
|
|
348
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
349
|
+
revision=self.metadata.dataset["revision"],
|
|
350
|
+
)
|
|
351
|
+
)
|
|
352
|
+
self.data_loaded = True
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
class BrightSustainableLivingRetrieval(AbsTaskRetrieval):
|
|
356
|
+
metadata = TaskMetadata(
|
|
357
|
+
name="BrightSustainableLivingRetrieval",
|
|
358
|
+
dataset={
|
|
359
|
+
"path": "xlangai/BRIGHT",
|
|
360
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
361
|
+
},
|
|
362
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
363
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Sustainable Living StackExchange answers.",
|
|
364
|
+
type="Retrieval",
|
|
365
|
+
prompt={
|
|
366
|
+
"query": "Represent this sustainable_living post for searching relevant passages: "
|
|
367
|
+
},
|
|
368
|
+
category="t2t",
|
|
369
|
+
eval_splits=["standard"],
|
|
370
|
+
eval_langs=["eng-Latn"],
|
|
371
|
+
main_score="ndcg_at_10",
|
|
372
|
+
date=("2024-03-01", "2024-06-01"),
|
|
373
|
+
domains=["Non-fiction", "Written"],
|
|
374
|
+
task_subtypes=["Article retrieval"],
|
|
375
|
+
license="cc-by-4.0",
|
|
376
|
+
annotations_creators="derived",
|
|
377
|
+
dialect=[],
|
|
378
|
+
sample_creation="found",
|
|
379
|
+
modalities=["text"],
|
|
380
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
def load_data(self, **kwargs):
|
|
384
|
+
if self.data_loaded:
|
|
385
|
+
return
|
|
386
|
+
|
|
387
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
388
|
+
load_bright_data(
|
|
389
|
+
path=self.metadata.dataset["path"],
|
|
390
|
+
eval_splits=self.metadata.eval_splits,
|
|
391
|
+
domain="sustainable_living",
|
|
392
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
393
|
+
revision=self.metadata.dataset["revision"],
|
|
394
|
+
)
|
|
395
|
+
)
|
|
396
|
+
self.data_loaded = True
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
class BrightPonyRetrieval(AbsTaskRetrieval):
|
|
400
|
+
metadata = TaskMetadata(
|
|
401
|
+
name="BrightPonyRetrieval",
|
|
402
|
+
dataset={
|
|
403
|
+
"path": "xlangai/BRIGHT",
|
|
404
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
405
|
+
},
|
|
406
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
407
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of Pony programming language syntax documentation.",
|
|
408
|
+
type="Retrieval",
|
|
409
|
+
prompt={
|
|
410
|
+
"query": "Represent this Pony question for searching relevant passages: "
|
|
411
|
+
},
|
|
412
|
+
category="t2t",
|
|
413
|
+
eval_splits=["standard"],
|
|
414
|
+
eval_langs=["eng-Latn"],
|
|
415
|
+
main_score="ndcg_at_10",
|
|
416
|
+
date=("2024-03-01", "2024-06-01"),
|
|
417
|
+
domains=["Non-fiction", "Written"],
|
|
418
|
+
task_subtypes=["Article retrieval"],
|
|
419
|
+
license="cc-by-4.0",
|
|
420
|
+
annotations_creators="derived",
|
|
421
|
+
dialect=[],
|
|
422
|
+
sample_creation="found",
|
|
423
|
+
modalities=["text"],
|
|
424
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
def load_data(self, **kwargs):
|
|
428
|
+
if self.data_loaded:
|
|
429
|
+
return
|
|
430
|
+
|
|
431
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
432
|
+
load_bright_data(
|
|
433
|
+
path=self.metadata.dataset["path"],
|
|
434
|
+
eval_splits=self.metadata.eval_splits,
|
|
435
|
+
domain="pony",
|
|
436
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
437
|
+
revision=self.metadata.dataset["revision"],
|
|
438
|
+
)
|
|
439
|
+
)
|
|
440
|
+
self.data_loaded = True
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
class BrightLeetcodeRetrieval(AbsTaskRetrieval):
|
|
444
|
+
metadata = TaskMetadata(
|
|
445
|
+
name="BrightLeetcodeRetrieval",
|
|
446
|
+
dataset={
|
|
447
|
+
"path": "xlangai/BRIGHT",
|
|
448
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
449
|
+
},
|
|
450
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
451
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of similar algorithmic problems based on shared solution techniques.",
|
|
452
|
+
type="Retrieval",
|
|
453
|
+
prompt={
|
|
454
|
+
"query": "Represent this Coding problem for searching relevant examples: "
|
|
455
|
+
},
|
|
456
|
+
category="t2t",
|
|
457
|
+
eval_splits=["standard"],
|
|
458
|
+
eval_langs=["eng-Latn"],
|
|
459
|
+
main_score="ndcg_at_10",
|
|
460
|
+
date=("2024-03-01", "2024-06-01"),
|
|
461
|
+
domains=["Non-fiction", "Written"],
|
|
462
|
+
task_subtypes=["Article retrieval"],
|
|
463
|
+
license="cc-by-4.0",
|
|
464
|
+
annotations_creators="derived",
|
|
465
|
+
dialect=[],
|
|
466
|
+
sample_creation="found",
|
|
467
|
+
modalities=["text"],
|
|
468
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
def load_data(self, **kwargs):
|
|
472
|
+
if self.data_loaded:
|
|
473
|
+
return
|
|
474
|
+
|
|
475
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
476
|
+
load_bright_data(
|
|
477
|
+
path=self.metadata.dataset["path"],
|
|
478
|
+
eval_splits=self.metadata.eval_splits,
|
|
479
|
+
domain="leetcode",
|
|
480
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
481
|
+
revision=self.metadata.dataset["revision"],
|
|
482
|
+
)
|
|
483
|
+
)
|
|
484
|
+
self.data_loaded = True
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
class BrightAopsRetrieval(AbsTaskRetrieval):
|
|
488
|
+
metadata = TaskMetadata(
|
|
489
|
+
name="BrightAopsRetrieval",
|
|
490
|
+
dataset={
|
|
491
|
+
"path": "xlangai/BRIGHT",
|
|
492
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
493
|
+
},
|
|
494
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
495
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of similar Math Olympiad problems from Art of Problem Solving.",
|
|
496
|
+
type="Retrieval",
|
|
497
|
+
prompt={
|
|
498
|
+
"query": "Represent this Math problem for searching relevant examples: "
|
|
499
|
+
},
|
|
500
|
+
category="t2t",
|
|
501
|
+
eval_splits=["standard"],
|
|
502
|
+
eval_langs=["eng-Latn"],
|
|
503
|
+
main_score="ndcg_at_10",
|
|
504
|
+
date=("2024-03-01", "2024-06-01"),
|
|
505
|
+
domains=["Non-fiction", "Written"],
|
|
506
|
+
task_subtypes=["Article retrieval"],
|
|
507
|
+
license="cc-by-4.0",
|
|
508
|
+
annotations_creators="derived",
|
|
509
|
+
dialect=[],
|
|
510
|
+
sample_creation="found",
|
|
511
|
+
modalities=["text"],
|
|
512
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
def load_data(self, **kwargs):
|
|
516
|
+
if self.data_loaded:
|
|
517
|
+
return
|
|
518
|
+
|
|
519
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
520
|
+
load_bright_data(
|
|
521
|
+
path=self.metadata.dataset["path"],
|
|
522
|
+
eval_splits=self.metadata.eval_splits,
|
|
523
|
+
domain="aops",
|
|
524
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
525
|
+
revision=self.metadata.dataset["revision"],
|
|
526
|
+
)
|
|
527
|
+
)
|
|
528
|
+
self.data_loaded = True
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
class BrightTheoremQATheoremsRetrieval(AbsTaskRetrieval):
|
|
532
|
+
metadata = TaskMetadata(
|
|
533
|
+
name="BrightTheoremQATheoremsRetrieval",
|
|
534
|
+
dataset={
|
|
535
|
+
"path": "xlangai/BRIGHT",
|
|
536
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
537
|
+
},
|
|
538
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
539
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of theorem definitions and proofs from ProofWiki.",
|
|
540
|
+
type="Retrieval",
|
|
541
|
+
prompt={
|
|
542
|
+
"query": "Represent this Math problem for searching relevant theorems: "
|
|
543
|
+
},
|
|
544
|
+
category="t2t",
|
|
545
|
+
eval_splits=["standard"],
|
|
546
|
+
eval_langs=["eng-Latn"],
|
|
547
|
+
main_score="ndcg_at_10",
|
|
548
|
+
date=("2024-03-01", "2024-06-01"),
|
|
549
|
+
domains=["Non-fiction", "Written"],
|
|
550
|
+
task_subtypes=["Article retrieval"],
|
|
551
|
+
license="cc-by-4.0",
|
|
552
|
+
annotations_creators="derived",
|
|
553
|
+
dialect=[],
|
|
554
|
+
sample_creation="found",
|
|
555
|
+
modalities=["text"],
|
|
556
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
def load_data(self, **kwargs):
|
|
560
|
+
if self.data_loaded:
|
|
561
|
+
return
|
|
562
|
+
|
|
563
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
564
|
+
load_bright_data(
|
|
565
|
+
path=self.metadata.dataset["path"],
|
|
566
|
+
eval_splits=self.metadata.eval_splits,
|
|
567
|
+
domain="theoremqa_theorems",
|
|
568
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
569
|
+
revision=self.metadata.dataset["revision"],
|
|
570
|
+
)
|
|
571
|
+
)
|
|
572
|
+
self.data_loaded = True
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
class BrightTheoremQAQuestionsRetrieval(AbsTaskRetrieval):
|
|
576
|
+
metadata = TaskMetadata(
|
|
577
|
+
name="BrightTheoremQAQuestionsRetrieval",
|
|
578
|
+
dataset={
|
|
579
|
+
"path": "xlangai/BRIGHT",
|
|
580
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
581
|
+
},
|
|
582
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
583
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of theorem definitions from ProofWiki given questions rephrased as real-world scenarios.",
|
|
584
|
+
type="Retrieval",
|
|
585
|
+
prompt={
|
|
586
|
+
"query": "Represent this Math problem for searching relevant examples: "
|
|
587
|
+
},
|
|
588
|
+
category="t2t",
|
|
589
|
+
eval_splits=["standard"],
|
|
590
|
+
eval_langs=["eng-Latn"],
|
|
591
|
+
main_score="ndcg_at_10",
|
|
592
|
+
date=("2024-03-01", "2024-06-01"),
|
|
593
|
+
domains=["Non-fiction", "Written"],
|
|
594
|
+
task_subtypes=["Article retrieval"],
|
|
595
|
+
license="cc-by-4.0",
|
|
596
|
+
annotations_creators="derived",
|
|
597
|
+
dialect=[],
|
|
598
|
+
sample_creation="found",
|
|
599
|
+
modalities=["text"],
|
|
600
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
def load_data(self, **kwargs):
|
|
604
|
+
if self.data_loaded:
|
|
605
|
+
return
|
|
606
|
+
|
|
607
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
608
|
+
load_bright_data(
|
|
609
|
+
path=self.metadata.dataset["path"],
|
|
610
|
+
eval_splits=self.metadata.eval_splits,
|
|
611
|
+
domain="theoremqa_questions",
|
|
612
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
613
|
+
revision=self.metadata.dataset["revision"],
|
|
614
|
+
)
|
|
615
|
+
)
|
|
616
|
+
self.data_loaded = True
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
class BrightBiologyLongRetrieval(AbsTaskRetrieval):
|
|
620
|
+
metadata = TaskMetadata(
|
|
621
|
+
name="BrightBiologyLongRetrieval",
|
|
622
|
+
dataset={
|
|
623
|
+
"path": "xlangai/BRIGHT",
|
|
624
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
625
|
+
},
|
|
626
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
627
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Biology StackExchange answers with long documents.",
|
|
628
|
+
type="Retrieval",
|
|
629
|
+
prompt={
|
|
630
|
+
"query": "Represent this biology post for searching relevant passages: "
|
|
631
|
+
},
|
|
632
|
+
category="t2t",
|
|
633
|
+
eval_splits=["long"],
|
|
634
|
+
eval_langs=["eng-Latn"],
|
|
635
|
+
main_score="recall_at_1",
|
|
636
|
+
date=("2024-03-01", "2024-06-01"),
|
|
637
|
+
domains=["Non-fiction", "Written"],
|
|
638
|
+
task_subtypes=["Article retrieval"],
|
|
639
|
+
license="cc-by-4.0",
|
|
640
|
+
annotations_creators="derived",
|
|
641
|
+
dialect=[],
|
|
642
|
+
sample_creation="found",
|
|
643
|
+
modalities=["text"],
|
|
644
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
def load_data(self, **kwargs):
|
|
648
|
+
if self.data_loaded:
|
|
649
|
+
return
|
|
650
|
+
|
|
651
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
652
|
+
load_bright_data(
|
|
653
|
+
path=self.metadata.dataset["path"],
|
|
654
|
+
eval_splits=self.metadata.eval_splits,
|
|
655
|
+
domain="biology",
|
|
656
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
657
|
+
revision=self.metadata.dataset["revision"],
|
|
658
|
+
)
|
|
659
|
+
)
|
|
660
|
+
self.data_loaded = True
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
class BrightEarthScienceLongRetrieval(AbsTaskRetrieval):
|
|
664
|
+
metadata = TaskMetadata(
|
|
665
|
+
name="BrightEarthScienceLongRetrieval",
|
|
666
|
+
dataset={
|
|
667
|
+
"path": "xlangai/BRIGHT",
|
|
668
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
669
|
+
},
|
|
670
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
671
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Earth Science StackExchange answers with long documents.",
|
|
672
|
+
type="Retrieval",
|
|
673
|
+
prompt={
|
|
674
|
+
"query": "Represent this earth_science post for searching relevant passages: "
|
|
675
|
+
},
|
|
676
|
+
category="t2t",
|
|
677
|
+
eval_splits=["long"],
|
|
678
|
+
eval_langs=["eng-Latn"],
|
|
679
|
+
main_score="recall_at_1",
|
|
680
|
+
date=("2024-03-01", "2024-06-01"),
|
|
681
|
+
domains=["Non-fiction", "Written"],
|
|
682
|
+
task_subtypes=["Article retrieval"],
|
|
683
|
+
license="cc-by-4.0",
|
|
684
|
+
annotations_creators="derived",
|
|
685
|
+
dialect=[],
|
|
686
|
+
sample_creation="found",
|
|
687
|
+
modalities=["text"],
|
|
688
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
def load_data(self, **kwargs):
|
|
692
|
+
if self.data_loaded:
|
|
693
|
+
return
|
|
694
|
+
|
|
695
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
696
|
+
load_bright_data(
|
|
697
|
+
path=self.metadata.dataset["path"],
|
|
698
|
+
eval_splits=self.metadata.eval_splits,
|
|
699
|
+
domain="earth_science",
|
|
700
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
701
|
+
revision=self.metadata.dataset["revision"],
|
|
702
|
+
)
|
|
703
|
+
)
|
|
704
|
+
self.data_loaded = True
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
class BrightEconomicsLongRetrieval(AbsTaskRetrieval):
|
|
708
|
+
metadata = TaskMetadata(
|
|
709
|
+
name="BrightEconomicsLongRetrieval",
|
|
710
|
+
dataset={
|
|
711
|
+
"path": "xlangai/BRIGHT",
|
|
712
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
713
|
+
},
|
|
714
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
715
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Economics StackExchange answers with long documents.",
|
|
716
|
+
type="Retrieval",
|
|
717
|
+
prompt={
|
|
718
|
+
"query": "Represent this economics post for searching relevant passages: "
|
|
719
|
+
},
|
|
720
|
+
category="t2t",
|
|
721
|
+
eval_splits=["long"],
|
|
722
|
+
eval_langs=["eng-Latn"],
|
|
723
|
+
main_score="recall_at_1",
|
|
724
|
+
date=("2024-03-01", "2024-06-01"),
|
|
725
|
+
domains=["Non-fiction", "Written"],
|
|
726
|
+
task_subtypes=["Article retrieval"],
|
|
727
|
+
license="cc-by-4.0",
|
|
728
|
+
annotations_creators="derived",
|
|
729
|
+
dialect=[],
|
|
730
|
+
sample_creation="found",
|
|
731
|
+
modalities=["text"],
|
|
732
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
def load_data(self, **kwargs):
|
|
736
|
+
if self.data_loaded:
|
|
737
|
+
return
|
|
738
|
+
|
|
739
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
740
|
+
load_bright_data(
|
|
741
|
+
path=self.metadata.dataset["path"],
|
|
742
|
+
eval_splits=self.metadata.eval_splits,
|
|
743
|
+
domain="economics",
|
|
744
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
745
|
+
revision=self.metadata.dataset["revision"],
|
|
746
|
+
)
|
|
747
|
+
)
|
|
748
|
+
self.data_loaded = True
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
class BrightPsychologyLongRetrieval(AbsTaskRetrieval):
|
|
752
|
+
metadata = TaskMetadata(
|
|
753
|
+
name="BrightPsychologyLongRetrieval",
|
|
754
|
+
dataset={
|
|
755
|
+
"path": "xlangai/BRIGHT",
|
|
756
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
757
|
+
},
|
|
758
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
759
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Psychology StackExchange answers with long documents.",
|
|
760
|
+
type="Retrieval",
|
|
761
|
+
prompt={
|
|
762
|
+
"query": "Represent this psychology post for searching relevant passages: "
|
|
763
|
+
},
|
|
764
|
+
category="t2t",
|
|
765
|
+
eval_splits=["long"],
|
|
766
|
+
eval_langs=["eng-Latn"],
|
|
767
|
+
main_score="recall_at_1",
|
|
768
|
+
date=("2024-03-01", "2024-06-01"),
|
|
769
|
+
domains=["Non-fiction", "Written"],
|
|
770
|
+
task_subtypes=["Article retrieval"],
|
|
771
|
+
license="cc-by-4.0",
|
|
772
|
+
annotations_creators="derived",
|
|
773
|
+
dialect=[],
|
|
774
|
+
sample_creation="found",
|
|
775
|
+
modalities=["text"],
|
|
776
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
def load_data(self, **kwargs):
|
|
780
|
+
if self.data_loaded:
|
|
781
|
+
return
|
|
782
|
+
|
|
783
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
784
|
+
load_bright_data(
|
|
785
|
+
path=self.metadata.dataset["path"],
|
|
786
|
+
eval_splits=self.metadata.eval_splits,
|
|
787
|
+
domain="psychology",
|
|
788
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
789
|
+
revision=self.metadata.dataset["revision"],
|
|
790
|
+
)
|
|
791
|
+
)
|
|
792
|
+
self.data_loaded = True
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
class BrightRoboticsLongRetrieval(AbsTaskRetrieval):
|
|
796
|
+
metadata = TaskMetadata(
|
|
797
|
+
name="BrightRoboticsLongRetrieval",
|
|
798
|
+
dataset={
|
|
799
|
+
"path": "xlangai/BRIGHT",
|
|
800
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
801
|
+
},
|
|
802
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
803
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Robotics StackExchange answers with long documents.",
|
|
804
|
+
type="Retrieval",
|
|
805
|
+
prompt={
|
|
806
|
+
"query": "Represent this robotics post for searching relevant passages: "
|
|
807
|
+
},
|
|
808
|
+
category="t2t",
|
|
809
|
+
eval_splits=["long"],
|
|
810
|
+
eval_langs=["eng-Latn"],
|
|
811
|
+
main_score="recall_at_1",
|
|
812
|
+
date=("2024-03-01", "2024-06-01"),
|
|
813
|
+
domains=["Non-fiction", "Written"],
|
|
814
|
+
task_subtypes=["Article retrieval"],
|
|
815
|
+
license="cc-by-4.0",
|
|
816
|
+
annotations_creators="derived",
|
|
817
|
+
dialect=[],
|
|
818
|
+
sample_creation="found",
|
|
819
|
+
modalities=["text"],
|
|
820
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
821
|
+
)
|
|
822
|
+
|
|
823
|
+
def load_data(self, **kwargs):
|
|
824
|
+
if self.data_loaded:
|
|
825
|
+
return
|
|
826
|
+
|
|
827
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
828
|
+
load_bright_data(
|
|
829
|
+
path=self.metadata.dataset["path"],
|
|
830
|
+
eval_splits=self.metadata.eval_splits,
|
|
831
|
+
domain="robotics",
|
|
832
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
833
|
+
revision=self.metadata.dataset["revision"],
|
|
834
|
+
)
|
|
835
|
+
)
|
|
836
|
+
self.data_loaded = True
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
class BrightStackoverflowLongRetrieval(AbsTaskRetrieval):
|
|
840
|
+
metadata = TaskMetadata(
|
|
841
|
+
name="BrightStackoverflowLongRetrieval",
|
|
842
|
+
dataset={
|
|
843
|
+
"path": "xlangai/BRIGHT",
|
|
844
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
845
|
+
},
|
|
846
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
847
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Stack Overflow answers with long documents.",
|
|
848
|
+
type="Retrieval",
|
|
849
|
+
prompt={
|
|
850
|
+
"query": "Represent this stackoverflow post for searching relevant passages: "
|
|
851
|
+
},
|
|
852
|
+
category="t2t",
|
|
853
|
+
eval_splits=["long"],
|
|
854
|
+
eval_langs=["eng-Latn"],
|
|
855
|
+
main_score="recall_at_1",
|
|
856
|
+
date=("2024-03-01", "2024-06-01"),
|
|
857
|
+
domains=["Non-fiction", "Written"],
|
|
858
|
+
task_subtypes=["Article retrieval"],
|
|
859
|
+
license="cc-by-4.0",
|
|
860
|
+
annotations_creators="derived",
|
|
861
|
+
dialect=[],
|
|
862
|
+
sample_creation="found",
|
|
863
|
+
modalities=["text"],
|
|
864
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
def load_data(self, **kwargs):
|
|
868
|
+
if self.data_loaded:
|
|
869
|
+
return
|
|
870
|
+
|
|
871
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
872
|
+
load_bright_data(
|
|
873
|
+
path=self.metadata.dataset["path"],
|
|
874
|
+
eval_splits=self.metadata.eval_splits,
|
|
875
|
+
domain="stackoverflow",
|
|
876
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
877
|
+
revision=self.metadata.dataset["revision"],
|
|
878
|
+
)
|
|
879
|
+
)
|
|
880
|
+
self.data_loaded = True
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
class BrightSustainableLivingLongRetrieval(AbsTaskRetrieval):
|
|
884
|
+
metadata = TaskMetadata(
|
|
885
|
+
name="BrightSustainableLivingLongRetrieval",
|
|
886
|
+
dataset={
|
|
887
|
+
"path": "xlangai/BRIGHT",
|
|
888
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
889
|
+
},
|
|
890
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
891
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of web documents cited in Sustainable Living StackExchange answers with long documents.",
|
|
892
|
+
type="Retrieval",
|
|
893
|
+
prompt={
|
|
894
|
+
"query": "Represent this sustainable_living post for searching relevant passages: "
|
|
895
|
+
},
|
|
896
|
+
category="t2t",
|
|
897
|
+
eval_splits=["long"],
|
|
898
|
+
eval_langs=["eng-Latn"],
|
|
899
|
+
main_score="recall_at_1",
|
|
900
|
+
date=("2024-03-01", "2024-06-01"),
|
|
901
|
+
domains=["Non-fiction", "Written"],
|
|
902
|
+
task_subtypes=["Article retrieval"],
|
|
903
|
+
license="cc-by-4.0",
|
|
904
|
+
annotations_creators="derived",
|
|
905
|
+
dialect=[],
|
|
906
|
+
sample_creation="found",
|
|
907
|
+
modalities=["text"],
|
|
908
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
909
|
+
)
|
|
910
|
+
|
|
911
|
+
def load_data(self, **kwargs):
|
|
912
|
+
if self.data_loaded:
|
|
913
|
+
return
|
|
914
|
+
|
|
915
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
916
|
+
load_bright_data(
|
|
917
|
+
path=self.metadata.dataset["path"],
|
|
918
|
+
eval_splits=self.metadata.eval_splits,
|
|
919
|
+
domain="sustainable_living",
|
|
920
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
921
|
+
revision=self.metadata.dataset["revision"],
|
|
922
|
+
)
|
|
923
|
+
)
|
|
924
|
+
self.data_loaded = True
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
class BrightPonyLongRetrieval(AbsTaskRetrieval):
|
|
928
|
+
metadata = TaskMetadata(
|
|
929
|
+
name="BrightPonyLongRetrieval",
|
|
930
|
+
dataset={
|
|
931
|
+
"path": "xlangai/BRIGHT",
|
|
932
|
+
"revision": "3066d29c9651a576c8aba4832d249807b181ecae",
|
|
933
|
+
},
|
|
934
|
+
reference="https://huggingface.co/datasets/xlangai/BRIGHT",
|
|
935
|
+
description="Part of the BRIGHT benchmark for reasoning-intensive retrieval. Retrieval of Pony programming language syntax documentation with long documents.",
|
|
936
|
+
type="Retrieval",
|
|
937
|
+
prompt={
|
|
938
|
+
"query": "Represent this Pony question for searching relevant passages: "
|
|
939
|
+
},
|
|
940
|
+
category="t2t",
|
|
941
|
+
eval_splits=["long"],
|
|
942
|
+
eval_langs=["eng-Latn"],
|
|
943
|
+
main_score="recall_at_1",
|
|
944
|
+
date=("2024-03-01", "2024-06-01"),
|
|
945
|
+
domains=["Non-fiction", "Written"],
|
|
946
|
+
task_subtypes=["Article retrieval"],
|
|
947
|
+
license="cc-by-4.0",
|
|
948
|
+
annotations_creators="derived",
|
|
949
|
+
dialect=[],
|
|
950
|
+
sample_creation="found",
|
|
951
|
+
modalities=["text"],
|
|
952
|
+
bibtex_citation=_BIBTEX_CITATION,
|
|
953
|
+
)
|
|
954
|
+
|
|
955
|
+
def load_data(self, **kwargs):
|
|
956
|
+
if self.data_loaded:
|
|
957
|
+
return
|
|
958
|
+
|
|
959
|
+
self.corpus, self.queries, self.relevant_docs, self.top_ranked = (
|
|
960
|
+
load_bright_data(
|
|
961
|
+
path=self.metadata.dataset["path"],
|
|
962
|
+
eval_splits=self.metadata.eval_splits,
|
|
963
|
+
domain="pony",
|
|
964
|
+
cache_dir=kwargs.get("cache_dir", None),
|
|
965
|
+
revision=self.metadata.dataset["revision"],
|
|
966
|
+
)
|
|
967
|
+
)
|
|
968
|
+
self.data_loaded = True
|