mteb 2.2.2__py3-none-any.whl → 2.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +4 -0
- mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
- mteb/evaluate.py +38 -7
- mteb/models/__init__.py +4 -1
- mteb/models/cache_wrappers/__init__.py +2 -1
- mteb/models/model_implementations/colpali_models.py +4 -4
- mteb/models/model_implementations/colqwen_models.py +206 -2
- mteb/models/model_implementations/eagerworks_models.py +163 -0
- mteb/models/model_implementations/euler_models.py +25 -0
- mteb/models/model_implementations/google_models.py +1 -1
- mteb/models/model_implementations/jina_models.py +203 -5
- mteb/models/model_implementations/nb_sbert.py +1 -1
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +10 -11
- mteb/models/model_implementations/nvidia_models.py +1 -1
- mteb/models/model_implementations/ops_moa_models.py +2 -2
- mteb/models/model_implementations/promptriever_models.py +4 -4
- mteb/models/model_implementations/qwen3_models.py +3 -3
- mteb/models/model_implementations/qzhou_models.py +1 -1
- mteb/models/model_implementations/random_baseline.py +8 -18
- mteb/models/model_implementations/vdr_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +57 -0
- mteb/models/search_encoder_index/__init__.py +7 -0
- mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
- mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +157 -0
- mteb/models/search_wrappers.py +157 -41
- mteb/results/model_result.py +2 -1
- mteb/results/task_result.py +12 -0
- mteb/similarity_functions.py +49 -0
- mteb/tasks/reranking/multilingual/__init__.py +2 -0
- mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +3 -3
- {mteb-2.2.2.dist-info → mteb-2.3.1.dist-info}/METADATA +6 -1
- {mteb-2.2.2.dist-info → mteb-2.3.1.dist-info}/RECORD +40 -31
- {mteb-2.2.2.dist-info → mteb-2.3.1.dist-info}/WHEEL +0 -0
- {mteb-2.2.2.dist-info → mteb-2.3.1.dist-info}/entry_points.txt +0 -0
- {mteb-2.2.2.dist-info → mteb-2.3.1.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.2.2.dist-info → mteb-2.3.1.dist-info}/top_level.txt +0 -0
mteb/models/search_wrappers.py
CHANGED
|
@@ -21,6 +21,7 @@ from mteb.types import (
|
|
|
21
21
|
)
|
|
22
22
|
|
|
23
23
|
from .models_protocols import CrossEncoderProtocol, EncoderProtocol
|
|
24
|
+
from .search_encoder_index.search_backend_protocol import IndexEncoderSearchProtocol
|
|
24
25
|
|
|
25
26
|
logger = logging.getLogger(__name__)
|
|
26
27
|
|
|
@@ -28,13 +29,19 @@ logger = logging.getLogger(__name__)
|
|
|
28
29
|
class SearchEncoderWrapper:
|
|
29
30
|
"""Wrapper for Encoder models to be used in search tasks."""
|
|
30
31
|
|
|
31
|
-
corpus_chunk_size = 50_000
|
|
32
32
|
task_corpus: CorpusDatasetType | None
|
|
33
33
|
|
|
34
|
-
def __init__(
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
model: EncoderProtocol,
|
|
37
|
+
corpus_chunk_size: int = 50_000,
|
|
38
|
+
index_backend: IndexEncoderSearchProtocol | None = None,
|
|
39
|
+
) -> None:
|
|
35
40
|
self.model = model
|
|
36
41
|
self.task_corpus = None
|
|
37
42
|
self.mteb_model_meta = model.mteb_model_meta
|
|
43
|
+
self.corpus_chunk_size = corpus_chunk_size
|
|
44
|
+
self.index_backend = index_backend
|
|
38
45
|
|
|
39
46
|
def index(
|
|
40
47
|
self,
|
|
@@ -56,6 +63,22 @@ class SearchEncoderWrapper:
|
|
|
56
63
|
"""
|
|
57
64
|
# Always retain corpus for potential reranking or fallback flows
|
|
58
65
|
self.task_corpus = corpus
|
|
66
|
+
if self.index_backend is not None:
|
|
67
|
+
all_doc_embeddings = self.model.encode(
|
|
68
|
+
create_dataloader(
|
|
69
|
+
corpus,
|
|
70
|
+
task_metadata,
|
|
71
|
+
prompt_type=PromptType.document,
|
|
72
|
+
**encode_kwargs,
|
|
73
|
+
),
|
|
74
|
+
task_metadata=task_metadata,
|
|
75
|
+
hf_split=hf_split,
|
|
76
|
+
hf_subset=hf_subset,
|
|
77
|
+
prompt_type=PromptType.document,
|
|
78
|
+
**encode_kwargs,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
self.index_backend.add_documents(all_doc_embeddings, corpus["id"])
|
|
59
82
|
|
|
60
83
|
def search(
|
|
61
84
|
self,
|
|
@@ -105,27 +128,74 @@ class SearchEncoderWrapper:
|
|
|
105
128
|
|
|
106
129
|
if top_ranked is not None:
|
|
107
130
|
logger.info("Reranking pre-ranked documents...")
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
131
|
+
if self.index_backend is None:
|
|
132
|
+
result_heaps = self._rerank_documents(
|
|
133
|
+
query_idx_to_id=query_idx_to_id,
|
|
134
|
+
query_embeddings=query_embeddings,
|
|
135
|
+
top_ranked=top_ranked,
|
|
136
|
+
top_k=top_k,
|
|
137
|
+
task_metadata=task_metadata,
|
|
138
|
+
hf_subset=hf_subset,
|
|
139
|
+
hf_split=hf_split,
|
|
140
|
+
encode_kwargs=encode_kwargs,
|
|
141
|
+
)
|
|
142
|
+
else:
|
|
143
|
+
cos_scores_top_k_values, cos_scores_top_k_idx = (
|
|
144
|
+
self.index_backend.search(
|
|
145
|
+
query_embeddings,
|
|
146
|
+
top_k,
|
|
147
|
+
similarity_fn=self.model.similarity,
|
|
148
|
+
top_ranked=top_ranked,
|
|
149
|
+
query_idx_to_id=query_idx_to_id,
|
|
150
|
+
)
|
|
151
|
+
)
|
|
152
|
+
result_heaps = {qid: [] for qid in query_idx_to_id.values()}
|
|
153
|
+
for query_itr in range(len(query_embeddings)):
|
|
154
|
+
result_heaps = self._rerank_sort_results(
|
|
155
|
+
result_heaps=result_heaps,
|
|
156
|
+
query_id=query_idx_to_id[query_itr],
|
|
157
|
+
ranked_ids=top_ranked[query_idx_to_id[query_itr]],
|
|
158
|
+
scores_top_k_idx=torch.tensor(
|
|
159
|
+
[cos_scores_top_k_idx[query_itr]]
|
|
160
|
+
),
|
|
161
|
+
scores_top_k_values=torch.tensor(
|
|
162
|
+
[cos_scores_top_k_values[query_itr]]
|
|
163
|
+
),
|
|
164
|
+
)
|
|
165
|
+
self.index_backend.clear()
|
|
118
166
|
else:
|
|
119
167
|
logger.info("Performing full corpus search...")
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
168
|
+
if self.index_backend is None:
|
|
169
|
+
result_heaps = self._full_corpus_search(
|
|
170
|
+
query_idx_to_id=query_idx_to_id,
|
|
171
|
+
query_embeddings=query_embeddings,
|
|
172
|
+
task_metadata=task_metadata,
|
|
173
|
+
hf_subset=hf_subset,
|
|
174
|
+
hf_split=hf_split,
|
|
175
|
+
top_k=top_k,
|
|
176
|
+
encode_kwargs=encode_kwargs,
|
|
177
|
+
)
|
|
178
|
+
else:
|
|
179
|
+
cos_scores_top_k_values, cos_scores_top_k_idx = (
|
|
180
|
+
self.index_backend.search(
|
|
181
|
+
query_embeddings,
|
|
182
|
+
top_k,
|
|
183
|
+
similarity_fn=self.model.similarity,
|
|
184
|
+
top_ranked=None,
|
|
185
|
+
query_idx_to_id=None,
|
|
186
|
+
)
|
|
187
|
+
)
|
|
188
|
+
result_heaps = {qid: [] for qid in query_idx_to_id.values()}
|
|
189
|
+
result_heaps = self._sort_full_corpus_results(
|
|
190
|
+
result_heaps=result_heaps,
|
|
191
|
+
query_idx_to_id=query_idx_to_id,
|
|
192
|
+
query_embeddings=query_embeddings,
|
|
193
|
+
cos_scores_top_k_idx=cos_scores_top_k_idx,
|
|
194
|
+
cos_scores_top_k_values=cos_scores_top_k_values,
|
|
195
|
+
sub_corpus_ids=self.task_corpus["id"],
|
|
196
|
+
top_k=top_k,
|
|
197
|
+
)
|
|
198
|
+
self.index_backend.clear()
|
|
129
199
|
|
|
130
200
|
# Reset the task corpus dataloader to None to free up memory
|
|
131
201
|
self.task_corpus = None
|
|
@@ -192,19 +262,45 @@ class SearchEncoderWrapper:
|
|
|
192
262
|
cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
|
|
193
263
|
|
|
194
264
|
sub_corpus_ids = list(sub_corpus_ids)
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
265
|
+
result_heaps = self._sort_full_corpus_results(
|
|
266
|
+
result_heaps=result_heaps,
|
|
267
|
+
query_idx_to_id=query_idx_to_id,
|
|
268
|
+
query_embeddings=query_embeddings,
|
|
269
|
+
cos_scores_top_k_idx=cos_scores_top_k_idx,
|
|
270
|
+
cos_scores_top_k_values=cos_scores_top_k_values,
|
|
271
|
+
sub_corpus_ids=sub_corpus_ids,
|
|
272
|
+
top_k=top_k,
|
|
273
|
+
)
|
|
274
|
+
return result_heaps
|
|
275
|
+
|
|
276
|
+
def _sort_full_corpus_results(
|
|
277
|
+
self,
|
|
278
|
+
result_heaps: dict[str, list[tuple[float, str]]],
|
|
279
|
+
query_idx_to_id: dict[int, str],
|
|
280
|
+
query_embeddings: Array,
|
|
281
|
+
cos_scores_top_k_idx: list[list[int]],
|
|
282
|
+
cos_scores_top_k_values: list[list[float]],
|
|
283
|
+
sub_corpus_ids: list[str],
|
|
284
|
+
top_k: int,
|
|
285
|
+
) -> dict[str, list[tuple[float, str]]]:
|
|
286
|
+
"""Sort the heaps into descending order lists.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
A dictionary mapping query IDs to a sorted list of tuples, each containing a relevance score and a document ID.
|
|
290
|
+
"""
|
|
291
|
+
for query_itr in range(len(query_embeddings)):
|
|
292
|
+
query_id = query_idx_to_id[query_itr]
|
|
293
|
+
for sub_corpus_id, score in zip(
|
|
294
|
+
cos_scores_top_k_idx[query_itr],
|
|
295
|
+
cos_scores_top_k_values[query_itr],
|
|
296
|
+
):
|
|
297
|
+
corpus_id = sub_corpus_ids[sub_corpus_id]
|
|
298
|
+
if len(result_heaps[query_id]) < top_k:
|
|
299
|
+
# push item on the heap
|
|
300
|
+
heapq.heappush(result_heaps[query_id], (score, corpus_id))
|
|
301
|
+
else:
|
|
302
|
+
# If item is larger than the smallest in the heap, push it on the heap then pop the smallest element
|
|
303
|
+
heapq.heappushpop(result_heaps[query_id], (score, corpus_id))
|
|
208
304
|
return result_heaps
|
|
209
305
|
|
|
210
306
|
def _rerank_documents(
|
|
@@ -279,14 +375,34 @@ class SearchEncoderWrapper:
|
|
|
279
375
|
scores_top_k_values = scores_top_k_values.cpu()
|
|
280
376
|
scores_top_k_idx = scores_top_k_idx.cpu()
|
|
281
377
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
378
|
+
result_heaps = self._rerank_sort_results(
|
|
379
|
+
result_heaps=result_heaps,
|
|
380
|
+
query_id=query_id,
|
|
381
|
+
ranked_ids=ranked_ids,
|
|
382
|
+
scores_top_k_idx=scores_top_k_idx,
|
|
383
|
+
scores_top_k_values=scores_top_k_values,
|
|
384
|
+
)
|
|
385
|
+
return result_heaps
|
|
386
|
+
|
|
387
|
+
def _rerank_sort_results(
|
|
388
|
+
self,
|
|
389
|
+
result_heaps: list[tuple[float, str]],
|
|
390
|
+
query_id: str,
|
|
391
|
+
ranked_ids: list[str],
|
|
392
|
+
scores_top_k_idx: torch.Tensor,
|
|
393
|
+
scores_top_k_values: torch.Tensor,
|
|
394
|
+
) -> list[tuple[float, str]]:
|
|
395
|
+
"""Sort the heap into descending order list.
|
|
289
396
|
|
|
397
|
+
Returns:
|
|
398
|
+
A sorted list of tuples, each containing a relevance score and a document ID.
|
|
399
|
+
"""
|
|
400
|
+
for doc_idx, score in zip(
|
|
401
|
+
scores_top_k_idx[0].tolist(),
|
|
402
|
+
scores_top_k_values[0].tolist(),
|
|
403
|
+
):
|
|
404
|
+
corpus_id = ranked_ids[doc_idx]
|
|
405
|
+
heapq.heappush(result_heaps[query_id], (score, corpus_id))
|
|
290
406
|
return result_heaps
|
|
291
407
|
|
|
292
408
|
def encode(
|
mteb/results/model_result.py
CHANGED
|
@@ -22,7 +22,7 @@ from mteb.types import (
|
|
|
22
22
|
SplitName,
|
|
23
23
|
)
|
|
24
24
|
|
|
25
|
-
from .task_result import TaskResult
|
|
25
|
+
from .task_result import TaskError, TaskResult
|
|
26
26
|
|
|
27
27
|
logger = logging.getLogger(__name__)
|
|
28
28
|
|
|
@@ -82,6 +82,7 @@ class ModelResult(BaseModel):
|
|
|
82
82
|
protected_namespaces=(),
|
|
83
83
|
)
|
|
84
84
|
)
|
|
85
|
+
exceptions: list[TaskError] | None = None
|
|
85
86
|
|
|
86
87
|
def __repr__(self) -> str:
|
|
87
88
|
n_entries = len(self.task_results)
|
mteb/results/task_result.py
CHANGED
|
@@ -840,3 +840,15 @@ class TaskResult(BaseModel):
|
|
|
840
840
|
)
|
|
841
841
|
)
|
|
842
842
|
return results
|
|
843
|
+
|
|
844
|
+
|
|
845
|
+
class TaskError(BaseModel):
|
|
846
|
+
"""A class to represent an error that occurred during the evaluation of a task.
|
|
847
|
+
|
|
848
|
+
Attributes:
|
|
849
|
+
task_name: The name of the MTEB task.
|
|
850
|
+
exception: The error message that occurred during the evaluation.
|
|
851
|
+
"""
|
|
852
|
+
|
|
853
|
+
task_name: str
|
|
854
|
+
exception: str
|
mteb/similarity_functions.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import torch
|
|
2
2
|
|
|
3
3
|
from mteb.models import EncoderProtocol
|
|
4
|
+
from mteb.models.model_meta import ScoringFunction
|
|
4
5
|
from mteb.types import Array
|
|
5
6
|
|
|
6
7
|
|
|
@@ -38,6 +39,54 @@ def compute_pairwise_similarity(
|
|
|
38
39
|
return pairwise_cos_sim(embedding1, embedding2)
|
|
39
40
|
|
|
40
41
|
|
|
42
|
+
def select_similarity(
|
|
43
|
+
embedding1: Array,
|
|
44
|
+
embedding2: Array,
|
|
45
|
+
similarity_fn: ScoringFunction,
|
|
46
|
+
) -> Array:
|
|
47
|
+
"""Compute similarity between two sets of embeddings using the specified similarity function.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
embedding1: The first set of embeddings.
|
|
51
|
+
embedding2: The second set of embeddings.
|
|
52
|
+
similarity_fn: The similarity function to use (COSINE, DOT_PRODUCT, EUCLIDEAN).
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Array: The computed similarity scores.
|
|
56
|
+
"""
|
|
57
|
+
if similarity_fn is ScoringFunction.COSINE:
|
|
58
|
+
return cos_sim(embedding1, embedding2)
|
|
59
|
+
elif similarity_fn is ScoringFunction.DOT_PRODUCT:
|
|
60
|
+
return dot_score(embedding1, embedding2)
|
|
61
|
+
elif similarity_fn is ScoringFunction.EUCLIDEAN:
|
|
62
|
+
return euclidean_sim(embedding1, embedding2)
|
|
63
|
+
raise ValueError(f"Unsupported similarity function: {similarity_fn}")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def select_pairwise_similarity(
|
|
67
|
+
embedding1: Array,
|
|
68
|
+
embedding2: Array,
|
|
69
|
+
similarity_fn: ScoringFunction,
|
|
70
|
+
) -> Array:
|
|
71
|
+
"""Compute pairwise similarity between two sets of embeddings using the specified similarity function.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
embedding1: The first set of embeddings.
|
|
75
|
+
embedding2: The second set of embeddings.
|
|
76
|
+
similarity_fn: The similarity function to use (COSINE, DOT_PRODUCT, EUCLIDEAN).
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Array: The computed pairwise similarity scores.
|
|
80
|
+
"""
|
|
81
|
+
if similarity_fn is ScoringFunction.COSINE:
|
|
82
|
+
return pairwise_cos_sim(embedding1, embedding2)
|
|
83
|
+
elif similarity_fn is ScoringFunction.DOT_PRODUCT:
|
|
84
|
+
return pairwise_dot_score(embedding1, embedding2)
|
|
85
|
+
elif similarity_fn is ScoringFunction.EUCLIDEAN:
|
|
86
|
+
return pairwise_euclidean_sim(embedding1, embedding2)
|
|
87
|
+
raise ValueError(f"Unsupported similarity function: {similarity_fn}")
|
|
88
|
+
|
|
89
|
+
|
|
41
90
|
def _normalize_embeddings(embeddings: Array) -> torch.Tensor:
|
|
42
91
|
"""Normalizes the embeddings matrix, so that each sentence embedding has unit length.
|
|
43
92
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from .esci_reranking import ESCIReranking
|
|
2
2
|
from .hume_wikipedia_reranking_multilingual import HUMEWikipediaRerankingMultilingual
|
|
3
3
|
from .miracl_reranking import MIRACLReranking
|
|
4
|
+
from .multi_long_doc_reranking import MultiLongDocReranking
|
|
4
5
|
from .wikipedia_reranking_multilingual import WikipediaRerankingMultilingual
|
|
5
6
|
from .x_glue_wpr_reranking import XGlueWPRReranking
|
|
6
7
|
|
|
@@ -8,6 +9,7 @@ __all__ = [
|
|
|
8
9
|
"ESCIReranking",
|
|
9
10
|
"HUMEWikipediaRerankingMultilingual",
|
|
10
11
|
"MIRACLReranking",
|
|
12
|
+
"MultiLongDocReranking",
|
|
11
13
|
"WikipediaRerankingMultilingual",
|
|
12
14
|
"XGlueWPRReranking",
|
|
13
15
|
]
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class MultiLongDocReranking(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="MultiLongDocReranking",
|
|
8
|
+
description=(
|
|
9
|
+
"Reranking version of MultiLongDocRetrieval (MLDR). MLDR is a Multilingual Long-Document "
|
|
10
|
+
"Retrieval dataset built on Wikipedia, Wudao and mC4, covering 13 typologically diverse languages. "
|
|
11
|
+
"Specifically, we sample lengthy articles from Wikipedia, Wudao and mC4 datasets and randomly choose "
|
|
12
|
+
"paragraphs from them. Then we use GPT-3.5 to generate questions based on these paragraphs. "
|
|
13
|
+
"The generated question and the sampled article constitute a new text pair to the dataset."
|
|
14
|
+
),
|
|
15
|
+
reference="https://huggingface.co/datasets/Shitao/MLDR",
|
|
16
|
+
dataset={
|
|
17
|
+
"path": "mteb/MultiLongDocReranking",
|
|
18
|
+
"revision": "ad09ce14c17bce6edae151b7f6ef12e15d91dbf3",
|
|
19
|
+
},
|
|
20
|
+
type="Reranking",
|
|
21
|
+
category="t2t",
|
|
22
|
+
modalities=["text"],
|
|
23
|
+
eval_splits=["test"],
|
|
24
|
+
eval_langs={
|
|
25
|
+
"ar": ["ara-Arab"],
|
|
26
|
+
"de": ["deu-Latn"],
|
|
27
|
+
"en": ["eng-Latn"],
|
|
28
|
+
"es": ["spa-Latn"],
|
|
29
|
+
"fr": ["fra-Latn"],
|
|
30
|
+
"hi": ["hin-Deva"],
|
|
31
|
+
"it": ["ita-Latn"],
|
|
32
|
+
"ja": ["jpn-Jpan"],
|
|
33
|
+
"ko": ["kor-Kore"],
|
|
34
|
+
"pt": ["por-Latn"],
|
|
35
|
+
"ru": ["rus-Cyrl"],
|
|
36
|
+
"th": ["tha-Thai"],
|
|
37
|
+
"zh": ["zho-Hans"],
|
|
38
|
+
},
|
|
39
|
+
main_score="ndcg_at_10",
|
|
40
|
+
date=(
|
|
41
|
+
"2000-01-01",
|
|
42
|
+
"2024-12-31",
|
|
43
|
+
), # Not found in the paper, guessed using the paper's publication date and constituent datasets
|
|
44
|
+
domains=[
|
|
45
|
+
"Encyclopaedic",
|
|
46
|
+
"Written",
|
|
47
|
+
"Web",
|
|
48
|
+
"Non-fiction",
|
|
49
|
+
"Fiction",
|
|
50
|
+
], # narrativeqa, wikipedia, wudao, mC4
|
|
51
|
+
task_subtypes=[],
|
|
52
|
+
license="mit",
|
|
53
|
+
annotations_creators="LM-generated", # gpt-3.5
|
|
54
|
+
dialect=[],
|
|
55
|
+
sample_creation="found",
|
|
56
|
+
bibtex_citation=r"""
|
|
57
|
+
@misc{bge-m3,
|
|
58
|
+
archiveprefix = {arXiv},
|
|
59
|
+
author = {Jianlv Chen and Shitao Xiao and Peitian Zhang and Kun Luo and Defu Lian and Zheng Liu},
|
|
60
|
+
eprint = {2402.03216},
|
|
61
|
+
primaryclass = {cs.CL},
|
|
62
|
+
title = {BGE M3-Embedding: Multi-Lingual, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation},
|
|
63
|
+
year = {2024},
|
|
64
|
+
}
|
|
65
|
+
""",
|
|
66
|
+
prompt={
|
|
67
|
+
"query": "Given a question, rerank long documents based on their relevance to answer the question"
|
|
68
|
+
},
|
|
69
|
+
adapted_from=["MultiLongDocRetrieval"],
|
|
70
|
+
)
|
|
@@ -351,6 +351,7 @@ class VidoreSyntheticDocQAAIRetrieval(AbsTaskRetrieval):
|
|
|
351
351
|
}
|
|
352
352
|
""",
|
|
353
353
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
354
|
+
adapted_from=["VidoreDocVQARetrieval"],
|
|
354
355
|
)
|
|
355
356
|
|
|
356
357
|
def load_data(self) -> None:
|
|
@@ -394,6 +395,7 @@ class VidoreSyntheticDocQAEnergyRetrieval(AbsTaskRetrieval):
|
|
|
394
395
|
}
|
|
395
396
|
""",
|
|
396
397
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
398
|
+
adapted_from=["VidoreDocVQARetrieval"],
|
|
397
399
|
)
|
|
398
400
|
|
|
399
401
|
def load_data(self) -> None:
|
|
@@ -437,6 +439,7 @@ class VidoreSyntheticDocQAGovernmentReportsRetrieval(AbsTaskRetrieval):
|
|
|
437
439
|
}
|
|
438
440
|
""",
|
|
439
441
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
442
|
+
adapted_from=["VidoreDocVQARetrieval"],
|
|
440
443
|
)
|
|
441
444
|
|
|
442
445
|
def load_data(self) -> None:
|
|
@@ -480,6 +483,7 @@ class VidoreSyntheticDocQAHealthcareIndustryRetrieval(AbsTaskRetrieval):
|
|
|
480
483
|
}
|
|
481
484
|
""",
|
|
482
485
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
486
|
+
adapted_from=["VidoreDocVQARetrieval"],
|
|
483
487
|
)
|
|
484
488
|
|
|
485
489
|
def load_data(self) -> None:
|