mteb 2.2.2__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +4 -0
- mteb/models/__init__.py +4 -1
- mteb/models/cache_wrappers/__init__.py +2 -1
- mteb/models/model_implementations/eagerworks_models.py +163 -0
- mteb/models/model_implementations/google_models.py +1 -1
- mteb/models/model_implementations/nb_sbert.py +1 -1
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -2
- mteb/models/model_implementations/nvidia_models.py +1 -1
- mteb/models/model_implementations/ops_moa_models.py +2 -2
- mteb/models/model_implementations/promptriever_models.py +4 -4
- mteb/models/model_implementations/qwen3_models.py +3 -3
- mteb/models/model_implementations/qzhou_models.py +1 -1
- mteb/models/model_implementations/random_baseline.py +8 -18
- mteb/models/search_encoder_index/__init__.py +7 -0
- mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
- mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +157 -0
- mteb/models/search_wrappers.py +157 -41
- mteb/similarity_functions.py +49 -0
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +3 -3
- {mteb-2.2.2.dist-info → mteb-2.3.0.dist-info}/METADATA +3 -1
- {mteb-2.2.2.dist-info → mteb-2.3.0.dist-info}/RECORD +26 -21
- {mteb-2.2.2.dist-info → mteb-2.3.0.dist-info}/WHEEL +0 -0
- {mteb-2.2.2.dist-info → mteb-2.3.0.dist-info}/entry_points.txt +0 -0
- {mteb-2.2.2.dist-info → mteb-2.3.0.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.2.2.dist-info → mteb-2.3.0.dist-info}/top_level.txt +0 -0
mteb/__init__.py
CHANGED
|
@@ -9,8 +9,10 @@ from mteb.filter_tasks import filter_tasks
|
|
|
9
9
|
from mteb.get_tasks import get_task, get_tasks
|
|
10
10
|
from mteb.load_results import load_results
|
|
11
11
|
from mteb.models import (
|
|
12
|
+
CacheBackendProtocol,
|
|
12
13
|
CrossEncoderProtocol,
|
|
13
14
|
EncoderProtocol,
|
|
15
|
+
IndexEncoderSearchProtocol,
|
|
14
16
|
SearchProtocol,
|
|
15
17
|
SentenceTransformerEncoderWrapper,
|
|
16
18
|
)
|
|
@@ -27,8 +29,10 @@ __all__ = [
|
|
|
27
29
|
"AbsTask",
|
|
28
30
|
"Benchmark",
|
|
29
31
|
"BenchmarkResults",
|
|
32
|
+
"CacheBackendProtocol",
|
|
30
33
|
"CrossEncoderProtocol",
|
|
31
34
|
"EncoderProtocol",
|
|
35
|
+
"IndexEncoderSearchProtocol",
|
|
32
36
|
"SearchProtocol",
|
|
33
37
|
"SentenceTransformerEncoderWrapper",
|
|
34
38
|
"TaskMetadata",
|
mteb/models/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .cache_wrappers import CachedEmbeddingWrapper
|
|
1
|
+
from .cache_wrappers import CacheBackendProtocol, CachedEmbeddingWrapper
|
|
2
2
|
from .model_meta import ModelMeta
|
|
3
3
|
from .models_protocols import (
|
|
4
4
|
CrossEncoderProtocol,
|
|
@@ -6,6 +6,7 @@ from .models_protocols import (
|
|
|
6
6
|
MTEBModels,
|
|
7
7
|
SearchProtocol,
|
|
8
8
|
)
|
|
9
|
+
from .search_encoder_index.search_backend_protocol import IndexEncoderSearchProtocol
|
|
9
10
|
from .search_wrappers import SearchCrossEncoderWrapper, SearchEncoderWrapper
|
|
10
11
|
from .sentence_transformer_wrapper import (
|
|
11
12
|
CrossEncoderWrapper,
|
|
@@ -14,10 +15,12 @@ from .sentence_transformer_wrapper import (
|
|
|
14
15
|
)
|
|
15
16
|
|
|
16
17
|
__all__ = [
|
|
18
|
+
"CacheBackendProtocol",
|
|
17
19
|
"CachedEmbeddingWrapper",
|
|
18
20
|
"CrossEncoderProtocol",
|
|
19
21
|
"CrossEncoderWrapper",
|
|
20
22
|
"EncoderProtocol",
|
|
23
|
+
"IndexEncoderSearchProtocol",
|
|
21
24
|
"MTEBModels",
|
|
22
25
|
"ModelMeta",
|
|
23
26
|
"SearchCrossEncoderWrapper",
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
from torch.utils.data import DataLoader
|
|
5
|
+
from tqdm.auto import tqdm
|
|
6
|
+
|
|
7
|
+
from mteb._requires_package import (
|
|
8
|
+
requires_image_dependencies,
|
|
9
|
+
requires_package,
|
|
10
|
+
)
|
|
11
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
12
|
+
from mteb.models.abs_encoder import AbsEncoder
|
|
13
|
+
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
14
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class EagerEmbedV1Wrapper(AbsEncoder):
|
|
18
|
+
"""Wrapper for EagerEmbed single-vector embedding models."""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
model_name: str,
|
|
23
|
+
revision: str | None = None,
|
|
24
|
+
device: str | None = None,
|
|
25
|
+
image_size: int = 784,
|
|
26
|
+
**kwargs,
|
|
27
|
+
):
|
|
28
|
+
requires_image_dependencies()
|
|
29
|
+
requires_package(
|
|
30
|
+
self, "qwen_vl_utils", model_name, "pip install mteb[eager_embed]"
|
|
31
|
+
)
|
|
32
|
+
from transformers import AutoProcessor, Qwen3VLForConditionalGeneration
|
|
33
|
+
|
|
34
|
+
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
|
35
|
+
self.image_size = image_size
|
|
36
|
+
|
|
37
|
+
# Load model
|
|
38
|
+
self.mdl = Qwen3VLForConditionalGeneration.from_pretrained(model_name, **kwargs)
|
|
39
|
+
self.mdl = self.mdl.to(self.device)
|
|
40
|
+
self.mdl.eval()
|
|
41
|
+
|
|
42
|
+
# Load processor
|
|
43
|
+
self.processor = AutoProcessor.from_pretrained(model_name)
|
|
44
|
+
|
|
45
|
+
def get_embedding(self, last_hidden_state: torch.Tensor) -> torch.Tensor:
|
|
46
|
+
"""Extract embeddings from last token of last hidden state."""
|
|
47
|
+
reps = last_hidden_state[:, -1]
|
|
48
|
+
return reps
|
|
49
|
+
|
|
50
|
+
def encode(
|
|
51
|
+
self,
|
|
52
|
+
inputs: DataLoader[BatchedInput],
|
|
53
|
+
*,
|
|
54
|
+
task_metadata: TaskMetadata,
|
|
55
|
+
hf_split: str,
|
|
56
|
+
hf_subset: str,
|
|
57
|
+
prompt_type: PromptType | None = None,
|
|
58
|
+
**kwargs: Any,
|
|
59
|
+
) -> Array:
|
|
60
|
+
"""Encode inputs (text and/or images) into embeddings."""
|
|
61
|
+
from qwen_vl_utils import process_vision_info
|
|
62
|
+
|
|
63
|
+
all_embeddings: list[torch.Tensor] = []
|
|
64
|
+
|
|
65
|
+
with torch.no_grad():
|
|
66
|
+
for batch in tqdm(inputs, desc="Encoding"):
|
|
67
|
+
batch_texts = batch.get("text", [])
|
|
68
|
+
batch_images = batch.get("image", [])
|
|
69
|
+
|
|
70
|
+
messages = []
|
|
71
|
+
for i in range(max(len(batch_texts), len(batch_images))):
|
|
72
|
+
text_content = batch_texts[i] if batch_texts else ""
|
|
73
|
+
image_content = batch_images[i] if batch_images else None
|
|
74
|
+
|
|
75
|
+
query_prefix = "Query: " if prompt_type == PromptType.query else ""
|
|
76
|
+
content = [
|
|
77
|
+
{"type": "text", "text": f"{query_prefix}{text_content}"}
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
if image_content is not None:
|
|
81
|
+
content.append(
|
|
82
|
+
{
|
|
83
|
+
"type": "image",
|
|
84
|
+
"image": image_content,
|
|
85
|
+
"resized_height": self.image_size,
|
|
86
|
+
"resized_width": self.image_size,
|
|
87
|
+
}
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
messages.append([{"role": "user", "content": content}])
|
|
91
|
+
|
|
92
|
+
# Prepare inputs
|
|
93
|
+
texts = [
|
|
94
|
+
self.processor.apply_chat_template(
|
|
95
|
+
msg, tokenize=False, add_generation_prompt=False
|
|
96
|
+
)
|
|
97
|
+
+ "<|endoftext|>"
|
|
98
|
+
for msg in messages
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
image_inputs = None
|
|
102
|
+
video_inputs = None
|
|
103
|
+
if batch_images:
|
|
104
|
+
image_inputs, video_inputs = process_vision_info(messages)
|
|
105
|
+
|
|
106
|
+
model_inputs = self.processor(
|
|
107
|
+
text=texts,
|
|
108
|
+
images=image_inputs,
|
|
109
|
+
videos=video_inputs,
|
|
110
|
+
padding="longest",
|
|
111
|
+
return_tensors="pt",
|
|
112
|
+
).to(self.device)
|
|
113
|
+
|
|
114
|
+
# Get embeddings
|
|
115
|
+
output = self.mdl(
|
|
116
|
+
**model_inputs, return_dict=True, output_hidden_states=True
|
|
117
|
+
)
|
|
118
|
+
embeddings = self.get_embedding(output.hidden_states[-1])
|
|
119
|
+
embeddings = embeddings.cpu().to(torch.float32)
|
|
120
|
+
embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=-1)
|
|
121
|
+
|
|
122
|
+
all_embeddings.append(embeddings)
|
|
123
|
+
|
|
124
|
+
return torch.cat(all_embeddings, dim=0)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
EAGER_EMBED_V1_CITATION = """@article{EagerEmbed,
|
|
128
|
+
title={Eager Embed V1: Multimodal Dense Embeddings for Retrieval},
|
|
129
|
+
author={Juan Pablo Balarini},
|
|
130
|
+
year={2025},
|
|
131
|
+
publisher={Eagerworks},
|
|
132
|
+
url={https://github.com/eagerworks/eager-embed},
|
|
133
|
+
}"""
|
|
134
|
+
|
|
135
|
+
EAGER_EMBED_V1_TRAINING_DATASETS = {"colpali", "bge-ir", "pixmo-docs", "wiki-ss"}
|
|
136
|
+
|
|
137
|
+
Eager_Embed_V1 = ModelMeta(
|
|
138
|
+
loader=EagerEmbedV1Wrapper,
|
|
139
|
+
loader_kwargs=dict(
|
|
140
|
+
dtype=torch.float16,
|
|
141
|
+
image_size=784,
|
|
142
|
+
),
|
|
143
|
+
name="eagerworks/eager-embed-v1",
|
|
144
|
+
languages=["fra-Latn", "spa-Latn", "eng-Latn", "deu-Latn"],
|
|
145
|
+
revision="a6bec272729c5056e2c26618ce085205c82a3b3c",
|
|
146
|
+
release_date="2025-11-20",
|
|
147
|
+
modalities=["image", "text"],
|
|
148
|
+
n_parameters=4_000_000_000,
|
|
149
|
+
memory_usage_mb=16929,
|
|
150
|
+
max_tokens=262144,
|
|
151
|
+
embed_dim=2560,
|
|
152
|
+
license="apache-2.0",
|
|
153
|
+
open_weights=True,
|
|
154
|
+
framework=["Tevatron"],
|
|
155
|
+
reference="https://huggingface.co/eagerworks/eager-embed-v1",
|
|
156
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
157
|
+
use_instructions=True,
|
|
158
|
+
training_datasets=EAGER_EMBED_V1_TRAINING_DATASETS,
|
|
159
|
+
citation=EAGER_EMBED_V1_CITATION,
|
|
160
|
+
adapted_from="https://huggingface.co/Qwen/Qwen3-VL-4B-Instruct",
|
|
161
|
+
public_training_code="https://github.com/eagerworks/eager-embed",
|
|
162
|
+
public_training_data="https://github.com/eagerworks/eager-embed/blob/main/dataset_config.yaml",
|
|
163
|
+
)
|
|
@@ -146,7 +146,7 @@ llama_nemoretriever_colembed_1b_v1 = ModelMeta(
|
|
|
146
146
|
release_date="2025-06-27",
|
|
147
147
|
modalities=["image", "text"],
|
|
148
148
|
n_parameters=2_418_000_000,
|
|
149
|
-
memory_usage_mb=
|
|
149
|
+
memory_usage_mb=4610,
|
|
150
150
|
max_tokens=8192,
|
|
151
151
|
embed_dim=2048,
|
|
152
152
|
license="https://huggingface.co/nvidia/llama-nemoretriever-colembed-1b-v1/blob/main/LICENSE",
|
|
@@ -172,7 +172,7 @@ llama_nemoretriever_colembed_3b_v1 = ModelMeta(
|
|
|
172
172
|
release_date="2025-06-27",
|
|
173
173
|
modalities=["image", "text"],
|
|
174
174
|
n_parameters=4_407_000_000,
|
|
175
|
-
memory_usage_mb=
|
|
175
|
+
memory_usage_mb=8403,
|
|
176
176
|
max_tokens=8192,
|
|
177
177
|
embed_dim=3072,
|
|
178
178
|
license="https://huggingface.co/nvidia/llama-nemoretriever-colembed-1b-v1/blob/main/LICENSE",
|
|
@@ -146,7 +146,7 @@ NV_embed_v1 = ModelMeta(
|
|
|
146
146
|
revision="570834afd5fef5bf3a3c2311a2b6e0a66f6f4f2c",
|
|
147
147
|
release_date="2024-09-13", # initial commit of hf model.
|
|
148
148
|
n_parameters=7_850_000_000,
|
|
149
|
-
memory_usage_mb=
|
|
149
|
+
memory_usage_mb=14975,
|
|
150
150
|
embed_dim=4096,
|
|
151
151
|
license="cc-by-nc-4.0",
|
|
152
152
|
max_tokens=32768,
|
|
@@ -27,7 +27,7 @@ ops_moa_conan_embedding = ModelMeta(
|
|
|
27
27
|
languages=["zho-Hans"],
|
|
28
28
|
loader=OPSWrapper,
|
|
29
29
|
n_parameters=int(343 * 1e6),
|
|
30
|
-
memory_usage_mb=
|
|
30
|
+
memory_usage_mb=1308,
|
|
31
31
|
max_tokens=512,
|
|
32
32
|
embed_dim=1536,
|
|
33
33
|
license="cc-by-nc-4.0",
|
|
@@ -58,7 +58,7 @@ ops_moa_yuan_embedding = ModelMeta(
|
|
|
58
58
|
languages=["zho-Hans"],
|
|
59
59
|
loader=OPSWrapper,
|
|
60
60
|
n_parameters=int(343 * 1e6),
|
|
61
|
-
memory_usage_mb=
|
|
61
|
+
memory_usage_mb=1242,
|
|
62
62
|
max_tokens=512,
|
|
63
63
|
embed_dim=1536,
|
|
64
64
|
license="cc-by-nc-4.0",
|
|
@@ -80,7 +80,7 @@ promptriever_llama2 = ModelMeta(
|
|
|
80
80
|
revision="01c7f73d771dfac7d292323805ebc428287df4f9-30b14e3813c0fa45facfd01a594580c3fe5ecf23", # base-peft revision
|
|
81
81
|
release_date="2024-09-15",
|
|
82
82
|
n_parameters=7_000_000_000,
|
|
83
|
-
memory_usage_mb=
|
|
83
|
+
memory_usage_mb=26703,
|
|
84
84
|
max_tokens=4096,
|
|
85
85
|
embed_dim=4096,
|
|
86
86
|
license="apache-2.0",
|
|
@@ -115,7 +115,7 @@ promptriever_llama3 = ModelMeta(
|
|
|
115
115
|
},
|
|
116
116
|
release_date="2024-09-15",
|
|
117
117
|
n_parameters=8_000_000_000,
|
|
118
|
-
memory_usage_mb=
|
|
118
|
+
memory_usage_mb=30518,
|
|
119
119
|
max_tokens=8192,
|
|
120
120
|
embed_dim=4096,
|
|
121
121
|
license="apache-2.0",
|
|
@@ -143,7 +143,7 @@ promptriever_llama3_instruct = ModelMeta(
|
|
|
143
143
|
revision="5206a32e0bd3067aef1ce90f5528ade7d866253f-8b677258615625122c2eb7329292b8c402612c21", # base-peft revision
|
|
144
144
|
release_date="2024-09-15",
|
|
145
145
|
n_parameters=8_000_000_000,
|
|
146
|
-
memory_usage_mb=
|
|
146
|
+
memory_usage_mb=30518,
|
|
147
147
|
max_tokens=8192,
|
|
148
148
|
embed_dim=4096,
|
|
149
149
|
training_datasets={
|
|
@@ -175,7 +175,7 @@ promptriever_mistral_v1 = ModelMeta(
|
|
|
175
175
|
revision="7231864981174d9bee8c7687c24c8344414eae6b-876d63e49b6115ecb6839893a56298fadee7e8f5", # base-peft revision
|
|
176
176
|
release_date="2024-09-15",
|
|
177
177
|
n_parameters=7_000_000_000,
|
|
178
|
-
memory_usage_mb=
|
|
178
|
+
memory_usage_mb=26703,
|
|
179
179
|
training_datasets={
|
|
180
180
|
# "samaya-ai/msmarco-w-instructions",
|
|
181
181
|
"mMARCO-NL", # translation not trained on
|
|
@@ -139,7 +139,7 @@ Qwen3_Embedding_0B6 = ModelMeta(
|
|
|
139
139
|
revision="b22da495047858cce924d27d76261e96be6febc0", # Commit of @tomaarsen
|
|
140
140
|
release_date="2025-06-05",
|
|
141
141
|
n_parameters=595776512,
|
|
142
|
-
memory_usage_mb=
|
|
142
|
+
memory_usage_mb=1136,
|
|
143
143
|
embed_dim=1024,
|
|
144
144
|
max_tokens=32768,
|
|
145
145
|
license="apache-2.0",
|
|
@@ -161,7 +161,7 @@ Qwen3_Embedding_4B = ModelMeta(
|
|
|
161
161
|
revision="636cd9bf47d976946cdbb2b0c3ca0cb2f8eea5ff", # Commit of @tomaarsen
|
|
162
162
|
release_date="2025-06-05",
|
|
163
163
|
n_parameters=4021774336,
|
|
164
|
-
memory_usage_mb=
|
|
164
|
+
memory_usage_mb=7671,
|
|
165
165
|
embed_dim=2560,
|
|
166
166
|
max_tokens=32768,
|
|
167
167
|
license="apache-2.0",
|
|
@@ -183,7 +183,7 @@ Qwen3_Embedding_8B = ModelMeta(
|
|
|
183
183
|
revision="4e423935c619ae4df87b646a3ce949610c66241c", # Commit of @tomaarsen
|
|
184
184
|
release_date="2025-06-05",
|
|
185
185
|
n_parameters=7567295488,
|
|
186
|
-
memory_usage_mb=
|
|
186
|
+
memory_usage_mb=14433,
|
|
187
187
|
embed_dim=4096,
|
|
188
188
|
max_tokens=32768,
|
|
189
189
|
license="apache-2.0",
|
|
@@ -63,7 +63,7 @@ QZhou_Embedding = ModelMeta(
|
|
|
63
63
|
revision="f1e6c03ee3882e7b9fa5cec91217715272e433b8",
|
|
64
64
|
release_date="2025-08-24",
|
|
65
65
|
n_parameters=7_070_619_136,
|
|
66
|
-
memory_usage_mb=
|
|
66
|
+
memory_usage_mb=14436,
|
|
67
67
|
embed_dim=3584,
|
|
68
68
|
license="apache-2.0",
|
|
69
69
|
max_tokens=8192,
|
|
@@ -8,6 +8,10 @@ from torch.utils.data import DataLoader
|
|
|
8
8
|
|
|
9
9
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
10
10
|
from mteb.models.model_meta import ModelMeta
|
|
11
|
+
from mteb.similarity_functions import (
|
|
12
|
+
select_pairwise_similarity,
|
|
13
|
+
select_similarity,
|
|
14
|
+
)
|
|
11
15
|
from mteb.types._encoder_io import Array, BatchedInput, PromptType
|
|
12
16
|
|
|
13
17
|
|
|
@@ -155,15 +159,9 @@ class RandomEncoderBaseline:
|
|
|
155
159
|
Returns:
|
|
156
160
|
Cosine similarity matrix between the two sets of embeddings
|
|
157
161
|
"""
|
|
158
|
-
|
|
159
|
-
embeddings1
|
|
160
|
-
)
|
|
161
|
-
norm2 = np.linalg.norm(
|
|
162
|
-
embeddings2.reshape(-1, self.embedding_dim), axis=1, keepdims=True
|
|
162
|
+
return select_similarity(
|
|
163
|
+
embeddings1, embeddings2, self.mteb_model_meta.similarity_fn_name
|
|
163
164
|
)
|
|
164
|
-
normalized1 = embeddings1 / (norm1 + 1e-10)
|
|
165
|
-
normalized2 = embeddings2 / (norm2 + 1e-10)
|
|
166
|
-
return np.dot(normalized1, normalized2.T)
|
|
167
165
|
|
|
168
166
|
def similarity_pairwise(
|
|
169
167
|
self,
|
|
@@ -179,17 +177,9 @@ class RandomEncoderBaseline:
|
|
|
179
177
|
Returns:
|
|
180
178
|
Cosine similarity for each pair of embeddings
|
|
181
179
|
"""
|
|
182
|
-
|
|
183
|
-
embeddings1
|
|
184
|
-
)
|
|
185
|
-
norm2 = np.linalg.norm(
|
|
186
|
-
embeddings2.reshape(-1, self.embedding_dim), axis=1, keepdims=True
|
|
180
|
+
return select_pairwise_similarity(
|
|
181
|
+
embeddings1, embeddings2, self.mteb_model_meta.similarity_fn_name
|
|
187
182
|
)
|
|
188
|
-
normalized1 = embeddings1 / (norm1 + 1e-10)
|
|
189
|
-
normalized2 = embeddings2 / (norm2 + 1e-10)
|
|
190
|
-
normalized1 = np.asarray(normalized1)
|
|
191
|
-
normalized2 = np.asarray(normalized2)
|
|
192
|
-
return np.sum(normalized1 * normalized2, axis=1)
|
|
193
183
|
|
|
194
184
|
|
|
195
185
|
random_encoder_baseline = ModelMeta(
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from typing import Protocol
|
|
3
|
+
|
|
4
|
+
from mteb.types import Array, TopRankedDocumentsType
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class IndexEncoderSearchProtocol(Protocol):
|
|
8
|
+
"""Protocol for search backends used in encoder-based retrieval."""
|
|
9
|
+
|
|
10
|
+
def add_documents(
|
|
11
|
+
self,
|
|
12
|
+
embeddings: Array,
|
|
13
|
+
idxs: list[str],
|
|
14
|
+
) -> None:
|
|
15
|
+
"""Add documents to the search backend.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
embeddings: Embeddings of the documents to add.
|
|
19
|
+
idxs: IDs of the documents to add.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def search(
|
|
23
|
+
self,
|
|
24
|
+
embeddings: Array,
|
|
25
|
+
top_k: int,
|
|
26
|
+
similarity_fn: Callable[[Array, Array], Array],
|
|
27
|
+
top_ranked: TopRankedDocumentsType | None = None,
|
|
28
|
+
query_idx_to_id: dict[int, str] | None = None,
|
|
29
|
+
) -> tuple[list[list[float]], list[list[int]]]:
|
|
30
|
+
"""Search through added corpus embeddings or rerank top-ranked documents.
|
|
31
|
+
|
|
32
|
+
Supports both full-corpus and reranking search modes:
|
|
33
|
+
- Full-corpus mode: `top_ranked=None`, uses added corpus embeddings.
|
|
34
|
+
- Reranking mode: `top_ranked` contains mapping {query_id: [doc_ids]}.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
embeddings: Query embeddings, shape (num_queries, dim).
|
|
38
|
+
top_k: Number of top results to return.
|
|
39
|
+
similarity_fn: Function to compute similarity between query and corpus.
|
|
40
|
+
top_ranked: Mapping of query_id -> list of candidate doc_ids. Used for reranking.
|
|
41
|
+
query_idx_to_id: Mapping of query index -> query_id. Used for reranking.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
A tuple (top_k_values, top_k_indices), for each query:
|
|
45
|
+
- top_k_values: List of top-k similarity scores.
|
|
46
|
+
- top_k_indices: List of indices of the top-k documents in the added corpus.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def clear(self) -> None:
|
|
50
|
+
"""Clear all stored documents and embeddings from the backend."""
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import torch
|
|
6
|
+
|
|
7
|
+
from mteb._requires_package import requires_package
|
|
8
|
+
from mteb.models.model_meta import ScoringFunction
|
|
9
|
+
from mteb.models.models_protocols import EncoderProtocol
|
|
10
|
+
from mteb.types import Array, TopRankedDocumentsType
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FaissSearchIndex:
|
|
16
|
+
"""FAISS-based backend for encoder-based search.
|
|
17
|
+
|
|
18
|
+
Supports both full-corpus retrieval and reranking (via `top_ranked`).
|
|
19
|
+
|
|
20
|
+
Notes:
|
|
21
|
+
- Stores *all* embeddings in memory (IndexFlatIP or IndexFlatL2).
|
|
22
|
+
- Expects embeddings to be normalized if cosine similarity is desired.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
_normalize: bool = False
|
|
26
|
+
|
|
27
|
+
def __init__(self, model: EncoderProtocol) -> None:
|
|
28
|
+
requires_package(
|
|
29
|
+
self,
|
|
30
|
+
"faiss",
|
|
31
|
+
"FAISS-based search",
|
|
32
|
+
install_instruction="pip install mteb[faiss-cpu]",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
import faiss
|
|
36
|
+
from faiss import IndexFlatIP, IndexFlatL2
|
|
37
|
+
|
|
38
|
+
# https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
|
|
39
|
+
if model.mteb_model_meta.similarity_fn_name is ScoringFunction.DOT_PRODUCT:
|
|
40
|
+
self.index_type = IndexFlatIP
|
|
41
|
+
elif model.mteb_model_meta.similarity_fn_name is ScoringFunction.COSINE:
|
|
42
|
+
self.index_type = IndexFlatIP
|
|
43
|
+
self._normalize = True
|
|
44
|
+
elif model.mteb_model_meta.similarity_fn_name is ScoringFunction.EUCLIDEAN:
|
|
45
|
+
self.index_type = IndexFlatL2
|
|
46
|
+
else:
|
|
47
|
+
raise ValueError(
|
|
48
|
+
f"FAISS backend does not support similarity function {model.mteb_model_meta.similarity_fn_name}. "
|
|
49
|
+
f"Available: {ScoringFunction.DOT_PRODUCT}, {ScoringFunction.COSINE}."
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
self.idxs: list[str] = []
|
|
53
|
+
self.index: faiss.Index | None = None
|
|
54
|
+
|
|
55
|
+
def add_documents(self, embeddings: Array, idxs: list[str]) -> None:
|
|
56
|
+
"""Add all document embeddings and their IDs to FAISS index."""
|
|
57
|
+
import faiss
|
|
58
|
+
|
|
59
|
+
if isinstance(embeddings, torch.Tensor):
|
|
60
|
+
embeddings = embeddings.detach().cpu().numpy()
|
|
61
|
+
|
|
62
|
+
embeddings = embeddings.astype(np.float32)
|
|
63
|
+
self.idxs.extend(idxs)
|
|
64
|
+
|
|
65
|
+
if self._normalize:
|
|
66
|
+
faiss.normalize_L2(embeddings)
|
|
67
|
+
|
|
68
|
+
dim = embeddings.shape[1]
|
|
69
|
+
if self.index is None:
|
|
70
|
+
self.index = self.index_type(dim)
|
|
71
|
+
|
|
72
|
+
self.index.add(embeddings)
|
|
73
|
+
logger.info(f"FAISS index built with {len(idxs)} vectors of dim {dim}.")
|
|
74
|
+
|
|
75
|
+
def search(
|
|
76
|
+
self,
|
|
77
|
+
embeddings: Array,
|
|
78
|
+
top_k: int,
|
|
79
|
+
similarity_fn: Callable[[Array, Array], Array],
|
|
80
|
+
top_ranked: TopRankedDocumentsType | None = None,
|
|
81
|
+
query_idx_to_id: dict[int, str] | None = None,
|
|
82
|
+
) -> tuple[list[list[float]], list[list[int]]]:
|
|
83
|
+
"""Search using FAISS."""
|
|
84
|
+
import faiss
|
|
85
|
+
|
|
86
|
+
if self.index is None:
|
|
87
|
+
raise ValueError("No index built. Call add_document() first.")
|
|
88
|
+
|
|
89
|
+
if isinstance(embeddings, torch.Tensor):
|
|
90
|
+
embeddings = embeddings.detach().cpu().numpy()
|
|
91
|
+
|
|
92
|
+
if self._normalize:
|
|
93
|
+
faiss.normalize_L2(embeddings)
|
|
94
|
+
|
|
95
|
+
if top_ranked is not None:
|
|
96
|
+
if query_idx_to_id is None:
|
|
97
|
+
raise ValueError("query_idx_to_id must be provided when reranking.")
|
|
98
|
+
|
|
99
|
+
similarities, ids = self._reranking(
|
|
100
|
+
embeddings,
|
|
101
|
+
top_k,
|
|
102
|
+
top_ranked=top_ranked,
|
|
103
|
+
query_idx_to_id=query_idx_to_id,
|
|
104
|
+
)
|
|
105
|
+
else:
|
|
106
|
+
similarities, ids = self.index.search(embeddings.astype(np.float32), top_k)
|
|
107
|
+
similarities = similarities.tolist()
|
|
108
|
+
ids = ids.tolist()
|
|
109
|
+
|
|
110
|
+
if issubclass(self.index_type, faiss.IndexFlatL2):
|
|
111
|
+
similarities = -np.sqrt(np.maximum(similarities, 0))
|
|
112
|
+
|
|
113
|
+
return similarities, ids
|
|
114
|
+
|
|
115
|
+
def _reranking(
|
|
116
|
+
self,
|
|
117
|
+
embeddings: Array,
|
|
118
|
+
top_k: int,
|
|
119
|
+
top_ranked: TopRankedDocumentsType | None = None,
|
|
120
|
+
query_idx_to_id: dict[int, str] | None = None,
|
|
121
|
+
) -> tuple[list[list[float]], list[list[int]]]:
|
|
122
|
+
doc_id_to_idx = {doc_id: i for i, doc_id in enumerate(self.idxs)}
|
|
123
|
+
scores_all: list[list[float]] = []
|
|
124
|
+
idxs_all: list[list[int]] = []
|
|
125
|
+
|
|
126
|
+
for query_idx, query_emb in enumerate(embeddings):
|
|
127
|
+
query_id = query_idx_to_id[query_idx]
|
|
128
|
+
ranked_ids = top_ranked.get(query_id)
|
|
129
|
+
if not ranked_ids:
|
|
130
|
+
logger.warning(f"No top-ranked documents for query {query_id}")
|
|
131
|
+
scores_all.append([])
|
|
132
|
+
idxs_all.append([])
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
candidate_indices = [doc_id_to_idx[doc_id] for doc_id in ranked_ids]
|
|
136
|
+
d = self.index.d
|
|
137
|
+
candidate_embs = np.vstack(
|
|
138
|
+
[self.index.reconstruct(idx) for idx in candidate_indices]
|
|
139
|
+
)
|
|
140
|
+
sub_reranking_index = self.index_type(d)
|
|
141
|
+
sub_reranking_index.add(candidate_embs)
|
|
142
|
+
|
|
143
|
+
# Search returns scores and indices in one call
|
|
144
|
+
scores, local_indices = sub_reranking_index.search(
|
|
145
|
+
query_emb.reshape(1, -1).astype(np.float32),
|
|
146
|
+
min(top_k, len(candidate_indices)),
|
|
147
|
+
)
|
|
148
|
+
# faiss will output 2d arrays even for single query
|
|
149
|
+
scores_all.append(scores[0].tolist())
|
|
150
|
+
idxs_all.append(local_indices[0].tolist())
|
|
151
|
+
|
|
152
|
+
return scores_all, idxs_all
|
|
153
|
+
|
|
154
|
+
def clear(self) -> None:
|
|
155
|
+
"""Clear all stored documents and embeddings from the backend."""
|
|
156
|
+
self.index = None
|
|
157
|
+
self.idxs = []
|
mteb/models/search_wrappers.py
CHANGED
|
@@ -21,6 +21,7 @@ from mteb.types import (
|
|
|
21
21
|
)
|
|
22
22
|
|
|
23
23
|
from .models_protocols import CrossEncoderProtocol, EncoderProtocol
|
|
24
|
+
from .search_encoder_index.search_backend_protocol import IndexEncoderSearchProtocol
|
|
24
25
|
|
|
25
26
|
logger = logging.getLogger(__name__)
|
|
26
27
|
|
|
@@ -28,13 +29,19 @@ logger = logging.getLogger(__name__)
|
|
|
28
29
|
class SearchEncoderWrapper:
|
|
29
30
|
"""Wrapper for Encoder models to be used in search tasks."""
|
|
30
31
|
|
|
31
|
-
corpus_chunk_size = 50_000
|
|
32
32
|
task_corpus: CorpusDatasetType | None
|
|
33
33
|
|
|
34
|
-
def __init__(
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
model: EncoderProtocol,
|
|
37
|
+
corpus_chunk_size: int = 50_000,
|
|
38
|
+
index_backend: IndexEncoderSearchProtocol | None = None,
|
|
39
|
+
) -> None:
|
|
35
40
|
self.model = model
|
|
36
41
|
self.task_corpus = None
|
|
37
42
|
self.mteb_model_meta = model.mteb_model_meta
|
|
43
|
+
self.corpus_chunk_size = corpus_chunk_size
|
|
44
|
+
self.index_backend = index_backend
|
|
38
45
|
|
|
39
46
|
def index(
|
|
40
47
|
self,
|
|
@@ -56,6 +63,22 @@ class SearchEncoderWrapper:
|
|
|
56
63
|
"""
|
|
57
64
|
# Always retain corpus for potential reranking or fallback flows
|
|
58
65
|
self.task_corpus = corpus
|
|
66
|
+
if self.index_backend is not None:
|
|
67
|
+
all_doc_embeddings = self.model.encode(
|
|
68
|
+
create_dataloader(
|
|
69
|
+
corpus,
|
|
70
|
+
task_metadata,
|
|
71
|
+
prompt_type=PromptType.document,
|
|
72
|
+
**encode_kwargs,
|
|
73
|
+
),
|
|
74
|
+
task_metadata=task_metadata,
|
|
75
|
+
hf_split=hf_split,
|
|
76
|
+
hf_subset=hf_subset,
|
|
77
|
+
prompt_type=PromptType.document,
|
|
78
|
+
**encode_kwargs,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
self.index_backend.add_documents(all_doc_embeddings, corpus["id"])
|
|
59
82
|
|
|
60
83
|
def search(
|
|
61
84
|
self,
|
|
@@ -105,27 +128,74 @@ class SearchEncoderWrapper:
|
|
|
105
128
|
|
|
106
129
|
if top_ranked is not None:
|
|
107
130
|
logger.info("Reranking pre-ranked documents...")
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
131
|
+
if self.index_backend is None:
|
|
132
|
+
result_heaps = self._rerank_documents(
|
|
133
|
+
query_idx_to_id=query_idx_to_id,
|
|
134
|
+
query_embeddings=query_embeddings,
|
|
135
|
+
top_ranked=top_ranked,
|
|
136
|
+
top_k=top_k,
|
|
137
|
+
task_metadata=task_metadata,
|
|
138
|
+
hf_subset=hf_subset,
|
|
139
|
+
hf_split=hf_split,
|
|
140
|
+
encode_kwargs=encode_kwargs,
|
|
141
|
+
)
|
|
142
|
+
else:
|
|
143
|
+
cos_scores_top_k_values, cos_scores_top_k_idx = (
|
|
144
|
+
self.index_backend.search(
|
|
145
|
+
query_embeddings,
|
|
146
|
+
top_k,
|
|
147
|
+
similarity_fn=self.model.similarity,
|
|
148
|
+
top_ranked=top_ranked,
|
|
149
|
+
query_idx_to_id=query_idx_to_id,
|
|
150
|
+
)
|
|
151
|
+
)
|
|
152
|
+
result_heaps = {qid: [] for qid in query_idx_to_id.values()}
|
|
153
|
+
for query_itr in range(len(query_embeddings)):
|
|
154
|
+
result_heaps = self._rerank_sort_results(
|
|
155
|
+
result_heaps=result_heaps,
|
|
156
|
+
query_id=query_idx_to_id[query_itr],
|
|
157
|
+
ranked_ids=top_ranked[query_idx_to_id[query_itr]],
|
|
158
|
+
scores_top_k_idx=torch.tensor(
|
|
159
|
+
[cos_scores_top_k_idx[query_itr]]
|
|
160
|
+
),
|
|
161
|
+
scores_top_k_values=torch.tensor(
|
|
162
|
+
[cos_scores_top_k_values[query_itr]]
|
|
163
|
+
),
|
|
164
|
+
)
|
|
165
|
+
self.index_backend.clear()
|
|
118
166
|
else:
|
|
119
167
|
logger.info("Performing full corpus search...")
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
168
|
+
if self.index_backend is None:
|
|
169
|
+
result_heaps = self._full_corpus_search(
|
|
170
|
+
query_idx_to_id=query_idx_to_id,
|
|
171
|
+
query_embeddings=query_embeddings,
|
|
172
|
+
task_metadata=task_metadata,
|
|
173
|
+
hf_subset=hf_subset,
|
|
174
|
+
hf_split=hf_split,
|
|
175
|
+
top_k=top_k,
|
|
176
|
+
encode_kwargs=encode_kwargs,
|
|
177
|
+
)
|
|
178
|
+
else:
|
|
179
|
+
cos_scores_top_k_values, cos_scores_top_k_idx = (
|
|
180
|
+
self.index_backend.search(
|
|
181
|
+
query_embeddings,
|
|
182
|
+
top_k,
|
|
183
|
+
similarity_fn=self.model.similarity,
|
|
184
|
+
top_ranked=None,
|
|
185
|
+
query_idx_to_id=None,
|
|
186
|
+
)
|
|
187
|
+
)
|
|
188
|
+
result_heaps = {qid: [] for qid in query_idx_to_id.values()}
|
|
189
|
+
result_heaps = self._sort_full_corpus_results(
|
|
190
|
+
result_heaps=result_heaps,
|
|
191
|
+
query_idx_to_id=query_idx_to_id,
|
|
192
|
+
query_embeddings=query_embeddings,
|
|
193
|
+
cos_scores_top_k_idx=cos_scores_top_k_idx,
|
|
194
|
+
cos_scores_top_k_values=cos_scores_top_k_values,
|
|
195
|
+
sub_corpus_ids=self.task_corpus["id"],
|
|
196
|
+
top_k=top_k,
|
|
197
|
+
)
|
|
198
|
+
self.index_backend.clear()
|
|
129
199
|
|
|
130
200
|
# Reset the task corpus dataloader to None to free up memory
|
|
131
201
|
self.task_corpus = None
|
|
@@ -192,19 +262,45 @@ class SearchEncoderWrapper:
|
|
|
192
262
|
cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
|
|
193
263
|
|
|
194
264
|
sub_corpus_ids = list(sub_corpus_ids)
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
265
|
+
result_heaps = self._sort_full_corpus_results(
|
|
266
|
+
result_heaps=result_heaps,
|
|
267
|
+
query_idx_to_id=query_idx_to_id,
|
|
268
|
+
query_embeddings=query_embeddings,
|
|
269
|
+
cos_scores_top_k_idx=cos_scores_top_k_idx,
|
|
270
|
+
cos_scores_top_k_values=cos_scores_top_k_values,
|
|
271
|
+
sub_corpus_ids=sub_corpus_ids,
|
|
272
|
+
top_k=top_k,
|
|
273
|
+
)
|
|
274
|
+
return result_heaps
|
|
275
|
+
|
|
276
|
+
def _sort_full_corpus_results(
|
|
277
|
+
self,
|
|
278
|
+
result_heaps: dict[str, list[tuple[float, str]]],
|
|
279
|
+
query_idx_to_id: dict[int, str],
|
|
280
|
+
query_embeddings: Array,
|
|
281
|
+
cos_scores_top_k_idx: list[list[int]],
|
|
282
|
+
cos_scores_top_k_values: list[list[float]],
|
|
283
|
+
sub_corpus_ids: list[str],
|
|
284
|
+
top_k: int,
|
|
285
|
+
) -> dict[str, list[tuple[float, str]]]:
|
|
286
|
+
"""Sort the heaps into descending order lists.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
A dictionary mapping query IDs to a sorted list of tuples, each containing a relevance score and a document ID.
|
|
290
|
+
"""
|
|
291
|
+
for query_itr in range(len(query_embeddings)):
|
|
292
|
+
query_id = query_idx_to_id[query_itr]
|
|
293
|
+
for sub_corpus_id, score in zip(
|
|
294
|
+
cos_scores_top_k_idx[query_itr],
|
|
295
|
+
cos_scores_top_k_values[query_itr],
|
|
296
|
+
):
|
|
297
|
+
corpus_id = sub_corpus_ids[sub_corpus_id]
|
|
298
|
+
if len(result_heaps[query_id]) < top_k:
|
|
299
|
+
# push item on the heap
|
|
300
|
+
heapq.heappush(result_heaps[query_id], (score, corpus_id))
|
|
301
|
+
else:
|
|
302
|
+
# If item is larger than the smallest in the heap, push it on the heap then pop the smallest element
|
|
303
|
+
heapq.heappushpop(result_heaps[query_id], (score, corpus_id))
|
|
208
304
|
return result_heaps
|
|
209
305
|
|
|
210
306
|
def _rerank_documents(
|
|
@@ -279,14 +375,34 @@ class SearchEncoderWrapper:
|
|
|
279
375
|
scores_top_k_values = scores_top_k_values.cpu()
|
|
280
376
|
scores_top_k_idx = scores_top_k_idx.cpu()
|
|
281
377
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
378
|
+
result_heaps = self._rerank_sort_results(
|
|
379
|
+
result_heaps=result_heaps,
|
|
380
|
+
query_id=query_id,
|
|
381
|
+
ranked_ids=ranked_ids,
|
|
382
|
+
scores_top_k_idx=scores_top_k_idx,
|
|
383
|
+
scores_top_k_values=scores_top_k_values,
|
|
384
|
+
)
|
|
385
|
+
return result_heaps
|
|
386
|
+
|
|
387
|
+
def _rerank_sort_results(
|
|
388
|
+
self,
|
|
389
|
+
result_heaps: list[tuple[float, str]],
|
|
390
|
+
query_id: str,
|
|
391
|
+
ranked_ids: list[str],
|
|
392
|
+
scores_top_k_idx: torch.Tensor,
|
|
393
|
+
scores_top_k_values: torch.Tensor,
|
|
394
|
+
) -> list[tuple[float, str]]:
|
|
395
|
+
"""Sort the heap into descending order list.
|
|
289
396
|
|
|
397
|
+
Returns:
|
|
398
|
+
A sorted list of tuples, each containing a relevance score and a document ID.
|
|
399
|
+
"""
|
|
400
|
+
for doc_idx, score in zip(
|
|
401
|
+
scores_top_k_idx[0].tolist(),
|
|
402
|
+
scores_top_k_values[0].tolist(),
|
|
403
|
+
):
|
|
404
|
+
corpus_id = ranked_ids[doc_idx]
|
|
405
|
+
heapq.heappush(result_heaps[query_id], (score, corpus_id))
|
|
290
406
|
return result_heaps
|
|
291
407
|
|
|
292
408
|
def encode(
|
mteb/similarity_functions.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import torch
|
|
2
2
|
|
|
3
3
|
from mteb.models import EncoderProtocol
|
|
4
|
+
from mteb.models.model_meta import ScoringFunction
|
|
4
5
|
from mteb.types import Array
|
|
5
6
|
|
|
6
7
|
|
|
@@ -38,6 +39,54 @@ def compute_pairwise_similarity(
|
|
|
38
39
|
return pairwise_cos_sim(embedding1, embedding2)
|
|
39
40
|
|
|
40
41
|
|
|
42
|
+
def select_similarity(
|
|
43
|
+
embedding1: Array,
|
|
44
|
+
embedding2: Array,
|
|
45
|
+
similarity_fn: ScoringFunction,
|
|
46
|
+
) -> Array:
|
|
47
|
+
"""Compute similarity between two sets of embeddings using the specified similarity function.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
embedding1: The first set of embeddings.
|
|
51
|
+
embedding2: The second set of embeddings.
|
|
52
|
+
similarity_fn: The similarity function to use (COSINE, DOT_PRODUCT, EUCLIDEAN).
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Array: The computed similarity scores.
|
|
56
|
+
"""
|
|
57
|
+
if similarity_fn is ScoringFunction.COSINE:
|
|
58
|
+
return cos_sim(embedding1, embedding2)
|
|
59
|
+
elif similarity_fn is ScoringFunction.DOT_PRODUCT:
|
|
60
|
+
return dot_score(embedding1, embedding2)
|
|
61
|
+
elif similarity_fn is ScoringFunction.EUCLIDEAN:
|
|
62
|
+
return euclidean_sim(embedding1, embedding2)
|
|
63
|
+
raise ValueError(f"Unsupported similarity function: {similarity_fn}")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def select_pairwise_similarity(
|
|
67
|
+
embedding1: Array,
|
|
68
|
+
embedding2: Array,
|
|
69
|
+
similarity_fn: ScoringFunction,
|
|
70
|
+
) -> Array:
|
|
71
|
+
"""Compute pairwise similarity between two sets of embeddings using the specified similarity function.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
embedding1: The first set of embeddings.
|
|
75
|
+
embedding2: The second set of embeddings.
|
|
76
|
+
similarity_fn: The similarity function to use (COSINE, DOT_PRODUCT, EUCLIDEAN).
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Array: The computed pairwise similarity scores.
|
|
80
|
+
"""
|
|
81
|
+
if similarity_fn is ScoringFunction.COSINE:
|
|
82
|
+
return pairwise_cos_sim(embedding1, embedding2)
|
|
83
|
+
elif similarity_fn is ScoringFunction.DOT_PRODUCT:
|
|
84
|
+
return pairwise_dot_score(embedding1, embedding2)
|
|
85
|
+
elif similarity_fn is ScoringFunction.EUCLIDEAN:
|
|
86
|
+
return pairwise_euclidean_sim(embedding1, embedding2)
|
|
87
|
+
raise ValueError(f"Unsupported similarity function: {similarity_fn}")
|
|
88
|
+
|
|
89
|
+
|
|
41
90
|
def _normalize_embeddings(embeddings: Array) -> torch.Tensor:
|
|
42
91
|
"""Normalizes the embeddings matrix, so that each sentence embedding has unit length.
|
|
43
92
|
|
|
@@ -41,7 +41,6 @@ def _load_data(
|
|
|
41
41
|
},
|
|
42
42
|
remove_columns=["query-id", "query"],
|
|
43
43
|
)
|
|
44
|
-
query_ds = query_ds.select_columns(["id", "text"])
|
|
45
44
|
|
|
46
45
|
corpus_ds = load_dataset(
|
|
47
46
|
path,
|
|
@@ -66,7 +65,7 @@ def _load_data(
|
|
|
66
65
|
)
|
|
67
66
|
|
|
68
67
|
if langs is None:
|
|
69
|
-
queries[split] = query_ds
|
|
68
|
+
queries[split] = query_ds.select_columns(["id", "text"])
|
|
70
69
|
corpus[split] = corpus_ds
|
|
71
70
|
relevant_docs[split] = {}
|
|
72
71
|
for row in qrels_ds:
|
|
@@ -77,7 +76,8 @@ def _load_data(
|
|
|
77
76
|
relevant_docs[split][qid][did] = int(row["score"])
|
|
78
77
|
else:
|
|
79
78
|
for lang in langs:
|
|
80
|
-
|
|
79
|
+
filtered_query_ds = query_ds.filter(lambda x: x["language"] == lang)
|
|
80
|
+
queries[lang][split] = filtered_query_ds.select_columns(["id", "text"])
|
|
81
81
|
|
|
82
82
|
corpus[lang][split] = corpus_ds
|
|
83
83
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -100,6 +100,8 @@ Provides-Extra: llama-embed-nemotron
|
|
|
100
100
|
Requires-Dist: transformers==4.51.0; extra == "llama-embed-nemotron"
|
|
101
101
|
Provides-Extra: faiss-cpu
|
|
102
102
|
Requires-Dist: faiss-cpu>=1.12.0; extra == "faiss-cpu"
|
|
103
|
+
Provides-Extra: eager-embed
|
|
104
|
+
Requires-Dist: qwen_vl_utils>=0.0.14; extra == "eager-embed"
|
|
103
105
|
Dynamic: license-file
|
|
104
106
|
|
|
105
107
|
<h1 align="center">
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
mteb/__init__.py,sha256=
|
|
1
|
+
mteb/__init__.py,sha256=h2kru--zMEC0mmLQ688kggdDpBH7dxYz1HhLVHbRjcI,1376
|
|
2
2
|
mteb/__main__.py,sha256=KKWed4HW-OpfpJhCuKDNDPuAAIoppQY1g2gRuCdAmlw,34
|
|
3
3
|
mteb/_create_dataloaders.py,sha256=9aUHM1q2q748XHax_YYcPBmckIOOVCrJ_N2bJYVbn3s,14338
|
|
4
4
|
mteb/_helpful_enum.py,sha256=jh73N1jlcpg7RGz4bj8UpctiMNvqvHpp9wrB7SYEzIU,510
|
|
@@ -11,7 +11,7 @@ mteb/evaluate.py,sha256=nSRKXlQikwtd4qb2Ruckn756IAgTigDjXdl-W2nhI6M,17993
|
|
|
11
11
|
mteb/filter_tasks.py,sha256=5XE1OYmgDDoJYnXwFf4ma_PIT_Lekzs420sQF_kpCiY,7240
|
|
12
12
|
mteb/get_tasks.py,sha256=6Gc18a2bZoLQV1Ms_qdr2KieAqIXg8TDg4l7ZN8rW2I,14218
|
|
13
13
|
mteb/load_results.py,sha256=Xw2ZX7BToU92WwUTQUQKPAgPhX7ucyRRdoCrxAoPHdI,6414
|
|
14
|
-
mteb/similarity_functions.py,sha256=
|
|
14
|
+
mteb/similarity_functions.py,sha256=ySSnrKl4cSKOWfyIKQPVTJtxuy2ZNfcv0COXDp22QlQ,10630
|
|
15
15
|
mteb/_evaluators/__init__.py,sha256=Ag1_RWpxBGMpujzd3FZjI40gY_KQKIpY31tJPuk-hFg,1013
|
|
16
16
|
mteb/_evaluators/_download.py,sha256=jntlcURbJxcxUjTmn2D9Tu6ZnWgDc9t5bY8p9CZCqv4,586
|
|
17
17
|
mteb/_evaluators/any_sts_evaluator.py,sha256=f0V3NDP5Bfp8qEeBwP8E-Enj5F5NbFze-kGmzlkObQA,3762
|
|
@@ -1428,15 +1428,15 @@ mteb/leaderboard/benchmark_selector.py,sha256=hnXdo_Kj4UUAruFl6nZkCxAQ88IEfbaH8E
|
|
|
1428
1428
|
mteb/leaderboard/figures.py,sha256=Rq20LFpaUhQD4tuKp7P7ExQtAjonMLibgO3ud0ykMag,7491
|
|
1429
1429
|
mteb/leaderboard/table.py,sha256=qs0H_Gt9FzRvzb-AL0YlqEe0YAsdYsVX3QlncfCBEqg,7828
|
|
1430
1430
|
mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9HydipKM,6570
|
|
1431
|
-
mteb/models/__init__.py,sha256=
|
|
1431
|
+
mteb/models/__init__.py,sha256=ABTuoqiBjBtBWW3LYY7ItBHdylR6jWoy06HH0g6j6fU,910
|
|
1432
1432
|
mteb/models/abs_encoder.py,sha256=m0JkRfRPMYadDgBR9eozRloI31ZSWkSzDFINpwbfLZk,16533
|
|
1433
1433
|
mteb/models/get_model_meta.py,sha256=VpZZNINk-QrNeVpPZnlqzlLhtBs8G84eRwTzAb_gRD4,9108
|
|
1434
1434
|
mteb/models/instruct_wrapper.py,sha256=Ty4nfEvioycL_uATkhd0PGuyeB5Xc9xrRd6HOGgb-tc,9005
|
|
1435
1435
|
mteb/models/model_meta.py,sha256=b-Nel9nX5bJk4cgJnqkBzEKyMY7uXvxlCBSxmmH1Ios,14769
|
|
1436
1436
|
mteb/models/models_protocols.py,sha256=D2hYWn_UBGMaKtRwBx3u0B0ni6lHJjSzTxX21XFNwIc,8917
|
|
1437
|
-
mteb/models/search_wrappers.py,sha256=
|
|
1437
|
+
mteb/models/search_wrappers.py,sha256=AcMhjQyKdeitUjnaqgnP3_zTeVSum8rz1sjBRddHUVQ,20328
|
|
1438
1438
|
mteb/models/sentence_transformer_wrapper.py,sha256=n5CMsM6Lpg_CFHH0NkpJusMsaLUTt-L9vRmFINQ961k,12338
|
|
1439
|
-
mteb/models/cache_wrappers/__init__.py,sha256=
|
|
1439
|
+
mteb/models/cache_wrappers/__init__.py,sha256=1w1TnMwulWJSzNkLXjbh5MY3sqgHWc6vUntYn49i9X8,169
|
|
1440
1440
|
mteb/models/cache_wrappers/cache_backend_protocol.py,sha256=TR7kD7KbN1J4piszIecpegtLZYGy7sRHZt3SDWlImKk,1665
|
|
1441
1441
|
mteb/models/cache_wrappers/cache_wrapper.py,sha256=KLDeOCe_ndQshbZa5ep2u3jovsl--tfpQzvt9EXyxCA,6589
|
|
1442
1442
|
mteb/models/cache_wrappers/cache_backends/__init__.py,sha256=hN2Tq7cpTxoOYSCJ1Wnpvb8dEm-kQLfCCahT1N9Bacw,123
|
|
@@ -1471,13 +1471,14 @@ mteb/models/model_implementations/dino_models.py,sha256=QFgaFHR5YKrylqJGSljXCBn2
|
|
|
1471
1471
|
mteb/models/model_implementations/e5_instruct.py,sha256=9R4GoSFicgqNDCh3HhTN_8L1qhzuEKvatjHYn3T9zlU,7676
|
|
1472
1472
|
mteb/models/model_implementations/e5_models.py,sha256=vsqkmm6XzZn9ROj_OUR0j2KiN75MEuQsOPeoyc1AeYg,10937
|
|
1473
1473
|
mteb/models/model_implementations/e5_v.py,sha256=_9W7I0ryIzx_H9eCkzwdm8iHdGX1LIjKGXkhSh_zNv8,6690
|
|
1474
|
+
mteb/models/model_implementations/eagerworks_models.py,sha256=NOQkCUqn9jLSpf9p6KyaIHnJxYV1MNlr2z7hO2AcRSc,5744
|
|
1474
1475
|
mteb/models/model_implementations/emillykkejensen_models.py,sha256=QdhGqCm_1-AURkrniZj2S1MjwwIVOPMzLvpgfJq-3EQ,2779
|
|
1475
1476
|
mteb/models/model_implementations/en_code_retriever.py,sha256=leZ-0M6LrunocY3XQBYZU1uevDRopeyR5ujIhwqBbd8,1043
|
|
1476
1477
|
mteb/models/model_implementations/evaclip_models.py,sha256=cPMGYLDIq4s8zJxb4vPXqJ-rqwPaq7KOh2QZSO6cDas,8000
|
|
1477
1478
|
mteb/models/model_implementations/fa_models.py,sha256=WGal70_ezITWoNdjcMdbOCTSCtoaXzuPadYstLVXxhg,7478
|
|
1478
1479
|
mteb/models/model_implementations/geogpt_models.py,sha256=Juv86SwhgQX80lVLjAFtim2aSiJT1AcgjniyyiKyk1Q,1923
|
|
1479
1480
|
mteb/models/model_implementations/gme_v_models.py,sha256=NkfgR3_UdZzoBt1NnalVou6LOR-F7qXM4by9EbAVrys,13568
|
|
1480
|
-
mteb/models/model_implementations/google_models.py,sha256=
|
|
1481
|
+
mteb/models/model_implementations/google_models.py,sha256=ROo83udaUmPx0U_qfFuS55DSrCILVsRZu3oLp_P-srg,9296
|
|
1481
1482
|
mteb/models/model_implementations/granite_vision_embedding_models.py,sha256=uqQ5-e_a-ADv3gf3sR9Drk0S4x8Gy8mZkpL-E4X16TM,7241
|
|
1482
1483
|
mteb/models/model_implementations/gritlm_models.py,sha256=aS_CuioL95JAQMYiaKlGuAWU9wZjabn268Xut3bD8-w,3005
|
|
1483
1484
|
mteb/models/model_implementations/gte_models.py,sha256=o26Xyu_tucUlP435Q_jB4-bl0xckgj4wtbutTwhYgIo,10073
|
|
@@ -1503,25 +1504,25 @@ mteb/models/model_implementations/moco_models.py,sha256=Kl0nBsqkG3crYoo5YulFq1fv
|
|
|
1503
1504
|
mteb/models/model_implementations/model2vec_models.py,sha256=D-EY-6P-cKKunbgzk4DHzJL1ogpWYFhpHbTLb8qQjJw,13765
|
|
1504
1505
|
mteb/models/model_implementations/moka_models.py,sha256=Y5do7Z4JyGxabYrjHhkBLqCKTQKotniS-f4kOgXJjag,4995
|
|
1505
1506
|
mteb/models/model_implementations/mxbai_models.py,sha256=33ta2BnhvKYBUgE89wFgPNf-CnOb7ooumZvqHOvbZsA,3593
|
|
1506
|
-
mteb/models/model_implementations/nb_sbert.py,sha256=
|
|
1507
|
+
mteb/models/model_implementations/nb_sbert.py,sha256=dF3WBn6ERIK7Oqp-tXdLn11Gf0Z7RKLhAoCq0YHxEug,861
|
|
1507
1508
|
mteb/models/model_implementations/no_instruct_sentence_models.py,sha256=6i-xbLRRNKuDpU-hwklwdQjgu1wnz5CecLSoc6kyd7Q,3976
|
|
1508
1509
|
mteb/models/model_implementations/nomic_models.py,sha256=mT-v5Gs5-sRH8-ziCw_CtxB9ox3C6FtwWJjNghNrunw,11334
|
|
1509
1510
|
mteb/models/model_implementations/nomic_models_vision.py,sha256=gEEieMThvw4p-QhRH0G_9-WWTvj-jqOlgFsh6O07dbc,6731
|
|
1510
|
-
mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py,sha256=
|
|
1511
|
-
mteb/models/model_implementations/nvidia_models.py,sha256=
|
|
1511
|
+
mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py,sha256=j71ijIGeYammmtPO6O_IQvPHtSRgwvonDjh8QhfwU64,6170
|
|
1512
|
+
mteb/models/model_implementations/nvidia_models.py,sha256=acVverAt77lURkILCVkCdXsWgY1BJoG1-ugB7yIhlIM,21555
|
|
1512
1513
|
mteb/models/model_implementations/openai_models.py,sha256=2tJyEapIW-GtB3ZOXIHwGjSZGgJl2daE_UsbzH4NhBM,9620
|
|
1513
1514
|
mteb/models/model_implementations/openclip_models.py,sha256=W8XcokgLU1nSmMaWpYXkWWizVd3sQezcP02YtF2fXpo,11436
|
|
1514
1515
|
mteb/models/model_implementations/opensearch_neural_sparse_models.py,sha256=fuxIjOx_kPoDps5C7LW3JllG-AZj4ktqeTNgJESHZh4,8351
|
|
1515
|
-
mteb/models/model_implementations/ops_moa_models.py,sha256=
|
|
1516
|
+
mteb/models/model_implementations/ops_moa_models.py,sha256=luWw1j2iTMx1z1ydLCjvCI89E9Yvge7ruEawivJTmfE,2413
|
|
1516
1517
|
mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py,sha256=qGXv71qRjNCIFluZOwvfBlFlKKyN2bXBokwUPk4KHmM,1066
|
|
1517
1518
|
mteb/models/model_implementations/piccolo_models.py,sha256=d8Dtkv_ZTUOCmJLLOuwquq-gX-2UfKvAtl_LvAS0Xi0,2113
|
|
1518
|
-
mteb/models/model_implementations/promptriever_models.py,sha256=
|
|
1519
|
+
mteb/models/model_implementations/promptriever_models.py,sha256=S7uWes_P74p3OZR_KBJHJN_ezlvvRx2__46DMCWqV5M,6328
|
|
1519
1520
|
mteb/models/model_implementations/pylate_models.py,sha256=yINGQL97S4xjj74-FTWpO4KHX-E9NDOEeyQWyRmmnaE,14772
|
|
1520
1521
|
mteb/models/model_implementations/qodo_models.py,sha256=JDqffDlQiOEariyheybOIf3iNkqot2gTkEIHWDnRbUE,2037
|
|
1521
1522
|
mteb/models/model_implementations/qtack_models.py,sha256=biZLH5E3UWIcMZXIZNGgBZFEUvovPpAo6vUyL776W1w,1224
|
|
1522
|
-
mteb/models/model_implementations/qwen3_models.py,sha256=
|
|
1523
|
-
mteb/models/model_implementations/qzhou_models.py,sha256=
|
|
1524
|
-
mteb/models/model_implementations/random_baseline.py,sha256=
|
|
1523
|
+
mteb/models/model_implementations/qwen3_models.py,sha256=F_o6ciD-6gLFfIlQYD9MsNvcbkmGzJ39eKpFlEog1rM,5132
|
|
1524
|
+
mteb/models/model_implementations/qzhou_models.py,sha256=7KaZpHdap-YyK0QxOMHxU0W2aGismx7GZv_bNXkEOcI,3536
|
|
1525
|
+
mteb/models/model_implementations/random_baseline.py,sha256=1VNnWBSi0Ph_RLON6clOuQI-Kli5BRtiiDFZMrTj7PM,7489
|
|
1525
1526
|
mteb/models/model_implementations/rasgaard_models.py,sha256=a8F3kDSBWHH0UR7wRioOrWGQUxtloD5mU7EG27iM-68,1260
|
|
1526
1527
|
mteb/models/model_implementations/reasonir_model.py,sha256=wSCcJpUgZ0pG2g3vTEzYNmPlPG_CVn_rR0ENVCines0,2218
|
|
1527
1528
|
mteb/models/model_implementations/repllama_models.py,sha256=89HoqEpzkNysHeuf_-YhU8WETamHTogSRztGIRo6G1s,7321
|
|
@@ -1553,6 +1554,10 @@ mteb/models/model_implementations/voyage_v.py,sha256=WnvwYNVv3c5K0ChzGA3v2iTQX2e
|
|
|
1553
1554
|
mteb/models/model_implementations/xyz_models.py,sha256=TePlrH6EHwRPO87U_J3Yce9-XHCn_X7I2cJ_6BZ2fUY,1296
|
|
1554
1555
|
mteb/models/model_implementations/youtu_models.py,sha256=NB74E6z-_36HyXb8GXKn8CrmRLN68uX9eH4xcS57zl0,5938
|
|
1555
1556
|
mteb/models/model_implementations/yuan_models.py,sha256=yZ6ki6YFaoVrJ_2pPSRQaMKOsIOUo3GtmhPx1qeUl2w,939
|
|
1557
|
+
mteb/models/search_encoder_index/__init__.py,sha256=3QFacIuFyEiI7ocsSkb3Lp2S2L7MLkpHCMIJ201fowA,182
|
|
1558
|
+
mteb/models/search_encoder_index/search_backend_protocol.py,sha256=TSjlx88stJcMldbAeVqNCf8JsQvE-B5rf5SBRw90isY,1890
|
|
1559
|
+
mteb/models/search_encoder_index/search_indexes/__init__.py,sha256=Wm60_oUemUpFsvrCMW111dcPH2L2rt1iZrXMskXmG7o,88
|
|
1560
|
+
mteb/models/search_encoder_index/search_indexes/faiss_search_index.py,sha256=WMs3QbbYV13fRuT3dakmdVMZLFdc_9ZzSupS3QxlbVQ,5555
|
|
1556
1561
|
mteb/results/__init__.py,sha256=EXQqK4Am5eIYzD52dpcGAFSdqnC38oE6JHN302oidHc,158
|
|
1557
1562
|
mteb/results/benchmark_results.py,sha256=OWqeBxbNsPmOKRhxY980N5CikpdJXToDGJGTXUe64Lw,18209
|
|
1558
1563
|
mteb/results/model_result.py,sha256=Wdbkpxq7_geliYDr4558i6txDVdsHL-Y9WAv_u7thlI,13689
|
|
@@ -2354,7 +2359,7 @@ mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py,sha256=LJGpx4RkS
|
|
|
2354
2359
|
mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py,sha256=Mmcvrt_1cIxPfHZfUzSURPZyaaweGiB02im1ZszlS6M,6837
|
|
2355
2360
|
mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py,sha256=iFUQUlO_ogBdQBVYBQW3o-AJDQ792yg1pJtRxA5I3Qo,3796
|
|
2356
2361
|
mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py,sha256=UduWKefwP7bPYxiDlztPEvSWXmTdw0xElglMbPY6XhA,4449
|
|
2357
|
-
mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py,sha256=
|
|
2362
|
+
mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py,sha256=vOfiruHywYkP8pccdAuGLyYyFTw1zK0qcXDnUFA8Z5A,9091
|
|
2358
2363
|
mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py,sha256=wOoC--IVTz0dR6RMVICbz6OWxfCyVahGDSfX_TScCgA,16934
|
|
2359
2364
|
mteb/tasks/retrieval/multilingual/web_faq_retrieval.py,sha256=TM-Q98yXZny_PKHAFNEvw9o9ET_L6VM3aNis1NJ9DgM,2686
|
|
2360
2365
|
mteb/tasks/retrieval/multilingual/wikipedia_retrieval_multilingual.py,sha256=zyqAt63bHXNU_I37jb891pwWUyGzZUGkXCyhWlRbed8,1569
|
|
@@ -2558,9 +2563,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
|
|
|
2558
2563
|
mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
|
|
2559
2564
|
mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
|
|
2560
2565
|
mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
|
|
2561
|
-
mteb-2.
|
|
2562
|
-
mteb-2.
|
|
2563
|
-
mteb-2.
|
|
2564
|
-
mteb-2.
|
|
2565
|
-
mteb-2.
|
|
2566
|
-
mteb-2.
|
|
2566
|
+
mteb-2.3.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
2567
|
+
mteb-2.3.0.dist-info/METADATA,sha256=ZXHapOBoYWzV3b_qYW_igqfvEsqDVXky9deYaQDAOFI,13662
|
|
2568
|
+
mteb-2.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
2569
|
+
mteb-2.3.0.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
|
|
2570
|
+
mteb-2.3.0.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
|
|
2571
|
+
mteb-2.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|