mteb 2.7.15__py3-none-any.whl → 2.7.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/models/model_implementations/misc_models.py +0 -48
- mteb/models/model_implementations/rerankers_custom.py +0 -87
- mteb/models/model_implementations/rerankers_monot5_based.py +0 -26
- {mteb-2.7.15.dist-info → mteb-2.7.16.dist-info}/METADATA +1 -1
- {mteb-2.7.15.dist-info → mteb-2.7.16.dist-info}/RECORD +9 -9
- {mteb-2.7.15.dist-info → mteb-2.7.16.dist-info}/WHEEL +0 -0
- {mteb-2.7.15.dist-info → mteb-2.7.16.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.15.dist-info → mteb-2.7.16.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.15.dist-info → mteb-2.7.16.dist-info}/top_level.txt +0 -0
|
@@ -1007,54 +1007,6 @@ thenlper__gte_small = ModelMeta(
|
|
|
1007
1007
|
year={2023}
|
|
1008
1008
|
}""",
|
|
1009
1009
|
)
|
|
1010
|
-
OrlikB__KartonBERT_USE_base_v1 = ModelMeta(
|
|
1011
|
-
name="OrlikB/KartonBERT-USE-base-v1",
|
|
1012
|
-
model_type=["dense"],
|
|
1013
|
-
revision="1f59dd58fe57995c0e867d5e29f03763eae99645",
|
|
1014
|
-
release_date="2024-09-30",
|
|
1015
|
-
languages=["pol-Latn"],
|
|
1016
|
-
loader=sentence_transformers_loader,
|
|
1017
|
-
n_parameters=103705344,
|
|
1018
|
-
n_embedding_parameters=None,
|
|
1019
|
-
memory_usage_mb=396,
|
|
1020
|
-
max_tokens=512.0,
|
|
1021
|
-
embed_dim=768,
|
|
1022
|
-
license="gpl-3.0",
|
|
1023
|
-
open_weights=True,
|
|
1024
|
-
public_training_code=None,
|
|
1025
|
-
public_training_data=None,
|
|
1026
|
-
framework=["PyTorch"],
|
|
1027
|
-
reference="https://huggingface.co/OrlikB/KartonBERT-USE-base-v1",
|
|
1028
|
-
similarity_fn_name=ScoringFunction.COSINE,
|
|
1029
|
-
use_instructions=None,
|
|
1030
|
-
training_datasets=None,
|
|
1031
|
-
adapted_from="KartonBERT-USE-base-v1",
|
|
1032
|
-
superseded_by=None,
|
|
1033
|
-
)
|
|
1034
|
-
OrlikB__st_polish_kartonberta_base_alpha_v1 = ModelMeta(
|
|
1035
|
-
name="OrlikB/st-polish-kartonberta-base-alpha-v1",
|
|
1036
|
-
model_type=["dense"],
|
|
1037
|
-
revision="5590a0e2d7bb43674e44d7076b3ff157f7d4a1cb",
|
|
1038
|
-
release_date="2023-11-12",
|
|
1039
|
-
languages=["pol-Latn"],
|
|
1040
|
-
loader=sentence_transformers_loader,
|
|
1041
|
-
n_parameters=None,
|
|
1042
|
-
n_embedding_parameters=None,
|
|
1043
|
-
memory_usage_mb=None,
|
|
1044
|
-
max_tokens=514.0,
|
|
1045
|
-
embed_dim=768,
|
|
1046
|
-
license="lgpl",
|
|
1047
|
-
open_weights=True,
|
|
1048
|
-
public_training_code=None,
|
|
1049
|
-
public_training_data=None,
|
|
1050
|
-
framework=["PyTorch"],
|
|
1051
|
-
reference="https://huggingface.co/OrlikB/st-polish-kartonberta-base-alpha-v1",
|
|
1052
|
-
similarity_fn_name=ScoringFunction.COSINE,
|
|
1053
|
-
use_instructions=None,
|
|
1054
|
-
training_datasets=None,
|
|
1055
|
-
adapted_from="st-polish-kartonberta-base-alpha-v1",
|
|
1056
|
-
superseded_by=None,
|
|
1057
|
-
)
|
|
1058
1010
|
sdadas__mmlw_e5_base = ModelMeta(
|
|
1059
1011
|
name="sdadas/mmlw-e5-base",
|
|
1060
1012
|
model_type=["dense"],
|
|
@@ -103,68 +103,6 @@ class BGEReranker(RerankerWrapper):
|
|
|
103
103
|
return scores
|
|
104
104
|
|
|
105
105
|
|
|
106
|
-
class MonoBERTReranker(RerankerWrapper):
|
|
107
|
-
name: str = "MonoBERT"
|
|
108
|
-
|
|
109
|
-
def __init__(
|
|
110
|
-
self,
|
|
111
|
-
model_name_or_path="castorini/monobert-large-msmarco",
|
|
112
|
-
torch_compile=False,
|
|
113
|
-
**kwargs,
|
|
114
|
-
):
|
|
115
|
-
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
|
116
|
-
|
|
117
|
-
super().__init__(model_name_or_path, **kwargs)
|
|
118
|
-
if not self.device:
|
|
119
|
-
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
120
|
-
model_args = {}
|
|
121
|
-
if self.fp_options:
|
|
122
|
-
model_args["torch_dtype"] = self.fp_options
|
|
123
|
-
self.model = AutoModelForSequenceClassification.from_pretrained(
|
|
124
|
-
model_name_or_path,
|
|
125
|
-
**model_args,
|
|
126
|
-
)
|
|
127
|
-
self.model.to(self.device)
|
|
128
|
-
self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
|
129
|
-
self.max_length = self.tokenizer.model_max_length
|
|
130
|
-
logger.info(f"Using max_length of {self.max_length}")
|
|
131
|
-
|
|
132
|
-
self.model.eval()
|
|
133
|
-
|
|
134
|
-
@torch.inference_mode()
|
|
135
|
-
def predict(
|
|
136
|
-
self,
|
|
137
|
-
inputs1: DataLoader[BatchedInput],
|
|
138
|
-
inputs2: DataLoader[BatchedInput],
|
|
139
|
-
*,
|
|
140
|
-
task_metadata: TaskMetadata,
|
|
141
|
-
hf_split: str,
|
|
142
|
-
hf_subset: str,
|
|
143
|
-
prompt_type: PromptType | None = None,
|
|
144
|
-
**kwargs: Any,
|
|
145
|
-
) -> Array:
|
|
146
|
-
queries = [text for batch in inputs1 for text in batch["query"]]
|
|
147
|
-
instructions = None
|
|
148
|
-
if "instruction" in inputs2.dataset.features:
|
|
149
|
-
instructions = [text for batch in inputs1 for text in batch["instruction"]]
|
|
150
|
-
passages = [text for batch in inputs2 for text in batch["text"]]
|
|
151
|
-
|
|
152
|
-
if instructions is not None and instructions[0] is not None:
|
|
153
|
-
queries = [f"{q} {i}".strip() for i, q in zip(instructions, queries)]
|
|
154
|
-
|
|
155
|
-
tokens = self.tokenizer(
|
|
156
|
-
queries,
|
|
157
|
-
passages,
|
|
158
|
-
padding=True,
|
|
159
|
-
truncation="only_second",
|
|
160
|
-
return_tensors="pt",
|
|
161
|
-
max_length=self.max_length,
|
|
162
|
-
).to(self.device)
|
|
163
|
-
output = self.model(**tokens)[0]
|
|
164
|
-
batch_scores = torch.nn.functional.log_softmax(output, dim=1)
|
|
165
|
-
return batch_scores[:, 1].exp()
|
|
166
|
-
|
|
167
|
-
|
|
168
106
|
class JinaReranker(RerankerWrapper):
|
|
169
107
|
name = "Jina"
|
|
170
108
|
|
|
@@ -219,31 +157,6 @@ class JinaReranker(RerankerWrapper):
|
|
|
219
157
|
return scores
|
|
220
158
|
|
|
221
159
|
|
|
222
|
-
monobert_large = ModelMeta(
|
|
223
|
-
loader=MonoBERTReranker,
|
|
224
|
-
loader_kwargs=dict(
|
|
225
|
-
fp_options="float16",
|
|
226
|
-
),
|
|
227
|
-
name="castorini/monobert-large-msmarco",
|
|
228
|
-
model_type=["cross-encoder"],
|
|
229
|
-
languages=["eng-Latn"],
|
|
230
|
-
open_weights=True,
|
|
231
|
-
revision="0a97706f3827389da43b83348d5d18c9d53876fa",
|
|
232
|
-
release_date="2020-05-28",
|
|
233
|
-
n_parameters=None,
|
|
234
|
-
n_embedding_parameters=31_254_528,
|
|
235
|
-
memory_usage_mb=None,
|
|
236
|
-
max_tokens=None,
|
|
237
|
-
embed_dim=None,
|
|
238
|
-
license=None,
|
|
239
|
-
public_training_code=None,
|
|
240
|
-
public_training_data=None,
|
|
241
|
-
similarity_fn_name=None,
|
|
242
|
-
use_instructions=None,
|
|
243
|
-
training_datasets=None,
|
|
244
|
-
framework=["Sentence Transformers", "PyTorch", "Transformers"],
|
|
245
|
-
)
|
|
246
|
-
|
|
247
160
|
# languages unclear: https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual/discussions/28
|
|
248
161
|
jina_reranker_multilingual = ModelMeta(
|
|
249
162
|
loader=JinaReranker,
|
|
@@ -34,7 +34,6 @@ prediction_tokens = {
|
|
|
34
34
|
"unicamp-dl/mt5-base-en-msmarco": ["▁no", "▁yes"],
|
|
35
35
|
"unicamp-dl/mt5-base-mmarco-v2": ["▁no", "▁yes"],
|
|
36
36
|
"unicamp-dl/mt5-base-mmarco-v1": ["▁no", "▁yes"],
|
|
37
|
-
"unicamp-dl/mt5-13b-mmarco-100k": ["▁", "▁true"],
|
|
38
37
|
}
|
|
39
38
|
|
|
40
39
|
|
|
@@ -919,28 +918,3 @@ mt5_base_mmarco_v2 = ModelMeta(
|
|
|
919
918
|
use_instructions=None,
|
|
920
919
|
framework=["PyTorch", "Transformers"],
|
|
921
920
|
)
|
|
922
|
-
|
|
923
|
-
mt5_13b_mmarco_100k = ModelMeta(
|
|
924
|
-
loader=MonoT5Reranker,
|
|
925
|
-
loader_kwargs=dict(
|
|
926
|
-
fp_options="float16",
|
|
927
|
-
),
|
|
928
|
-
name="unicamp-dl/mt5-13b-mmarco-100k",
|
|
929
|
-
model_type=["cross-encoder"],
|
|
930
|
-
languages=mt5_languages,
|
|
931
|
-
open_weights=True,
|
|
932
|
-
revision="e1a4317e102a525ea9e16745ad21394a4f1bffbc",
|
|
933
|
-
release_date="2022-11-04",
|
|
934
|
-
n_parameters=None,
|
|
935
|
-
n_embedding_parameters=1_024_458_752,
|
|
936
|
-
memory_usage_mb=None,
|
|
937
|
-
max_tokens=None,
|
|
938
|
-
embed_dim=None,
|
|
939
|
-
license=None,
|
|
940
|
-
public_training_code=None,
|
|
941
|
-
public_training_data=None,
|
|
942
|
-
similarity_fn_name=None,
|
|
943
|
-
use_instructions=None,
|
|
944
|
-
training_datasets=None,
|
|
945
|
-
framework=["PyTorch", "Transformers"],
|
|
946
|
-
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.7.
|
|
3
|
+
Version: 2.7.16
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -1556,7 +1556,7 @@ mteb/models/model_implementations/llm2clip_models.py,sha256=X3W16uipaZ0t4Mco4lhh
|
|
|
1556
1556
|
mteb/models/model_implementations/llm2vec_models.py,sha256=n86YQ8fAHU1gVtlY7tZcXq-1ab_ISxBmuk-X4MDnY4o,13348
|
|
1557
1557
|
mteb/models/model_implementations/mcinext_models.py,sha256=T3vO9JQSmh3BICp6Y_q7j4anuA8P8LGZ4ZWnwGnF7cs,19299
|
|
1558
1558
|
mteb/models/model_implementations/mdbr_models.py,sha256=AqsRZ-IDekIjq-FDWu0zx7Nk9ySJxaWTdRb8YhUZeu4,2828
|
|
1559
|
-
mteb/models/model_implementations/misc_models.py,sha256=
|
|
1559
|
+
mteb/models/model_implementations/misc_models.py,sha256=JkJsyha-B5M8myLvHIwFUV14yo2lnSuBzHeO5fE9i74,73191
|
|
1560
1560
|
mteb/models/model_implementations/mixedbread_ai_models.py,sha256=1-RD4M-16M-Rcf5CTD_R7LVoLv3cNFbmEjataQ__q94,10666
|
|
1561
1561
|
mteb/models/model_implementations/mme5_models.py,sha256=V7BCGFkfZxkZ3ANJImvSFfP7in8OSfmkbqX-zXc_iF8,1574
|
|
1562
1562
|
mteb/models/model_implementations/moco_models.py,sha256=6eEGpGTlI4StFRYsaNtXejhYE9GCqasUYCqB_SQy9cE,5714
|
|
@@ -1590,8 +1590,8 @@ mteb/models/model_implementations/random_baseline.py,sha256=YsITQoLbea_Iz2X84WNG
|
|
|
1590
1590
|
mteb/models/model_implementations/rasgaard_models.py,sha256=_uNYP_nqJcOyoKnHNcvfJnP9gRvsv7HCWhZX2LJzQ9s,1322
|
|
1591
1591
|
mteb/models/model_implementations/reasonir_model.py,sha256=WNWGqa9wANBL9vTdcFx51TEFXz6yHq_ygK0rij3LCL8,5217
|
|
1592
1592
|
mteb/models/model_implementations/repllama_models.py,sha256=k6BgN2Cn41p0gQ0F1FdOTQ9OXlmFgG-2RtdvzOcCSZg,7543
|
|
1593
|
-
mteb/models/model_implementations/rerankers_custom.py,sha256=
|
|
1594
|
-
mteb/models/model_implementations/rerankers_monot5_based.py,sha256=
|
|
1593
|
+
mteb/models/model_implementations/rerankers_custom.py,sha256=WBSA7kBRqxgb1549UwRYdtYzUovdwmW8C0PWzvGR54g,8087
|
|
1594
|
+
mteb/models/model_implementations/rerankers_monot5_based.py,sha256=U9ChokUEDXtkoFno-o4GeT4fXEEoFtnZn2denIafxi8,34583
|
|
1595
1595
|
mteb/models/model_implementations/richinfoai_models.py,sha256=FsXamY-bvR5LLagtKK8fP-I5oc6B_bKp_i6_xzUYL8Y,1069
|
|
1596
1596
|
mteb/models/model_implementations/ru_sentence_models.py,sha256=W4R985LnThJ-9XFbPnTGKb3L1QnoS3i3VXBFq94DK_w,43034
|
|
1597
1597
|
mteb/models/model_implementations/ruri_models.py,sha256=3zYOqacB3JEnGJkMGYHqFgVkbmLo4uceJs9kzV54ivU,10819
|
|
@@ -2646,9 +2646,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
|
|
|
2646
2646
|
mteb/types/_result.py,sha256=UKNokV9pu3G74MGebocU512aU_fFU9I9nPKnrG9Q0iE,1035
|
|
2647
2647
|
mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
|
|
2648
2648
|
mteb/types/statistics.py,sha256=gElgSShKBXpfcqaZHhU_d2UHln1CyzUj8FN8KFun_UA,4087
|
|
2649
|
-
mteb-2.7.
|
|
2650
|
-
mteb-2.7.
|
|
2651
|
-
mteb-2.7.
|
|
2652
|
-
mteb-2.7.
|
|
2653
|
-
mteb-2.7.
|
|
2654
|
-
mteb-2.7.
|
|
2649
|
+
mteb-2.7.16.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
2650
|
+
mteb-2.7.16.dist-info/METADATA,sha256=a-Rt1xa9ZgNdKf-JlM6EUZE_pKzEHoT6KGpFZUvnPo0,14348
|
|
2651
|
+
mteb-2.7.16.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
2652
|
+
mteb-2.7.16.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
|
|
2653
|
+
mteb-2.7.16.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
|
|
2654
|
+
mteb-2.7.16.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|