mteb 2.3.6__py3-none-any.whl → 2.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/models/model_implementations/openai_models.py +15 -16
- {mteb-2.3.6.dist-info → mteb-2.3.7.dist-info}/METADATA +1 -1
- {mteb-2.3.6.dist-info → mteb-2.3.7.dist-info}/RECORD +7 -7
- {mteb-2.3.6.dist-info → mteb-2.3.7.dist-info}/WHEEL +0 -0
- {mteb-2.3.6.dist-info → mteb-2.3.7.dist-info}/entry_points.txt +0 -0
- {mteb-2.3.6.dist-info → mteb-2.3.7.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.3.6.dist-info → mteb-2.3.7.dist-info}/top_level.txt +0 -0
|
@@ -91,10 +91,6 @@ class OpenAIModel(AbsEncoder):
|
|
|
91
91
|
|
|
92
92
|
from openai import NotGiven
|
|
93
93
|
|
|
94
|
-
if self.model_name == "text-embedding-ada-002" and self._embed_dim is not None:
|
|
95
|
-
logger.warning(
|
|
96
|
-
"Reducing embedding size available only for text-embedding-3-* models"
|
|
97
|
-
)
|
|
98
94
|
sentences = [text for batch in inputs for text in batch["text"]]
|
|
99
95
|
|
|
100
96
|
mask_sents = [(i, t) for i, t in enumerate(sentences) if t.strip()]
|
|
@@ -122,13 +118,22 @@ class OpenAIModel(AbsEncoder):
|
|
|
122
118
|
|
|
123
119
|
no_empty_embeddings = []
|
|
124
120
|
|
|
121
|
+
# Set dimensions only for models that support it
|
|
122
|
+
dimensions = (
|
|
123
|
+
self._embed_dim or NotGiven()
|
|
124
|
+
if not self.model_name == "text-embedding-ada-002"
|
|
125
|
+
else NotGiven()
|
|
126
|
+
)
|
|
127
|
+
default_kwargs = dict(
|
|
128
|
+
model=self.model_name,
|
|
129
|
+
encoding_format="float",
|
|
130
|
+
dimensions=dimensions,
|
|
131
|
+
)
|
|
132
|
+
|
|
125
133
|
for sublist in tqdm(sublists, leave=False, disable=not show_progress_bar):
|
|
126
134
|
try:
|
|
127
135
|
response = self._client.embeddings.create(
|
|
128
|
-
input=sublist,
|
|
129
|
-
model=self.model_name,
|
|
130
|
-
encoding_format="float",
|
|
131
|
-
dimensions=self._embed_dim or NotGiven(),
|
|
136
|
+
input=sublist, **default_kwargs
|
|
132
137
|
)
|
|
133
138
|
except Exception as e:
|
|
134
139
|
# Sleep due to too many requests
|
|
@@ -138,19 +143,13 @@ class OpenAIModel(AbsEncoder):
|
|
|
138
143
|
time.sleep(10)
|
|
139
144
|
try:
|
|
140
145
|
response = self._client.embeddings.create(
|
|
141
|
-
input=sublist,
|
|
142
|
-
model=self.model_name,
|
|
143
|
-
encoding_format="float",
|
|
144
|
-
dimensions=self._embed_dim or NotGiven(),
|
|
146
|
+
input=sublist, **default_kwargs
|
|
145
147
|
)
|
|
146
148
|
except Exception as e:
|
|
147
149
|
logger.info("Sleeping for 60 seconds due to error", e)
|
|
148
150
|
time.sleep(60)
|
|
149
151
|
response = self._client.embeddings.create(
|
|
150
|
-
input=sublist,
|
|
151
|
-
model=self.model_name,
|
|
152
|
-
encoding_format="float",
|
|
153
|
-
dimensions=self._embed_dim or NotGiven(),
|
|
152
|
+
input=sublist, **default_kwargs
|
|
154
153
|
)
|
|
155
154
|
no_empty_embeddings.extend(self._to_numpy(response))
|
|
156
155
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.3.
|
|
3
|
+
Version: 2.3.7
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -1516,7 +1516,7 @@ mteb/models/model_implementations/nomic_models.py,sha256=mT-v5Gs5-sRH8-ziCw_CtxB
|
|
|
1516
1516
|
mteb/models/model_implementations/nomic_models_vision.py,sha256=gEEieMThvw4p-QhRH0G_9-WWTvj-jqOlgFsh6O07dbc,6731
|
|
1517
1517
|
mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py,sha256=14XSv7wGsitu0cF8P3A951gel_Py7PrKlRixkLS4qG4,6203
|
|
1518
1518
|
mteb/models/model_implementations/nvidia_models.py,sha256=acVverAt77lURkILCVkCdXsWgY1BJoG1-ugB7yIhlIM,21555
|
|
1519
|
-
mteb/models/model_implementations/openai_models.py,sha256=
|
|
1519
|
+
mteb/models/model_implementations/openai_models.py,sha256=loU6JByNUwRidq7lmcu8iGOtUQvzejw6HVLaF_IKCR0,9352
|
|
1520
1520
|
mteb/models/model_implementations/openclip_models.py,sha256=W8XcokgLU1nSmMaWpYXkWWizVd3sQezcP02YtF2fXpo,11436
|
|
1521
1521
|
mteb/models/model_implementations/opensearch_neural_sparse_models.py,sha256=fuxIjOx_kPoDps5C7LW3JllG-AZj4ktqeTNgJESHZh4,8351
|
|
1522
1522
|
mteb/models/model_implementations/ops_moa_models.py,sha256=luWw1j2iTMx1z1ydLCjvCI89E9Yvge7ruEawivJTmfE,2413
|
|
@@ -2573,9 +2573,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
|
|
|
2573
2573
|
mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
|
|
2574
2574
|
mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
|
|
2575
2575
|
mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
|
|
2576
|
-
mteb-2.3.
|
|
2577
|
-
mteb-2.3.
|
|
2578
|
-
mteb-2.3.
|
|
2579
|
-
mteb-2.3.
|
|
2580
|
-
mteb-2.3.
|
|
2581
|
-
mteb-2.3.
|
|
2576
|
+
mteb-2.3.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
2577
|
+
mteb-2.3.7.dist-info/METADATA,sha256=8WsmZm9cDT-XjdHIF04XscnqFbTmCiudzikksCUJSiw,13923
|
|
2578
|
+
mteb-2.3.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
2579
|
+
mteb-2.3.7.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
|
|
2580
|
+
mteb-2.3.7.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
|
|
2581
|
+
mteb-2.3.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|