mteb 2.3.6__py3-none-any.whl → 2.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -91,10 +91,6 @@ class OpenAIModel(AbsEncoder):
91
91
 
92
92
  from openai import NotGiven
93
93
 
94
- if self.model_name == "text-embedding-ada-002" and self._embed_dim is not None:
95
- logger.warning(
96
- "Reducing embedding size available only for text-embedding-3-* models"
97
- )
98
94
  sentences = [text for batch in inputs for text in batch["text"]]
99
95
 
100
96
  mask_sents = [(i, t) for i, t in enumerate(sentences) if t.strip()]
@@ -122,13 +118,22 @@ class OpenAIModel(AbsEncoder):
122
118
 
123
119
  no_empty_embeddings = []
124
120
 
121
+ # Set dimensions only for models that support it
122
+ dimensions = (
123
+ self._embed_dim or NotGiven()
124
+ if not self.model_name == "text-embedding-ada-002"
125
+ else NotGiven()
126
+ )
127
+ default_kwargs = dict(
128
+ model=self.model_name,
129
+ encoding_format="float",
130
+ dimensions=dimensions,
131
+ )
132
+
125
133
  for sublist in tqdm(sublists, leave=False, disable=not show_progress_bar):
126
134
  try:
127
135
  response = self._client.embeddings.create(
128
- input=sublist,
129
- model=self.model_name,
130
- encoding_format="float",
131
- dimensions=self._embed_dim or NotGiven(),
136
+ input=sublist, **default_kwargs
132
137
  )
133
138
  except Exception as e:
134
139
  # Sleep due to too many requests
@@ -138,19 +143,13 @@ class OpenAIModel(AbsEncoder):
138
143
  time.sleep(10)
139
144
  try:
140
145
  response = self._client.embeddings.create(
141
- input=sublist,
142
- model=self.model_name,
143
- encoding_format="float",
144
- dimensions=self._embed_dim or NotGiven(),
146
+ input=sublist, **default_kwargs
145
147
  )
146
148
  except Exception as e:
147
149
  logger.info("Sleeping for 60 seconds due to error", e)
148
150
  time.sleep(60)
149
151
  response = self._client.embeddings.create(
150
- input=sublist,
151
- model=self.model_name,
152
- encoding_format="float",
153
- dimensions=self._embed_dim or NotGiven(),
152
+ input=sublist, **default_kwargs
154
153
  )
155
154
  no_empty_embeddings.extend(self._to_numpy(response))
156
155
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.3.6
3
+ Version: 2.3.7
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -1516,7 +1516,7 @@ mteb/models/model_implementations/nomic_models.py,sha256=mT-v5Gs5-sRH8-ziCw_CtxB
1516
1516
  mteb/models/model_implementations/nomic_models_vision.py,sha256=gEEieMThvw4p-QhRH0G_9-WWTvj-jqOlgFsh6O07dbc,6731
1517
1517
  mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py,sha256=14XSv7wGsitu0cF8P3A951gel_Py7PrKlRixkLS4qG4,6203
1518
1518
  mteb/models/model_implementations/nvidia_models.py,sha256=acVverAt77lURkILCVkCdXsWgY1BJoG1-ugB7yIhlIM,21555
1519
- mteb/models/model_implementations/openai_models.py,sha256=2tJyEapIW-GtB3ZOXIHwGjSZGgJl2daE_UsbzH4NhBM,9620
1519
+ mteb/models/model_implementations/openai_models.py,sha256=loU6JByNUwRidq7lmcu8iGOtUQvzejw6HVLaF_IKCR0,9352
1520
1520
  mteb/models/model_implementations/openclip_models.py,sha256=W8XcokgLU1nSmMaWpYXkWWizVd3sQezcP02YtF2fXpo,11436
1521
1521
  mteb/models/model_implementations/opensearch_neural_sparse_models.py,sha256=fuxIjOx_kPoDps5C7LW3JllG-AZj4ktqeTNgJESHZh4,8351
1522
1522
  mteb/models/model_implementations/ops_moa_models.py,sha256=luWw1j2iTMx1z1ydLCjvCI89E9Yvge7ruEawivJTmfE,2413
@@ -2573,9 +2573,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2573
2573
  mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
2574
2574
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2575
2575
  mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
2576
- mteb-2.3.6.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2577
- mteb-2.3.6.dist-info/METADATA,sha256=urz0_67bNhVt17rvN3pZdvMFt_mvxI7MFvamWkNoNjM,13923
2578
- mteb-2.3.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2579
- mteb-2.3.6.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2580
- mteb-2.3.6.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2581
- mteb-2.3.6.dist-info/RECORD,,
2576
+ mteb-2.3.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2577
+ mteb-2.3.7.dist-info/METADATA,sha256=8WsmZm9cDT-XjdHIF04XscnqFbTmCiudzikksCUJSiw,13923
2578
+ mteb-2.3.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2579
+ mteb-2.3.7.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2580
+ mteb-2.3.7.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2581
+ mteb-2.3.7.dist-info/RECORD,,
File without changes