mteb 2.1.3__py3-none-any.whl → 2.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,29 +1,29 @@
1
1
  {
2
2
  "test": {
3
- "num_samples": 4872,
4
- "number_of_characters": 9352943,
3
+ "num_samples": 6362,
4
+ "number_of_characters": 180770,
5
5
  "documents_text_statistics": {
6
- "total_text_length": 8957572,
7
- "min_text_length": 8,
8
- "average_text_length": 3504.527386541471,
9
- "max_text_length": 47929,
10
- "unique_texts": 2556
6
+ "total_text_length": 39142,
7
+ "min_text_length": 2,
8
+ "average_text_length": 7.68243375858685,
9
+ "max_text_length": 31,
10
+ "unique_texts": 5095
11
11
  },
12
12
  "documents_image_statistics": null,
13
13
  "queries_text_statistics": {
14
- "total_text_length": 395371,
15
- "min_text_length": 8,
16
- "average_text_length": 170.71286701208982,
17
- "max_text_length": 2863,
18
- "unique_texts": 2316
14
+ "total_text_length": 141628,
15
+ "min_text_length": 79,
16
+ "average_text_length": 111.78216258879242,
17
+ "max_text_length": 185,
18
+ "unique_texts": 1267
19
19
  },
20
20
  "queries_image_statistics": null,
21
21
  "relevant_docs_statistics": {
22
- "num_relevant_docs": 2316,
22
+ "num_relevant_docs": 1267,
23
23
  "min_relevant_docs_per_query": 1,
24
24
  "average_relevant_docs_per_query": 1.0,
25
25
  "max_relevant_docs_per_query": 1,
26
- "unique_relevant_docs": 988
26
+ "unique_relevant_docs": 478
27
27
  },
28
28
  "top_ranked_statistics": null
29
29
  }
@@ -156,16 +156,15 @@ class VoyageModel(AbsEncoder):
156
156
  and len(batch) < batch_size
157
157
  and batch_tokens < self._max_tokens_per_batch
158
158
  ):
159
- n_tokens = len(
160
- self._client.tokenize([sentences[index]], model=self._model_name)[0]
161
- )
159
+ txt = sentences[index] if len(sentences[index]) > 0 else " "
160
+ n_tokens = len(self._client.tokenize([txt], model=self._model_name)[0])
162
161
  if (
163
162
  batch_tokens + n_tokens > self._max_tokens_per_batch
164
163
  and len(batch) > 0
165
164
  ):
166
165
  break
167
166
  batch_tokens += n_tokens
168
- batch.append(sentences[index])
167
+ batch.append(txt)
169
168
  index += 1
170
169
 
171
170
  embeddings.extend(
@@ -249,7 +248,7 @@ voyage_3_5 = ModelMeta(
249
248
  n_parameters=None,
250
249
  memory_usage_mb=None,
251
250
  license=None,
252
- reference="https://docs.voyageai.com/docs/embeddings",
251
+ reference="https://blog.voyageai.com/2025/05/20/voyage-3-5/",
253
252
  similarity_fn_name="cosine",
254
253
  framework=["API"],
255
254
  use_instructions=True,
@@ -274,7 +273,7 @@ voyage_3_5_int8 = ModelMeta(
274
273
  n_parameters=None,
275
274
  memory_usage_mb=None,
276
275
  license=None,
277
- reference="https://docs.voyageai.com/docs/flexible-dimensions-and-quantization",
276
+ reference="https://blog.voyageai.com/2025/05/20/voyage-3-5/",
278
277
  similarity_fn_name="cosine",
279
278
  framework=["API"],
280
279
  use_instructions=True,
@@ -300,7 +299,7 @@ voyage_3_5_binary = ModelMeta(
300
299
  n_parameters=None,
301
300
  memory_usage_mb=None,
302
301
  license=None,
303
- reference="https://docs.voyageai.com/docs/flexible-dimensions-and-quantization",
302
+ reference="https://blog.voyageai.com/2025/05/20/voyage-3-5/",
304
303
  similarity_fn_name="cosine",
305
304
  framework=["API"],
306
305
  use_instructions=True,
@@ -9,7 +9,7 @@ class WinoGrande(AbsTaskRetrieval):
9
9
  reference="https://winogrande.allenai.org/",
10
10
  dataset={
11
11
  "path": "mteb/WinoGrande",
12
- "revision": "770abbd7f77affc005f9734996e795925cbc0f65",
12
+ "revision": "4dec9c5666e9f84702ac614363db6d96a68bc6de",
13
13
  },
14
14
  type="Retrieval",
15
15
  category="t2t",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.1.3
3
+ Version: 2.1.4
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -1340,7 +1340,7 @@ mteb/descriptive_stats/Retrieval/VieQuADRetrieval.json,sha256=NHt0U-wJXBOPYOki5Y
1340
1340
  mteb/descriptive_stats/Retrieval/WebFAQRetrieval.json,sha256=uNkLCfiuVbdNKZ54vYGo8dPaoBlTShexDjl_IwCcT_M,60318
1341
1341
  mteb/descriptive_stats/Retrieval/WikiSQLRetrieval.json,sha256=JinCBRnmfMDeIwQtQfD6bD8MYNEpUgedw05A6G-W7u4,985
1342
1342
  mteb/descriptive_stats/Retrieval/WikipediaRetrievalMultilingual.json,sha256=v9npJOOJrcBUdRQ8EzAbOrpynAoSAJBaJoSJhS-qkww,20357
1343
- mteb/descriptive_stats/Retrieval/WinoGrande.json,sha256=--CmwZnUf7EBw01t8yz2UGc8ifsMKtSBT-QIiMRkKm0,989
1343
+ mteb/descriptive_stats/Retrieval/WinoGrande.json,sha256=czzrnqg24MPfCrlVPLamL4LvMdWWOZC4R4tJYjc1QAA,982
1344
1344
  mteb/descriptive_stats/Retrieval/XMarket.json,sha256=s0odo5MWwzgQi0HwqK4APYhWIBR8WEtiC8KKt7wgzOc,4770
1345
1345
  mteb/descriptive_stats/Retrieval/XPQARetrieval.json,sha256=qHxeCggIwW1iw5ujbHsCc_7rf_-JIhfYRFGEKvzSTO0,44958
1346
1346
  mteb/descriptive_stats/Retrieval/XQuADRetrieval.json,sha256=NjNvrloDl561L_WTWBg6fQ31yBTZInYWL-SW0q35SfA,15462
@@ -1528,7 +1528,7 @@ mteb/models/model_implementations/vdr_models.py,sha256=lMm43BBPjZU5lxZcpmPZ8hn0P
1528
1528
  mteb/models/model_implementations/vi_vn_models.py,sha256=quWmd3JT2J6SlAsFrV2gcnc67M9zr58mEF2zLUF8-uw,4795
1529
1529
  mteb/models/model_implementations/vista_models.py,sha256=Q3I01kRtIPaoke0iMIcH4CLcCDTnMSIBFNCof7LPTX4,10832
1530
1530
  mteb/models/model_implementations/vlm2vec_models.py,sha256=HGGy_-z9Wc99xOKum71rBNipCPqWcM1efmmXgy5Rvxc,11724
1531
- mteb/models/model_implementations/voyage_models.py,sha256=Qn9foyBmAGuuhispwZTFQb5ZtDM-OWMif5Ca4PBEhcw,19842
1531
+ mteb/models/model_implementations/voyage_models.py,sha256=dOCccOQlloGrg0q44PxMQzx8dHuQ8VgkDUD01EydpJ0,19824
1532
1532
  mteb/models/model_implementations/voyage_v.py,sha256=6i-oFnaY2D2qR1Dgb0B98ougnD1ujW9aNG9QoWyvwwY,8041
1533
1533
  mteb/models/model_implementations/xyz_models.py,sha256=TePlrH6EHwRPO87U_J3Yce9-XHCn_X7I2cJ_6BZ2fUY,1296
1534
1534
  mteb/models/model_implementations/youtu_models.py,sha256=NB74E6z-_36HyXb8GXKn8CrmRLN68uX9eH4xcS57zl0,5938
@@ -2283,7 +2283,7 @@ mteb/tasks/retrieval/eng/viz_wiz_it2t_retrieval.py,sha256=jE70T5If62lkKnbF-CMAgR
2283
2283
  mteb/tasks/retrieval/eng/vqa2_it2t_retrieval.py,sha256=M_g6Y6OrNRByD52-JxuO8iIO8aFUg8HHg5BxQ31-m1I,1403
2284
2284
  mteb/tasks/retrieval/eng/web_qa_t2it_retrieval.py,sha256=c7pJja_ii4ku9pfd-Gd3FqO6cF-0IIEb_H0FRY2A69w,1477
2285
2285
  mteb/tasks/retrieval/eng/web_qa_t2t_retrieval.py,sha256=rx6uoqc8yduGhuvdv2K5v2oFiQI8jP-BEt5nmaKrsac,1517
2286
- mteb/tasks/retrieval/eng/wino_grande_retrieval.py,sha256=bRjNxz_pgH7waI7m_NMR_VLvcRxnBtMXY6CTU4RLEnc,1714
2286
+ mteb/tasks/retrieval/eng/wino_grande_retrieval.py,sha256=ou8TlZ-JPS1nh7NS7OeerUsB2WRZWWwKTuygpJNLb2A,1714
2287
2287
  mteb/tasks/retrieval/est/__init__.py,sha256=uNkOSKfZsO1F-xC4twL8ukxtfrI4A4eIU-oAs3Hi5Dg,46
2288
2288
  mteb/tasks/retrieval/est/estqa.py,sha256=sORL3KI47yXOy8GXptBtCuryOdDShdRDFpCdnnIaaCI,1418
2289
2289
  mteb/tasks/retrieval/fas/__init__.py,sha256=DUq1CTC_nj-201dbUNqlmqN-oR-YKjeW3O8DhtMX9rk,2213
@@ -2536,9 +2536,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2536
2536
  mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
2537
2537
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2538
2538
  mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
2539
- mteb-2.1.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2540
- mteb-2.1.3.dist-info/METADATA,sha256=fSSp_uFAo3SSrjhhMQGHJMDIyEjeinv-7QjmK11VrPQ,13573
2541
- mteb-2.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2542
- mteb-2.1.3.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2543
- mteb-2.1.3.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2544
- mteb-2.1.3.dist-info/RECORD,,
2539
+ mteb-2.1.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2540
+ mteb-2.1.4.dist-info/METADATA,sha256=3HJ0LwqN5B9zLnqA4j5PsmVknk75iAHG-qwtZqJdKW0,13573
2541
+ mteb-2.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2542
+ mteb-2.1.4.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2543
+ mteb-2.1.4.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2544
+ mteb-2.1.4.dist-info/RECORD,,
File without changes