mteb 2.5.4__py3-none-any.whl → 2.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ class Benchmark:
19
19
 
20
20
  Args:
21
21
  name: The name of the benchmark
22
+ aliases: Alternative names for the benchmark
22
23
  tasks: The tasks within the benchmark.
23
24
  description: A description of the benchmark, should include its intended goal and potentially a description of its construction
24
25
  reference: A link reference, to a source containing additional information typically to a paper, leaderboard or github.
@@ -38,6 +39,7 @@ class Benchmark:
38
39
 
39
40
  name: str
40
41
  tasks: Sequence[AbsTask]
42
+ aliases: Sequence[str] = field(default_factory=tuple)
41
43
  description: str | None = None
42
44
  reference: StrURL | None = None
43
45
  citation: str | None = None
@@ -18,6 +18,7 @@ MMTEB_CITATION = r"""@article{enevoldsen2025mmtebmassivemultilingualtext,
18
18
 
19
19
  MTEB_EN = Benchmark(
20
20
  name="MTEB(eng, v2)",
21
+ aliases=["MTEB(eng)"],
21
22
  display_name="English",
22
23
  icon="https://github.com/lipis/flag-icons/raw/refs/heads/main/flags/4x3/us.svg",
23
24
  tasks=MTEBTasks(
@@ -89,6 +90,7 @@ The original MTEB leaderboard is available under the [MTEB(eng, v1)](http://mteb
89
90
 
90
91
  MTEB_ENG_CLASSIC = Benchmark(
91
92
  name="MTEB(eng, v1)",
93
+ aliases=["MTEB(eng, classic)", "MTEB"],
92
94
  display_name="English Legacy",
93
95
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/gb.svg",
94
96
  tasks=MTEBTasks(
@@ -185,6 +187,7 @@ We recommend that you use [MTEB(eng, v2)](http://mteb-leaderboard.hf.space/?benc
185
187
 
186
188
  MTEB_MAIN_RU = Benchmark(
187
189
  name="MTEB(rus, v1)",
190
+ aliases=["MTEB(rus)"],
188
191
  display_name="Russian legacy",
189
192
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ru.svg",
190
193
  tasks=MTEBTasks(
@@ -344,6 +347,7 @@ RU_SCI_BENCH = Benchmark(
344
347
 
345
348
  MTEB_RETRIEVAL_WITH_INSTRUCTIONS = Benchmark(
346
349
  name="FollowIR",
350
+ aliases=["MTEB(Retrieval w/Instructions)"],
347
351
  display_name="Instruction Following",
348
352
  tasks=get_tasks(
349
353
  tasks=[
@@ -394,7 +398,9 @@ MTEB_RETRIEVAL_WITH_DOMAIN_INSTRUCTIONS = Benchmark(
394
398
  )
395
399
 
396
400
  MTEB_RETRIEVAL_LAW = Benchmark(
397
- name="MTEB(Law, v1)", # This benchmark is likely in the need of an update
401
+ # This benchmark is likely in the need of an update
402
+ name="MTEB(Law, v1)",
403
+ aliases=["MTEB(law)"],
398
404
  display_name="Legal",
399
405
  icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-library.svg",
400
406
  tasks=get_tasks(
@@ -416,6 +422,7 @@ MTEB_RETRIEVAL_LAW = Benchmark(
416
422
 
417
423
  MTEB_RETRIEVAL_MEDICAL = Benchmark(
418
424
  name="MTEB(Medical, v1)",
425
+ aliases=["MTEB(Medical)"],
419
426
  display_name="Medical",
420
427
  icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-map-hospital.svg",
421
428
  tasks=get_tasks(
@@ -469,6 +476,7 @@ MTEB_MINERS_BITEXT_MINING = Benchmark(
469
476
 
470
477
  SEB = Benchmark(
471
478
  name="MTEB(Scandinavian, v1)",
479
+ aliases=["MTEB(Scandinavian)", "SEB"],
472
480
  display_name="Scandinavian",
473
481
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/dk.svg",
474
482
  language_view=["dan-Latn", "swe-Latn", "nno-Latn", "nob-Latn"],
@@ -595,6 +603,7 @@ RAR_b = Benchmark(
595
603
 
596
604
  MTEB_FRA = Benchmark(
597
605
  name="MTEB(fra, v1)",
606
+ aliases=["MTEB(fra)"],
598
607
  display_name="French",
599
608
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/fr.svg",
600
609
  tasks=MTEBTasks(
@@ -653,6 +662,7 @@ MTEB_FRA = Benchmark(
653
662
 
654
663
  MTEB_DEU = Benchmark(
655
664
  name="MTEB(deu, v1)",
665
+ aliases=["MTEB(deu)"],
656
666
  display_name="German",
657
667
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/de.svg",
658
668
  tasks=get_tasks(
@@ -704,6 +714,7 @@ MTEB_DEU = Benchmark(
704
714
 
705
715
  MTEB_KOR = Benchmark(
706
716
  name="MTEB(kor, v1)",
717
+ aliases=["MTEB(kor)"],
707
718
  display_name="Korean",
708
719
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/kr.svg",
709
720
  tasks=get_tasks(
@@ -728,6 +739,7 @@ MTEB_KOR = Benchmark(
728
739
 
729
740
  MTEB_POL = Benchmark(
730
741
  name="MTEB(pol, v1)",
742
+ aliases=["MTEB(pol)"],
731
743
  display_name="Polish",
732
744
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/pl.svg",
733
745
  tasks=MTEBTasks(
@@ -777,6 +789,7 @@ two novel clustering tasks.""", # Rephrased from the abstract
777
789
 
778
790
  MTEB_code = Benchmark(
779
791
  name="MTEB(Code, v1)",
792
+ aliases=["MTEB(code)"],
780
793
  display_name="Code",
781
794
  icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-tech-electronics.svg",
782
795
  tasks=get_tasks(
@@ -953,6 +966,7 @@ MTEB_multilingual_v1 = Benchmark(
953
966
 
954
967
  MTEB_multilingual_v2 = Benchmark(
955
968
  name="MTEB(Multilingual, v2)",
969
+ aliases=["MTEB(Multilingual)", "MMTEB"],
956
970
  display_name="Multilingual",
957
971
  language_view=[
958
972
  "eng-Latn", # English
@@ -986,6 +1000,7 @@ MTEB_multilingual_v2 = Benchmark(
986
1000
 
987
1001
  MTEB_JPN = Benchmark(
988
1002
  name="MTEB(jpn, v1)",
1003
+ aliases=["MTEB(jpn)"],
989
1004
  display_name="Japanese Legacy",
990
1005
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
991
1006
  tasks=get_tasks(
@@ -1056,6 +1071,7 @@ indic_languages = [
1056
1071
 
1057
1072
  MTEB_INDIC = Benchmark(
1058
1073
  name="MTEB(Indic, v1)",
1074
+ aliases=["MTEB(Indic)"],
1059
1075
  display_name="Indic",
1060
1076
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/in.svg",
1061
1077
  tasks=MTEBTasks(
@@ -1146,6 +1162,7 @@ eu_languages = [
1146
1162
 
1147
1163
  MTEB_EU = Benchmark(
1148
1164
  name="MTEB(Europe, v1)",
1165
+ aliases=["MTEB(Europe)"],
1149
1166
  display_name="European",
1150
1167
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/eu.svg",
1151
1168
  tasks=get_tasks(
@@ -1285,6 +1302,7 @@ BRIGHT = Benchmark(
1285
1302
 
1286
1303
  BRIGHT_LONG = Benchmark(
1287
1304
  name="BRIGHT (long)",
1305
+ aliases=["BRIGHT(long)"],
1288
1306
  tasks=MTEBTasks(
1289
1307
  (
1290
1308
  get_task(
@@ -1400,6 +1418,7 @@ NANOBEIR = Benchmark(
1400
1418
 
1401
1419
  C_MTEB = Benchmark(
1402
1420
  name="MTEB(cmn, v1)",
1421
+ aliases=["MTEB(Chinese)", "CMTEB"],
1403
1422
  display_name="Chinese",
1404
1423
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/cn.svg",
1405
1424
  tasks=MTEBTasks(
@@ -1466,6 +1485,7 @@ C_MTEB = Benchmark(
1466
1485
 
1467
1486
  FA_MTEB = Benchmark(
1468
1487
  name="MTEB(fas, v1)",
1488
+ aliases=["FaMTEB(fas, beta)"],
1469
1489
  display_name="Farsi Legacy",
1470
1490
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ir.svg",
1471
1491
  tasks=get_tasks(
@@ -2347,6 +2367,7 @@ VIDORE_V3 = VidoreBenchmark(
2347
2367
 
2348
2368
  VISUAL_DOCUMENT_RETRIEVAL = VidoreBenchmark(
2349
2369
  name="ViDoRe(v1&v2)",
2370
+ aliases=["VisualDocumentRetrieval"],
2350
2371
  display_name="ViDoRe (V1&V2)",
2351
2372
  tasks=get_tasks(
2352
2373
  tasks=[
@@ -1,6 +1,5 @@
1
1
  import difflib
2
2
  import logging
3
- import warnings
4
3
  from functools import lru_cache
5
4
 
6
5
  from .benchmark import Benchmark
@@ -20,53 +19,16 @@ def _build_registry() -> dict[str, Benchmark]:
20
19
  return benchmark_registry
21
20
 
22
21
 
23
- def _get_previous_benchmark_names() -> dict[str, str]:
24
- from .benchmarks import (
25
- BRIGHT_LONG,
26
- C_MTEB,
27
- FA_MTEB,
28
- MTEB_DEU,
29
- MTEB_EN,
30
- MTEB_ENG_CLASSIC,
31
- MTEB_EU,
32
- MTEB_FRA,
33
- MTEB_INDIC,
34
- MTEB_JPN,
35
- MTEB_KOR,
36
- MTEB_MAIN_RU,
37
- MTEB_POL,
38
- MTEB_RETRIEVAL_LAW,
39
- MTEB_RETRIEVAL_MEDICAL,
40
- MTEB_RETRIEVAL_WITH_INSTRUCTIONS,
41
- SEB,
42
- VISUAL_DOCUMENT_RETRIEVAL,
43
- MTEB_code,
44
- MTEB_multilingual_v2,
45
- )
46
-
47
- previous_benchmark_names = {
48
- "MTEB(eng)": MTEB_EN.name,
49
- "MTEB(eng, classic)": MTEB_ENG_CLASSIC.name,
50
- "MTEB(rus)": MTEB_MAIN_RU.name,
51
- "MTEB(Retrieval w/Instructions)": MTEB_RETRIEVAL_WITH_INSTRUCTIONS.name,
52
- "MTEB(law)": MTEB_RETRIEVAL_LAW.name,
53
- "MTEB(Medical)": MTEB_RETRIEVAL_MEDICAL.name,
54
- "MTEB(Scandinavian)": SEB.name,
55
- "MTEB(fra)": MTEB_FRA.name,
56
- "MTEB(deu)": MTEB_DEU.name,
57
- "MTEB(kor)": MTEB_KOR.name,
58
- "MTEB(pol)": MTEB_POL.name,
59
- "MTEB(code)": MTEB_code.name,
60
- "MTEB(Multilingual)": MTEB_multilingual_v2.name,
61
- "MTEB(jpn)": MTEB_JPN.name,
62
- "MTEB(Indic)": MTEB_INDIC.name,
63
- "MTEB(Europe)": MTEB_EU.name,
64
- "MTEB(Chinese)": C_MTEB.name,
65
- "FaMTEB(fas, beta)": FA_MTEB.name,
66
- "BRIGHT(long)": BRIGHT_LONG.name,
67
- "VisualDocumentRetrieval": VISUAL_DOCUMENT_RETRIEVAL.name,
68
- }
69
- return previous_benchmark_names
22
+ @lru_cache
23
+ def _build_aliases_registry() -> dict[str, Benchmark]:
24
+ import mteb.benchmarks.benchmarks as benchmark_module
25
+
26
+ aliases: dict[str, Benchmark] = {}
27
+ for _, inst in benchmark_module.__dict__.items():
28
+ if isinstance(inst, Benchmark) and inst.aliases is not None:
29
+ for alias in inst.aliases:
30
+ aliases[alias] = inst
31
+ return aliases
70
32
 
71
33
 
72
34
  def get_benchmark(
@@ -80,14 +42,11 @@ def get_benchmark(
80
42
  Returns:
81
43
  The Benchmark instance corresponding to the given name.
82
44
  """
83
- previous_benchmark_names = _get_previous_benchmark_names()
84
45
  benchmark_registry = _build_registry()
85
- if benchmark_name in previous_benchmark_names:
86
- warnings.warn(
87
- f"Using the previous benchmark name '{benchmark_name}' is deprecated. Please use '{previous_benchmark_names[benchmark_name]}' instead.",
88
- DeprecationWarning,
89
- )
90
- benchmark_name = previous_benchmark_names[benchmark_name]
46
+ aliases_registry = _build_aliases_registry()
47
+
48
+ if benchmark_name in aliases_registry:
49
+ return aliases_registry[benchmark_name]
91
50
  if benchmark_name not in benchmark_registry:
92
51
  close_matches = difflib.get_close_matches(
93
52
  benchmark_name, benchmark_registry.keys()
mteb/cache.py CHANGED
@@ -472,7 +472,7 @@ class ResultCache:
472
472
  def load_results(
473
473
  self,
474
474
  models: Sequence[str] | Iterable[ModelMeta] | None = None,
475
- tasks: Sequence[str] | Iterable[AbsTask] | str | None = None,
475
+ tasks: Sequence[str] | Iterable[AbsTask] | Benchmark | str | None = None,
476
476
  require_model_meta: bool = True,
477
477
  include_remote: bool = True,
478
478
  validate_and_filter: bool = False,
@@ -483,6 +483,7 @@ class ResultCache:
483
483
  Args:
484
484
  models: A list of model names to load the results for. If None it will load the results for all models.
485
485
  tasks: A list of task names to load the results for. If str is passed, then benchmark will be loaded.
486
+ If Benchmark is passed, then all tasks in the benchmark will be loaded.
486
487
  If None it will load the results for all tasks.
487
488
  require_model_meta: If True it will ignore results that do not have a model_meta.json file. If false it attempt to
488
489
  extract the model name and revision from the path.
mteb/cli/build_cli.py CHANGED
@@ -290,17 +290,17 @@ def _create_meta(args: argparse.Namespace) -> None:
290
290
  "Output path already exists, use --overwrite to overwrite."
291
291
  )
292
292
 
293
+ benchmarks = None
293
294
  tasks: list[AbsTask] = []
294
295
  if tasks_names is not None:
295
296
  tasks = list(mteb.get_tasks(tasks_names))
296
297
  if benchmarks is not None:
297
298
  benchmarks = mteb.get_benchmarks(benchmarks)
298
- for benchmark in benchmarks:
299
- tasks.extend(benchmark.tasks)
300
299
 
301
300
  generate_model_card(
302
301
  model_name,
303
- tasks if len(tasks) > 0 else None,
302
+ tasks,
303
+ benchmarks,
304
304
  existing_model_card_id_or_path=from_existing,
305
305
  results_cache=ResultCache(results_folder),
306
306
  output_path=output_path,
@@ -5,8 +5,8 @@ from pathlib import Path
5
5
 
6
6
  from huggingface_hub import ModelCard, ModelCardData, repo_exists
7
7
 
8
- from mteb import BenchmarkResults
9
8
  from mteb.abstasks.abstask import AbsTask
9
+ from mteb.benchmarks.benchmark import Benchmark
10
10
  from mteb.cache import ResultCache
11
11
 
12
12
  logger = logging.getLogger(__name__)
@@ -15,11 +15,12 @@ logger = logging.getLogger(__name__)
15
15
  def generate_model_card(
16
16
  model_name: str,
17
17
  tasks: Sequence[AbsTask] | None = None,
18
+ benchmarks: Sequence[Benchmark] | None = None,
18
19
  existing_model_card_id_or_path: str | Path | None = None,
19
20
  results_cache: ResultCache = ResultCache(),
20
21
  output_path: Path = Path("model_card.md"),
21
22
  add_table_to_model_card: bool = False,
22
- models_to_compare: list[str] | None = None,
23
+ models_to_compare: Sequence[str] | None = None,
23
24
  token: str | None = None,
24
25
  push_to_hub: bool = False,
25
26
  ) -> None:
@@ -28,6 +29,7 @@ def generate_model_card(
28
29
  Args:
29
30
  model_name: Name of the model.
30
31
  tasks: List of tasks to generate results for.
32
+ benchmarks: A Benchmark or list of benchmarks to generate results for.
31
33
  existing_model_card_id_or_path: Path or ID of an existing model card to update.
32
34
  results_cache: Instance of ResultCache to load results from.
33
35
  output_path: Path to save the generated model card.
@@ -41,8 +43,16 @@ def generate_model_card(
41
43
  if existing_model_card_id_or_path:
42
44
  existing_model_card = ModelCard.load(existing_model_card_id_or_path)
43
45
 
46
+ all_tasks: list[AbsTask] = []
47
+ if tasks is not None:
48
+ all_tasks.extend(tasks)
49
+
50
+ if benchmarks is not None:
51
+ for b in benchmarks:
52
+ all_tasks.extend(b.tasks)
53
+
44
54
  benchmark_results = results_cache.load_results(
45
- [model_name], tasks, only_main_score=True
55
+ [model_name], all_tasks if all_tasks else None, only_main_score=True
46
56
  )
47
57
  eval_results = []
48
58
  for models_results in benchmark_results.model_results:
@@ -80,14 +90,12 @@ def generate_model_card(
80
90
  card_data=existing_model_card_data
81
91
  )
82
92
 
83
- if models_to_compare:
84
- benchmark_results = results_cache.load_results(
85
- [model_name, *models_to_compare], tasks, only_main_score=True
86
- )
87
-
88
93
  if add_table_to_model_card:
89
94
  existing_model_card = _add_table_to_model_card(
90
- benchmark_results, existing_model_card
95
+ results_cache,
96
+ existing_model_card,
97
+ (model_name, *models_to_compare) if models_to_compare else (model_name,),
98
+ benchmarks or [],
91
99
  )
92
100
 
93
101
  if push_to_hub and existing_model_card_id_or_path:
@@ -102,14 +110,23 @@ def generate_model_card(
102
110
 
103
111
 
104
112
  def _add_table_to_model_card(
105
- results: BenchmarkResults, model_card: ModelCard
113
+ results_cache: ResultCache,
114
+ model_card: ModelCard,
115
+ models: Sequence[str],
116
+ benchmarks: Sequence[Benchmark],
106
117
  ) -> ModelCard:
107
118
  original_content = model_card.content
108
- results_df = results.to_dataframe()
109
- results_df = results_df.set_index("task_name")
110
- mteb_content = f"""
111
- # MTEB results
112
- {results_df.to_markdown()}
113
- """
119
+ mteb_content = "# MTEB Results\n\n"
120
+
121
+ for benchmark in benchmarks:
122
+ mteb_content += f"## Benchmark: {benchmark.name}\n\n"
123
+ benchmark_results = results_cache.load_results(
124
+ tasks=benchmark,
125
+ models=models,
126
+ only_main_score=True,
127
+ )
128
+ df_results = benchmark_results.get_benchmark_result()
129
+ mteb_content += df_results.to_markdown(index=True) + "\n\n"
130
+
114
131
  model_card.content = original_content + "\n\n" + mteb_content
115
132
  return model_card
@@ -4,13 +4,15 @@ import base64
4
4
  import logging
5
5
  import os
6
6
  import time
7
- from concurrent.futures import ThreadPoolExecutor, as_completed
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ from functools import partial
8
9
  from io import BytesIO
9
10
  from typing import TYPE_CHECKING, Any
10
11
 
11
12
  import requests
12
13
  import torch
13
14
  from torch.utils.data import DataLoader
15
+ from tqdm import tqdm
14
16
 
15
17
  from mteb._requires_package import requires_package
16
18
  from mteb.abstasks.task_metadata import TaskMetadata
@@ -26,114 +28,6 @@ if TYPE_CHECKING:
26
28
 
27
29
  logger = logging.getLogger(__name__)
28
30
 
29
-
30
- def pil_to_base64(image, format="jpeg"):
31
- if image is None:
32
- return None
33
- buffer = BytesIO()
34
- image.save(buffer, format=format)
35
- img_bytes = buffer.getvalue()
36
- encoded_bytes = base64.b64encode(img_bytes)
37
- return encoded_bytes.decode("utf-8")
38
-
39
-
40
- def multimodal_embedding(image_base64=None, text_content=None):
41
- auth_token = os.getenv("VOLCES_AUTH_TOKEN")
42
- model_name = "doubao-embedding-vision-251215"
43
- api_url = "https://ark.cn-beijing.volces.com/api/v3/embeddings/multimodal"
44
-
45
- headers = {
46
- "Authorization": f"Bearer {auth_token}",
47
- "x-ark-vlm1": "true",
48
- "Content-Type": "application/json",
49
- }
50
-
51
- if image_base64 is not None and text_content is None:
52
- inputs = []
53
- for image in image_base64:
54
- image_format = "jpeg"
55
- image_data = f"data:image/{image_format};base64,{image}"
56
- inputs.append({"type": "image_url", "image_url": {"url": image_data}})
57
-
58
- payload = {"model": model_name, "input": inputs}
59
- elif image_base64 is None and text_content is not None:
60
- payload = {
61
- "model": model_name,
62
- "input": [
63
- {"type": "text", "text": text_content},
64
- ],
65
- }
66
- else:
67
- inputs = []
68
- for image in image_base64:
69
- image_format = "jpeg"
70
- image_data = f"data:image/{image_format};base64,{image}"
71
- inputs.append({"type": "image_url", "image_url": {"url": image_data}})
72
- inputs.append({"type": "text", "text": text_content})
73
- payload = {"model": model_name, "input": inputs}
74
-
75
- try:
76
- response = requests.post(url=api_url, headers=headers, json=payload, timeout=10)
77
-
78
- response.raise_for_status()
79
- return response.json()
80
-
81
- except requests.exceptions.HTTPError as http_err:
82
- logger.error(f"HTTP error ({http_err.response.status_code}): {http_err}")
83
- except requests.exceptions.JSONDecodeError:
84
- logger.error("Error:The response is not in valid JSON format")
85
- except requests.exceptions.Timeout:
86
- logger.error("Error:Request timeout")
87
- except Exception as e:
88
- logger.error(f"Unknown error: {str(e)}")
89
-
90
- return None
91
-
92
-
93
- def multi_thread_encode(sentences, batch_size=1, max_workers=8):
94
- batches = []
95
- for idx in range(0, len(sentences), batch_size):
96
- batches.append((idx // batch_size, sentences[idx : idx + batch_size]))
97
-
98
- n_batches = len(batches)
99
- results = [None] * n_batches # Pre-allocated result list
100
- all_embeddings = [] # Final ordered embeddings
101
-
102
- def _process_batch(batch_idx, batch_sentences):
103
- sentence = batch_sentences[0]
104
-
105
- retries = 5
106
- while retries > 0:
107
- try:
108
- resp = multimodal_embedding(text_content=sentence)
109
- embedding = torch.tensor(resp["data"]["embedding"])
110
- break
111
- except Exception as e:
112
- time.sleep(1)
113
- logger.warning(f"Retrying... {retries} retries left. Error: {str(e)}")
114
- retries -= 1
115
- if retries == 0:
116
- raise e
117
- return batch_idx, embedding
118
-
119
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
120
- futures = {
121
- executor.submit(_process_batch, idx, batch): idx for idx, batch in batches
122
- }
123
-
124
- for future in as_completed(futures):
125
- batch_idx, embeddings = future.result()
126
- results[batch_idx] = embeddings
127
-
128
- for batch_embeddings in results:
129
- all_embeddings.append(batch_embeddings)
130
-
131
- all_embeddings = torch.stack(all_embeddings, dim=0)
132
- all_embeddings = torch.nn.functional.normalize(all_embeddings, dim=-1)
133
-
134
- return all_embeddings.float().cpu()
135
-
136
-
137
31
  doubao_embedding_training_data = (
138
32
  {
139
33
  "PawsXPairClassification",
@@ -166,25 +60,80 @@ class Seed16EmbeddingWrapper(AbsEncoder):
166
60
  "pip install mteb[ark]",
167
61
  "tiktoken",
168
62
  )
169
- import tiktoken
170
63
 
171
64
  self._model_name = model_name
172
65
  self._max_tokens = 32768
173
66
  self._embed_dim = embed_dim
174
67
  self._available_embed_dims = [2048, 1024]
175
- self._encoding = tiktoken.get_encoding(tokenizer_name)
176
68
 
177
- def truncate_text_tokens(self, text: str) -> str:
178
- """Truncate a string to have `max_tokens` according to the given encoding.
69
+ def pil_to_base64(self, image, format="jpeg"):
70
+ if image is None:
71
+ return None
72
+ buffer = BytesIO()
73
+ image.save(buffer, format=format)
74
+ img_bytes = buffer.getvalue()
75
+ encoded_bytes = base64.b64encode(img_bytes)
76
+ return encoded_bytes.decode("utf-8")
77
+
78
+ def multimodal_embedding(self, instruction, image_base64, text_content):
79
+ auth_token = os.getenv("VOLCES_AUTH_TOKEN")
80
+ model_name = "doubao-embedding-vision-251215"
81
+ api_url = "https://ark.cn-beijing.volces.com/api/v3/embeddings/multimodal"
82
+
83
+ headers = {
84
+ "Authorization": f"Bearer {auth_token}",
85
+ "x-ark-vlm1": "true",
86
+ "Content-Type": "application/json",
87
+ }
179
88
 
180
- Args:
181
- text: The input string to be truncated.
89
+ if text_content is not None and len(text_content) > self._max_tokens:
90
+ text_content = text_content[: self._max_tokens]
91
+
92
+ if image_base64 is not None and text_content is None:
93
+ inputs = []
94
+ for image in image_base64:
95
+ image_format = "jpeg"
96
+ image_data = f"data:image/{image_format};base64,{image}"
97
+ inputs.append({"type": "image_url", "image_url": {"url": image_data}})
98
+
99
+ payload = {"model": model_name, "input": inputs}
100
+ elif image_base64 is None and text_content is not None:
101
+ payload = {
102
+ "model": model_name,
103
+ "instruction": instruction,
104
+ "input": [
105
+ {"type": "text", "text": text_content},
106
+ ],
107
+ }
108
+ else:
109
+ inputs = []
110
+ for image in image_base64:
111
+ image_format = "jpeg"
112
+ image_data = f"data:image/{image_format};base64,{image}"
113
+ inputs.append({"type": "image_url", "image_url": {"url": image_data}})
114
+ inputs.append({"type": "text", "text": text_content})
115
+ payload = {"model": model_name, "input": inputs}
116
+
117
+ max_retries = 3
118
+ retry_count = 0
119
+
120
+ while retry_count < max_retries:
121
+ response = requests.post(
122
+ url=api_url, headers=headers, json=payload, timeout=30
123
+ )
182
124
 
183
- Returns:
184
- The truncated string.
185
- """
186
- truncated_sentence = self._encoding.encode(text)[: self._max_tokens]
187
- return self._encoding.decode(truncated_sentence)
125
+ if response.status_code != 200:
126
+ retry_count += 1
127
+ time.sleep(3)
128
+ continue
129
+
130
+ response_json = response.json()
131
+ return response_json
132
+
133
+ raise Exception(
134
+ f"Request failed with status code {response.status_code}. "
135
+ f"Response: {response.text}"
136
+ )
188
137
 
189
138
  def get_fused_embeddings(
190
139
  self,
@@ -204,59 +153,69 @@ class Seed16EmbeddingWrapper(AbsEncoder):
204
153
  if images is not None and texts is not None:
205
154
  assert len(texts) == len(images)
206
155
  batch_len = len(texts)
207
- images_base64 = [pil_to_base64(image) for image in images]
156
+ images_base64 = [self.pil_to_base64(image) for image in images]
208
157
  elif images is None:
209
158
  batch_len = len(texts)
210
159
  images_base64 = [None for _ in range(batch_len)]
211
160
  elif texts is None:
212
161
  batch_len = len(images)
213
- images_base64 = [pil_to_base64(image) for image in images]
162
+ images_base64 = [self.pil_to_base64(image) for image in images]
214
163
  else:
215
164
  raise ValueError("images and texts cannot be None at the same time")
216
165
 
217
- outputs = []
218
- for i in range(batch_len):
166
+ def process_item(
167
+ i, prompt_type, task_name, texts, images_base64, multimodal_embedding
168
+ ):
219
169
  if (
220
170
  prompt_type == PromptType("query") or prompt_type is None
221
171
  ) and task_name in TASK_NAME_TO_INSTRUCTION:
222
172
  instruction = TASK_NAME_TO_INSTRUCTION[task_name]
223
173
  instruction = instruction.rstrip("{}").rstrip("\n")
224
- if texts[i] != "":
225
- input_text = (
226
- "Target_modality:Text.\n Instruction:"
227
- + instruction
228
- + "\n Query:{}"
229
- ).format(texts[i])
230
- else:
231
- input_text = (
232
- "Target_modality:Text.\n Instruction:"
233
- + instruction
234
- + "\n Query:"
235
- )
174
+ instruction = (
175
+ "Target_modality:Text.\n Instruction:" + instruction + "\n Query:"
176
+ )
177
+ input_text = texts[i]
236
178
  else:
237
179
  if texts[i] != "" and images_base64[i] is not None:
238
- instruction = "Instruction: Compress the the text and image into one word.\n Query: {}"
239
- input_text = instruction.format(texts[i])
180
+ instruction = "Instruction: Compress the text and image into one word.\n Query:"
181
+ input_text = texts[i]
240
182
  elif texts[i] != "":
241
183
  instruction = (
242
- "Instruction: Compress the the text into one word.\n Query: {}"
184
+ "Instruction: Compress the text into one word.\n Query:"
243
185
  )
244
- input_text = instruction.format(texts[i])
186
+ input_text = texts[i]
245
187
  elif images_base64[i] is not None:
246
188
  instruction = (
247
- "Instruction: Compress the the image into one word.\n Query:"
189
+ "Instruction: Compress the image into one word.\n Query:"
248
190
  )
249
- input_text = instruction
191
+ input_text = None
250
192
  else:
251
193
  raise ValueError("image and text are both None")
252
194
 
253
195
  resp = multimodal_embedding(
254
- image_base64=[images_base64[i]], text_content=input_text
196
+ instruction=instruction,
197
+ image_base64=images_base64[i],
198
+ text_content=input_text,
255
199
  )
256
200
  embedding = torch.tensor(resp["data"]["embedding"])
257
201
  embedding = torch.reshape(embedding, (1, -1))
202
+ return embedding
203
+
204
+ outputs = []
205
+ process_partial = partial(
206
+ process_item,
207
+ prompt_type=prompt_type,
208
+ task_name=task_name,
209
+ texts=texts,
210
+ images_base64=images_base64,
211
+ multimodal_embedding=self.multimodal_embedding,
212
+ )
213
+ with ThreadPoolExecutor(max_workers=15) as executor:
214
+ futures = [executor.submit(process_partial, i) for i in range(batch_len)]
215
+ for future in tqdm(futures, total=batch_len, desc="Encoding"):
216
+ outputs.append(future.result())
258
217
 
259
- outputs = torch.stack(outputs, dim=0)
218
+ outputs = torch.stack(outputs, dim=0).squeeze(1)
260
219
 
261
220
  if self._embed_dim is not None:
262
221
  outputs = outputs[:, : self._embed_dim]
@@ -273,13 +232,21 @@ class Seed16EmbeddingWrapper(AbsEncoder):
273
232
  prompt_type: PromptType | None = None,
274
233
  **kwargs: Any,
275
234
  ) -> Array:
276
- sentences = [text for batch in inputs for text in batch["text"]]
277
- images = [image for batch in inputs for image in batch["image"]]
235
+ if "text" in inputs.dataset.features:
236
+ sentences = [text for batch in inputs for text in batch["text"]]
237
+ else:
238
+ sentences = None
239
+
240
+ if "image" in inputs.dataset.features:
241
+ images = [image for batch in inputs for image in batch["image"]]
242
+ else:
243
+ images = None
278
244
 
279
245
  return self.get_fused_embeddings(
280
246
  texts=sentences,
281
247
  images=images,
282
248
  task_name=task_metadata.name,
249
+ prompt_type=prompt_type,
283
250
  **kwargs,
284
251
  )
285
252
 
mteb/models/model_meta.py CHANGED
@@ -22,6 +22,7 @@ from huggingface_hub import (
22
22
  from huggingface_hub.errors import (
23
23
  EntryNotFoundError,
24
24
  GatedRepoError,
25
+ HFValidationError,
25
26
  NotASafetensorsRepoError,
26
27
  RepositoryNotFoundError,
27
28
  SafetensorsParsingError,
@@ -305,7 +306,7 @@ class ModelMeta(BaseModel):
305
306
  embedding_dim = None
306
307
  max_tokens = None
307
308
 
308
- if model_name and compute_metadata and repo_exists(model_name):
309
+ if model_name and compute_metadata and _repo_exists(model_name):
309
310
  reference = "https://huggingface.co/" + model_name
310
311
  card = ModelCard.load(model_name)
311
312
  card_data: ModelCardData = card.data
@@ -414,7 +415,7 @@ class ModelMeta(BaseModel):
414
415
  meta.framework.append("Sentence Transformers")
415
416
  meta.modalities = ["text"]
416
417
 
417
- if model and compute_metadata and repo_exists(model):
418
+ if model and compute_metadata and _repo_exists(model):
418
419
  # have max_seq_length field
419
420
  sbert_config = _get_json_from_hub(
420
421
  model, "sentence_bert_config.json", "model", revision=revision
@@ -785,3 +786,19 @@ def _get_file_on_hub(
785
786
  except (GatedRepoError, RepositoryNotFoundError, EntryNotFoundError) as e:
786
787
  logger.warning(f"Can't get file {file_name} of {repo_id}: {e}")
787
788
  return None
789
+
790
+
791
+ def _repo_exists(repo_id: str, repo_type: str | None = None) -> bool:
792
+ """Checks if a repository exists on HuggingFace Hub.
793
+
794
+ Repo exists will raise HFValidationError for invalid local paths
795
+
796
+ Args:
797
+ repo_id: The repository ID.
798
+ repo_type: The type of repository (e.g., "model", "dataset", "space").
799
+ """
800
+ try:
801
+ return repo_exists(repo_id=repo_id, repo_type=repo_type)
802
+ except HFValidationError as e:
803
+ logger.warning(f"Can't check existence of {repo_id}: {e}")
804
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.5.4
3
+ Version: 2.5.5
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -5,7 +5,7 @@ mteb/_helpful_enum.py,sha256=jh73N1jlcpg7RGz4bj8UpctiMNvqvHpp9wrB7SYEzIU,510
5
5
  mteb/_log_once.py,sha256=-tUKzxGQzf2LZSuQXi97oYFXMta1B6GEYXd7BPqssvY,1095
6
6
  mteb/_requires_package.py,sha256=eHg_TD9BVZRzNCcQQrUP17d8M1DF_vOd_tVx54AmAnM,3017
7
7
  mteb/_set_seed.py,sha256=HPlPRl__Pe6IG-4UgJqTfplcivJ_wA2kaClbXoHQedM,1178
8
- mteb/cache.py,sha256=8EB1irD7AHKxYuELFYzEC4GA04TMNThXJJSHixQZw6k,21494
8
+ mteb/cache.py,sha256=M9UkWEqSA_Ro3_jc09k-XjVQy7amIDgeHgyO8VmHhmI,21594
9
9
  mteb/deprecated_evaluator.py,sha256=LCnM-kG2SBkh-xqVd4MurExsVMlFOIycSb7sHz2S_Cw,27634
10
10
  mteb/evaluate.py,sha256=6h06XsolgVCJEq9j6NA5ebwH2rSLsyIdtrxHanlqQfk,19185
11
11
  mteb/filter_tasks.py,sha256=D9g2o79aQiA5va7u_QKtMlZNDUmYwZGqCDpaKhBimWQ,7335
@@ -58,15 +58,15 @@ mteb/abstasks/text/reranking.py,sha256=QMgAAndGYRzvQdlhjLRMxrh_yrJZ0VQH40I-7mXo1
58
58
  mteb/abstasks/text/summarization.py,sha256=Sr-QX7T8SDS2dudSEspZHUtH_sxF_8A_tgfbkZNT3cA,7137
59
59
  mteb/benchmarks/__init__.py,sha256=MQEVeli-zLaJ7Xg0z7RhXQwsdmm7Ht_W2Ln0rZo1Szc,225
60
60
  mteb/benchmarks/_create_table.py,sha256=b2RqGqi0ZonKbHecEcZiF4pkfE96smFRIzxOI82ETA8,22304
61
- mteb/benchmarks/benchmark.py,sha256=MdRdGIGyYOH1_wK9O6NSAGjsA_QGjU5VKBoLPJcd4PE,5583
62
- mteb/benchmarks/get_benchmark.py,sha256=-n_O-gitRKZi48gJKNgGuI36hsP7yLVSiwulnMHN7Gw,3935
61
+ mteb/benchmarks/benchmark.py,sha256=RheQOo0iQbu_ylN7oFLr2r-z_ahrMCTvKscVuwUx6yo,5694
62
+ mteb/benchmarks/get_benchmark.py,sha256=nzR6cu5yXu1kIJKhd4A2R62xp43Z62bluPbOpNXHMWQ,2545
63
63
  mteb/benchmarks/benchmarks/__init__.py,sha256=73NYNv98q-tRCqf2YHabvElz_a8g_mF75HTup0J-E5E,2220
64
- mteb/benchmarks/benchmarks/benchmarks.py,sha256=_8zds06sQj41JzR6BHGWk33DZE2VGvabhBoyty5oAHk,97949
64
+ mteb/benchmarks/benchmarks/benchmarks.py,sha256=48yX0qsPL07rr14ygT28qQrCF7MBhFdrb_d2bzRkfWA,98612
65
65
  mteb/benchmarks/benchmarks/rteb_benchmarks.py,sha256=QnCSrTTaBfcRlAQp2Nu81tgv1idMXqiM16Fp2zKJ5Ys,10607
66
66
  mteb/cli/__init__.py,sha256=v-csUr3eUZElIvrGB6QGtaIdndDfNWEe9oZchsGsJpg,64
67
67
  mteb/cli/_display_tasks.py,sha256=pWKupzak8uxEIwJZbYpZpteeVprOgVT9Wr0HYeypitQ,2206
68
- mteb/cli/build_cli.py,sha256=ccxmjPDLT4GqBq_son4VyfxpGTnY0w_L-4H1p9izLTk,12608
69
- mteb/cli/generate_model_card.py,sha256=thc0I1suK87g-ND9fsKHHxrYlnQ_7AmjWFaydLtXc6Q,4288
68
+ mteb/cli/build_cli.py,sha256=y3Pr6wBy1Y0F1NDkHWdCCtNvEXGmtSVBJfh-nGv9TLg,12543
69
+ mteb/cli/generate_model_card.py,sha256=wX1ApQHCbox9z8QIiHSomcbTiCIHmsoUTnsxmAv7e-g,4945
70
70
  mteb/descriptive_stats/BitextMining/BUCC.json,sha256=7zXoJaZacNdqMSG60jPZGIDJ1is_bxbVlcrVyImPRxw,3745
71
71
  mteb/descriptive_stats/BitextMining/BUCC.v2.json,sha256=IRPOKaIaUD31okNe12nQV2E1JeYK_Fo25Tz7d-utATM,3716
72
72
  mteb/descriptive_stats/BitextMining/BibleNLPBitextMining.json,sha256=BxzjiVoeXrSKaeBIVytLKMf2yx_6ksZ4GddPtTU8MWY,1248649
@@ -1448,7 +1448,7 @@ mteb/models/__init__.py,sha256=ABTuoqiBjBtBWW3LYY7ItBHdylR6jWoy06HH0g6j6fU,910
1448
1448
  mteb/models/abs_encoder.py,sha256=HSJTjvcPYJRsKhhZeK2r6YP241EqpovwBcAuX1NevKE,16553
1449
1449
  mteb/models/get_model_meta.py,sha256=76BlPX5NuoNpo223OrcjD6a15Ee23G2RRlQfQt8mrAA,5620
1450
1450
  mteb/models/instruct_wrapper.py,sha256=e6id0oNMQd7ulDCkB-2IGaF2JK5S3Tiwcn-QFG-ufDk,9292
1451
- mteb/models/model_meta.py,sha256=p4Xl4Yae3kIp2k_ebV4VNGAzthQVDgDWDW8toiuV8S8,29842
1451
+ mteb/models/model_meta.py,sha256=5hWcv0RfVt3RcBJ2Vsyc2_8pGP42MGNRqXOzsIe4DJ0,30397
1452
1452
  mteb/models/models_protocols.py,sha256=D2hYWn_UBGMaKtRwBx3u0B0ni6lHJjSzTxX21XFNwIc,8917
1453
1453
  mteb/models/search_wrappers.py,sha256=yu3BnXLqE5JbOD14cF2mhyjvlF5LRKPfgk8uUuDhbjI,20939
1454
1454
  mteb/models/sentence_transformer_wrapper.py,sha256=3zmWttzmIvU8Uuz48sHJ4VQf1Mu_kvSLCdxezoQ6FN4,12545
@@ -1563,7 +1563,7 @@ mteb/models/model_implementations/samilpwc_models.py,sha256=ZcMUO_pWXARqzBa_2G6q
1563
1563
  mteb/models/model_implementations/sarashina_embedding_models.py,sha256=LmJAsZ_zXywQwpQspQRB83jThRq2Lc8wxZt8K8UYzRw,8467
1564
1564
  mteb/models/model_implementations/searchmap_models.py,sha256=WpwYwv0xWmig-rTGK0Li7m8ppADV-Qhy9BiHPlhGZug,1930
1565
1565
  mteb/models/model_implementations/seed_1_6_embedding_models.py,sha256=gcGKEY-n7DWGPlXYhO_kcNJ3lkBEnbw8NUxADNs3siM,18635
1566
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py,sha256=O0BlsOHaxF0EEGaoas4AdzB8f-_9W9lwfoxLypexKEo,37516
1566
+ mteb/models/model_implementations/seed_1_6_embedding_models_1215.py,sha256=OoTHcDRQGOuSzf08V62EXrSEdRsXhnMv2ZN9feJWs9s,36443
1567
1567
  mteb/models/model_implementations/seed_models.py,sha256=9UF2AQ0Uue8DD73SjYhHn2hLxey_7Iq9ii9TkRaA3CM,14168
1568
1568
  mteb/models/model_implementations/sentence_transformers_models.py,sha256=_4MbkdjZ58bell8Ss0JkyCAkLzUxTLBMofnHckRtWs0,23252
1569
1569
  mteb/models/model_implementations/shuu_model.py,sha256=8-hoGqELHQRQ1QFhjwyuOY_8rqj_6f9vhE1Xi8OJ8aw,1162
@@ -2603,9 +2603,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2603
2603
  mteb/types/_result.py,sha256=UKNokV9pu3G74MGebocU512aU_fFU9I9nPKnrG9Q0iE,1035
2604
2604
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2605
2605
  mteb/types/statistics.py,sha256=GwkBPmAr18Onu-vHtzHs0PFrhCozdOMiT13HwnWL4ZM,3961
2606
- mteb-2.5.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2607
- mteb-2.5.4.dist-info/METADATA,sha256=72f7JHdvmwTqqUzMk8lT8m27KdorIiypPdxO6tRQROg,13990
2608
- mteb-2.5.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2609
- mteb-2.5.4.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2610
- mteb-2.5.4.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2611
- mteb-2.5.4.dist-info/RECORD,,
2606
+ mteb-2.5.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2607
+ mteb-2.5.5.dist-info/METADATA,sha256=9Ta6P3mxIJeO4otx8KyizC9A4mgWvQk5RW3sx_PvgL8,13990
2608
+ mteb-2.5.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2609
+ mteb-2.5.5.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2610
+ mteb-2.5.5.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2611
+ mteb-2.5.5.dist-info/RECORD,,
File without changes