mteb 2.5.1__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. mteb/abstasks/abstask.py +6 -6
  2. mteb/abstasks/aggregated_task.py +4 -10
  3. mteb/abstasks/clustering_legacy.py +3 -2
  4. mteb/abstasks/task_metadata.py +2 -3
  5. mteb/cache.py +7 -4
  6. mteb/cli/build_cli.py +10 -5
  7. mteb/cli/generate_model_card.py +4 -3
  8. mteb/deprecated_evaluator.py +4 -3
  9. mteb/evaluate.py +4 -1
  10. mteb/get_tasks.py +4 -3
  11. mteb/leaderboard/app.py +70 -3
  12. mteb/models/abs_encoder.py +5 -3
  13. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +4 -1
  14. mteb/models/cache_wrappers/cache_backends/numpy_cache.py +13 -12
  15. mteb/models/model_implementations/align_models.py +1 -0
  16. mteb/models/model_implementations/amazon_models.py +1 -0
  17. mteb/models/model_implementations/andersborges.py +2 -0
  18. mteb/models/model_implementations/ara_models.py +1 -0
  19. mteb/models/model_implementations/arctic_models.py +8 -0
  20. mteb/models/model_implementations/b1ade_models.py +1 -0
  21. mteb/models/model_implementations/bedrock_models.py +4 -0
  22. mteb/models/model_implementations/bge_models.py +17 -0
  23. mteb/models/model_implementations/bica_model.py +1 -0
  24. mteb/models/model_implementations/blip2_models.py +2 -0
  25. mteb/models/model_implementations/blip_models.py +8 -0
  26. mteb/models/model_implementations/bm25.py +1 -0
  27. mteb/models/model_implementations/bmretriever_models.py +4 -0
  28. mteb/models/model_implementations/cadet_models.py +1 -0
  29. mteb/models/model_implementations/cde_models.py +2 -0
  30. mteb/models/model_implementations/clip_models.py +3 -0
  31. mteb/models/model_implementations/clips_models.py +3 -0
  32. mteb/models/model_implementations/codefuse_models.py +3 -0
  33. mteb/models/model_implementations/codesage_models.py +3 -0
  34. mteb/models/model_implementations/cohere_models.py +4 -0
  35. mteb/models/model_implementations/cohere_v.py +5 -0
  36. mteb/models/model_implementations/colpali_models.py +3 -0
  37. mteb/models/model_implementations/colqwen_models.py +9 -0
  38. mteb/models/model_implementations/colsmol_models.py +2 -0
  39. mteb/models/model_implementations/conan_models.py +1 -0
  40. mteb/models/model_implementations/dino_models.py +19 -0
  41. mteb/models/model_implementations/e5_instruct.py +4 -0
  42. mteb/models/model_implementations/e5_models.py +9 -0
  43. mteb/models/model_implementations/e5_v.py +1 -0
  44. mteb/models/model_implementations/eagerworks_models.py +1 -0
  45. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  46. mteb/models/model_implementations/en_code_retriever.py +1 -0
  47. mteb/models/model_implementations/euler_models.py +1 -0
  48. mteb/models/model_implementations/evaclip_models.py +4 -0
  49. mteb/models/model_implementations/fa_models.py +8 -0
  50. mteb/models/model_implementations/facebookai.py +2 -0
  51. mteb/models/model_implementations/geogpt_models.py +1 -0
  52. mteb/models/model_implementations/gme_v_models.py +6 -3
  53. mteb/models/model_implementations/google_models.py +5 -0
  54. mteb/models/model_implementations/granite_vision_embedding_models.py +1 -0
  55. mteb/models/model_implementations/gritlm_models.py +2 -0
  56. mteb/models/model_implementations/gte_models.py +9 -0
  57. mteb/models/model_implementations/hinvec_models.py +1 -0
  58. mteb/models/model_implementations/human.py +1 -0
  59. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  60. mteb/models/model_implementations/inf_models.py +2 -0
  61. mteb/models/model_implementations/jasper_models.py +2 -0
  62. mteb/models/model_implementations/jina_clip.py +1 -0
  63. mteb/models/model_implementations/jina_models.py +7 -1
  64. mteb/models/model_implementations/kalm_models.py +6 -0
  65. mteb/models/model_implementations/kblab.py +1 -0
  66. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  67. mteb/models/model_implementations/kfst.py +1 -0
  68. mteb/models/model_implementations/kowshik24_models.py +1 -0
  69. mteb/models/model_implementations/lens_models.py +2 -0
  70. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  71. mteb/models/model_implementations/linq_models.py +1 -0
  72. mteb/models/model_implementations/listconranker.py +1 -1
  73. mteb/models/model_implementations/llm2clip_models.py +3 -0
  74. mteb/models/model_implementations/llm2vec_models.py +8 -0
  75. mteb/models/model_implementations/mcinext_models.py +7 -1
  76. mteb/models/model_implementations/mdbr_models.py +2 -0
  77. mteb/models/model_implementations/misc_models.py +63 -0
  78. mteb/models/model_implementations/mme5_models.py +1 -0
  79. mteb/models/model_implementations/moco_models.py +2 -0
  80. mteb/models/model_implementations/model2vec_models.py +13 -0
  81. mteb/models/model_implementations/moka_models.py +3 -0
  82. mteb/models/model_implementations/mxbai_models.py +3 -0
  83. mteb/models/model_implementations/nbailab.py +3 -0
  84. mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
  85. mteb/models/model_implementations/nomic_models.py +6 -0
  86. mteb/models/model_implementations/nomic_models_vision.py +1 -0
  87. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -0
  88. mteb/models/model_implementations/nvidia_models.py +3 -0
  89. mteb/models/model_implementations/octen_models.py +195 -0
  90. mteb/models/model_implementations/openai_models.py +5 -0
  91. mteb/models/model_implementations/openclip_models.py +8 -0
  92. mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
  93. mteb/models/model_implementations/ops_moa_models.py +2 -0
  94. mteb/models/model_implementations/pawan_models.py +1 -0
  95. mteb/models/model_implementations/piccolo_models.py +2 -0
  96. mteb/models/model_implementations/promptriever_models.py +4 -0
  97. mteb/models/model_implementations/pylate_models.py +3 -0
  98. mteb/models/model_implementations/qodo_models.py +2 -0
  99. mteb/models/model_implementations/qtack_models.py +1 -0
  100. mteb/models/model_implementations/qwen3_models.py +3 -0
  101. mteb/models/model_implementations/qzhou_models.py +2 -0
  102. mteb/models/model_implementations/random_baseline.py +2 -1
  103. mteb/models/model_implementations/rasgaard_models.py +1 -0
  104. mteb/models/model_implementations/reasonir_model.py +1 -0
  105. mteb/models/model_implementations/repllama_models.py +2 -0
  106. mteb/models/model_implementations/rerankers_custom.py +3 -3
  107. mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
  108. mteb/models/model_implementations/richinfoai_models.py +1 -0
  109. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  110. mteb/models/model_implementations/ruri_models.py +10 -0
  111. mteb/models/model_implementations/salesforce_models.py +3 -0
  112. mteb/models/model_implementations/samilpwc_models.py +1 -0
  113. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  114. mteb/models/model_implementations/searchmap_models.py +1 -0
  115. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -0
  116. mteb/models/model_implementations/seed_models.py +1 -0
  117. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  118. mteb/models/model_implementations/shuu_model.py +32 -31
  119. mteb/models/model_implementations/siglip_models.py +10 -0
  120. mteb/models/model_implementations/sonar_models.py +1 -0
  121. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  122. mteb/models/model_implementations/stella_models.py +6 -0
  123. mteb/models/model_implementations/tarka_models.py +2 -0
  124. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  125. mteb/models/model_implementations/uae_models.py +1 -0
  126. mteb/models/model_implementations/vdr_models.py +1 -0
  127. mteb/models/model_implementations/vi_vn_models.py +6 -0
  128. mteb/models/model_implementations/vista_models.py +2 -0
  129. mteb/models/model_implementations/vlm2vec_models.py +2 -0
  130. mteb/models/model_implementations/voyage_models.py +15 -0
  131. mteb/models/model_implementations/voyage_v.py +1 -0
  132. mteb/models/model_implementations/xyz_models.py +1 -0
  133. mteb/models/model_implementations/youtu_models.py +1 -0
  134. mteb/models/model_implementations/yuan_models.py +1 -0
  135. mteb/models/model_implementations/yuan_models_en.py +1 -0
  136. mteb/models/model_meta.py +49 -4
  137. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +4 -1
  138. mteb/models/search_wrappers.py +4 -2
  139. mteb/models/sentence_transformer_wrapper.py +10 -10
  140. mteb/results/benchmark_results.py +67 -43
  141. mteb/results/model_result.py +3 -1
  142. mteb/results/task_result.py +22 -17
  143. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/METADATA +1 -1
  144. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/RECORD +148 -147
  145. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/WHEEL +0 -0
  146. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/entry_points.txt +0 -0
  147. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/licenses/LICENSE +0 -0
  148. {mteb-2.5.1.dist-info → mteb-2.5.3.dist-info}/top_level.txt +0 -0
mteb/abstasks/abstask.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import logging
3
+ import warnings
3
4
  from abc import ABC, abstractmethod
4
5
  from collections.abc import Sequence
5
6
  from copy import copy
@@ -102,9 +103,9 @@ class AbsTask(ABC):
102
103
  def check_if_dataset_is_superseded(self) -> None:
103
104
  """Check if the dataset is superseded by a newer version."""
104
105
  if self.superseded_by:
105
- logger.warning(
106
- f"Dataset '{self.metadata.name}' is superseded by '{self.superseded_by}', you might consider using the newer version of the dataset."
107
- )
106
+ msg = f"Dataset '{self.metadata.name}' is superseded by '{self.superseded_by}'. We recommend using the newer version of the dataset unless you are running a specific benchmark. See `get_task('{self.superseded_by}').metadata.description` to get a description of the task and changes."
107
+ logger.warning(msg)
108
+ warnings.warn(msg)
108
109
 
109
110
  def dataset_transform(self):
110
111
  """A transform operations applied to the dataset after loading.
@@ -607,9 +608,8 @@ class AbsTask(ABC):
607
608
  self.data_loaded = False
608
609
  logger.info(f"Unloaded dataset {self.metadata.name} from memory.")
609
610
  else:
610
- logger.warning(
611
- f"Dataset {self.metadata.name} is not loaded, cannot unload it."
612
- )
611
+ msg = f"Dataset `{self.metadata.name}` is not loaded, cannot unload it."
612
+ logger.warning(msg)
613
613
 
614
614
  @property
615
615
  def superseded_by(self) -> str | None:
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import warnings
2
3
  from pathlib import Path
3
4
  from typing import Any
4
5
 
@@ -113,20 +114,13 @@ class AbsTaskAggregate(AbsTask):
113
114
  )
114
115
  mteb_versions = {tr.mteb_version for tr in task_results}
115
116
  if len(mteb_versions) != 1:
116
- logger.warning(
117
- f"All tasks of {self.metadata.name} is not run using the same version."
118
- )
117
+ msg = f"All tasks of {self.metadata.name} is not run using the same version. different versions found are: {mteb_versions}"
118
+ logger.warning(msg)
119
+ warnings.warn(msg)
119
120
  task_res.mteb_version = None
120
121
  task_res.mteb_version = task_results[0].mteb_version
121
122
  return task_res
122
123
 
123
- def check_if_dataset_is_superseded(self) -> None:
124
- """Check if the dataset is superseded by a newer version"""
125
- if self.superseded_by:
126
- logger.warning(
127
- f"Dataset '{self.metadata.name}' is superseded by '{self.superseded_by}', you might consider using the newer version of the dataset."
128
- )
129
-
130
124
  def filter_eval_splits(self, eval_splits: list[str] | None) -> Self:
131
125
  """Filter the evaluation splits of the task.
132
126
 
@@ -89,6 +89,9 @@ class AbsTaskClusteringLegacy(AbsTask):
89
89
  prediction_folder: Path | None = None,
90
90
  **kwargs: Any,
91
91
  ) -> ScoresDict:
92
+ data_split = data_split.select_columns(
93
+ [self.input_column_name, self.label_column_name]
94
+ )
92
95
  # MTEB text clustering requires renaming and eval per subset.
93
96
  if self.metadata.modalities == ["text"]:
94
97
  all_metrics = []
@@ -97,8 +100,6 @@ class AbsTaskClusteringLegacy(AbsTask):
97
100
  logger.info(
98
101
  f"Running clustering on cluster ({i + 1}/{len(data_split)})"
99
102
  )
100
- if "__index_level_0__" in cluster_set:
101
- cluster_set.pop("__index_level_0__")
102
103
  clustering_dataset = Dataset.from_dict(cluster_set).select_columns(
103
104
  [self.input_column_name, self.label_column_name]
104
105
  )
@@ -376,9 +376,8 @@ class TaskMetadata(BaseModel):
376
376
  if include_cite and cite:
377
377
  # check for whitespace in the citation
378
378
  if " " in cite:
379
- logger.warning(
380
- "Citation contains whitespace. Please ensure that the citation is correctly formatted."
381
- )
379
+ msg = "Citation contains whitespace. Please ensure that the citation is correctly formatted."
380
+ logger.warning(msg)
382
381
  return f"\\cite{{{cite}}}"
383
382
  return cite
384
383
 
mteb/cache.py CHANGED
@@ -3,6 +3,7 @@ import logging
3
3
  import os
4
4
  import shutil
5
5
  import subprocess
6
+ import warnings
6
7
  from collections import defaultdict
7
8
  from collections.abc import Sequence
8
9
  from pathlib import Path
@@ -83,9 +84,9 @@ class ResultCache:
83
84
  model_path = results_folder / model_name
84
85
 
85
86
  if model_revision is None:
86
- logger.warning(
87
- "model_revision is not specified, attempting to load the latest revision. To disable this behavior, specify model_revision explicitly."
88
- )
87
+ msg = "`model_revision` is not specified, attempting to load the latest revision. To disable this behavior, specify the 'model_revision` explicitly."
88
+ logger.warning(msg)
89
+ warnings.warn(msg)
89
90
  # get revs from paths
90
91
  revisions = [p for p in model_path.glob("*") if p.is_dir()]
91
92
  if not revisions:
@@ -281,7 +282,9 @@ class ResultCache:
281
282
  shutil.rmtree(self.cache_path)
282
283
  logger.info(f"Cache directory {self.cache_path} cleared.")
283
284
  else:
284
- logger.warning(f"Cache directory {self.cache_path} does not exist.")
285
+ msg = f"Cache directory `{self.cache_path}` does not exist."
286
+ logger.warning(msg)
287
+ warnings.warn(msg)
285
288
 
286
289
  def __repr__(self) -> str:
287
290
  return f"ResultCache(cache_path={self.cache_path})"
mteb/cli/build_cli.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import argparse
2
2
  import logging
3
3
  import os
4
+ import warnings
4
5
  from pathlib import Path
5
6
 
6
7
  import torch
@@ -69,15 +70,17 @@ def run(args: argparse.Namespace) -> None:
69
70
 
70
71
  overwrite_strategy = args.overwrite_strategy
71
72
  if args.overwrite:
72
- logger.warning(
73
- "`--overwrite` is deprecated, please use `--overwrite-strategy 'always'` instead."
73
+ warnings.warn(
74
+ "`--overwrite` is deprecated, please use `--overwrite-strategy 'always'` instead.",
75
+ DeprecationWarning,
74
76
  )
75
77
  overwrite_strategy = OverwriteStrategy.ALWAYS.value
76
78
 
77
79
  prediction_folder = args.prediction_folder
78
80
  if args.save_predictions:
79
- logger.warning(
80
- "`--save_predictions` is deprecated, please use `--prediction-folder` instead."
81
+ warnings.warn(
82
+ "`--save_predictions` is deprecated, please use `--prediction-folder` instead.",
83
+ DeprecationWarning,
81
84
  )
82
85
  prediction_folder = args.output_folder
83
86
 
@@ -279,7 +282,9 @@ def _create_meta(args: argparse.Namespace) -> None:
279
282
  from_existing = Path(from_existing)
280
283
 
281
284
  if output_path.exists() and overwrite:
282
- logger.warning("Output path already exists, overwriting.")
285
+ msg = "Output path already exists, overwriting."
286
+ logger.warning(msg)
287
+ warnings.warn(msg)
283
288
  elif output_path.exists():
284
289
  raise FileExistsError(
285
290
  "Output path already exists, use --overwrite to overwrite."
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import warnings
2
3
  from pathlib import Path
3
4
 
4
5
  from huggingface_hub import ModelCard, ModelCardData, repo_exists
@@ -92,9 +93,9 @@ def generate_model_card(
92
93
  if repo_exists(existing_model_card_id_or_path):
93
94
  existing_model_card.push_to_hub(existing_model_card_id_or_path, token=token)
94
95
  else:
95
- logger.warning(
96
- f"Repository {existing_model_card_id_or_path} does not exist on the Hub. Skipping push to hub."
97
- )
96
+ msg = f"Repository {existing_model_card_id_or_path} does not exist on the Hub. Skipping push to hub."
97
+ logger.warning(msg)
98
+ warnings.warn(msg)
98
99
  existing_model_card.save(output_path)
99
100
 
100
101
 
@@ -5,6 +5,7 @@ import logging
5
5
  import os
6
6
  import sys
7
7
  import traceback
8
+ import warnings
8
9
  from collections.abc import Iterable
9
10
  from copy import deepcopy
10
11
  from datetime import datetime
@@ -470,9 +471,9 @@ class MTEB:
470
471
  raise ImportError(
471
472
  "codecarbon is not installed. Please install it using `pip install 'mteb[codecarbon]'` to track CO₂ emissions."
472
473
  )
473
- logger.warning(
474
- "Evaluating multiple MTEB runs simultaneously will produce incorrect CO₂ results"
475
- )
474
+ msg = "Evaluating multiple MTEB runs simultaneously will produce incorrect CO₂ results"
475
+ logger.warning(msg)
476
+ warnings.warn(msg)
476
477
  with EmissionsTracker(
477
478
  save_to_file=False,
478
479
  save_to_api=False,
mteb/evaluate.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ import warnings
4
5
  from collections.abc import Iterable
5
6
  from pathlib import Path
6
7
  from time import time
@@ -136,10 +137,12 @@ def _evaluate_task(
136
137
  task.load_data()
137
138
  except DatasetNotFoundError as e:
138
139
  if not task.metadata.is_public and public_only is None:
139
- logger.warning(
140
+ msg = (
140
141
  f"Dataset for private task '{task.metadata.name}' not found. "
141
142
  "Make sure you have access to the dataset and that you have set up the authentication correctly. To disable this warning set `public_only=False`"
142
143
  )
144
+ logger.warning(msg)
145
+ warnings.warn(msg)
143
146
  return TaskError(
144
147
  task_name=task.metadata.name,
145
148
  exception=str(e),
mteb/get_tasks.py CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  import difflib
4
4
  import logging
5
+ import warnings
5
6
  from collections import Counter, defaultdict
6
7
  from collections.abc import Sequence
7
8
  from typing import Any
@@ -340,9 +341,9 @@ def get_task(
340
341
  """
341
342
  if task_name in _TASK_RENAMES:
342
343
  _task_name = _TASK_RENAMES[task_name]
343
- logger.warning(
344
- f"The task with the given name '{task_name}' has been renamed to '{_task_name}'. To prevent this warning use the new name."
345
- )
344
+ msg = f"The task with the given name '{task_name}' has been renamed to '{_task_name}'. To prevent this warning use the new name."
345
+ logger.warning(msg)
346
+ warnings.warn(msg)
346
347
 
347
348
  if task_name not in _TASKS_REGISTRY:
348
349
  close_matches = difflib.get_close_matches(task_name, _TASKS_REGISTRY.keys())
mteb/leaderboard/app.py CHANGED
@@ -36,9 +36,15 @@ LANGUAGE: list[str] = list({l for t in mteb.get_tasks() for l in t.metadata.lang
36
36
 
37
37
 
38
38
  def _load_results(cache: ResultCache) -> BenchmarkResults:
39
+ start_time = time.time()
39
40
  results_cache_path = Path(__file__).parent.joinpath("__cached_results.json")
40
41
  if not results_cache_path.exists():
42
+ logger.info("Cached results not found, downloading from remote...")
41
43
  cache.download_from_remote()
44
+ download_time = time.time() - start_time
45
+ logger.info(f"Downloaded remote results in {download_time:.2f}s")
46
+
47
+ load_start = time.time()
42
48
  all_model_names = [model_meta.name for model_meta in mteb.get_model_metas()]
43
49
 
44
50
  all_results = cache.load_results(
@@ -47,10 +53,16 @@ def _load_results(cache: ResultCache) -> BenchmarkResults:
47
53
  require_model_meta=False,
48
54
  include_remote=True,
49
55
  )
56
+ load_time = time.time() - load_start
57
+ logger.info(f"Loaded results from cache in {load_time:.2f}s")
50
58
  return all_results
51
59
  else:
60
+ logger.info("Loading cached results from disk...")
52
61
  with results_cache_path.open() as cache_file:
53
- return mteb.BenchmarkResults.from_validated(**json.load(cache_file))
62
+ results = mteb.BenchmarkResults.from_validated(**json.load(cache_file))
63
+ total_time = time.time() - start_time
64
+ logger.info(f"Loaded cached results in {total_time:.2f}s")
65
+ return results
54
66
 
55
67
 
56
68
  def _produce_benchmark_link(benchmark_name: str, request: gr.Request) -> str:
@@ -322,20 +334,48 @@ def _cache_update_task_list(
322
334
 
323
335
  def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
324
336
  """Returns a Gradio Blocks app for the MTEB leaderboard."""
325
- logger.info("Loading all benchmark results")
337
+ app_start = time.time()
338
+ logger.info("=== Starting leaderboard app initialization ===")
339
+
340
+ logger.info("Step 1/7: Loading all benchmark results...")
341
+ load_start = time.time()
326
342
  all_results = _load_results(cache)
343
+ load_time = time.time() - load_start
344
+ logger.info(f"Step 1/7 complete: Loaded results in {load_time:.2f}s")
327
345
 
346
+ logger.info("Step 2/7: Fetching benchmarks...")
347
+ bench_start = time.time()
328
348
  benchmarks = sorted(
329
349
  mteb.get_benchmarks(display_on_leaderboard=True), key=lambda x: x.name
330
350
  )
351
+ bench_time = time.time() - bench_start
352
+ logger.info(
353
+ f"Step 2/7 complete: Fetched {len(benchmarks)} benchmarks in {bench_time:.2f}s"
354
+ )
355
+
356
+ logger.info(
357
+ "Step 3/7: Processing all benchmarks (select_tasks + join_revisions)..."
358
+ )
359
+ process_start = time.time()
331
360
  all_benchmark_results = {
332
361
  benchmark.name: all_results.select_tasks(benchmark.tasks).join_revisions()
333
362
  for benchmark in benchmarks
334
363
  }
364
+ process_time = time.time() - process_start
365
+ if len(benchmarks) > 0:
366
+ logger.info(
367
+ f"Step 3/7 complete: Processed {len(benchmarks)} benchmarks in {process_time:.2f}s (avg {process_time / len(benchmarks):.2f}s/benchmark)"
368
+ )
369
+ else:
370
+ logger.info(
371
+ f"Step 3/7 complete: Processed 0 benchmarks in {process_time:.2f}s (avg N/A)"
372
+ )
335
373
 
336
374
  default_benchmark = mteb.get_benchmark(DEFAULT_BENCHMARK_NAME)
337
375
  default_results = all_benchmark_results[default_benchmark.name]
338
- logger.info("Benchmark results loaded")
376
+
377
+ logger.info("Step 4/7: Filtering models...")
378
+ filter_start = time.time()
339
379
 
340
380
  default_scores = default_results._get_scores(format="long")
341
381
  all_models = list({entry["model_name"] for entry in default_scores})
@@ -355,7 +395,13 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
355
395
  # Filter BenchmarkResults based on default filtered models (as required by Kenneth)
356
396
  filtered_model_names = [entry["model_name"] for entry in default_filtered_scores]
357
397
  filtered_benchmark_results = default_results.select_models(filtered_model_names)
398
+ filter_time = time.time() - filter_start
399
+ logger.info(
400
+ f"Step 4/7 complete: Filtered {len(filtered_model_names)} models in {filter_time:.2f}s"
401
+ )
358
402
 
403
+ logger.info("Step 5/7: Generating tables...")
404
+ table_start = time.time()
359
405
  summary_table = apply_summary_styling_from_benchmark(
360
406
  default_benchmark, filtered_benchmark_results
361
407
  )
@@ -366,10 +412,14 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
366
412
  default_benchmark,
367
413
  filtered_benchmark_results,
368
414
  )
415
+ table_time = time.time() - table_start
416
+ logger.info(f"Step 5/7 complete: Generated tables in {table_time:.2f}s")
369
417
 
370
418
  # Check if this benchmark displays per-language results
371
419
  display_language_table = len(default_benchmark.language_view) > 0
372
420
 
421
+ logger.info("Step 6/7: Creating Gradio components...")
422
+ component_start = time.time()
373
423
  lang_select = gr.CheckboxGroup(
374
424
  sorted(default_results.languages),
375
425
  value=sorted(default_results.languages),
@@ -410,7 +460,13 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
410
460
  label="Modality",
411
461
  info="Select modalities to include.",
412
462
  )
463
+ component_time = time.time() - component_start
464
+ logger.info(
465
+ f"Step 6/7 complete: Created Gradio components in {component_time:.2f}s"
466
+ )
413
467
 
468
+ logger.info("Step 7/7: Building Gradio interface and callbacks...")
469
+ interface_start = time.time()
414
470
  with gr.Blocks(fill_width=True) as demo:
415
471
  with gr.Sidebar(
416
472
  position="left",
@@ -926,7 +982,11 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
926
982
  )
927
983
 
928
984
  gr.Markdown(ACKNOWLEDGEMENT, elem_id="ack_markdown")
985
+ interface_time = time.time() - interface_start
986
+ logger.info(f"Step 7/7 complete: Built Gradio interface in {interface_time:.2f}s")
929
987
 
988
+ logger.info("Starting prerun on all benchmarks to populate caches...")
989
+ prerun_start = time.time()
930
990
  # Prerun on all benchmarks, so that results of callbacks get cached
931
991
  for benchmark in benchmarks:
932
992
  (
@@ -952,6 +1012,13 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
952
1012
  update_tables(
953
1013
  bench_scores, filtered_tasks, bench_initial_models, benchmark.name
954
1014
  )
1015
+ prerun_time = time.time() - prerun_start
1016
+ logger.info(
1017
+ f"Prerun complete: Processed {len(benchmarks)} benchmarks in {prerun_time:.2f}s"
1018
+ )
1019
+
1020
+ total_time = time.time() - app_start
1021
+ logger.info(f"=== Leaderboard app initialization complete in {total_time:.2f}s ===")
955
1022
  return demo
956
1023
 
957
1024
 
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import warnings
2
3
  from abc import ABC, abstractmethod
3
4
  from collections.abc import Callable, Sequence
4
5
  from typing import Any, Literal, cast, get_args, overload
@@ -187,6 +188,7 @@ class AbsEncoder(ABC):
187
188
  except KeyError:
188
189
  msg = f"Task name {task_name} is not valid. {valid_keys_msg}"
189
190
  logger.warning(msg)
191
+ warnings.warn(msg)
190
192
  invalid_task_messages.add(msg)
191
193
  invalid_keys.add(task_key)
192
194
 
@@ -232,9 +234,9 @@ class AbsEncoder(ABC):
232
234
  if isinstance(prompt, dict) and prompt_type:
233
235
  if prompt.get(prompt_type.value):
234
236
  return prompt[prompt_type.value]
235
- logger.warning(
236
- f"Prompt type '{prompt_type}' not found in task metadata for task '{task_metadata.name}'."
237
- )
237
+ msg = f"Prompt type '{prompt_type}' not found in task metadata for task '{task_metadata.name}'."
238
+ logger.warning(msg)
239
+ warnings.warn(msg)
238
240
  return ""
239
241
 
240
242
  if prompt:
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import logging
3
+ import warnings
3
4
  from pathlib import Path
4
5
 
5
6
  import numpy as np
@@ -71,7 +72,9 @@ class FaissCache:
71
72
  try:
72
73
  return self.index.reconstruct(idx)
73
74
  except Exception:
74
- logger.warning(f"Vector id {idx} missing for hash {item_hash}")
75
+ msg = f"Vector id {idx} missing for hash {item_hash}"
76
+ logger.warning(msg)
77
+ warnings.warn(msg)
75
78
  return None
76
79
 
77
80
  def save(self) -> None:
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import logging
3
+ import warnings
3
4
  from pathlib import Path
4
5
 
5
6
  import numpy as np
@@ -41,9 +42,9 @@ class NumpyCache:
41
42
  for item, vec in zip(item, vectors):
42
43
  item_hash = _hash_item(item)
43
44
  if item_hash in self.hash_to_index:
44
- logger.warning(
45
- "Hash collision or duplicate item. Overwriting existing vector."
46
- )
45
+ msg = f"Hash collision or duplicate item for hash {item_hash}. Overwriting existing vector."
46
+ logger.warning(msg)
47
+ warnings.warn(msg)
47
48
  index = self.hash_to_index[item_hash]
48
49
  else:
49
50
  index = len(self.hash_to_index)
@@ -107,9 +108,9 @@ class NumpyCache:
107
108
  f"Loaded vector dimension {self.vector_dim} from {self.dimension_file}"
108
109
  )
109
110
  else:
110
- logger.warning(
111
- "Dimension file not found. Vector dimension remains uninitialized."
112
- )
111
+ msg = "Dimension file not found. Vector dimension remains uninitialized."
112
+ logger.warning(msg)
113
+ warnings.warn(msg)
113
114
 
114
115
  def save(self) -> None:
115
116
  """Persist VectorCacheMap to disk."""
@@ -151,14 +152,14 @@ class NumpyCache:
151
152
  self.vectors = self.vectors.reshape(-1, self.vector_dim)
152
153
  logger.info(f"Loaded vectors file with shape: {self.vectors.shape}")
153
154
  else:
154
- logger.warning(
155
- "Vector dimension not set. Unable to load vectors file."
156
- )
155
+ msg = "Vector dimension not set. Unable to load vectors file."
156
+ logger.warning(msg)
157
+ warnings.warn(msg)
157
158
  logger.info(f"Loaded VectorCacheMap from {self.directory}")
158
159
  else:
159
- logger.warning(
160
- "No existing files found. Initialized empty VectorCacheMap."
161
- )
160
+ msg = "No existing files found. Initialized empty VectorCacheMap."
161
+ logger.warning(msg)
162
+ warnings.warn(msg)
162
163
  except Exception as e:
163
164
  logger.error(f"Error loading VectorCacheMap: {str(e)}")
164
165
  raise
@@ -105,6 +105,7 @@ class ALIGNModel(AbsEncoder):
105
105
  align_base = ModelMeta(
106
106
  loader=ALIGNModel,
107
107
  name="kakaobrain/align-base",
108
+ model_type=["dense"],
108
109
  languages=["eng-Latn"],
109
110
  revision="e96a37facc7b1f59090ece82293226b817afd6ba",
110
111
  release_date="2023-02-24",
@@ -3,6 +3,7 @@ from mteb.models.model_meta import ModelMeta, ScoringFunction
3
3
  amazon_titan_text_embeddings_v2 = ModelMeta(
4
4
  loader=None,
5
5
  name="amazon/Titan-text-embeddings-v2",
6
+ model_type=["dense"],
6
7
  revision="1",
7
8
  release_date="2024-04-30",
8
9
  languages=["eng-Latn"],
@@ -6,6 +6,7 @@ from mteb.models.model_meta import ModelMeta, ScoringFunction
6
6
  model2vecdk = ModelMeta(
7
7
  loader=Model2VecModel, # type: ignore
8
8
  name="andersborges/model2vecdk",
9
+ model_type=["dense"],
9
10
  languages=["dan-Latn"],
10
11
  open_weights=True,
11
12
  revision="cb576c78dcc1b729e4612645f61db59929d69e61",
@@ -36,6 +37,7 @@ model2vecdk = ModelMeta(
36
37
  model2vecdk_stem = ModelMeta(
37
38
  loader=Model2VecModel, # type: ignore
38
39
  name="andersborges/model2vecdk-stem",
40
+ model_type=["dense"],
39
41
  languages=["dan-Latn"],
40
42
  open_weights=True,
41
43
  revision="cb576c78dcc1b729e4612645f61db59929d69e61",
@@ -4,6 +4,7 @@ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loade
4
4
  arabic_triplet_matryoshka = ModelMeta(
5
5
  loader=sentence_transformers_loader,
6
6
  name="Omartificial-Intelligence-Space/Arabic-Triplet-Matryoshka-V2",
7
+ model_type=["dense"],
7
8
  languages=["ara-Arab"],
8
9
  open_weights=True,
9
10
  revision="ed357f222f0b6ea6670d2c9b5a1cb93950d34200",
@@ -140,6 +140,7 @@ arctic_v2_training_datasets = {
140
140
  arctic_embed_xs = ModelMeta(
141
141
  loader=sentence_transformers_loader,
142
142
  name="Snowflake/snowflake-arctic-embed-xs",
143
+ model_type=["dense"],
143
144
  revision="742da4f66e1823b5b4dbe6c320a1375a1fd85f9e",
144
145
  release_date="2024-07-08", # initial commit of hf model.
145
146
  languages=["eng-Latn"],
@@ -165,6 +166,7 @@ arctic_embed_xs = ModelMeta(
165
166
  arctic_embed_s = ModelMeta(
166
167
  loader=sentence_transformers_loader,
167
168
  name="Snowflake/snowflake-arctic-embed-s",
169
+ model_type=["dense"],
168
170
  revision="d3c1d2d433dd0fdc8e9ca01331a5f225639e798f",
169
171
  release_date="2024-04-12", # initial commit of hf model.
170
172
  languages=["eng-Latn"],
@@ -190,6 +192,7 @@ arctic_embed_s = ModelMeta(
190
192
  arctic_embed_m = ModelMeta(
191
193
  loader=sentence_transformers_loader,
192
194
  name="Snowflake/snowflake-arctic-embed-m",
195
+ model_type=["dense"],
193
196
  revision="cc17beacbac32366782584c8752220405a0f3f40",
194
197
  release_date="2024-04-12", # initial commit of hf model.
195
198
  languages=["eng-Latn"],
@@ -215,6 +218,7 @@ arctic_embed_m_long = ModelMeta(
215
218
  loader=sentence_transformers_loader,
216
219
  loader_kwargs={"trust_remote_code": True},
217
220
  name="Snowflake/snowflake-arctic-embed-m-long",
221
+ model_type=["dense"],
218
222
  revision="89d0f6ab196eead40b90cb6f9fefec01a908d2d1",
219
223
  release_date="2024-04-12", # initial commit of hf model.
220
224
  languages=["eng-Latn"],
@@ -239,6 +243,7 @@ arctic_embed_m_long = ModelMeta(
239
243
  arctic_embed_l = ModelMeta(
240
244
  loader=sentence_transformers_loader,
241
245
  name="Snowflake/snowflake-arctic-embed-l",
246
+ model_type=["dense"],
242
247
  revision="9a9e5834d2e89cdd8bb72b64111dde496e4fe78c",
243
248
  release_date="2024-04-12", # initial commit of hf model.
244
249
  languages=["eng-Latn"],
@@ -268,6 +273,7 @@ arctic_embed_m_v1_5 = ModelMeta(
268
273
  },
269
274
  ),
270
275
  name="Snowflake/snowflake-arctic-embed-m-v1.5",
276
+ model_type=["dense"],
271
277
  revision="97eab2e17fcb7ccb8bb94d6e547898fa1a6a0f47",
272
278
  release_date="2024-07-08", # initial commit of hf model.
273
279
  languages=["eng-Latn"],
@@ -293,6 +299,7 @@ arctic_embed_m_v2_0 = ModelMeta(
293
299
  loader=sentence_transformers_loader,
294
300
  loader_kwargs={"trust_remote_code": True},
295
301
  name="Snowflake/snowflake-arctic-embed-m-v2.0",
302
+ model_type=["dense"],
296
303
  revision="f2a7d59d80dfda5b1d14f096f3ce88bb6bf9ebdc",
297
304
  release_date="2024-12-04", # initial commit of hf model.
298
305
  languages=LANGUAGES_V2_0,
@@ -317,6 +324,7 @@ arctic_embed_m_v2_0 = ModelMeta(
317
324
  arctic_embed_l_v2_0 = ModelMeta(
318
325
  loader=sentence_transformers_loader,
319
326
  name="Snowflake/snowflake-arctic-embed-l-v2.0",
327
+ model_type=["dense"],
320
328
  revision="edc2df7b6c25794b340229ca082e7c78782e6374",
321
329
  release_date="2024-12-04", # initial commit of hf model.
322
330
  languages=LANGUAGES_V2_0,
@@ -10,6 +10,7 @@ b1ade_training_data = {
10
10
  b1ade_embed = ModelMeta(
11
11
  loader=sentence_transformers_loader,
12
12
  name="w601sxs/b1ade-embed",
13
+ model_type=["dense"],
13
14
  languages=["eng-Latn"],
14
15
  revision="3bdac13927fdc888b903db93b2ffdbd90b295a69",
15
16
  open_weights=True,
@@ -155,6 +155,7 @@ class BedrockModel(AbsEncoder):
155
155
 
156
156
  amazon_titan_embed_text_v1 = ModelMeta(
157
157
  name="bedrock/amazon-titan-embed-text-v1",
158
+ model_type=["dense"],
158
159
  revision="1",
159
160
  release_date="2023-09-27",
160
161
  languages=None, # not specified
@@ -181,6 +182,7 @@ amazon_titan_embed_text_v1 = ModelMeta(
181
182
 
182
183
  amazon_titan_embed_text_v2 = ModelMeta(
183
184
  name="bedrock/amazon-titan-embed-text-v2",
185
+ model_type=["dense"],
184
186
  revision="1",
185
187
  release_date="2024-04-30",
186
188
  languages=None, # not specified
@@ -216,6 +218,7 @@ cohere_embed_english_v3 = ModelMeta(
216
218
  model_prompts=cohere_model_prompts,
217
219
  ),
218
220
  name="bedrock/cohere-embed-english-v3",
221
+ model_type=["dense"],
219
222
  languages=["eng-Latn"],
220
223
  open_weights=False,
221
224
  reference="https://cohere.com/blog/introducing-embed-v3",
@@ -243,6 +246,7 @@ cohere_embed_multilingual_v3 = ModelMeta(
243
246
  model_prompts=cohere_model_prompts,
244
247
  ),
245
248
  name="bedrock/cohere-embed-multilingual-v3",
249
+ model_type=["dense"],
246
250
  languages=cohere_supported_languages,
247
251
  open_weights=False,
248
252
  reference="https://cohere.com/blog/introducing-embed-v3",