mteb 2.3.1__py3-none-any.whl → 2.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,3 @@
1
- import math
2
1
  import re
3
2
  from collections import defaultdict
4
3
 
@@ -32,26 +31,18 @@ def _split_on_capital(s: str) -> str:
32
31
  return " ".join(re.findall(r"[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)", s))
33
32
 
34
33
 
35
- def _format_n_parameters(n_parameters) -> str:
36
- if (n_parameters is None) or (not int(n_parameters)):
37
- return "Unknown"
38
- n_thousand = int(n_parameters // 1e3)
39
- if n_thousand < 1:
40
- return str(int(n_parameters))
41
- n_zeros = math.log10(n_thousand)
42
- if n_zeros >= 6:
43
- return str(n_thousand // (10**6)) + "B"
44
- if n_zeros >= 3:
45
- return str(n_thousand // (10**3)) + "M"
46
- return str(n_thousand) + "K"
34
+ def _format_n_parameters(n_parameters) -> float | None:
35
+ """Format n_parameters to be in billions with decimals down to 1 million. I.e. 7M -> 0.007B, 1.5B -> 1.5B, None -> None"""
36
+ if n_parameters:
37
+ n_parameters = float(n_parameters)
38
+ return round(n_parameters / 1e9, 3)
39
+ return None
47
40
 
48
41
 
49
- def _format_max_tokens(max_tokens: float | None) -> str:
50
- if max_tokens is None:
51
- return "Unknown"
52
- if max_tokens == np.inf:
53
- return "Infinite"
54
- return str(int(max_tokens))
42
+ def _format_max_tokens(max_tokens: float | None) -> float | None:
43
+ if max_tokens is None or max_tokens == np.inf:
44
+ return None
45
+ return float(max_tokens)
55
46
 
56
47
 
57
48
  def _get_means_per_types(per_task: pd.DataFrame):
@@ -144,18 +135,18 @@ def _create_summary_table_from_benchmark_results(
144
135
  joint_table.insert(
145
136
  1,
146
137
  "Embedding Dimensions",
147
- model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
138
+ model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
148
139
  )
149
140
  joint_table.insert(
150
141
  1,
151
- "Number of Parameters",
142
+ "Number of Parameters (B)",
152
143
  model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
153
144
  )
154
145
  joint_table.insert(
155
146
  1,
156
147
  "Memory Usage (MB)",
157
148
  model_metas.map(
158
- lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
149
+ lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
159
150
  ),
160
151
  )
161
152
 
@@ -323,18 +314,18 @@ def _create_summary_table_mean_public_private(
323
314
  joint_table.insert(
324
315
  1,
325
316
  "Embedding Dimensions",
326
- model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
317
+ model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
327
318
  )
328
319
  joint_table.insert(
329
320
  1,
330
- "Number of Parameters",
321
+ "Number of Parameters (B)",
331
322
  model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
332
323
  )
333
324
  joint_table.insert(
334
325
  1,
335
326
  "Memory Usage (MB)",
336
327
  model_metas.map(
337
- lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
328
+ lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
338
329
  ),
339
330
  )
340
331
 
@@ -445,18 +436,18 @@ def _create_summary_table_mean_subset(
445
436
  joint_table.insert(
446
437
  1,
447
438
  "Embedding Dimensions",
448
- model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
439
+ model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
449
440
  )
450
441
  joint_table.insert(
451
442
  1,
452
- "Number of Parameters",
443
+ "Number of Parameters (B)",
453
444
  model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
454
445
  )
455
446
  joint_table.insert(
456
447
  1,
457
448
  "Memory Usage (MB)",
458
449
  model_metas.map(
459
- lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
450
+ lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
460
451
  ),
461
452
  )
462
453
 
@@ -558,25 +549,23 @@ def _create_summary_table_mean_task_type(
558
549
 
559
550
  # Insert model metadata columns
560
551
  joint_table.insert(
561
- 1,
562
- "Max Tokens",
563
- model_metas.map(lambda m: _format_max_tokens(m.max_tokens)),
552
+ 1, "Max Tokens", model_metas.map(lambda m: _format_max_tokens(m.max_tokens))
564
553
  )
565
554
  joint_table.insert(
566
555
  1,
567
556
  "Embedding Dimensions",
568
- model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
557
+ model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
569
558
  )
570
559
  joint_table.insert(
571
560
  1,
572
- "Number of Parameters",
561
+ "Number of Parameters (B)",
573
562
  model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
574
563
  )
575
564
  joint_table.insert(
576
565
  1,
577
566
  "Memory Usage (MB)",
578
567
  model_metas.map(
579
- lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
568
+ lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
580
569
  ),
581
570
  )
582
571
 
@@ -43,6 +43,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
43
43
  VN_MTEB,
44
44
  CoIR,
45
45
  MTEB_code,
46
+ MTEB_MAIN_RU_v1_1,
46
47
  MTEB_multilingual_v1,
47
48
  MTEB_multilingual_v2,
48
49
  RAR_b,
@@ -113,6 +114,7 @@ __all__ = [
113
114
  "VISUAL_DOCUMENT_RETRIEVAL",
114
115
  "VN_MTEB",
115
116
  "CoIR",
117
+ "MTEB_MAIN_RU_v1_1",
116
118
  "MTEB_code",
117
119
  "MTEB_multilingual_v1",
118
120
  "MTEB_multilingual_v2",
@@ -185,7 +185,7 @@ We recommend that you use [MTEB(eng, v2)](http://mteb-leaderboard.hf.space/?benc
185
185
 
186
186
  MTEB_MAIN_RU = Benchmark(
187
187
  name="MTEB(rus, v1)",
188
- display_name="Russian",
188
+ display_name="Russian legacy",
189
189
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ru.svg",
190
190
  tasks=MTEBTasks(
191
191
  get_tasks(
@@ -240,6 +240,67 @@ MTEB_MAIN_RU = Benchmark(
240
240
  year = {2024},
241
241
  }
242
242
  """,
243
+ contacts=["Samoed", "artemsnegirev", "Drozhzhinastya"],
244
+ )
245
+
246
+ MTEB_MAIN_RU_v1_1 = Benchmark(
247
+ name="MTEB(rus, v1.1)",
248
+ display_name="Russian",
249
+ icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/ru.svg",
250
+ tasks=MTEBTasks(
251
+ get_tasks(
252
+ languages=["rus"],
253
+ tasks=[
254
+ # Classification
255
+ "GeoreviewClassification",
256
+ "HeadlineClassification",
257
+ "InappropriatenessClassification",
258
+ "KinopoiskClassification",
259
+ "MassiveIntentClassification",
260
+ "MassiveScenarioClassification",
261
+ "RuReviewsClassification",
262
+ "RuSciBenchGRNTIClassification",
263
+ "RuSciBenchOECDClassification",
264
+ # Clustering
265
+ "GeoreviewClusteringP2P",
266
+ "RuSciBenchGRNTIClusteringP2P",
267
+ "RuSciBenchOECDClusteringP2P",
268
+ # MultiLabelClassification
269
+ "CEDRClassification",
270
+ "SensitiveTopicsClassification",
271
+ # PairClassification
272
+ "TERRa",
273
+ # Reranking
274
+ "MIRACLReranking",
275
+ "RuBQReranking",
276
+ # Retrieval
277
+ "MIRACLRetrievalHardNegatives.v2",
278
+ "RiaNewsRetrievalHardNegatives.v2",
279
+ "RuBQRetrieval",
280
+ # STS
281
+ "RUParaPhraserSTS",
282
+ "STS22",
283
+ ],
284
+ )
285
+ + get_tasks(
286
+ tasks=["RuSTSBenchmarkSTS"],
287
+ eval_splits=["test"],
288
+ )
289
+ ),
290
+ description="A Russian version of the Massive Text Embedding Benchmark covering the task categories of classification, clustering, reranking, pair classification, retrieval, and semantic similarity. In v1.1, MIRACLRetrieval and RiaNewsRetrieval were replaced with their HardNegatives variants for improved time-optimization measurement. MIRACLRetrievalHardNegatives and RiaNewsRetrievalHardNegatives are used in their updated versions (v2), both of which include improved default prompts.",
291
+ reference="https://aclanthology.org/2023.eacl-main.148/",
292
+ citation=r"""
293
+ @misc{snegirev2024russianfocusedembeddersexplorationrumteb,
294
+ archiveprefix = {arXiv},
295
+ author = {Artem Snegirev and Maria Tikhonova and Anna Maksimova and Alena Fenogenova and Alexander Abramov},
296
+ eprint = {2408.12503},
297
+ primaryclass = {cs.CL},
298
+ title = {The Russian-focused embedders' exploration: ruMTEB benchmark and Russian embedding model design},
299
+ url = {https://arxiv.org/abs/2408.12503},
300
+ year = {2024},
301
+ }
302
+ """,
303
+ contacts=["Samoed", "artemsnegirev", "Drozhzhinastya"],
243
304
  )
244
305
 
245
306
 
mteb/leaderboard/app.py CHANGED
@@ -5,7 +5,7 @@ import tempfile
5
5
  import time
6
6
  import warnings
7
7
  from pathlib import Path
8
- from typing import Literal, get_args
8
+ from typing import Literal
9
9
  from urllib.parse import urlencode
10
10
 
11
11
  import cachetools
@@ -14,7 +14,6 @@ import pandas as pd
14
14
 
15
15
  import mteb
16
16
  from mteb import BenchmarkResults
17
- from mteb.abstasks.task_metadata import TaskDomain, TaskType
18
17
  from mteb.benchmarks.benchmark import RtebBenchmark
19
18
  from mteb.cache import ResultCache
20
19
  from mteb.leaderboard.benchmark_selector import (
@@ -29,7 +28,6 @@ from mteb.leaderboard.table import (
29
28
  apply_summary_styling_from_benchmark,
30
29
  )
31
30
  from mteb.leaderboard.text_segments import ACKNOWLEDGEMENT, FAQ
32
- from mteb.types import Modalities
33
31
 
34
32
  logger = logging.getLogger(__name__)
35
33
 
@@ -139,7 +137,10 @@ def _update_task_info(task_names: str) -> gr.DataFrame:
139
137
  df["languages"] = df["languages"].map(_format_list)
140
138
  df = df.sort_values("name")
141
139
  df["domains"] = df["domains"].map(_format_list)
142
- df["name"] = f'<a href="{df["reference"]}" target="_blank">{df["name"]}</a>'
140
+ df["name"] = df.apply(
141
+ lambda row: f'<a href="{row["reference"]}" target="_blank">{row["name"]}</a>',
142
+ axis=1,
143
+ )
143
144
  df["modalities"] = df["modalities"].map(_format_list)
144
145
  df = df.rename(
145
146
  columns={
@@ -155,9 +156,8 @@ def _update_task_info(task_names: str) -> gr.DataFrame:
155
156
  df = df.drop(columns="reference")
156
157
  return gr.DataFrame(
157
158
  df,
158
- datatype=["markdown"] + ["str"] * (len(df.columns) - 1),
159
- show_copy_button=True,
160
- show_fullscreen_button=True,
159
+ datatype=["markdown"] + ["str"] * (len(df.columns) - 1), # type: ignore
160
+ buttons=["copy", "fullscreen"],
161
161
  show_search="filter",
162
162
  )
163
163
 
@@ -215,6 +215,110 @@ def _should_show_zero_shot_filter(benchmark_name: str) -> bool:
215
215
  return True
216
216
 
217
217
 
218
+ @cachetools.cached(
219
+ cache={},
220
+ key=lambda benchmark_name, all_benchmark_results: hash(benchmark_name),
221
+ )
222
+ def _cache_on_benchmark_select(benchmark_name, all_benchmark_results):
223
+ start_time = time.time()
224
+ benchmark = mteb.get_benchmark(benchmark_name)
225
+ languages = [task.languages for task in benchmark.tasks if task.languages]
226
+ languages = set(itertools.chain.from_iterable(languages))
227
+ languages = sorted(languages)
228
+ domains = [
229
+ task.metadata.domains for task in benchmark.tasks if task.metadata.domains
230
+ ]
231
+ domains = set(itertools.chain.from_iterable(domains))
232
+ types = {task.metadata.type for task in benchmark.tasks if task.metadata.type}
233
+ modalities = set()
234
+ for task in benchmark.tasks:
235
+ modalities.update(task.metadata.modalities)
236
+ languages, domains, types, modalities = (
237
+ sorted(languages),
238
+ sorted(domains),
239
+ sorted(types),
240
+ sorted(modalities),
241
+ )
242
+ elapsed = time.time() - start_time
243
+ benchmark_results = all_benchmark_results[benchmark_name]
244
+ scores = benchmark_results._get_scores(format="long")
245
+ logger.debug(f"on_benchmark_select callback: {elapsed}s")
246
+ show_zero_shot = _should_show_zero_shot_filter(benchmark_name)
247
+
248
+ # Calculate initial models for this benchmark to avoid race conditions
249
+ benchmark_tasks = sorted([task.metadata.name for task in benchmark.tasks])
250
+ all_models_in_scores = list({entry["model_name"] for entry in scores})
251
+ initial_models = _filter_models(
252
+ all_models_in_scores,
253
+ benchmark_tasks,
254
+ availability=None,
255
+ compatibility=[],
256
+ instructions=None,
257
+ max_model_size=MAX_MODEL_SIZE,
258
+ zero_shot_setting="allow_all",
259
+ )
260
+ # Sort to ensure consistency with update_models
261
+ initial_models = sorted(initial_models)
262
+
263
+ return (
264
+ languages,
265
+ domains,
266
+ types,
267
+ modalities,
268
+ benchmark_tasks,
269
+ scores,
270
+ show_zero_shot,
271
+ initial_models,
272
+ )
273
+
274
+
275
+ @cachetools.cached(
276
+ cache={},
277
+ key=lambda benchmark_name,
278
+ type_select,
279
+ domain_select,
280
+ lang_select,
281
+ modality_select: hash(
282
+ (
283
+ hash(benchmark_name),
284
+ hash(tuple(type_select)),
285
+ hash(tuple(domain_select)),
286
+ hash(tuple(lang_select)),
287
+ hash(tuple(modality_select)),
288
+ )
289
+ ),
290
+ )
291
+ def _cache_update_task_list(
292
+ benchmark_name, type_select, domain_select, lang_select, modality_select
293
+ ):
294
+ if not len(lang_select):
295
+ return []
296
+ start_time = time.time()
297
+ benchmark_tasks = []
298
+ tasks_to_keep = []
299
+ for task in mteb.get_benchmark(benchmark_name).tasks:
300
+ benchmark_tasks.append(task.metadata.name)
301
+ if task.metadata.type not in type_select:
302
+ continue
303
+ if task.metadata.domains and not (
304
+ set(task.metadata.domains) & set(domain_select)
305
+ ):
306
+ continue
307
+ if task.languages and not (set(task.languages) & set(lang_select)):
308
+ continue
309
+ if task.metadata.modalities and not (
310
+ set(task.metadata.modalities) & set(modality_select)
311
+ ):
312
+ continue
313
+ tasks_to_keep.append(task.metadata.name)
314
+ benchmark_tasks.sort()
315
+ tasks_to_keep.sort()
316
+ elapsed = time.time() - start_time
317
+ logger.debug(f"update_task_list callback: {elapsed}s")
318
+
319
+ return benchmark_tasks, tasks_to_keep
320
+
321
+
218
322
  def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
219
323
  """Returns a Gradio Blocks app for the MTEB leaderboard."""
220
324
  logger.info("Loading all benchmark results")
@@ -227,6 +331,7 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
227
331
  benchmark.name: all_results.select_tasks(benchmark.tasks).join_revisions()
228
332
  for benchmark in benchmarks
229
333
  }
334
+
230
335
  default_benchmark = mteb.get_benchmark(DEFAULT_BENCHMARK_NAME)
231
336
  default_results = all_benchmark_results[default_benchmark.name]
232
337
  logger.info("Benchmark results loaded")
@@ -257,55 +362,48 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
257
362
  default_benchmark, filtered_benchmark_results
258
363
  )
259
364
 
260
- lang_select = gr.Dropdown(
261
- LANGUAGE,
365
+ lang_select = gr.CheckboxGroup(
366
+ sorted(default_results.languages),
262
367
  value=sorted(default_results.languages),
263
- allow_custom_value=True,
264
- multiselect=True,
368
+ show_label=True,
369
+ show_select_all=True,
265
370
  label="Language",
266
371
  info="Select languages to include.",
267
372
  )
268
- type_select = gr.Dropdown(
269
- sorted(get_args(TaskType)),
373
+ type_select = gr.CheckboxGroup(
374
+ sorted(default_results.task_types),
270
375
  value=sorted(default_results.task_types),
271
- multiselect=True,
376
+ show_label=True,
377
+ show_select_all=True,
272
378
  label="Task Type",
273
379
  info="Select task types to include.",
274
380
  )
275
- domain_select = gr.Dropdown(
276
- sorted(get_args(TaskDomain)),
381
+ domain_select = gr.CheckboxGroup(
382
+ sorted(default_results.domains),
277
383
  value=sorted(default_results.domains),
278
- multiselect=True,
384
+ show_label=True,
385
+ show_select_all=True,
279
386
  label="Domain",
280
387
  info="Select domains to include.",
281
388
  )
282
- task_select = gr.Dropdown(
283
- sorted(all_results.task_names),
389
+ task_select = gr.CheckboxGroup(
390
+ sorted(default_results.task_names),
284
391
  value=sorted(default_results.task_names),
285
- allow_custom_value=True,
286
- multiselect=True,
392
+ show_label=True,
393
+ show_select_all=True,
287
394
  label="Task",
288
395
  info="Select specific tasks to include",
289
396
  )
290
- modality_select = gr.Dropdown(
291
- sorted(get_args(Modalities)),
397
+ modality_select = gr.CheckboxGroup(
398
+ sorted(default_results.modalities),
292
399
  value=sorted(default_results.modalities),
293
- multiselect=True,
400
+ show_label=True,
401
+ show_select_all=True,
294
402
  label="Modality",
295
403
  info="Select modalities to include.",
296
404
  )
297
405
 
298
- head = """
299
- <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
300
- """
301
-
302
- with gr.Blocks(
303
- fill_width=True,
304
- theme=gr.themes.Soft(
305
- font=[gr.themes.GoogleFont("Roboto Mono"), "Arial", "sans-serif"],
306
- ),
307
- head=head,
308
- ) as demo:
406
+ with gr.Blocks(fill_width=True) as demo:
309
407
  with gr.Sidebar(
310
408
  position="left",
311
409
  label="Benchmark Selection and Customization",
@@ -437,9 +535,6 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
437
535
 
438
536
  with gr.Tab("Performance per Model Size") as plot_tab:
439
537
  plot = gr.Plot(_performance_size_plot, inputs=[summary_table])
440
- gr.Markdown(
441
- "*We only display TOP 5 models that have been run on all tasks in the benchmark*"
442
- )
443
538
  plot_tab.select(
444
539
  _performance_size_plot, inputs=[summary_table], outputs=[plot]
445
540
  )
@@ -465,62 +560,25 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
465
560
  # This sets the benchmark from the URL query parameters
466
561
  demo.load(_set_benchmark_on_load, inputs=[], outputs=[benchmark_select])
467
562
 
468
- @cachetools.cached(
469
- cache={},
470
- key=lambda benchmark_name: hash(benchmark_name),
471
- )
472
563
  def on_benchmark_select(benchmark_name):
473
- start_time = time.time()
474
- benchmark = mteb.get_benchmark(benchmark_name)
475
- languages = [task.languages for task in benchmark.tasks if task.languages]
476
- languages = set(itertools.chain.from_iterable(languages))
477
- languages = sorted(languages)
478
- domains = [
479
- task.metadata.domains
480
- for task in benchmark.tasks
481
- if task.metadata.domains
482
- ]
483
- domains = set(itertools.chain.from_iterable(domains))
484
- types = {
485
- task.metadata.type for task in benchmark.tasks if task.metadata.type
486
- }
487
- modalities = set()
488
- for task in benchmark.tasks:
489
- modalities.update(task.metadata.modalities)
490
- languages, domains, types, modalities = (
491
- sorted(languages),
492
- sorted(domains),
493
- sorted(types),
494
- sorted(modalities),
495
- )
496
- elapsed = time.time() - start_time
497
- benchmark_results = all_benchmark_results[benchmark_name]
498
- scores = benchmark_results._get_scores(format="long")
499
- logger.debug(f"on_benchmark_select callback: {elapsed}s")
500
- show_zero_shot = _should_show_zero_shot_filter(benchmark_name)
501
-
502
- # Calculate initial models for this benchmark to avoid race conditions
503
- benchmark_tasks = sorted([task.metadata.name for task in benchmark.tasks])
504
- all_models_in_scores = list({entry["model_name"] for entry in scores})
505
- initial_models = _filter_models(
506
- all_models_in_scores,
507
- benchmark_tasks,
508
- availability=None,
509
- compatibility=[],
510
- instructions=None,
511
- max_model_size=MAX_MODEL_SIZE,
512
- zero_shot_setting="allow_all",
513
- )
514
- # Sort to ensure consistency with update_models
515
- initial_models = sorted(initial_models)
516
-
517
- return (
564
+ (
518
565
  languages,
519
566
  domains,
520
567
  types,
521
568
  modalities,
522
569
  benchmark_tasks,
523
570
  scores,
571
+ show_zero_shot,
572
+ initial_models,
573
+ ) = _cache_on_benchmark_select(benchmark_name, all_benchmark_results)
574
+
575
+ return (
576
+ gr.update(choices=languages, value=languages),
577
+ gr.update(choices=domains, value=domains),
578
+ gr.update(choices=types, value=types),
579
+ gr.update(choices=modalities, value=modalities),
580
+ gr.update(choices=benchmark_tasks, value=benchmark_tasks),
581
+ scores,
524
582
  gr.update(visible=show_zero_shot),
525
583
  initial_models,
526
584
  )
@@ -562,48 +620,13 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
562
620
  outputs=[scores],
563
621
  )
564
622
 
565
- @cachetools.cached(
566
- cache={},
567
- key=lambda benchmark_name,
568
- type_select,
569
- domain_select,
570
- lang_select,
571
- modality_select: hash(
572
- (
573
- hash(benchmark_name),
574
- hash(tuple(type_select)),
575
- hash(tuple(domain_select)),
576
- hash(tuple(lang_select)),
577
- hash(tuple(modality_select)),
578
- )
579
- ),
580
- )
581
623
  def update_task_list(
582
624
  benchmark_name, type_select, domain_select, lang_select, modality_select
583
625
  ):
584
- if not len(lang_select):
585
- return []
586
- start_time = time.time()
587
- tasks_to_keep = []
588
- for task in mteb.get_benchmark(benchmark_name).tasks:
589
- if task.metadata.type not in type_select:
590
- continue
591
- if task.metadata.domains is not None and not (
592
- set(task.metadata.domains) & set(domain_select)
593
- ):
594
- continue
595
- if task.languages is not None and not (
596
- set(task.languages) & set(lang_select)
597
- ):
598
- continue
599
- if task.metadata.modalities and not (
600
- set(task.metadata.modalities) & set(modality_select)
601
- ):
602
- continue
603
- tasks_to_keep.append(task.metadata.name)
604
- elapsed = time.time() - start_time
605
- logger.debug(f"update_task_list callback: {elapsed}s")
606
- return sorted(tasks_to_keep)
626
+ benchmark_tasks, tasks_to_keep = _cache_update_task_list(
627
+ benchmark_name, type_select, domain_select, lang_select, modality_select
628
+ )
629
+ return gr.update(choices=benchmark_tasks, value=tasks_to_keep)
607
630
 
608
631
  type_select.input(
609
632
  update_task_list,
@@ -913,4 +936,15 @@ if __name__ == "__main__":
913
936
  warnings.filterwarnings("ignore", message="Couldn't get scores for .* due to .*")
914
937
 
915
938
  app = get_leaderboard_app()
916
- app.launch(server_name="0.0.0.0", server_port=7860)
939
+
940
+ head = """
941
+ <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
942
+ """
943
+ app.launch(
944
+ server_name="0.0.0.0",
945
+ server_port=7860,
946
+ theme=gr.themes.Soft(
947
+ font=[gr.themes.GoogleFont("Roboto Mono"), "Arial", "sans-serif"],
948
+ ),
949
+ head=head,
950
+ )
@@ -75,14 +75,17 @@ GP_BENCHMARK_ENTRIES = [
75
75
  "MTEB(kor, v1)",
76
76
  "MTEB(nld, v1)",
77
77
  "MTEB(pol, v1)",
78
- "MTEB(rus, v1)",
78
+ "MTEB(rus, v1.1)",
79
79
  "MTEB(fas, v2)",
80
80
  "VN-MTEB (vie, v1)",
81
81
  ]
82
82
  )
83
83
  + [
84
84
  MenuEntry(
85
- "Other", mteb.get_benchmarks(["MTEB(eng, v1)", "MTEB(fas, v1)"])
85
+ "Other",
86
+ mteb.get_benchmarks(
87
+ ["MTEB(eng, v1)", "MTEB(fas, v1)", "MTEB(rus, v1)"]
88
+ ),
86
89
  )
87
90
  ],
88
91
  ),
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from typing import get_args
2
3
 
3
4
  import numpy as np
@@ -7,6 +8,8 @@ import plotly.graph_objects as go
7
8
 
8
9
  from mteb.abstasks.task_metadata import TaskType
9
10
 
11
+ logger = logging.getLogger(__name__)
12
+
10
13
 
11
14
  def _text_plot(text: str):
12
15
  """Returns empty scatter plot with text added, this can be great for error messages."""
@@ -29,16 +32,17 @@ def _failsafe_plot(fun):
29
32
  try:
30
33
  return fun(*args, **kwargs)
31
34
  except Exception as e:
35
+ logger.error(f"Plot generation failed: {e}")
32
36
  return _text_plot(f"Couldn't produce plot. Reason: {e}")
33
37
 
34
38
  return wrapper
35
39
 
36
40
 
37
- def _parse_n_params(text: str) -> int:
38
- if text.endswith("M"):
39
- return float(text[:-1]) * 1e6
40
- if text.endswith("B"):
41
- return float(text[:-1]) * 1e9
41
+ def _parse_n_params(params: float | None) -> int | float:
42
+ """Specified in billions."""
43
+ if params is None or np.isnan(params):
44
+ return None
45
+ return int(params * 1e9)
42
46
 
43
47
 
44
48
  def _parse_model_name(name: str) -> str:
@@ -51,20 +55,14 @@ def _parse_model_name(name: str) -> str:
51
55
 
52
56
 
53
57
  def _parse_float(value) -> float:
54
- try:
55
- if value == "Infinite":
56
- return np.inf
57
- else:
58
- return float(value)
59
- except ValueError:
58
+ if value is None or np.isnan(value):
60
59
  return np.nan
60
+ return float(value)
61
61
 
62
62
 
63
63
  def _process_max_tokens(x):
64
- if pd.isna(x):
64
+ if pd.isna(x) or x is None or np.isinf(x):
65
65
  return "Unknown"
66
- if np.isinf(x):
67
- return "Infinite"
68
66
  return str(int(x))
69
67
 
70
68
 
@@ -112,7 +110,7 @@ def _add_size_guide(fig: go.Figure):
112
110
  @_failsafe_plot
113
111
  def _performance_size_plot(df: pd.DataFrame) -> go.Figure:
114
112
  df = df.copy()
115
- df["Number of Parameters"] = df["Number of Parameters"].map(_parse_n_params)
113
+ df["Number of Parameters"] = df["Number of Parameters (B)"].map(_parse_n_params)
116
114
  df["Model"] = df["Model"].map(_parse_model_name)
117
115
  df["model_text"] = df["Model"].where(df["Model"].isin(models_to_annotate), "")
118
116
  df["Embedding Dimensions"] = df["Embedding Dimensions"].map(_parse_float)
mteb/leaderboard/table.py CHANGED
@@ -120,6 +120,14 @@ def apply_per_task_styling_from_benchmark(
120
120
  return _apply_per_task_table_styling(per_task_df)
121
121
 
122
122
 
123
+ def _style_number_of_parameters(num_params: float) -> str:
124
+ """Anything bigger than 1B is shown in billions with 1 decimal (e.g. 1.712 > 1.7) while anything smaller as 0.xxx B (e.g. 0.345 remains 0.345)"""
125
+ if num_params >= 1:
126
+ return f"{num_params:.1f}"
127
+ else:
128
+ return f"{num_params:.3f}"
129
+
130
+
123
131
  def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
124
132
  """Apply styling to a raw summary DataFrame
125
133
 
@@ -130,7 +138,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
130
138
  "Rank (Borda)",
131
139
  "Rank",
132
140
  "Model",
133
- "Number of Parameters",
141
+ "Number of Parameters (B)",
134
142
  "Embedding Dimensions",
135
143
  "Max Tokens",
136
144
  "Memory Usage (MB)",
@@ -156,7 +164,14 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
156
164
  joint_table[score_columns] = joint_table[score_columns].map(_format_scores)
157
165
 
158
166
  joint_table_style = joint_table.style.format(
159
- {**dict.fromkeys(score_columns, "{:.2f}"), "Rank (Borda)": "{:.0f}"},
167
+ {
168
+ **dict.fromkeys(score_columns, "{:.2f}"),
169
+ "Rank (Borda)": "{:.0f}",
170
+ "Memory Usage (MB)": "{:.0f}",
171
+ "Embedding Dimensions": "{:.0f}",
172
+ "Max Tokens": "{:.0f}",
173
+ "Number of Parameters (B)": lambda x: _style_number_of_parameters(x),
174
+ },
160
175
  na_rep="",
161
176
  )
162
177
  joint_table_style = joint_table_style.highlight_min(
@@ -204,8 +219,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
204
219
  pinned_columns=2,
205
220
  column_widths=column_widths,
206
221
  wrap=True,
207
- show_fullscreen_button=True,
208
- show_copy_button=True,
222
+ buttons=["copy", "fullscreen"],
209
223
  show_search="filter",
210
224
  )
211
225
 
@@ -227,7 +241,6 @@ def _apply_per_task_table_styling(per_task: pd.DataFrame) -> gr.DataFrame:
227
241
  per_task_style,
228
242
  interactive=False,
229
243
  pinned_columns=1,
230
- show_fullscreen_button=True,
231
- show_copy_button=True,
244
+ buttons=["copy", "fullscreen"],
232
245
  show_search="filter",
233
246
  )
@@ -0,0 +1,97 @@
1
+ from mteb.models.model_meta import (
2
+ ModelMeta,
3
+ ScoringFunction,
4
+ )
5
+ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
6
+
7
+ from .e5_models import ME5_TRAINING_DATA, model_prompts
8
+
9
+ E5_NL_CITATION = """
10
+ @misc{banar2025mtebnle5nlembeddingbenchmark,
11
+ archiveprefix = {arXiv},
12
+ author = {Nikolay Banar and Ehsan Lotfi and Jens Van Nooten and Cristina Arhiliuc and Marija Kliocaite and Walter Daelemans},
13
+ eprint = {2509.12340},
14
+ primaryclass = {cs.CL},
15
+ title = {MTEB-NL and E5-NL: Embedding Benchmark and Models for Dutch},
16
+ url = {https://arxiv.org/abs/2509.12340},
17
+ year = {2025},
18
+ }
19
+ """
20
+
21
+ e5_nl_small = ModelMeta(
22
+ loader=sentence_transformers_loader,
23
+ loader_kwargs=dict(
24
+ model_prompts=model_prompts,
25
+ ),
26
+ name="clips/e5-small-trm-nl",
27
+ languages=["nld-Latn"],
28
+ open_weights=True,
29
+ revision="0243664a6c5e12eef854b091eb283e51833c3e9f",
30
+ release_date="2025-09-23",
31
+ n_parameters=40_800_000,
32
+ memory_usage_mb=78,
33
+ embed_dim=384,
34
+ license="mit",
35
+ max_tokens=512,
36
+ reference="https://huggingface.co/clips/e5-small-trm-nl",
37
+ similarity_fn_name=ScoringFunction.COSINE,
38
+ framework=["Sentence Transformers", "PyTorch"],
39
+ use_instructions=True,
40
+ public_training_code="https://github.com/ELotfi/e5-nl",
41
+ public_training_data="https://huggingface.co/collections/clips/beir-nl",
42
+ training_datasets=ME5_TRAINING_DATA, # mMARCO-NL, HotpotQA-NL, FEVER-NL, and LLM generated data
43
+ adapted_from="intfloat/multilingual-e5-small",
44
+ citation=E5_NL_CITATION,
45
+ )
46
+
47
+ e5_nl_base = ModelMeta(
48
+ loader=sentence_transformers_loader,
49
+ loader_kwargs=dict(
50
+ model_prompts=model_prompts,
51
+ ),
52
+ name="clips/e5-base-trm-nl",
53
+ languages=["nld-Latn"],
54
+ open_weights=True,
55
+ revision="6bd5722f236da48b4b8bcb28cc1fc478f7089956",
56
+ release_date="2025-09-23",
57
+ n_parameters=124_400_000,
58
+ memory_usage_mb=237,
59
+ embed_dim=768,
60
+ license="mit",
61
+ max_tokens=514,
62
+ reference="https://huggingface.co/clips/e5-base-trm-nl",
63
+ similarity_fn_name=ScoringFunction.COSINE,
64
+ framework=["Sentence Transformers", "PyTorch"],
65
+ use_instructions=True,
66
+ public_training_code="https://github.com/ELotfi/e5-nl",
67
+ public_training_data="https://huggingface.co/collections/clips/beir-nl",
68
+ adapted_from="intfloat/multilingual-e5-base",
69
+ training_datasets=ME5_TRAINING_DATA, # mMARCO-NL, HotpotQA-NL, FEVER-NL, and LLM generated data
70
+ citation=E5_NL_CITATION,
71
+ )
72
+
73
+ e5_nl_large = ModelMeta(
74
+ loader=sentence_transformers_loader,
75
+ loader_kwargs=dict(
76
+ model_prompts=model_prompts,
77
+ ),
78
+ name="clips/e5-large-trm-nl",
79
+ languages=["nld-Latn"],
80
+ open_weights=True,
81
+ revision="683333f86ed9eb3699b5567f0fdabeb958d412b0",
82
+ release_date="2025-09-23",
83
+ n_parameters=355_000_000,
84
+ memory_usage_mb=1355,
85
+ embed_dim=1024,
86
+ license="mit",
87
+ max_tokens=514,
88
+ reference="https://huggingface.co/clips/e5-large-trm-nl",
89
+ similarity_fn_name=ScoringFunction.COSINE,
90
+ framework=["Sentence Transformers", "PyTorch"],
91
+ use_instructions=True,
92
+ public_training_code="https://github.com/ELotfi/e5-nl",
93
+ public_training_data="https://huggingface.co/collections/clips/beir-nl",
94
+ training_datasets=ME5_TRAINING_DATA, # mMARCO-NL, HotpotQA-NL, FEVER-NL, and LLM generated data
95
+ adapted_from="intfloat/multilingual-e5-large",
96
+ citation=E5_NL_CITATION,
97
+ )
@@ -8,6 +8,7 @@ import torch
8
8
  from torch.utils.data import DataLoader
9
9
  from tqdm.auto import tqdm
10
10
 
11
+ from mteb._requires_package import requires_package
11
12
  from mteb.abstasks.task_metadata import TaskMetadata
12
13
  from mteb.models.abs_encoder import AbsEncoder
13
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
@@ -219,6 +220,8 @@ class CohereTextEmbeddingModel(AbsEncoder):
219
220
  output_dimension: int | None = None,
220
221
  **kwargs,
221
222
  ) -> None:
223
+ requires_package(self, "cohere", model_name, "pip install 'mteb[cohere]'")
224
+
222
225
  import cohere # type: ignore
223
226
 
224
227
  self.model_name = model_name.removeprefix("Cohere/Cohere-")
@@ -147,7 +147,6 @@ class GoogleTextEmbeddingModel(AbsEncoder):
147
147
  google_text_emb_004 = ModelMeta(
148
148
  loader=GoogleTextEmbeddingModel, # type: ignore[call-arg]
149
149
  loader_kwargs=dict(
150
- model_name="text-embedding-004",
151
150
  model_prompts=MODEL_PROMPTS,
152
151
  ),
153
152
  name="google/text-embedding-004",
@@ -172,7 +171,6 @@ google_text_emb_004 = ModelMeta(
172
171
  google_text_emb_005 = ModelMeta(
173
172
  loader=GoogleTextEmbeddingModel, # type: ignore[call-arg]
174
173
  loader_kwargs=dict(
175
- model_name="text-embedding-005",
176
174
  model_prompts=MODEL_PROMPTS,
177
175
  ),
178
176
  name="google/text-embedding-005",
@@ -197,7 +195,6 @@ google_text_emb_005 = ModelMeta(
197
195
  google_text_multilingual_emb_002 = ModelMeta(
198
196
  loader=GoogleTextEmbeddingModel, # type: ignore[call-arg]
199
197
  loader_kwargs=dict(
200
- model_name="text-embedding-002",
201
198
  model_prompts=MODEL_PROMPTS,
202
199
  ),
203
200
  name="google/text-multilingual-embedding-002",
@@ -222,7 +219,6 @@ google_text_multilingual_emb_002 = ModelMeta(
222
219
  google_gemini_embedding_001 = ModelMeta(
223
220
  loader=GoogleTextEmbeddingModel, # type: ignore[call-arg]
224
221
  loader_kwargs=dict(
225
- model_name="gemini-embedding-001",
226
222
  model_prompts=MODEL_PROMPTS,
227
223
  ),
228
224
  name="google/gemini-embedding-001",
@@ -0,0 +1,72 @@
1
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
2
+ from mteb.models.sentence_transformer_wrapper import (
3
+ sentence_transformers_loader,
4
+ )
5
+
6
+ dfm_enc_large = ModelMeta(
7
+ loader=sentence_transformers_loader, # type: ignore
8
+ name="KennethEnevoldsen/dfm-sentence-encoder-large",
9
+ languages=["dan-Latn"],
10
+ open_weights=True,
11
+ revision="132c53391e7a780dc6a2f9a03724d0158fe7122c",
12
+ release_date="2023-07-12",
13
+ n_parameters=355087360,
14
+ memory_usage_mb=1554,
15
+ embed_dim=1024,
16
+ license="mit",
17
+ max_tokens=512,
18
+ reference="https://huggingface.co/KennethEnevoldsen/dfm-sentence-encoder-large",
19
+ similarity_fn_name=ScoringFunction.COSINE,
20
+ framework=["Sentence Transformers", "PyTorch"],
21
+ use_instructions=False,
22
+ superseded_by=None,
23
+ adapted_from="chcaa/dfm-encoder-large-v1",
24
+ training_datasets=set(), # just contrastive pre-training
25
+ public_training_code="https://huggingface.co/KennethEnevoldsen/dfm-sentence-encoder-large#hyperparameters",
26
+ citation="""@article{enevoldsenScandinavianEmbeddingBenchmarks2024,
27
+ title = {The {Scandinavian} {Embedding} {Benchmarks}: {Comprehensive} {Assessment} of {Multilingual} and {Monolingual} {Text} {Embedding}},
28
+ shorttitle = {The {Scandinavian} {Embedding} {Benchmarks}},
29
+ url = {https://openreview.net/forum?id=pJl_i7HIA72},
30
+ language = {en},
31
+ urldate = {2024-04-12},
32
+ author = {Enevoldsen, Kenneth and Kardos, Márton and Muennighoff, Niklas and Nielbo, Kristoffer},
33
+ month = feb,
34
+ year = {2024},
35
+ }
36
+ """,
37
+ public_training_data="https://huggingface.co/datasets/danish-foundation-models/danish-gigaword", # paragraphs extracted from Danish Gigaword
38
+ )
39
+
40
+ dfm_enc_med = ModelMeta(
41
+ loader=sentence_transformers_loader, # type: ignore
42
+ name="KennethEnevoldsen/dfm-sentence-encoder-medium",
43
+ languages=["dan-Latn"],
44
+ open_weights=True,
45
+ revision="701bce95d499fa97610d57e8823c54fd1fb79930",
46
+ release_date="2023-07-12",
47
+ n_parameters=124445952,
48
+ memory_usage_mb=475,
49
+ embed_dim=768,
50
+ license="mit",
51
+ max_tokens=512,
52
+ reference="https://huggingface.co/KennethEnevoldsen/dfm-sentence-encoder-medium",
53
+ similarity_fn_name=ScoringFunction.COSINE,
54
+ framework=["Sentence Transformers", "PyTorch"],
55
+ use_instructions=False,
56
+ superseded_by=None,
57
+ adapted_from=None,
58
+ public_training_code=None,
59
+ training_datasets=set(), # just contrastive pre-training
60
+ citation="""@article{enevoldsenScandinavianEmbeddingBenchmarks2024,
61
+ title = {The {Scandinavian} {Embedding} {Benchmarks}: {Comprehensive} {Assessment} of {Multilingual} and {Monolingual} {Text} {Embedding}},
62
+ shorttitle = {The {Scandinavian} {Embedding} {Benchmarks}},
63
+ url = {https://openreview.net/forum?id=pJl_i7HIA72},
64
+ language = {en},
65
+ urldate = {2024-04-12},
66
+ author = {Enevoldsen, Kenneth and Kardos, Márton and Muennighoff, Niklas and Nielbo, Kristoffer},
67
+ month = feb,
68
+ year = {2024},
69
+ }
70
+ """,
71
+ public_training_data=None,
72
+ )
@@ -1,7 +1,7 @@
1
1
  import torch
2
2
 
3
+ from mteb.models.instruct_wrapper import instruct_wrapper
3
4
  from mteb.models.model_meta import ModelMeta, ScoringFunction
4
- from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
5
5
  from mteb.types import PromptType
6
6
 
7
7
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
@@ -22,7 +22,7 @@ def instruction_template(
22
22
 
23
23
 
24
24
  Linq_Embed_Mistral = ModelMeta(
25
- loader=SentenceTransformerEncoderWrapper,
25
+ loader=instruct_wrapper,
26
26
  loader_kwargs=dict(
27
27
  instruction_template=instruction_template,
28
28
  attn="cccc",
@@ -43,6 +43,10 @@ GIGA_task_prompts = {
43
43
  "query": "Given a news title, retrieve relevant news article",
44
44
  "document": "",
45
45
  },
46
+ "RiaNewsRetrievalHardNegatives.v2": {
47
+ "query": "Given a news title, retrieve relevant news article",
48
+ "document": "",
49
+ },
46
50
  "MIRACLReranking": {
47
51
  "query": "Given a question, retrieve Wikipedia passages that answer the question",
48
52
  "document": "",
@@ -51,6 +55,10 @@ GIGA_task_prompts = {
51
55
  "query": "Given a question, retrieve Wikipedia passages that answer the question",
52
56
  "document": "",
53
57
  },
58
+ "MIRACLRetrievalHardNegatives.v2": {
59
+ "query": "Given a question, retrieve Wikipedia passages that answer the question",
60
+ "document": "",
61
+ },
54
62
  "ArguAna": {
55
63
  "query": "Given a search query, retrieve passages that answer the question",
56
64
  "document": "Given a search query, retrieve passages that answer the question",
@@ -755,6 +763,7 @@ frida_prompts = {
755
763
  "SensitiveTopicsClassification": "categorize_topic: ",
756
764
  "TERRa": "categorize_entailment: ",
757
765
  "RiaNewsRetrieval": "categorize: ",
766
+ "RiaNewsRetrievalHardNegatives.v2": "",
758
767
  }
759
768
 
760
769
  frida_training_datasets = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.3.1
3
+ Version: 2.3.3
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -37,7 +37,7 @@ Requires-Dist: torchvision>0.2.1; extra == "image"
37
37
  Provides-Extra: codecarbon
38
38
  Requires-Dist: codecarbon<3.0.0,>=2.0.0; extra == "codecarbon"
39
39
  Provides-Extra: leaderboard
40
- Requires-Dist: gradio==5.49.1; extra == "leaderboard"
40
+ Requires-Dist: gradio==6.0.1; extra == "leaderboard"
41
41
  Requires-Dist: plotly<6.0.0,>=5.24.0; extra == "leaderboard"
42
42
  Requires-Dist: cachetools>=5.2.0; extra == "leaderboard"
43
43
  Requires-Dist: matplotlib>=3.9.4; extra == "leaderboard"
@@ -108,7 +108,7 @@ Requires-Dist: qwen_vl_utils>=0.0.14; extra == "eager-embed"
108
108
  Dynamic: license-file
109
109
 
110
110
  <h1 align="center">
111
- <img src="docs/images/logos/mteb_logo/dots-icon.png" alt="MTEB" width="28" style="vertical-align: middle; margin-right: 10px;"/> MTEB
111
+ <img src="https://github.com/embeddings-benchmark/mteb/blob/main/docs/images/logos/mteb_logo/dots-icon.png?raw=true" alt="MTEB" width="28" style="vertical-align: middle; margin-right: 10px;"/> MTEB
112
112
  </h1>
113
113
 
114
114
  <h3 align="center" style="border-bottom: none;">Multimodal toolbox for evaluating embeddings and retrieval systems</h3>
@@ -137,7 +137,7 @@ Dynamic: license-file
137
137
 
138
138
 
139
139
  <h3 align="center">
140
- <a href="https://huggingface.co/spaces/mteb/leaderboard"><img style="float: middle; padding: 10px 10px 10px 10px;" width="60" height="55" src="./docs/images/logos/hf_logo.png" /></a>
140
+ <a href="https://huggingface.co/spaces/mteb/leaderboard"><img style="float: middle; padding: 10px 10px 10px 10px;" width="60" height="55" src="https://github.com/embeddings-benchmark/mteb/blob/main/docs/images/logos/hf_logo.png?raw=true" /></a>
141
141
  </h3>
142
142
 
143
143
 
@@ -52,11 +52,11 @@ mteb/abstasks/text/bitext_mining.py,sha256=8m86XHJ3TxguC9itxZRq2Bt_p0NYojojS2Btk
52
52
  mteb/abstasks/text/reranking.py,sha256=rfRGRBeSjZLgkh8pneMgRm-vd9NHr5jSFH92YfOHfmU,7776
53
53
  mteb/abstasks/text/summarization.py,sha256=KYEb8gh4JjpSsrvGUmQ2VlrVdzzVxIWcitXOJUaHhO4,6954
54
54
  mteb/benchmarks/__init__.py,sha256=MQEVeli-zLaJ7Xg0z7RhXQwsdmm7Ht_W2Ln0rZo1Szc,225
55
- mteb/benchmarks/_create_table.py,sha256=z3iqa5dajLk0DYxEE9EeO1qpR3VJXokg8ZQ2rdUkvdM,20452
55
+ mteb/benchmarks/_create_table.py,sha256=OAiR44ynJ2fMzoBmVITQtOTYQzxIu9KUdS_HzlBlAck,20195
56
56
  mteb/benchmarks/benchmark.py,sha256=70RlMyyg_wkWTlU_IbfLl-KaqRWXGCKTd8fWe9X-AQE,4173
57
57
  mteb/benchmarks/get_benchmark.py,sha256=-n_O-gitRKZi48gJKNgGuI36hsP7yLVSiwulnMHN7Gw,3935
58
- mteb/benchmarks/benchmarks/__init__.py,sha256=UD6YjWPDVPSQdUhmD-4rho08Gs5LU9pS_C2jX5eUns0,2102
59
- mteb/benchmarks/benchmarks/benchmarks.py,sha256=KDJanVYs3BkFn74VHwarZ8HJ2DX6EIgcVYBrlyjbv9I,89956
58
+ mteb/benchmarks/benchmarks/__init__.py,sha256=0ySgD14Mu3Y1nJzazR_eUir81ia3x6E23N57SzQNkF0,2150
59
+ mteb/benchmarks/benchmarks/benchmarks.py,sha256=Ob2cHVXwFk328xbV-2ZmUibiVAMtT2RN1ygGgiP6UNQ,92662
60
60
  mteb/benchmarks/benchmarks/rteb_benchmarks.py,sha256=QnCSrTTaBfcRlAQp2Nu81tgv1idMXqiM16Fp2zKJ5Ys,10607
61
61
  mteb/cli/__init__.py,sha256=v-csUr3eUZElIvrGB6QGtaIdndDfNWEe9oZchsGsJpg,64
62
62
  mteb/cli/_display_tasks.py,sha256=7A06dT9sSoTz6shyMvskPxuc5eHY_H7PGPlROzMP0yw,2196
@@ -1424,10 +1424,10 @@ mteb/languages/language_family.json,sha256=OUGcHeOIPcZPb2FWmYLhxTS0JxjK5y3Fo6x0P
1424
1424
  mteb/languages/language_scripts.py,sha256=5wix9HTYolNIpTiS5oXf2pGJyL7ftdGKs_m432w81V8,3998
1425
1425
  mteb/languages/programming_languages.py,sha256=zxAakT3OSUnAuTnQ34VyeFIECnNXMlleZmAake6jsZE,211
1426
1426
  mteb/leaderboard/__init__.py,sha256=991roXmtRwEQysV-37hWEzWpkvPgMCGRqZTHR-hm2io,88
1427
- mteb/leaderboard/app.py,sha256=EsQ_qoJ26yJbg2qExKFFAx90R8VYOO6GbLtIzFuHGpE,32642
1428
- mteb/leaderboard/benchmark_selector.py,sha256=hnXdo_Kj4UUAruFl6nZkCxAQ88IEfbaH8EADFJMMdVo,7686
1429
- mteb/leaderboard/figures.py,sha256=Rq20LFpaUhQD4tuKp7P7ExQtAjonMLibgO3ud0ykMag,7491
1430
- mteb/leaderboard/table.py,sha256=qs0H_Gt9FzRvzb-AL0YlqEe0YAsdYsVX3QlncfCBEqg,7828
1427
+ mteb/leaderboard/app.py,sha256=29MxFLKEVT-roULHG5boHmsQVhld1rDGNS94r7MWlz8,33118
1428
+ mteb/leaderboard/benchmark_selector.py,sha256=uH66SI0iT1J4_fnebViWa83dQwhPi7toBv7PRL_epDw,7784
1429
+ mteb/leaderboard/figures.py,sha256=cfOK82rRf-7sCjyP7GBxh4ezhOIt0OhD0_86mKtzLrg,7530
1430
+ mteb/leaderboard/table.py,sha256=6SnrYC5GcBlvVSO6vOk6ObuqtoveBLv3JUuXqdKueG8,8333
1431
1431
  mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9HydipKM,6570
1432
1432
  mteb/models/__init__.py,sha256=ABTuoqiBjBtBWW3LYY7ItBHdylR6jWoy06HH0g6j6fU,910
1433
1433
  mteb/models/abs_encoder.py,sha256=m0JkRfRPMYadDgBR9eozRloI31ZSWkSzDFINpwbfLZk,16533
@@ -1460,9 +1460,10 @@ mteb/models/model_implementations/bmretriever_models.py,sha256=ABfrACa028Dcujan7
1460
1460
  mteb/models/model_implementations/cadet_models.py,sha256=bDula_VroXOWgSw-tquvNVGcGg7_Z1xHnoTDn6OGOYU,2225
1461
1461
  mteb/models/model_implementations/cde_models.py,sha256=3nNU3nq3VZZcImFqH1VPj57-QJNMU6Ei2C_HCaicuUs,9012
1462
1462
  mteb/models/model_implementations/clip_models.py,sha256=zrfgNmZszu0JMtMNdCMzEohixsrnQ7xFhCqgsiucH_Q,6107
1463
+ mteb/models/model_implementations/clips_models.py,sha256=QwwoU4Zu_zwUgUg7Hn2lzpXK-GjXIST0qF_2oRxHm2Y,3410
1463
1464
  mteb/models/model_implementations/codefuse_models.py,sha256=19Y-d_qetVU64quzEvuUJ_K8DHo1JEEKEGqjRR48dFg,9113
1464
1465
  mteb/models/model_implementations/codesage_models.py,sha256=D4CdISGyv5f2GMYq4_efgm5qNq80SWAX5R2u5mjEiXM,2998
1465
- mteb/models/model_implementations/cohere_models.py,sha256=LiYYRT3clhFlh0RE654KyZtO66vnIO22h79HJLmXYwk,13696
1466
+ mteb/models/model_implementations/cohere_models.py,sha256=OWFClVAN4phjBoxfGGDyGDmzMu-t2VrjCGFyAIWmz4w,13832
1466
1467
  mteb/models/model_implementations/cohere_v.py,sha256=K6VEw1NkyM2PuMd18kHE6aqPrcByYSwEmAKjvLods_w,15760
1467
1468
  mteb/models/model_implementations/colpali_models.py,sha256=7PJ0SshVXasyncTfZRFIf_ZWzbqxJhhzNKAoGLhNktw,9004
1468
1469
  mteb/models/model_implementations/colqwen_models.py,sha256=6upaxe19V8j5Ayu03Dgj5jPtC8SJBCITK_RionJRMSE,15545
@@ -1480,7 +1481,7 @@ mteb/models/model_implementations/evaclip_models.py,sha256=cPMGYLDIq4s8zJxb4vPXq
1480
1481
  mteb/models/model_implementations/fa_models.py,sha256=WGal70_ezITWoNdjcMdbOCTSCtoaXzuPadYstLVXxhg,7478
1481
1482
  mteb/models/model_implementations/geogpt_models.py,sha256=Juv86SwhgQX80lVLjAFtim2aSiJT1AcgjniyyiKyk1Q,1923
1482
1483
  mteb/models/model_implementations/gme_v_models.py,sha256=NkfgR3_UdZzoBt1NnalVou6LOR-F7qXM4by9EbAVrys,13568
1483
- mteb/models/model_implementations/google_models.py,sha256=ROo83udaUmPx0U_qfFuS55DSrCILVsRZu3oLp_P-srg,9296
1484
+ mteb/models/model_implementations/google_models.py,sha256=7QfsaJ5JNDRQxFl7Zh2AtiR2PR7PZcfeCBgviuOFBCo,9130
1484
1485
  mteb/models/model_implementations/granite_vision_embedding_models.py,sha256=uqQ5-e_a-ADv3gf3sR9Drk0S4x8Gy8mZkpL-E4X16TM,7241
1485
1486
  mteb/models/model_implementations/gritlm_models.py,sha256=aS_CuioL95JAQMYiaKlGuAWU9wZjabn268Xut3bD8-w,3005
1486
1487
  mteb/models/model_implementations/gte_models.py,sha256=o26Xyu_tucUlP435Q_jB4-bl0xckgj4wtbutTwhYgIo,10073
@@ -1492,9 +1493,10 @@ mteb/models/model_implementations/jasper_models.py,sha256=ZY7qRRpBpD3eVryQb4rLs5
1492
1493
  mteb/models/model_implementations/jina_clip.py,sha256=CfiIxbhKspjQajNtObCfGPHOWPk6uLn4cuwydQHFTMo,5118
1493
1494
  mteb/models/model_implementations/jina_models.py,sha256=HrHm2Io3g9gHwxU5icAaudy_E8rAVkAAIFSzVYWF-dM,34859
1494
1495
  mteb/models/model_implementations/kalm_models.py,sha256=FmW7Z5Qs6WYBLuKvql3u4IJW36kj4k-Ypah8qTBEBkg,59837
1496
+ mteb/models/model_implementations/kennethenevoldsen_models.py,sha256=DF-9nmsewYO9ikZ0kV81ujKGr7Ot36-9iPoxN7KX2mY,2993
1495
1497
  mteb/models/model_implementations/lens_models.py,sha256=fC7_NB1F8vBAlXD0p0-hALf6eZTPFJwpz57dy71OlwI,1696
1496
1498
  mteb/models/model_implementations/lgai_embedding_models.py,sha256=S83pbfkMH3YUNl4skusgbK-Rn-uLuScQVxgXwegR_N4,2333
1497
- mteb/models/model_implementations/linq_models.py,sha256=rnW27MybLMQ2Y3OxDyBTMSIsx_hXC0DlMD4kFv7NJV0,1918
1499
+ mteb/models/model_implementations/linq_models.py,sha256=EtvUyiNbjU-GJd1kS0Z0gBACkP2pFOjk0KfGMZz4K9Y,1872
1498
1500
  mteb/models/model_implementations/listconranker.py,sha256=pFISrZ91NHsnhc5El5U_ZPsB9cSTuTY8-nDzpoNMC9s,4485
1499
1501
  mteb/models/model_implementations/llm2clip_models.py,sha256=_sqAOb5oSbxn1oaXjWwPXRjTvxLT48xXL_tuabt2Ks0,9265
1500
1502
  mteb/models/model_implementations/llm2vec_models.py,sha256=Og_EqnOXgIfaTcVTl3Lj5BicG83ycnXS_YHNtK63I-A,12638
@@ -1531,7 +1533,7 @@ mteb/models/model_implementations/repllama_models.py,sha256=89HoqEpzkNysHeuf_-Yh
1531
1533
  mteb/models/model_implementations/rerankers_custom.py,sha256=ro73A9-hHudy3_qIMrhP-ja-3Xqu78r_aORm856zHQc,10651
1532
1534
  mteb/models/model_implementations/rerankers_monot5_based.py,sha256=rxVwzapNnHl4gCw79XVCaTXj3-wbToyj7XVL97tpAF4,34302
1533
1535
  mteb/models/model_implementations/richinfoai_models.py,sha256=llvYa0JUjyOOMbuTgOYoJ2qeqZ5rLHX1ZjZIYlYbdvA,989
1534
- mteb/models/model_implementations/ru_sentence_models.py,sha256=Dstx46xFcAOC7giKPclC41GJTtFfmg4t6gLTdAnrxDk,40129
1536
+ mteb/models/model_implementations/ru_sentence_models.py,sha256=GuZFwbzaooufvSMGNjIsL0DDLrqHjhdSsAQHHZo5H08,40480
1535
1537
  mteb/models/model_implementations/salesforce_models.py,sha256=KslTK-IKeLvNG-vQir9k6swkaOgjk6eyozm_BOVgTpY,5160
1536
1538
  mteb/models/model_implementations/samilpwc_models.py,sha256=oMwKNwCxoH1jZgCy04oo2oVlBZWu253QMpnEEC6emz8,2021
1537
1539
  mteb/models/model_implementations/searchmap_models.py,sha256=XvVl99emIgnNUCxkTuFQXW6py2R8vgsArfpyHveCugw,1904
@@ -2567,9 +2569,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2567
2569
  mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
2568
2570
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2569
2571
  mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
2570
- mteb-2.3.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2571
- mteb-2.3.1.dist-info/METADATA,sha256=AeDGGuksA6YmVR7zGXWB1jbk2mUD3w5tRCgTZjTnZ4U,13798
2572
- mteb-2.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2573
- mteb-2.3.1.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2574
- mteb-2.3.1.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2575
- mteb-2.3.1.dist-info/RECORD,,
2572
+ mteb-2.3.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2573
+ mteb-2.3.3.dist-info/METADATA,sha256=LbvRqywjhaqAK4910G8ueME52YrrqFzvm4NXl2M3MBA,13923
2574
+ mteb-2.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2575
+ mteb-2.3.3.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2576
+ mteb-2.3.3.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2577
+ mteb-2.3.3.dist-info/RECORD,,
File without changes