mteb 2.3.2__py3-none-any.whl → 2.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,3 @@
1
- import math
2
1
  import re
3
2
  from collections import defaultdict
4
3
 
@@ -32,26 +31,18 @@ def _split_on_capital(s: str) -> str:
32
31
  return " ".join(re.findall(r"[A-Z]?[a-z]+|[A-Z]+(?=[A-Z]|$)", s))
33
32
 
34
33
 
35
- def _format_n_parameters(n_parameters) -> str:
36
- if (n_parameters is None) or (not int(n_parameters)):
37
- return "Unknown"
38
- n_thousand = int(n_parameters // 1e3)
39
- if n_thousand < 1:
40
- return str(int(n_parameters))
41
- n_zeros = math.log10(n_thousand)
42
- if n_zeros >= 6:
43
- return str(n_thousand // (10**6)) + "B"
44
- if n_zeros >= 3:
45
- return str(n_thousand // (10**3)) + "M"
46
- return str(n_thousand) + "K"
34
+ def _format_n_parameters(n_parameters) -> float | None:
35
+ """Format n_parameters to be in billions with decimals down to 1 million. I.e. 7M -> 0.007B, 1.5B -> 1.5B, None -> None"""
36
+ if n_parameters:
37
+ n_parameters = float(n_parameters)
38
+ return round(n_parameters / 1e9, 3)
39
+ return None
47
40
 
48
41
 
49
- def _format_max_tokens(max_tokens: float | None) -> str:
50
- if max_tokens is None:
51
- return "Unknown"
52
- if max_tokens == np.inf:
53
- return "Infinite"
54
- return str(int(max_tokens))
42
+ def _format_max_tokens(max_tokens: float | None) -> float | None:
43
+ if max_tokens is None or max_tokens == np.inf:
44
+ return None
45
+ return float(max_tokens)
55
46
 
56
47
 
57
48
  def _get_means_per_types(per_task: pd.DataFrame):
@@ -144,18 +135,18 @@ def _create_summary_table_from_benchmark_results(
144
135
  joint_table.insert(
145
136
  1,
146
137
  "Embedding Dimensions",
147
- model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
138
+ model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
148
139
  )
149
140
  joint_table.insert(
150
141
  1,
151
- "Number of Parameters",
142
+ "Number of Parameters (B)",
152
143
  model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
153
144
  )
154
145
  joint_table.insert(
155
146
  1,
156
147
  "Memory Usage (MB)",
157
148
  model_metas.map(
158
- lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
149
+ lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
159
150
  ),
160
151
  )
161
152
 
@@ -323,18 +314,18 @@ def _create_summary_table_mean_public_private(
323
314
  joint_table.insert(
324
315
  1,
325
316
  "Embedding Dimensions",
326
- model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
317
+ model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
327
318
  )
328
319
  joint_table.insert(
329
320
  1,
330
- "Number of Parameters",
321
+ "Number of Parameters (B)",
331
322
  model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
332
323
  )
333
324
  joint_table.insert(
334
325
  1,
335
326
  "Memory Usage (MB)",
336
327
  model_metas.map(
337
- lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
328
+ lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
338
329
  ),
339
330
  )
340
331
 
@@ -445,18 +436,18 @@ def _create_summary_table_mean_subset(
445
436
  joint_table.insert(
446
437
  1,
447
438
  "Embedding Dimensions",
448
- model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
439
+ model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
449
440
  )
450
441
  joint_table.insert(
451
442
  1,
452
- "Number of Parameters",
443
+ "Number of Parameters (B)",
453
444
  model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
454
445
  )
455
446
  joint_table.insert(
456
447
  1,
457
448
  "Memory Usage (MB)",
458
449
  model_metas.map(
459
- lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
450
+ lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
460
451
  ),
461
452
  )
462
453
 
@@ -558,25 +549,23 @@ def _create_summary_table_mean_task_type(
558
549
 
559
550
  # Insert model metadata columns
560
551
  joint_table.insert(
561
- 1,
562
- "Max Tokens",
563
- model_metas.map(lambda m: _format_max_tokens(m.max_tokens)),
552
+ 1, "Max Tokens", model_metas.map(lambda m: _format_max_tokens(m.max_tokens))
564
553
  )
565
554
  joint_table.insert(
566
555
  1,
567
556
  "Embedding Dimensions",
568
- model_metas.map(lambda m: str(int(m.embed_dim)) if m.embed_dim else "Unknown"),
557
+ model_metas.map(lambda m: int(m.embed_dim) if m.embed_dim else None),
569
558
  )
570
559
  joint_table.insert(
571
560
  1,
572
- "Number of Parameters",
561
+ "Number of Parameters (B)",
573
562
  model_metas.map(lambda m: _format_n_parameters(m.n_parameters)),
574
563
  )
575
564
  joint_table.insert(
576
565
  1,
577
566
  "Memory Usage (MB)",
578
567
  model_metas.map(
579
- lambda m: str(int(m.memory_usage_mb)) if m.memory_usage_mb else "Unknown"
568
+ lambda m: int(m.memory_usage_mb) if m.memory_usage_mb else None
580
569
  ),
581
570
  )
582
571
 
mteb/leaderboard/app.py CHANGED
@@ -535,9 +535,6 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
535
535
 
536
536
  with gr.Tab("Performance per Model Size") as plot_tab:
537
537
  plot = gr.Plot(_performance_size_plot, inputs=[summary_table])
538
- gr.Markdown(
539
- "*We only display TOP 5 models that have been run on all tasks in the benchmark*"
540
- )
541
538
  plot_tab.select(
542
539
  _performance_size_plot, inputs=[summary_table], outputs=[plot]
543
540
  )
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from typing import get_args
2
3
 
3
4
  import numpy as np
@@ -7,6 +8,8 @@ import plotly.graph_objects as go
7
8
 
8
9
  from mteb.abstasks.task_metadata import TaskType
9
10
 
11
+ logger = logging.getLogger(__name__)
12
+
10
13
 
11
14
  def _text_plot(text: str):
12
15
  """Returns empty scatter plot with text added, this can be great for error messages."""
@@ -29,16 +32,17 @@ def _failsafe_plot(fun):
29
32
  try:
30
33
  return fun(*args, **kwargs)
31
34
  except Exception as e:
35
+ logger.error(f"Plot generation failed: {e}")
32
36
  return _text_plot(f"Couldn't produce plot. Reason: {e}")
33
37
 
34
38
  return wrapper
35
39
 
36
40
 
37
- def _parse_n_params(text: str) -> int:
38
- if text.endswith("M"):
39
- return float(text[:-1]) * 1e6
40
- if text.endswith("B"):
41
- return float(text[:-1]) * 1e9
41
+ def _parse_n_params(params: float | None) -> int | float:
42
+ """Specified in billions."""
43
+ if params is None or np.isnan(params):
44
+ return None
45
+ return int(params * 1e9)
42
46
 
43
47
 
44
48
  def _parse_model_name(name: str) -> str:
@@ -51,20 +55,14 @@ def _parse_model_name(name: str) -> str:
51
55
 
52
56
 
53
57
  def _parse_float(value) -> float:
54
- try:
55
- if value == "Infinite":
56
- return np.inf
57
- else:
58
- return float(value)
59
- except ValueError:
58
+ if value is None or np.isnan(value):
60
59
  return np.nan
60
+ return float(value)
61
61
 
62
62
 
63
63
  def _process_max_tokens(x):
64
- if pd.isna(x):
64
+ if pd.isna(x) or x is None or np.isinf(x):
65
65
  return "Unknown"
66
- if np.isinf(x):
67
- return "Infinite"
68
66
  return str(int(x))
69
67
 
70
68
 
@@ -112,7 +110,7 @@ def _add_size_guide(fig: go.Figure):
112
110
  @_failsafe_plot
113
111
  def _performance_size_plot(df: pd.DataFrame) -> go.Figure:
114
112
  df = df.copy()
115
- df["Number of Parameters"] = df["Number of Parameters"].map(_parse_n_params)
113
+ df["Number of Parameters"] = df["Number of Parameters (B)"].map(_parse_n_params)
116
114
  df["Model"] = df["Model"].map(_parse_model_name)
117
115
  df["model_text"] = df["Model"].where(df["Model"].isin(models_to_annotate), "")
118
116
  df["Embedding Dimensions"] = df["Embedding Dimensions"].map(_parse_float)
mteb/leaderboard/table.py CHANGED
@@ -120,6 +120,14 @@ def apply_per_task_styling_from_benchmark(
120
120
  return _apply_per_task_table_styling(per_task_df)
121
121
 
122
122
 
123
+ def _style_number_of_parameters(num_params: float) -> str:
124
+ """Anything bigger than 1B is shown in billions with 1 decimal (e.g. 1.712 > 1.7) while anything smaller as 0.xxx B (e.g. 0.345 remains 0.345)"""
125
+ if num_params >= 1:
126
+ return f"{num_params:.1f}"
127
+ else:
128
+ return f"{num_params:.3f}"
129
+
130
+
123
131
  def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
124
132
  """Apply styling to a raw summary DataFrame
125
133
 
@@ -130,7 +138,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
130
138
  "Rank (Borda)",
131
139
  "Rank",
132
140
  "Model",
133
- "Number of Parameters",
141
+ "Number of Parameters (B)",
134
142
  "Embedding Dimensions",
135
143
  "Max Tokens",
136
144
  "Memory Usage (MB)",
@@ -156,7 +164,14 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
156
164
  joint_table[score_columns] = joint_table[score_columns].map(_format_scores)
157
165
 
158
166
  joint_table_style = joint_table.style.format(
159
- {**dict.fromkeys(score_columns, "{:.2f}"), "Rank (Borda)": "{:.0f}"},
167
+ {
168
+ **dict.fromkeys(score_columns, "{:.2f}"),
169
+ "Rank (Borda)": "{:.0f}",
170
+ "Memory Usage (MB)": "{:.0f}",
171
+ "Embedding Dimensions": "{:.0f}",
172
+ "Max Tokens": "{:.0f}",
173
+ "Number of Parameters (B)": lambda x: _style_number_of_parameters(x),
174
+ },
160
175
  na_rep="",
161
176
  )
162
177
  joint_table_style = joint_table_style.highlight_min(
@@ -0,0 +1,97 @@
1
+ from mteb.models.model_meta import (
2
+ ModelMeta,
3
+ ScoringFunction,
4
+ )
5
+ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
6
+
7
+ from .e5_models import ME5_TRAINING_DATA, model_prompts
8
+
9
+ E5_NL_CITATION = """
10
+ @misc{banar2025mtebnle5nlembeddingbenchmark,
11
+ archiveprefix = {arXiv},
12
+ author = {Nikolay Banar and Ehsan Lotfi and Jens Van Nooten and Cristina Arhiliuc and Marija Kliocaite and Walter Daelemans},
13
+ eprint = {2509.12340},
14
+ primaryclass = {cs.CL},
15
+ title = {MTEB-NL and E5-NL: Embedding Benchmark and Models for Dutch},
16
+ url = {https://arxiv.org/abs/2509.12340},
17
+ year = {2025},
18
+ }
19
+ """
20
+
21
+ e5_nl_small = ModelMeta(
22
+ loader=sentence_transformers_loader,
23
+ loader_kwargs=dict(
24
+ model_prompts=model_prompts,
25
+ ),
26
+ name="clips/e5-small-trm-nl",
27
+ languages=["nld-Latn"],
28
+ open_weights=True,
29
+ revision="0243664a6c5e12eef854b091eb283e51833c3e9f",
30
+ release_date="2025-09-23",
31
+ n_parameters=40_800_000,
32
+ memory_usage_mb=78,
33
+ embed_dim=384,
34
+ license="mit",
35
+ max_tokens=512,
36
+ reference="https://huggingface.co/clips/e5-small-trm-nl",
37
+ similarity_fn_name=ScoringFunction.COSINE,
38
+ framework=["Sentence Transformers", "PyTorch"],
39
+ use_instructions=True,
40
+ public_training_code="https://github.com/ELotfi/e5-nl",
41
+ public_training_data="https://huggingface.co/collections/clips/beir-nl",
42
+ training_datasets=ME5_TRAINING_DATA, # mMARCO-NL, HotpotQA-NL, FEVER-NL, and LLM generated data
43
+ adapted_from="intfloat/multilingual-e5-small",
44
+ citation=E5_NL_CITATION,
45
+ )
46
+
47
+ e5_nl_base = ModelMeta(
48
+ loader=sentence_transformers_loader,
49
+ loader_kwargs=dict(
50
+ model_prompts=model_prompts,
51
+ ),
52
+ name="clips/e5-base-trm-nl",
53
+ languages=["nld-Latn"],
54
+ open_weights=True,
55
+ revision="6bd5722f236da48b4b8bcb28cc1fc478f7089956",
56
+ release_date="2025-09-23",
57
+ n_parameters=124_400_000,
58
+ memory_usage_mb=237,
59
+ embed_dim=768,
60
+ license="mit",
61
+ max_tokens=514,
62
+ reference="https://huggingface.co/clips/e5-base-trm-nl",
63
+ similarity_fn_name=ScoringFunction.COSINE,
64
+ framework=["Sentence Transformers", "PyTorch"],
65
+ use_instructions=True,
66
+ public_training_code="https://github.com/ELotfi/e5-nl",
67
+ public_training_data="https://huggingface.co/collections/clips/beir-nl",
68
+ adapted_from="intfloat/multilingual-e5-base",
69
+ training_datasets=ME5_TRAINING_DATA, # mMARCO-NL, HotpotQA-NL, FEVER-NL, and LLM generated data
70
+ citation=E5_NL_CITATION,
71
+ )
72
+
73
+ e5_nl_large = ModelMeta(
74
+ loader=sentence_transformers_loader,
75
+ loader_kwargs=dict(
76
+ model_prompts=model_prompts,
77
+ ),
78
+ name="clips/e5-large-trm-nl",
79
+ languages=["nld-Latn"],
80
+ open_weights=True,
81
+ revision="683333f86ed9eb3699b5567f0fdabeb958d412b0",
82
+ release_date="2025-09-23",
83
+ n_parameters=355_000_000,
84
+ memory_usage_mb=1355,
85
+ embed_dim=1024,
86
+ license="mit",
87
+ max_tokens=514,
88
+ reference="https://huggingface.co/clips/e5-large-trm-nl",
89
+ similarity_fn_name=ScoringFunction.COSINE,
90
+ framework=["Sentence Transformers", "PyTorch"],
91
+ use_instructions=True,
92
+ public_training_code="https://github.com/ELotfi/e5-nl",
93
+ public_training_data="https://huggingface.co/collections/clips/beir-nl",
94
+ training_datasets=ME5_TRAINING_DATA, # mMARCO-NL, HotpotQA-NL, FEVER-NL, and LLM generated data
95
+ adapted_from="intfloat/multilingual-e5-large",
96
+ citation=E5_NL_CITATION,
97
+ )
@@ -8,6 +8,7 @@ import torch
8
8
  from torch.utils.data import DataLoader
9
9
  from tqdm.auto import tqdm
10
10
 
11
+ from mteb._requires_package import requires_package
11
12
  from mteb.abstasks.task_metadata import TaskMetadata
12
13
  from mteb.models.abs_encoder import AbsEncoder
13
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
@@ -219,6 +220,8 @@ class CohereTextEmbeddingModel(AbsEncoder):
219
220
  output_dimension: int | None = None,
220
221
  **kwargs,
221
222
  ) -> None:
223
+ requires_package(self, "cohere", model_name, "pip install 'mteb[cohere]'")
224
+
222
225
  import cohere # type: ignore
223
226
 
224
227
  self.model_name = model_name.removeprefix("Cohere/Cohere-")
@@ -147,7 +147,6 @@ class GoogleTextEmbeddingModel(AbsEncoder):
147
147
  google_text_emb_004 = ModelMeta(
148
148
  loader=GoogleTextEmbeddingModel, # type: ignore[call-arg]
149
149
  loader_kwargs=dict(
150
- model_name="text-embedding-004",
151
150
  model_prompts=MODEL_PROMPTS,
152
151
  ),
153
152
  name="google/text-embedding-004",
@@ -172,7 +171,6 @@ google_text_emb_004 = ModelMeta(
172
171
  google_text_emb_005 = ModelMeta(
173
172
  loader=GoogleTextEmbeddingModel, # type: ignore[call-arg]
174
173
  loader_kwargs=dict(
175
- model_name="text-embedding-005",
176
174
  model_prompts=MODEL_PROMPTS,
177
175
  ),
178
176
  name="google/text-embedding-005",
@@ -197,7 +195,6 @@ google_text_emb_005 = ModelMeta(
197
195
  google_text_multilingual_emb_002 = ModelMeta(
198
196
  loader=GoogleTextEmbeddingModel, # type: ignore[call-arg]
199
197
  loader_kwargs=dict(
200
- model_name="text-embedding-002",
201
198
  model_prompts=MODEL_PROMPTS,
202
199
  ),
203
200
  name="google/text-multilingual-embedding-002",
@@ -222,7 +219,6 @@ google_text_multilingual_emb_002 = ModelMeta(
222
219
  google_gemini_embedding_001 = ModelMeta(
223
220
  loader=GoogleTextEmbeddingModel, # type: ignore[call-arg]
224
221
  loader_kwargs=dict(
225
- model_name="gemini-embedding-001",
226
222
  model_prompts=MODEL_PROMPTS,
227
223
  ),
228
224
  name="google/gemini-embedding-001",
@@ -0,0 +1,72 @@
1
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
2
+ from mteb.models.sentence_transformer_wrapper import (
3
+ sentence_transformers_loader,
4
+ )
5
+
6
+ dfm_enc_large = ModelMeta(
7
+ loader=sentence_transformers_loader, # type: ignore
8
+ name="KennethEnevoldsen/dfm-sentence-encoder-large",
9
+ languages=["dan-Latn"],
10
+ open_weights=True,
11
+ revision="132c53391e7a780dc6a2f9a03724d0158fe7122c",
12
+ release_date="2023-07-12",
13
+ n_parameters=355087360,
14
+ memory_usage_mb=1554,
15
+ embed_dim=1024,
16
+ license="mit",
17
+ max_tokens=512,
18
+ reference="https://huggingface.co/KennethEnevoldsen/dfm-sentence-encoder-large",
19
+ similarity_fn_name=ScoringFunction.COSINE,
20
+ framework=["Sentence Transformers", "PyTorch"],
21
+ use_instructions=False,
22
+ superseded_by=None,
23
+ adapted_from="chcaa/dfm-encoder-large-v1",
24
+ training_datasets=set(), # just contrastive pre-training
25
+ public_training_code="https://huggingface.co/KennethEnevoldsen/dfm-sentence-encoder-large#hyperparameters",
26
+ citation="""@article{enevoldsenScandinavianEmbeddingBenchmarks2024,
27
+ title = {The {Scandinavian} {Embedding} {Benchmarks}: {Comprehensive} {Assessment} of {Multilingual} and {Monolingual} {Text} {Embedding}},
28
+ shorttitle = {The {Scandinavian} {Embedding} {Benchmarks}},
29
+ url = {https://openreview.net/forum?id=pJl_i7HIA72},
30
+ language = {en},
31
+ urldate = {2024-04-12},
32
+ author = {Enevoldsen, Kenneth and Kardos, Márton and Muennighoff, Niklas and Nielbo, Kristoffer},
33
+ month = feb,
34
+ year = {2024},
35
+ }
36
+ """,
37
+ public_training_data="https://huggingface.co/datasets/danish-foundation-models/danish-gigaword", # paragraphs extracted from Danish Gigaword
38
+ )
39
+
40
+ dfm_enc_med = ModelMeta(
41
+ loader=sentence_transformers_loader, # type: ignore
42
+ name="KennethEnevoldsen/dfm-sentence-encoder-medium",
43
+ languages=["dan-Latn"],
44
+ open_weights=True,
45
+ revision="701bce95d499fa97610d57e8823c54fd1fb79930",
46
+ release_date="2023-07-12",
47
+ n_parameters=124445952,
48
+ memory_usage_mb=475,
49
+ embed_dim=768,
50
+ license="mit",
51
+ max_tokens=512,
52
+ reference="https://huggingface.co/KennethEnevoldsen/dfm-sentence-encoder-medium",
53
+ similarity_fn_name=ScoringFunction.COSINE,
54
+ framework=["Sentence Transformers", "PyTorch"],
55
+ use_instructions=False,
56
+ superseded_by=None,
57
+ adapted_from=None,
58
+ public_training_code=None,
59
+ training_datasets=set(), # just contrastive pre-training
60
+ citation="""@article{enevoldsenScandinavianEmbeddingBenchmarks2024,
61
+ title = {The {Scandinavian} {Embedding} {Benchmarks}: {Comprehensive} {Assessment} of {Multilingual} and {Monolingual} {Text} {Embedding}},
62
+ shorttitle = {The {Scandinavian} {Embedding} {Benchmarks}},
63
+ url = {https://openreview.net/forum?id=pJl_i7HIA72},
64
+ language = {en},
65
+ urldate = {2024-04-12},
66
+ author = {Enevoldsen, Kenneth and Kardos, Márton and Muennighoff, Niklas and Nielbo, Kristoffer},
67
+ month = feb,
68
+ year = {2024},
69
+ }
70
+ """,
71
+ public_training_data=None,
72
+ )
@@ -1,7 +1,7 @@
1
1
  import torch
2
2
 
3
+ from mteb.models.instruct_wrapper import instruct_wrapper
3
4
  from mteb.models.model_meta import ModelMeta, ScoringFunction
4
- from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
5
5
  from mteb.types import PromptType
6
6
 
7
7
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
@@ -22,7 +22,7 @@ def instruction_template(
22
22
 
23
23
 
24
24
  Linq_Embed_Mistral = ModelMeta(
25
- loader=SentenceTransformerEncoderWrapper,
25
+ loader=instruct_wrapper,
26
26
  loader_kwargs=dict(
27
27
  instruction_template=instruction_template,
28
28
  attn="cccc",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.3.2
3
+ Version: 2.3.3
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -108,7 +108,7 @@ Requires-Dist: qwen_vl_utils>=0.0.14; extra == "eager-embed"
108
108
  Dynamic: license-file
109
109
 
110
110
  <h1 align="center">
111
- <img src="docs/images/logos/mteb_logo/dots-icon.png" alt="MTEB" width="28" style="vertical-align: middle; margin-right: 10px;"/> MTEB
111
+ <img src="https://github.com/embeddings-benchmark/mteb/blob/main/docs/images/logos/mteb_logo/dots-icon.png?raw=true" alt="MTEB" width="28" style="vertical-align: middle; margin-right: 10px;"/> MTEB
112
112
  </h1>
113
113
 
114
114
  <h3 align="center" style="border-bottom: none;">Multimodal toolbox for evaluating embeddings and retrieval systems</h3>
@@ -137,7 +137,7 @@ Dynamic: license-file
137
137
 
138
138
 
139
139
  <h3 align="center">
140
- <a href="https://huggingface.co/spaces/mteb/leaderboard"><img style="float: middle; padding: 10px 10px 10px 10px;" width="60" height="55" src="./docs/images/logos/hf_logo.png" /></a>
140
+ <a href="https://huggingface.co/spaces/mteb/leaderboard"><img style="float: middle; padding: 10px 10px 10px 10px;" width="60" height="55" src="https://github.com/embeddings-benchmark/mteb/blob/main/docs/images/logos/hf_logo.png?raw=true" /></a>
141
141
  </h3>
142
142
 
143
143
 
@@ -52,7 +52,7 @@ mteb/abstasks/text/bitext_mining.py,sha256=8m86XHJ3TxguC9itxZRq2Bt_p0NYojojS2Btk
52
52
  mteb/abstasks/text/reranking.py,sha256=rfRGRBeSjZLgkh8pneMgRm-vd9NHr5jSFH92YfOHfmU,7776
53
53
  mteb/abstasks/text/summarization.py,sha256=KYEb8gh4JjpSsrvGUmQ2VlrVdzzVxIWcitXOJUaHhO4,6954
54
54
  mteb/benchmarks/__init__.py,sha256=MQEVeli-zLaJ7Xg0z7RhXQwsdmm7Ht_W2Ln0rZo1Szc,225
55
- mteb/benchmarks/_create_table.py,sha256=z3iqa5dajLk0DYxEE9EeO1qpR3VJXokg8ZQ2rdUkvdM,20452
55
+ mteb/benchmarks/_create_table.py,sha256=OAiR44ynJ2fMzoBmVITQtOTYQzxIu9KUdS_HzlBlAck,20195
56
56
  mteb/benchmarks/benchmark.py,sha256=70RlMyyg_wkWTlU_IbfLl-KaqRWXGCKTd8fWe9X-AQE,4173
57
57
  mteb/benchmarks/get_benchmark.py,sha256=-n_O-gitRKZi48gJKNgGuI36hsP7yLVSiwulnMHN7Gw,3935
58
58
  mteb/benchmarks/benchmarks/__init__.py,sha256=0ySgD14Mu3Y1nJzazR_eUir81ia3x6E23N57SzQNkF0,2150
@@ -1424,10 +1424,10 @@ mteb/languages/language_family.json,sha256=OUGcHeOIPcZPb2FWmYLhxTS0JxjK5y3Fo6x0P
1424
1424
  mteb/languages/language_scripts.py,sha256=5wix9HTYolNIpTiS5oXf2pGJyL7ftdGKs_m432w81V8,3998
1425
1425
  mteb/languages/programming_languages.py,sha256=zxAakT3OSUnAuTnQ34VyeFIECnNXMlleZmAake6jsZE,211
1426
1426
  mteb/leaderboard/__init__.py,sha256=991roXmtRwEQysV-37hWEzWpkvPgMCGRqZTHR-hm2io,88
1427
- mteb/leaderboard/app.py,sha256=rwU3sHxx8YP3kFOvFNAF8izgBd5zgv6lrvO4mZcEmfA,33255
1427
+ mteb/leaderboard/app.py,sha256=29MxFLKEVT-roULHG5boHmsQVhld1rDGNS94r7MWlz8,33118
1428
1428
  mteb/leaderboard/benchmark_selector.py,sha256=uH66SI0iT1J4_fnebViWa83dQwhPi7toBv7PRL_epDw,7784
1429
- mteb/leaderboard/figures.py,sha256=Rq20LFpaUhQD4tuKp7P7ExQtAjonMLibgO3ud0ykMag,7491
1430
- mteb/leaderboard/table.py,sha256=ZBCW8JDk5gLbi06FA6zuGESQ5Xri0XZIO0uK-aWb2us,7772
1429
+ mteb/leaderboard/figures.py,sha256=cfOK82rRf-7sCjyP7GBxh4ezhOIt0OhD0_86mKtzLrg,7530
1430
+ mteb/leaderboard/table.py,sha256=6SnrYC5GcBlvVSO6vOk6ObuqtoveBLv3JUuXqdKueG8,8333
1431
1431
  mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9HydipKM,6570
1432
1432
  mteb/models/__init__.py,sha256=ABTuoqiBjBtBWW3LYY7ItBHdylR6jWoy06HH0g6j6fU,910
1433
1433
  mteb/models/abs_encoder.py,sha256=m0JkRfRPMYadDgBR9eozRloI31ZSWkSzDFINpwbfLZk,16533
@@ -1460,9 +1460,10 @@ mteb/models/model_implementations/bmretriever_models.py,sha256=ABfrACa028Dcujan7
1460
1460
  mteb/models/model_implementations/cadet_models.py,sha256=bDula_VroXOWgSw-tquvNVGcGg7_Z1xHnoTDn6OGOYU,2225
1461
1461
  mteb/models/model_implementations/cde_models.py,sha256=3nNU3nq3VZZcImFqH1VPj57-QJNMU6Ei2C_HCaicuUs,9012
1462
1462
  mteb/models/model_implementations/clip_models.py,sha256=zrfgNmZszu0JMtMNdCMzEohixsrnQ7xFhCqgsiucH_Q,6107
1463
+ mteb/models/model_implementations/clips_models.py,sha256=QwwoU4Zu_zwUgUg7Hn2lzpXK-GjXIST0qF_2oRxHm2Y,3410
1463
1464
  mteb/models/model_implementations/codefuse_models.py,sha256=19Y-d_qetVU64quzEvuUJ_K8DHo1JEEKEGqjRR48dFg,9113
1464
1465
  mteb/models/model_implementations/codesage_models.py,sha256=D4CdISGyv5f2GMYq4_efgm5qNq80SWAX5R2u5mjEiXM,2998
1465
- mteb/models/model_implementations/cohere_models.py,sha256=LiYYRT3clhFlh0RE654KyZtO66vnIO22h79HJLmXYwk,13696
1466
+ mteb/models/model_implementations/cohere_models.py,sha256=OWFClVAN4phjBoxfGGDyGDmzMu-t2VrjCGFyAIWmz4w,13832
1466
1467
  mteb/models/model_implementations/cohere_v.py,sha256=K6VEw1NkyM2PuMd18kHE6aqPrcByYSwEmAKjvLods_w,15760
1467
1468
  mteb/models/model_implementations/colpali_models.py,sha256=7PJ0SshVXasyncTfZRFIf_ZWzbqxJhhzNKAoGLhNktw,9004
1468
1469
  mteb/models/model_implementations/colqwen_models.py,sha256=6upaxe19V8j5Ayu03Dgj5jPtC8SJBCITK_RionJRMSE,15545
@@ -1480,7 +1481,7 @@ mteb/models/model_implementations/evaclip_models.py,sha256=cPMGYLDIq4s8zJxb4vPXq
1480
1481
  mteb/models/model_implementations/fa_models.py,sha256=WGal70_ezITWoNdjcMdbOCTSCtoaXzuPadYstLVXxhg,7478
1481
1482
  mteb/models/model_implementations/geogpt_models.py,sha256=Juv86SwhgQX80lVLjAFtim2aSiJT1AcgjniyyiKyk1Q,1923
1482
1483
  mteb/models/model_implementations/gme_v_models.py,sha256=NkfgR3_UdZzoBt1NnalVou6LOR-F7qXM4by9EbAVrys,13568
1483
- mteb/models/model_implementations/google_models.py,sha256=ROo83udaUmPx0U_qfFuS55DSrCILVsRZu3oLp_P-srg,9296
1484
+ mteb/models/model_implementations/google_models.py,sha256=7QfsaJ5JNDRQxFl7Zh2AtiR2PR7PZcfeCBgviuOFBCo,9130
1484
1485
  mteb/models/model_implementations/granite_vision_embedding_models.py,sha256=uqQ5-e_a-ADv3gf3sR9Drk0S4x8Gy8mZkpL-E4X16TM,7241
1485
1486
  mteb/models/model_implementations/gritlm_models.py,sha256=aS_CuioL95JAQMYiaKlGuAWU9wZjabn268Xut3bD8-w,3005
1486
1487
  mteb/models/model_implementations/gte_models.py,sha256=o26Xyu_tucUlP435Q_jB4-bl0xckgj4wtbutTwhYgIo,10073
@@ -1492,9 +1493,10 @@ mteb/models/model_implementations/jasper_models.py,sha256=ZY7qRRpBpD3eVryQb4rLs5
1492
1493
  mteb/models/model_implementations/jina_clip.py,sha256=CfiIxbhKspjQajNtObCfGPHOWPk6uLn4cuwydQHFTMo,5118
1493
1494
  mteb/models/model_implementations/jina_models.py,sha256=HrHm2Io3g9gHwxU5icAaudy_E8rAVkAAIFSzVYWF-dM,34859
1494
1495
  mteb/models/model_implementations/kalm_models.py,sha256=FmW7Z5Qs6WYBLuKvql3u4IJW36kj4k-Ypah8qTBEBkg,59837
1496
+ mteb/models/model_implementations/kennethenevoldsen_models.py,sha256=DF-9nmsewYO9ikZ0kV81ujKGr7Ot36-9iPoxN7KX2mY,2993
1495
1497
  mteb/models/model_implementations/lens_models.py,sha256=fC7_NB1F8vBAlXD0p0-hALf6eZTPFJwpz57dy71OlwI,1696
1496
1498
  mteb/models/model_implementations/lgai_embedding_models.py,sha256=S83pbfkMH3YUNl4skusgbK-Rn-uLuScQVxgXwegR_N4,2333
1497
- mteb/models/model_implementations/linq_models.py,sha256=rnW27MybLMQ2Y3OxDyBTMSIsx_hXC0DlMD4kFv7NJV0,1918
1499
+ mteb/models/model_implementations/linq_models.py,sha256=EtvUyiNbjU-GJd1kS0Z0gBACkP2pFOjk0KfGMZz4K9Y,1872
1498
1500
  mteb/models/model_implementations/listconranker.py,sha256=pFISrZ91NHsnhc5El5U_ZPsB9cSTuTY8-nDzpoNMC9s,4485
1499
1501
  mteb/models/model_implementations/llm2clip_models.py,sha256=_sqAOb5oSbxn1oaXjWwPXRjTvxLT48xXL_tuabt2Ks0,9265
1500
1502
  mteb/models/model_implementations/llm2vec_models.py,sha256=Og_EqnOXgIfaTcVTl3Lj5BicG83ycnXS_YHNtK63I-A,12638
@@ -2567,9 +2569,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2567
2569
  mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
2568
2570
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2569
2571
  mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
2570
- mteb-2.3.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2571
- mteb-2.3.2.dist-info/METADATA,sha256=LEbGSbNtHSdIf03wLQKaayWlIbr0sGHRfUCvlO4Voe0,13797
2572
- mteb-2.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2573
- mteb-2.3.2.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2574
- mteb-2.3.2.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2575
- mteb-2.3.2.dist-info/RECORD,,
2572
+ mteb-2.3.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2573
+ mteb-2.3.3.dist-info/METADATA,sha256=LbvRqywjhaqAK4910G8ueME52YrrqFzvm4NXl2M3MBA,13923
2574
+ mteb-2.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2575
+ mteb-2.3.3.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2576
+ mteb-2.3.3.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2577
+ mteb-2.3.3.dist-info/RECORD,,
File without changes