mteb 2.3.8__py3-none-any.whl → 2.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  import re
2
2
  from collections import defaultdict
3
+ from typing import Literal
3
4
 
4
5
  import numpy as np
5
6
  import pandas as pd
@@ -241,6 +242,65 @@ def _create_per_task_table_from_benchmark_results(
241
242
  return per_task
242
243
 
243
244
 
245
+ def _create_per_language_table_from_benchmark_results(
246
+ benchmark_results: BenchmarkResults,
247
+ language_view: list[str] | Literal["all"],
248
+ ) -> pd.DataFrame:
249
+ """Create per-language table from BenchmarkResults.
250
+
251
+ Returns a DataFrame with one row per model and one column per language.
252
+
253
+ Args:
254
+ benchmark_results: BenchmarkResults object containing model results
255
+ language_view: List of languages to include in the per-language table, or "all" for all languages present in the results
256
+ Returns:
257
+ DataFrame with per-language scores, ready for styling in the leaderboard
258
+ """
259
+ if language_view != "all" and not isinstance(language_view, list):
260
+ raise ValueError("language_view must be a list of languages or 'all'")
261
+
262
+ data = benchmark_results.to_dataframe(aggregation_level="language", format="long")
263
+
264
+ if data.empty:
265
+ no_results_frame = pd.DataFrame(
266
+ {"No results": ["You can try relaxing your criteria"]}
267
+ )
268
+ return no_results_frame
269
+
270
+ if language_view != "all":
271
+ data = data[data["language"].isin(language_view)]
272
+
273
+ per_language = data.pivot_table(
274
+ index="model_name", columns="language", values="score", aggfunc="mean"
275
+ )
276
+
277
+ to_remove = per_language.isna().all(axis="columns")
278
+ if to_remove.all():
279
+ no_results_frame = pd.DataFrame(
280
+ {"No results": ["You can try relaxing your criteria"]}
281
+ )
282
+ return no_results_frame
283
+
284
+ models_to_remove = list(per_language[to_remove].index)
285
+ per_language = per_language.drop(models_to_remove, axis=0)
286
+
287
+ per_language["borda_rank"] = _get_borda_rank(per_language)
288
+ per_language = per_language.sort_values("borda_rank", ascending=True)
289
+ per_language = per_language.drop(columns=["borda_rank"])
290
+ per_language = per_language.reset_index()
291
+
292
+ per_language["model_name"] = per_language["model_name"].map(
293
+ lambda name: name.split("/")[-1]
294
+ )
295
+ per_language = per_language.rename(
296
+ columns={
297
+ "model_name": "Model",
298
+ }
299
+ )
300
+
301
+ return per_language
302
+
303
+
244
304
  def _create_summary_table_mean_public_private(
245
305
  benchmark_results: BenchmarkResults,
246
306
  ) -> pd.DataFrame:
@@ -1,10 +1,11 @@
1
1
  from collections.abc import Iterable, Sequence
2
- from dataclasses import dataclass
3
- from typing import TYPE_CHECKING
2
+ from dataclasses import dataclass, field
3
+ from typing import TYPE_CHECKING, Literal
4
4
 
5
5
  import pandas as pd
6
6
 
7
7
  from mteb.benchmarks._create_table import (
8
+ _create_per_language_table_from_benchmark_results,
8
9
  _create_per_task_table_from_benchmark_results,
9
10
  _create_summary_table_from_benchmark_results,
10
11
  _create_summary_table_mean_public_private,
@@ -50,6 +51,7 @@ class Benchmark:
50
51
  display_on_leaderboard: bool = True
51
52
  icon: str | None = None
52
53
  display_name: str | None = None
54
+ language_view: list[str] | Literal["all"] = field(default_factory=list)
53
55
 
54
56
  def __iter__(self) -> Iterable["AbsTask"]:
55
57
  return iter(self.tasks)
@@ -80,6 +82,28 @@ class Benchmark:
80
82
  """
81
83
  return _create_per_task_table_from_benchmark_results(benchmark_results)
82
84
 
85
+ def _create_per_language_table(
86
+ self, benchmark_results: BenchmarkResults
87
+ ) -> pd.DataFrame:
88
+ """Create per-language table. Called by the leaderboard app.
89
+
90
+ Returns:
91
+ A pandas DataFrame representing the per-language results.
92
+ """
93
+ if self.language_view == "all" or len(self.language_view) > 0:
94
+ return _create_per_language_table_from_benchmark_results(
95
+ benchmark_results, self.language_view
96
+ )
97
+ else:
98
+ no_results_frame = pd.DataFrame(
99
+ {
100
+ "No results": [
101
+ "The per-language table is not available for this benchmark."
102
+ ]
103
+ }
104
+ )
105
+ return no_results_frame
106
+
83
107
 
84
108
  class RtebBenchmark(Benchmark):
85
109
  """Wrapper for RTEB benchmark."""
@@ -471,6 +471,7 @@ SEB = Benchmark(
471
471
  name="MTEB(Scandinavian, v1)",
472
472
  display_name="Scandinavian",
473
473
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/dk.svg",
474
+ language_view=["dan-Latn", "swe-Latn", "nno-Latn", "nob-Latn"],
474
475
  tasks=get_tasks(
475
476
  tasks=[
476
477
  # Bitext
@@ -953,6 +954,28 @@ MTEB_multilingual_v1 = Benchmark(
953
954
  MTEB_multilingual_v2 = Benchmark(
954
955
  name="MTEB(Multilingual, v2)",
955
956
  display_name="Multilingual",
957
+ language_view=[
958
+ "eng-Latn", # English
959
+ "zho-Hans", # Chinese (Simplified)
960
+ "hin-Deva", # Hindi
961
+ "spa-Latn", # Spanish
962
+ "fra-Latn", # French
963
+ "ara-Arab", # Arabic
964
+ "ben-Beng", # Bengali
965
+ "rus-Cyrl", # Russian
966
+ "por-Latn", # Portuguese
967
+ "urd-Arab", # Urdu
968
+ "ind-Latn", # Indonesian
969
+ "deu-Latn", # German
970
+ "jpn-Jpan", # Japanese
971
+ "swa-Latn", # Swahili
972
+ "mar-Deva", # Marathi
973
+ "tel-Telu", # Telugu
974
+ "tur-Latn", # Turkish
975
+ "tam-Taml", # Tamil
976
+ "vie-Latn", # Vietnamese
977
+ "kor-Hang", # Korean
978
+ ],
956
979
  icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-globe.svg",
957
980
  tasks=mteb_multilingual_tasks,
958
981
  description="A large-scale multilingual expansion of MTEB, driven mainly by highly-curated community contributions covering 250+ languages. ",
@@ -2283,6 +2306,14 @@ VIDORE_V2 = Benchmark(
2283
2306
  VIDORE_V3 = VidoreBenchmark(
2284
2307
  name="ViDoRe(v3)",
2285
2308
  display_name="ViDoRe V3",
2309
+ language_view=[
2310
+ "deu-Latn",
2311
+ "eng-Latn",
2312
+ "fra-Latn",
2313
+ "ita-Latn",
2314
+ "por-Latn",
2315
+ "spa-Latn",
2316
+ ],
2286
2317
  icon="https://cdn-uploads.huggingface.co/production/uploads/66e16a677c2eb2da5109fb5c/x99xqw__fl2UaPbiIdC_f.png",
2287
2318
  tasks=get_tasks(
2288
2319
  tasks=[
mteb/leaderboard/app.py CHANGED
@@ -24,6 +24,7 @@ from mteb.leaderboard.benchmark_selector import (
24
24
  )
25
25
  from mteb.leaderboard.figures import _performance_size_plot, _radar_chart
26
26
  from mteb.leaderboard.table import (
27
+ apply_per_language_styling_from_benchmark,
27
28
  apply_per_task_styling_from_benchmark,
28
29
  apply_summary_styling_from_benchmark,
29
30
  )
@@ -361,6 +362,13 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
361
362
  per_task_table = apply_per_task_styling_from_benchmark(
362
363
  default_benchmark, filtered_benchmark_results
363
364
  )
365
+ per_language_table = apply_per_language_styling_from_benchmark(
366
+ default_benchmark,
367
+ filtered_benchmark_results,
368
+ )
369
+
370
+ # Check if this benchmark displays per-language results
371
+ display_language_table = len(default_benchmark.language_view) > 0
364
372
 
365
373
  lang_select = gr.CheckboxGroup(
366
374
  sorted(default_results.languages),
@@ -554,6 +562,16 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
554
562
  download_per_task.click(
555
563
  _download_table, inputs=[per_task_table], outputs=[download_per_task]
556
564
  )
565
+ with gr.Tab(
566
+ "Performance per language", visible=display_language_table
567
+ ) as language_tab:
568
+ per_language_table.render()
569
+ download_per_language = gr.DownloadButton("Download Table")
570
+ download_per_language.click(
571
+ _download_table,
572
+ inputs=[per_language_table],
573
+ outputs=[download_per_language],
574
+ )
557
575
  with gr.Tab("Task information"):
558
576
  task_info_table = gr.DataFrame(_update_task_info, inputs=[task_select]) # noqa: F841
559
577
 
@@ -879,9 +897,18 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
879
897
  per_task = apply_per_task_styling_from_benchmark(
880
898
  benchmark, filtered_benchmark_results
881
899
  )
900
+ per_language = apply_per_language_styling_from_benchmark(
901
+ benchmark,
902
+ filtered_benchmark_results,
903
+ )
882
904
  elapsed = time.time() - start_time
883
905
  logger.debug(f"update_tables callback: {elapsed}s")
884
- return summary, per_task
906
+ return (
907
+ summary,
908
+ per_task,
909
+ per_language,
910
+ gr.update(visible=len(benchmark.language_view) > 0),
911
+ )
885
912
 
886
913
  # Only update tables when models change, not when scores/tasks change directly
887
914
  # This avoids redundant updates since scores/tasks changes trigger update_models
@@ -890,7 +917,12 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
890
917
  item.change(
891
918
  update_tables,
892
919
  inputs=[scores, task_select, models, benchmark_select],
893
- outputs=[summary_table, per_task_table],
920
+ outputs=[
921
+ summary_table,
922
+ per_task_table,
923
+ per_language_table,
924
+ language_tab,
925
+ ],
894
926
  )
895
927
 
896
928
  gr.Markdown(ACKNOWLEDGEMENT, elem_id="ack_markdown")
mteb/leaderboard/table.py CHANGED
@@ -120,6 +120,31 @@ def apply_per_task_styling_from_benchmark(
120
120
  return _apply_per_task_table_styling(per_task_df)
121
121
 
122
122
 
123
+ def apply_per_language_styling_from_benchmark(
124
+ benchmark_instance: Benchmark, benchmark_results: BenchmarkResults
125
+ ) -> gr.DataFrame:
126
+ """Apply styling to per-language table created by the benchmark instance's _create_per_language_table method.
127
+
128
+ This supports polymorphism - different benchmark classes can have different table generation logic.
129
+
130
+ Args:
131
+ benchmark_instance: The benchmark instance
132
+ benchmark_results: BenchmarkResults object containing model results (may be pre-filtered)
133
+
134
+ Returns:
135
+ Styled gr.DataFrame ready for display in the leaderboard
136
+ """
137
+ # Use the instance method to support polymorphism
138
+ per_language_df = benchmark_instance._create_per_language_table(benchmark_results)
139
+
140
+ # If it's a no-results DataFrame, return it as-is
141
+ if "No results" in per_language_df.columns:
142
+ return gr.DataFrame(per_language_df)
143
+
144
+ # Apply the styling
145
+ return _apply_per_language_table_styling(per_language_df)
146
+
147
+
123
148
  def _style_number_of_parameters(num_params: float) -> str:
124
149
  """Anything bigger than 1B is shown in billions with 1 decimal (e.g. 1.712 > 1.7) while anything smaller as 0.xxx B (e.g. 0.345 remains 0.345)"""
125
150
  if num_params >= 1:
@@ -237,10 +262,47 @@ def _apply_per_task_table_styling(per_task: pd.DataFrame) -> gr.DataFrame:
237
262
  "{:.2f}", subset=task_score_columns, na_rep=""
238
263
  ).highlight_max(subset=task_score_columns, props="font-weight: bold")
239
264
 
265
+ # setting task name column width to 250px
266
+ column_widths = _get_column_widths(per_task_style.data)
267
+ if len(column_widths) > 0:
268
+ column_widths[0] = "250px"
269
+
240
270
  return gr.DataFrame(
241
271
  per_task_style,
242
272
  interactive=False,
243
273
  pinned_columns=1,
274
+ column_widths=column_widths,
275
+ buttons=["copy", "fullscreen"],
276
+ show_search="filter",
277
+ )
278
+
279
+
280
+ def _apply_per_language_table_styling(per_language: pd.DataFrame) -> gr.DataFrame:
281
+ """Apply styling to a raw per-task DataFrame
282
+
283
+ Returns:
284
+ Styled gr.DataFrame ready for display in the leaderboard
285
+ """
286
+ language_score_columns = per_language.select_dtypes("number").columns
287
+ per_language[language_score_columns] *= 100
288
+
289
+ if len(per_language.columns) > 100: # Avoid gradio error on very wide tables
290
+ per_language_style = per_language.round(2)
291
+ else:
292
+ per_language_style = per_language.style.format(
293
+ "{:.2f}", subset=language_score_columns, na_rep=""
294
+ ).highlight_max(subset=language_score_columns, props="font-weight: bold")
295
+
296
+ # setting task name column width to 250px
297
+ column_widths = _get_column_widths(per_language_style.data)
298
+ if len(column_widths) > 0:
299
+ column_widths[0] = "250px"
300
+
301
+ return gr.DataFrame(
302
+ per_language_style,
303
+ interactive=False,
304
+ pinned_columns=1,
305
+ column_widths=column_widths,
244
306
  buttons=["copy", "fullscreen"],
245
307
  show_search="filter",
246
308
  )
@@ -5,108 +5,10 @@ from mteb.models.model_meta import (
5
5
  from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
6
6
  from mteb.types import PromptType
7
7
 
8
+ from .facebookai import XLMR_LANGUAGES
9
+
8
10
  E5_PAPER_RELEASE_DATE = "2024-02-08"
9
- XLMR_LANGUAGES = [
10
- "afr-Latn",
11
- "amh-Latn",
12
- "ara-Latn",
13
- "asm-Latn",
14
- "aze-Latn",
15
- "bel-Latn",
16
- "bul-Latn",
17
- "ben-Latn",
18
- "ben-Beng",
19
- "bre-Latn",
20
- "bos-Latn",
21
- "cat-Latn",
22
- "ces-Latn",
23
- "cym-Latn",
24
- "dan-Latn",
25
- "deu-Latn",
26
- "ell-Latn",
27
- "eng-Latn",
28
- "epo-Latn",
29
- "spa-Latn",
30
- "est-Latn",
31
- "eus-Latn",
32
- "fas-Latn",
33
- "fin-Latn",
34
- "fra-Latn",
35
- "fry-Latn",
36
- "gle-Latn",
37
- "gla-Latn",
38
- "glg-Latn",
39
- "guj-Latn",
40
- "hau-Latn",
41
- "heb-Latn",
42
- "hin-Latn",
43
- "hin-Deva",
44
- "hrv-Latn",
45
- "hun-Latn",
46
- "hye-Latn",
47
- "ind-Latn",
48
- "isl-Latn",
49
- "ita-Latn",
50
- "jpn-Latn",
51
- "jav-Latn",
52
- "kat-Latn",
53
- "kaz-Latn",
54
- "khm-Latn",
55
- "kan-Latn",
56
- "kor-Latn",
57
- "kur-Latn",
58
- "kir-Latn",
59
- "lat-Latn",
60
- "lao-Latn",
61
- "lit-Latn",
62
- "lav-Latn",
63
- "mlg-Latn",
64
- "mkd-Latn",
65
- "mal-Latn",
66
- "mon-Latn",
67
- "mar-Latn",
68
- "msa-Latn",
69
- "mya-Latn",
70
- "nep-Latn",
71
- "nld-Latn",
72
- "nob-Latn",
73
- "orm-Latn",
74
- "ori-Latn",
75
- "pan-Latn",
76
- "pol-Latn",
77
- "pus-Latn",
78
- "por-Latn",
79
- "ron-Latn",
80
- "rus-Latn",
81
- "san-Latn",
82
- "snd-Latn",
83
- "sin-Latn",
84
- "slk-Latn",
85
- "slv-Latn",
86
- "som-Latn",
87
- "sqi-Latn",
88
- "srp-Latn",
89
- "sun-Latn",
90
- "swe-Latn",
91
- "swa-Latn",
92
- "tam-Latn",
93
- "tam-Taml",
94
- "tel-Latn",
95
- "tel-Telu",
96
- "tha-Latn",
97
- "tgl-Latn",
98
- "tur-Latn",
99
- "uig-Latn",
100
- "ukr-Latn",
101
- "urd-Latn",
102
- "urd-Arab",
103
- "uzb-Latn",
104
- "vie-Latn",
105
- "xho-Latn",
106
- "yid-Latn",
107
- "zho-Hant",
108
- "zho-Hans",
109
- ]
11
+
110
12
 
111
13
  MULTILINGUAL_E5_CITATION = """
112
14
  @article{wang2024multilingual,
@@ -0,0 +1,147 @@
1
+ from mteb.models import sentence_transformers_loader
2
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
3
+
4
+ XLMR_LANGUAGES = [
5
+ "afr-Latn",
6
+ "amh-Latn",
7
+ "ara-Latn",
8
+ "asm-Latn",
9
+ "aze-Latn",
10
+ "bel-Latn",
11
+ "bul-Latn",
12
+ "ben-Latn",
13
+ "ben-Beng",
14
+ "bre-Latn",
15
+ "bos-Latn",
16
+ "cat-Latn",
17
+ "ces-Latn",
18
+ "cym-Latn",
19
+ "dan-Latn",
20
+ "deu-Latn",
21
+ "ell-Latn",
22
+ "eng-Latn",
23
+ "epo-Latn",
24
+ "spa-Latn",
25
+ "est-Latn",
26
+ "eus-Latn",
27
+ "fas-Latn",
28
+ "fin-Latn",
29
+ "fra-Latn",
30
+ "fry-Latn",
31
+ "gle-Latn",
32
+ "gla-Latn",
33
+ "glg-Latn",
34
+ "guj-Latn",
35
+ "hau-Latn",
36
+ "heb-Latn",
37
+ "hin-Latn",
38
+ "hin-Deva",
39
+ "hrv-Latn",
40
+ "hun-Latn",
41
+ "hye-Latn",
42
+ "ind-Latn",
43
+ "isl-Latn",
44
+ "ita-Latn",
45
+ "jpn-Latn",
46
+ "jav-Latn",
47
+ "kat-Latn",
48
+ "kaz-Latn",
49
+ "khm-Latn",
50
+ "kan-Latn",
51
+ "kor-Latn",
52
+ "kur-Latn",
53
+ "kir-Latn",
54
+ "lat-Latn",
55
+ "lao-Latn",
56
+ "lit-Latn",
57
+ "lav-Latn",
58
+ "mlg-Latn",
59
+ "mkd-Latn",
60
+ "mal-Latn",
61
+ "mon-Latn",
62
+ "mar-Latn",
63
+ "msa-Latn",
64
+ "mya-Latn",
65
+ "nep-Latn",
66
+ "nld-Latn",
67
+ "nob-Latn",
68
+ "orm-Latn",
69
+ "ori-Latn",
70
+ "pan-Latn",
71
+ "pol-Latn",
72
+ "pus-Latn",
73
+ "por-Latn",
74
+ "ron-Latn",
75
+ "rus-Latn",
76
+ "san-Latn",
77
+ "snd-Latn",
78
+ "sin-Latn",
79
+ "slk-Latn",
80
+ "slv-Latn",
81
+ "som-Latn",
82
+ "sqi-Latn",
83
+ "srp-Latn",
84
+ "sun-Latn",
85
+ "swe-Latn",
86
+ "swa-Latn",
87
+ "tam-Latn",
88
+ "tam-Taml",
89
+ "tel-Latn",
90
+ "tel-Telu",
91
+ "tha-Latn",
92
+ "tgl-Latn",
93
+ "tur-Latn",
94
+ "uig-Latn",
95
+ "ukr-Latn",
96
+ "urd-Latn",
97
+ "urd-Arab",
98
+ "uzb-Latn",
99
+ "vie-Latn",
100
+ "xho-Latn",
101
+ "yid-Latn",
102
+ "zho-Hant",
103
+ "zho-Hans",
104
+ ]
105
+
106
+
107
+ xlmr_base = ModelMeta(
108
+ loader=sentence_transformers_loader, # type: ignore[arg-type]
109
+ name="FacebookAI/xlm-roberta-base",
110
+ languages=XLMR_LANGUAGES,
111
+ open_weights=True,
112
+ revision="e73636d4f797dec63c3081bb6ed5c7b0bb3f2089",
113
+ release_date="2019-11-05", # arxiv paper release
114
+ n_parameters=278043648,
115
+ memory_usage_mb=1064,
116
+ embed_dim=768,
117
+ license="mit",
118
+ max_tokens=512,
119
+ reference="https://huggingface.co/FacebookAI/xlm-roberta-base",
120
+ similarity_fn_name=ScoringFunction.COSINE,
121
+ framework=["Sentence Transformers", "PyTorch"],
122
+ use_instructions=False,
123
+ public_training_code=None,
124
+ public_training_data=None,
125
+ training_datasets=set(),
126
+ )
127
+
128
+ xlmr_large = ModelMeta(
129
+ loader=sentence_transformers_loader, # type: ignore[arg-type]
130
+ name="FacebookAI/xlm-roberta-large",
131
+ languages=XLMR_LANGUAGES,
132
+ open_weights=True,
133
+ revision="c23d21b0620b635a76227c604d44e43a9f0ee389",
134
+ release_date="2019-11-05", # arxiv paper release
135
+ n_parameters=559890432,
136
+ memory_usage_mb=2141,
137
+ embed_dim=1024,
138
+ license="mit",
139
+ max_tokens=512,
140
+ reference="https://huggingface.co/FacebookAI/xlm-roberta-large",
141
+ similarity_fn_name=ScoringFunction.COSINE,
142
+ framework=["Sentence Transformers", "PyTorch"],
143
+ use_instructions=False,
144
+ public_training_code=None,
145
+ public_training_data=None,
146
+ training_datasets=set(),
147
+ )
@@ -0,0 +1,24 @@
1
+ from mteb.models import sentence_transformers_loader
2
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
3
+
4
+ sbert_swedish = ModelMeta(
5
+ loader=sentence_transformers_loader, # type: ignore[arg-type]
6
+ name="KBLab/sentence-bert-swedish-cased",
7
+ languages=["swe-Latn"],
8
+ open_weights=True,
9
+ revision="6b5e83cd29c03729cfdc33d13b1423399b0efb5c",
10
+ release_date="2023-01-11",
11
+ n_parameters=124690944,
12
+ memory_usage_mb=476,
13
+ embed_dim=768,
14
+ license="apache-2.0",
15
+ max_tokens=384,
16
+ reference="https://huggingface.co/KBLab/sentence-bert-swedish-cased",
17
+ similarity_fn_name=ScoringFunction.COSINE,
18
+ framework=["Sentence Transformers", "PyTorch"],
19
+ use_instructions=False,
20
+ public_training_code=None,
21
+ public_training_data=None,
22
+ training_datasets=None,
23
+ adapted_from="sentence-transformers/all-mpnet-base-v2",
24
+ )
@@ -0,0 +1,24 @@
1
+ from mteb.models import sentence_transformers_loader
2
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
3
+
4
+ xlmr_scandi = ModelMeta(
5
+ loader=sentence_transformers_loader, # type: ignore[arg-type]
6
+ name="KFST/XLMRoberta-en-da-sv-nb",
7
+ languages=["swe-Latn", "nob-Latn", "nno-Latn", "dan-Latn", "eng-Latn"],
8
+ open_weights=True,
9
+ revision="d40c10ca7b1e68b5a8372f2d112dac9eb3279df1",
10
+ release_date="2022-02-22",
11
+ n_parameters=278043648,
12
+ memory_usage_mb=1061,
13
+ embed_dim=768,
14
+ license="not specified",
15
+ max_tokens=512,
16
+ reference="https://huggingface.co/KFST/XLMRoberta-en-da-sv-nb",
17
+ similarity_fn_name=ScoringFunction.COSINE,
18
+ framework=["Sentence Transformers", "PyTorch"],
19
+ use_instructions=False,
20
+ public_training_code=None,
21
+ public_training_data=None,
22
+ training_datasets=None,
23
+ adapted_from="FacebookAI/xlm-roberta-base",
24
+ )
@@ -0,0 +1,38 @@
1
+ from mteb.models.model_meta import (
2
+ ModelMeta,
3
+ ScoringFunction,
4
+ )
5
+ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
6
+
7
+ PAWAN_EMBD_CITATION = """@misc{medhi2025pawanembd,
8
+ title={PawanEmbd-68M: Distilled Embedding Model},
9
+ author={Medhi, D.},
10
+ year={2025},
11
+ url={https://huggingface.co/dmedhi/PawanEmbd-68M}
12
+ }"""
13
+
14
+ pawan_embd_68m = ModelMeta(
15
+ loader=sentence_transformers_loader,
16
+ name="dmedhi/PawanEmbd-68M",
17
+ languages=["eng-Latn"],
18
+ open_weights=True,
19
+ revision="32f295145802bdbd65699ad65fd27d2a5b69a909",
20
+ release_date="2025-12-08",
21
+ n_parameters=68_000_000,
22
+ memory_usage_mb=260,
23
+ embed_dim=768,
24
+ license="apache-2.0",
25
+ max_tokens=512,
26
+ reference="https://huggingface.co/dmedhi/PawanEmbd-68M",
27
+ similarity_fn_name=ScoringFunction.COSINE,
28
+ framework=["Sentence Transformers", "PyTorch"],
29
+ adapted_from="ibm-granite/granite-embedding-278m-multilingual",
30
+ superseded_by=None,
31
+ public_training_code=None,
32
+ public_training_data=None,
33
+ use_instructions=False,
34
+ training_datasets={
35
+ "AllNLI",
36
+ },
37
+ citation=PAWAN_EMBD_CITATION,
38
+ )
@@ -296,7 +296,7 @@ class BenchmarkResults(BaseModel):
296
296
 
297
297
  def to_dataframe(
298
298
  self,
299
- aggregation_level: Literal["subset", "split", "task"] = "task",
299
+ aggregation_level: Literal["subset", "split", "task", "language"] = "task",
300
300
  aggregation_fn: Callable[[list[Score]], Any] | None = None,
301
301
  include_model_revision: bool = False,
302
302
  format: Literal["wide", "long"] = "wide",
@@ -321,6 +321,7 @@ class BenchmarkResults(BaseModel):
321
321
  - "subset"/None: No aggregation will be done. The DataFrame will have one row per model, task, split and subset.
322
322
  - "split": Aggregates the scores by split. The DataFrame will have one row per model, task and split.
323
323
  - "task": Aggregates the scores by task. The DataFrame will have one row per model and task.
324
+ - "language": Aggregates the scores by language. The DataFrame will have one row per model and language.
324
325
  aggregation_fn: The function to use for aggregation. If None, the mean will be used.
325
326
  include_model_revision: If True, the model revision will be included in the DataFrame. If False, it will be excluded.
326
327
  If there are multiple revisions for the same model, they will be joined using the `join_revisions` method.
@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
30
30
  def _aggregate_and_pivot(
31
31
  df: pd.DataFrame,
32
32
  columns: list[str],
33
- aggregation_level: Literal["subset", "split", "task"],
33
+ aggregation_level: Literal["subset", "split", "task", "language"],
34
34
  format: Literal["wide", "long"],
35
35
  aggregation_fn: Callable[[list[Score]], Any] | None,
36
36
  ) -> pd.DataFrame:
@@ -43,6 +43,12 @@ def _aggregate_and_pivot(
43
43
  elif aggregation_level == "task":
44
44
  index_columns = ["task_name"]
45
45
 
46
+ elif aggregation_level == "language":
47
+ index_columns = ["language"]
48
+ df = df.explode("language").reset_index(
49
+ drop=True
50
+ ) # each language in its own row before aggregation
51
+
46
52
  # perform aggregation
47
53
  if aggregation_fn is None:
48
54
  aggregation_fn = np.mean
@@ -227,7 +233,7 @@ class ModelResult(BaseModel):
227
233
  )
228
234
  return entries
229
235
 
230
- def _get_score_for_table(self) -> list[dict[str, str | float]]:
236
+ def _get_score_for_table(self) -> list[dict[str, str | float | list[str]]]:
231
237
  scores_data = []
232
238
  model_name = self.model_name
233
239
  for task_result in self.task_results:
@@ -239,10 +245,10 @@ class ModelResult(BaseModel):
239
245
  "model_revision": self.model_revision,
240
246
  "task_name": task_name,
241
247
  "split": split,
248
+ "language": score_item.get("languages", ["Unknown"]),
242
249
  "subset": score_item.get("hf_subset", "default"),
243
250
  "score": score_item.get("main_score", None),
244
251
  }
245
-
246
252
  scores_data.append(row)
247
253
 
248
254
  return scores_data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.3.8
3
+ Version: 2.3.10
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -56,11 +56,11 @@ mteb/abstasks/text/bitext_mining.py,sha256=8m86XHJ3TxguC9itxZRq2Bt_p0NYojojS2Btk
56
56
  mteb/abstasks/text/reranking.py,sha256=rfRGRBeSjZLgkh8pneMgRm-vd9NHr5jSFH92YfOHfmU,7776
57
57
  mteb/abstasks/text/summarization.py,sha256=KYEb8gh4JjpSsrvGUmQ2VlrVdzzVxIWcitXOJUaHhO4,6954
58
58
  mteb/benchmarks/__init__.py,sha256=MQEVeli-zLaJ7Xg0z7RhXQwsdmm7Ht_W2Ln0rZo1Szc,225
59
- mteb/benchmarks/_create_table.py,sha256=OAiR44ynJ2fMzoBmVITQtOTYQzxIu9KUdS_HzlBlAck,20195
60
- mteb/benchmarks/benchmark.py,sha256=70RlMyyg_wkWTlU_IbfLl-KaqRWXGCKTd8fWe9X-AQE,4173
59
+ mteb/benchmarks/_create_table.py,sha256=b2RqGqi0ZonKbHecEcZiF4pkfE96smFRIzxOI82ETA8,22304
60
+ mteb/benchmarks/benchmark.py,sha256=UEllUtZQ0L10SNnxRyKbiv4wLCMcNF2nUPhBDKY3nz8,5097
61
61
  mteb/benchmarks/get_benchmark.py,sha256=-n_O-gitRKZi48gJKNgGuI36hsP7yLVSiwulnMHN7Gw,3935
62
62
  mteb/benchmarks/benchmarks/__init__.py,sha256=Ig5dSFunzI-F-OamruuKJVSstbG3xQNkXCxRY3Bj_Ck,2180
63
- mteb/benchmarks/benchmarks/benchmarks.py,sha256=vWX6QZgqF9iKAE1tIQwaXw9f8q_WiBtdgo8yj4_CHFI,94767
63
+ mteb/benchmarks/benchmarks/benchmarks.py,sha256=mZQ56KBQwnBj2qLSQFOv39Av0HBNpH9HXYsDoFmqvu4,95640
64
64
  mteb/benchmarks/benchmarks/rteb_benchmarks.py,sha256=QnCSrTTaBfcRlAQp2Nu81tgv1idMXqiM16Fp2zKJ5Ys,10607
65
65
  mteb/cli/__init__.py,sha256=v-csUr3eUZElIvrGB6QGtaIdndDfNWEe9oZchsGsJpg,64
66
66
  mteb/cli/_display_tasks.py,sha256=7A06dT9sSoTz6shyMvskPxuc5eHY_H7PGPlROzMP0yw,2196
@@ -1430,10 +1430,10 @@ mteb/languages/language_family.json,sha256=OUGcHeOIPcZPb2FWmYLhxTS0JxjK5y3Fo6x0P
1430
1430
  mteb/languages/language_scripts.py,sha256=5wix9HTYolNIpTiS5oXf2pGJyL7ftdGKs_m432w81V8,3998
1431
1431
  mteb/languages/programming_languages.py,sha256=zxAakT3OSUnAuTnQ34VyeFIECnNXMlleZmAake6jsZE,211
1432
1432
  mteb/leaderboard/__init__.py,sha256=991roXmtRwEQysV-37hWEzWpkvPgMCGRqZTHR-hm2io,88
1433
- mteb/leaderboard/app.py,sha256=29MxFLKEVT-roULHG5boHmsQVhld1rDGNS94r7MWlz8,33118
1433
+ mteb/leaderboard/app.py,sha256=-sBAkZ9JTr9czhsYEbSm92MfTmB8BOQ17WDkQ1dsP90,34282
1434
1434
  mteb/leaderboard/benchmark_selector.py,sha256=qd-2L20RQ4ACke01UlytkhZok1dkWgfUlXzfET52kGc,7956
1435
1435
  mteb/leaderboard/figures.py,sha256=mPO0go_23QEhAm1RJdLiBxPFCoUiA74_ztyl6yimc7k,7553
1436
- mteb/leaderboard/table.py,sha256=6SnrYC5GcBlvVSO6vOk6ObuqtoveBLv3JUuXqdKueG8,8333
1436
+ mteb/leaderboard/table.py,sha256=NxXAUkQRWtxjJwfIiO9yvdvw9do3ogzqmAn6az01SSc,10609
1437
1437
  mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9HydipKM,6570
1438
1438
  mteb/models/__init__.py,sha256=ABTuoqiBjBtBWW3LYY7ItBHdylR6jWoy06HH0g6j6fU,910
1439
1439
  mteb/models/abs_encoder.py,sha256=m0JkRfRPMYadDgBR9eozRloI31ZSWkSzDFINpwbfLZk,16533
@@ -1477,7 +1477,7 @@ mteb/models/model_implementations/colsmol_models.py,sha256=O2M7Ksydh94M_Iax4KytH
1477
1477
  mteb/models/model_implementations/conan_models.py,sha256=G-s7xo9VtNX-f7lWKtYVGHHiMMN0Xp44PlNIp7E0LAo,6502
1478
1478
  mteb/models/model_implementations/dino_models.py,sha256=QFgaFHR5YKrylqJGSljXCBn2W7qHhmF6KdXkvHrQNEI,16380
1479
1479
  mteb/models/model_implementations/e5_instruct.py,sha256=9R4GoSFicgqNDCh3HhTN_8L1qhzuEKvatjHYn3T9zlU,7676
1480
- mteb/models/model_implementations/e5_models.py,sha256=vsqkmm6XzZn9ROj_OUR0j2KiN75MEuQsOPeoyc1AeYg,10937
1480
+ mteb/models/model_implementations/e5_models.py,sha256=ZLRgzx2uEBc_yWY6DwcJFUNKG6RHpWSEVp1_jaEURhs,9373
1481
1481
  mteb/models/model_implementations/e5_v.py,sha256=_9W7I0ryIzx_H9eCkzwdm8iHdGX1LIjKGXkhSh_zNv8,6690
1482
1482
  mteb/models/model_implementations/eagerworks_models.py,sha256=NOQkCUqn9jLSpf9p6KyaIHnJxYV1MNlr2z7hO2AcRSc,5744
1483
1483
  mteb/models/model_implementations/emillykkejensen_models.py,sha256=QdhGqCm_1-AURkrniZj2S1MjwwIVOPMzLvpgfJq-3EQ,2779
@@ -1485,6 +1485,7 @@ mteb/models/model_implementations/en_code_retriever.py,sha256=leZ-0M6LrunocY3XQB
1485
1485
  mteb/models/model_implementations/euler_models.py,sha256=fZoXYeDjSRN2Qj1Pf-ROi8xok03PjhYi4FLEZKjMPkk,905
1486
1486
  mteb/models/model_implementations/evaclip_models.py,sha256=cPMGYLDIq4s8zJxb4vPXqJ-rqwPaq7KOh2QZSO6cDas,8000
1487
1487
  mteb/models/model_implementations/fa_models.py,sha256=WGal70_ezITWoNdjcMdbOCTSCtoaXzuPadYstLVXxhg,7478
1488
+ mteb/models/model_implementations/facebookai.py,sha256=uhE6rB1YgxE0SIc7u8heE1U62qRFFA23IMgpjxBq_Ok,3116
1488
1489
  mteb/models/model_implementations/geogpt_models.py,sha256=Juv86SwhgQX80lVLjAFtim2aSiJT1AcgjniyyiKyk1Q,1923
1489
1490
  mteb/models/model_implementations/gme_v_models.py,sha256=NkfgR3_UdZzoBt1NnalVou6LOR-F7qXM4by9EbAVrys,13568
1490
1491
  mteb/models/model_implementations/google_models.py,sha256=7QfsaJ5JNDRQxFl7Zh2AtiR2PR7PZcfeCBgviuOFBCo,9130
@@ -1499,7 +1500,9 @@ mteb/models/model_implementations/jasper_models.py,sha256=ZY7qRRpBpD3eVryQb4rLs5
1499
1500
  mteb/models/model_implementations/jina_clip.py,sha256=CfiIxbhKspjQajNtObCfGPHOWPk6uLn4cuwydQHFTMo,5118
1500
1501
  mteb/models/model_implementations/jina_models.py,sha256=HrHm2Io3g9gHwxU5icAaudy_E8rAVkAAIFSzVYWF-dM,34859
1501
1502
  mteb/models/model_implementations/kalm_models.py,sha256=FmW7Z5Qs6WYBLuKvql3u4IJW36kj4k-Ypah8qTBEBkg,59837
1503
+ mteb/models/model_implementations/kblab.py,sha256=DDh8gDEI6YPjS4_yGYWC4HatE0mFf7vhGDU83zzV7V0,866
1502
1504
  mteb/models/model_implementations/kennethenevoldsen_models.py,sha256=DF-9nmsewYO9ikZ0kV81ujKGr7Ot36-9iPoxN7KX2mY,2993
1505
+ mteb/models/model_implementations/kfst.py,sha256=BQj0fxMJwyA6NOdK26NDYVL3z2PW1_F-lTTVImxEWZQ,892
1503
1506
  mteb/models/model_implementations/kowshik24_models.py,sha256=HoQpybjhquK2XSnawlq0aiSWFI5M7l6N4DNY4MQ-P10,976
1504
1507
  mteb/models/model_implementations/lens_models.py,sha256=fC7_NB1F8vBAlXD0p0-hALf6eZTPFJwpz57dy71OlwI,1696
1505
1508
  mteb/models/model_implementations/lgai_embedding_models.py,sha256=S83pbfkMH3YUNl4skusgbK-Rn-uLuScQVxgXwegR_N4,2333
@@ -1526,6 +1529,7 @@ mteb/models/model_implementations/openclip_models.py,sha256=W8XcokgLU1nSmMaWpYXk
1526
1529
  mteb/models/model_implementations/opensearch_neural_sparse_models.py,sha256=fuxIjOx_kPoDps5C7LW3JllG-AZj4ktqeTNgJESHZh4,8351
1527
1530
  mteb/models/model_implementations/ops_moa_models.py,sha256=luWw1j2iTMx1z1ydLCjvCI89E9Yvge7ruEawivJTmfE,2413
1528
1531
  mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py,sha256=qGXv71qRjNCIFluZOwvfBlFlKKyN2bXBokwUPk4KHmM,1066
1532
+ mteb/models/model_implementations/pawan_models.py,sha256=rV2ePGIuYroocvwqDXm4VU369Y_Vr67CyAE-08K5B9c,1151
1529
1533
  mteb/models/model_implementations/piccolo_models.py,sha256=d8Dtkv_ZTUOCmJLLOuwquq-gX-2UfKvAtl_LvAS0Xi0,2113
1530
1534
  mteb/models/model_implementations/promptriever_models.py,sha256=S7uWes_P74p3OZR_KBJHJN_ezlvvRx2__46DMCWqV5M,6328
1531
1535
  mteb/models/model_implementations/pylate_models.py,sha256=yINGQL97S4xjj74-FTWpO4KHX-E9NDOEeyQWyRmmnaE,14772
@@ -1573,8 +1577,8 @@ mteb/models/search_encoder_index/search_backend_protocol.py,sha256=TSjlx88stJcMl
1573
1577
  mteb/models/search_encoder_index/search_indexes/__init__.py,sha256=Wm60_oUemUpFsvrCMW111dcPH2L2rt1iZrXMskXmG7o,88
1574
1578
  mteb/models/search_encoder_index/search_indexes/faiss_search_index.py,sha256=WMs3QbbYV13fRuT3dakmdVMZLFdc_9ZzSupS3QxlbVQ,5555
1575
1579
  mteb/results/__init__.py,sha256=EXQqK4Am5eIYzD52dpcGAFSdqnC38oE6JHN302oidHc,158
1576
- mteb/results/benchmark_results.py,sha256=OWqeBxbNsPmOKRhxY980N5CikpdJXToDGJGTXUe64Lw,18209
1577
- mteb/results/model_result.py,sha256=pTyGFTLg6l1wmc3Ul1CJK6ESBqWJAuU4aeT8iFygAdU,13746
1580
+ mteb/results/benchmark_results.py,sha256=b_g0QmTbwue9ZpWTtyPfgf_nyavckZHUgTVE6zqqtzM,18342
1581
+ mteb/results/model_result.py,sha256=Y6b_xfJlw8EFZq464ZVhyw0Rryv111hvMjnXbEZJpXk,14059
1578
1582
  mteb/results/task_result.py,sha256=DgmAw6akotjp8m8E6gE8QP9mQMxUvyzu1hnZ5o01GkU,32303
1579
1583
  mteb/tasks/__init__.py,sha256=izAxU0ip1F_YUwx0dFCuN35BaktdmePh6vlDiHC0kLo,503
1580
1584
  mteb/tasks/aggregated_tasks/__init__.py,sha256=Ufgbh1AirxCQkojO3AUhUFWM8zQG10cfdVTkj_PeyLI,104
@@ -2578,9 +2582,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2578
2582
  mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
2579
2583
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2580
2584
  mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
2581
- mteb-2.3.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2582
- mteb-2.3.8.dist-info/METADATA,sha256=QMpRmhMLXi45L0d29kCoNcEugCwDl8IWCc3wE_r-fb4,13923
2583
- mteb-2.3.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2584
- mteb-2.3.8.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2585
- mteb-2.3.8.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2586
- mteb-2.3.8.dist-info/RECORD,,
2585
+ mteb-2.3.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2586
+ mteb-2.3.10.dist-info/METADATA,sha256=IPpkXC-YeiZU0BtiAnv-e9aS8X99_uAsGYxCCIz7nr4,13924
2587
+ mteb-2.3.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2588
+ mteb-2.3.10.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2589
+ mteb-2.3.10.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2590
+ mteb-2.3.10.dist-info/RECORD,,
File without changes