mteb 2.3.8__py3-none-any.whl → 2.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/benchmarks/_create_table.py +60 -0
- mteb/benchmarks/benchmark.py +26 -2
- mteb/benchmarks/benchmarks/benchmarks.py +31 -0
- mteb/leaderboard/app.py +34 -2
- mteb/leaderboard/table.py +62 -0
- mteb/models/model_implementations/e5_models.py +3 -101
- mteb/models/model_implementations/facebookai.py +147 -0
- mteb/models/model_implementations/kblab.py +24 -0
- mteb/models/model_implementations/kfst.py +24 -0
- mteb/models/model_implementations/pawan_models.py +38 -0
- mteb/results/benchmark_results.py +2 -1
- mteb/results/model_result.py +9 -3
- {mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/METADATA +1 -1
- {mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/RECORD +18 -14
- {mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/WHEEL +0 -0
- {mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/entry_points.txt +0 -0
- {mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.3.8.dist-info → mteb-2.3.10.dist-info}/top_level.txt +0 -0
mteb/benchmarks/_create_table.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from collections import defaultdict
|
|
3
|
+
from typing import Literal
|
|
3
4
|
|
|
4
5
|
import numpy as np
|
|
5
6
|
import pandas as pd
|
|
@@ -241,6 +242,65 @@ def _create_per_task_table_from_benchmark_results(
|
|
|
241
242
|
return per_task
|
|
242
243
|
|
|
243
244
|
|
|
245
|
+
def _create_per_language_table_from_benchmark_results(
|
|
246
|
+
benchmark_results: BenchmarkResults,
|
|
247
|
+
language_view: list[str] | Literal["all"],
|
|
248
|
+
) -> pd.DataFrame:
|
|
249
|
+
"""Create per-language table from BenchmarkResults.
|
|
250
|
+
|
|
251
|
+
Returns a DataFrame with one row per model and one column per language.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
benchmark_results: BenchmarkResults object containing model results
|
|
255
|
+
language_view: List of languages to include in the per-language table, or "all" for all languages present in the results
|
|
256
|
+
Returns:
|
|
257
|
+
DataFrame with per-language scores, ready for styling in the leaderboard
|
|
258
|
+
"""
|
|
259
|
+
if language_view != "all" and not isinstance(language_view, list):
|
|
260
|
+
raise ValueError("language_view must be a list of languages or 'all'")
|
|
261
|
+
|
|
262
|
+
data = benchmark_results.to_dataframe(aggregation_level="language", format="long")
|
|
263
|
+
|
|
264
|
+
if data.empty:
|
|
265
|
+
no_results_frame = pd.DataFrame(
|
|
266
|
+
{"No results": ["You can try relaxing your criteria"]}
|
|
267
|
+
)
|
|
268
|
+
return no_results_frame
|
|
269
|
+
|
|
270
|
+
if language_view != "all":
|
|
271
|
+
data = data[data["language"].isin(language_view)]
|
|
272
|
+
|
|
273
|
+
per_language = data.pivot_table(
|
|
274
|
+
index="model_name", columns="language", values="score", aggfunc="mean"
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
to_remove = per_language.isna().all(axis="columns")
|
|
278
|
+
if to_remove.all():
|
|
279
|
+
no_results_frame = pd.DataFrame(
|
|
280
|
+
{"No results": ["You can try relaxing your criteria"]}
|
|
281
|
+
)
|
|
282
|
+
return no_results_frame
|
|
283
|
+
|
|
284
|
+
models_to_remove = list(per_language[to_remove].index)
|
|
285
|
+
per_language = per_language.drop(models_to_remove, axis=0)
|
|
286
|
+
|
|
287
|
+
per_language["borda_rank"] = _get_borda_rank(per_language)
|
|
288
|
+
per_language = per_language.sort_values("borda_rank", ascending=True)
|
|
289
|
+
per_language = per_language.drop(columns=["borda_rank"])
|
|
290
|
+
per_language = per_language.reset_index()
|
|
291
|
+
|
|
292
|
+
per_language["model_name"] = per_language["model_name"].map(
|
|
293
|
+
lambda name: name.split("/")[-1]
|
|
294
|
+
)
|
|
295
|
+
per_language = per_language.rename(
|
|
296
|
+
columns={
|
|
297
|
+
"model_name": "Model",
|
|
298
|
+
}
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
return per_language
|
|
302
|
+
|
|
303
|
+
|
|
244
304
|
def _create_summary_table_mean_public_private(
|
|
245
305
|
benchmark_results: BenchmarkResults,
|
|
246
306
|
) -> pd.DataFrame:
|
mteb/benchmarks/benchmark.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
from collections.abc import Iterable, Sequence
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import TYPE_CHECKING, Literal
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
|
|
7
7
|
from mteb.benchmarks._create_table import (
|
|
8
|
+
_create_per_language_table_from_benchmark_results,
|
|
8
9
|
_create_per_task_table_from_benchmark_results,
|
|
9
10
|
_create_summary_table_from_benchmark_results,
|
|
10
11
|
_create_summary_table_mean_public_private,
|
|
@@ -50,6 +51,7 @@ class Benchmark:
|
|
|
50
51
|
display_on_leaderboard: bool = True
|
|
51
52
|
icon: str | None = None
|
|
52
53
|
display_name: str | None = None
|
|
54
|
+
language_view: list[str] | Literal["all"] = field(default_factory=list)
|
|
53
55
|
|
|
54
56
|
def __iter__(self) -> Iterable["AbsTask"]:
|
|
55
57
|
return iter(self.tasks)
|
|
@@ -80,6 +82,28 @@ class Benchmark:
|
|
|
80
82
|
"""
|
|
81
83
|
return _create_per_task_table_from_benchmark_results(benchmark_results)
|
|
82
84
|
|
|
85
|
+
def _create_per_language_table(
|
|
86
|
+
self, benchmark_results: BenchmarkResults
|
|
87
|
+
) -> pd.DataFrame:
|
|
88
|
+
"""Create per-language table. Called by the leaderboard app.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
A pandas DataFrame representing the per-language results.
|
|
92
|
+
"""
|
|
93
|
+
if self.language_view == "all" or len(self.language_view) > 0:
|
|
94
|
+
return _create_per_language_table_from_benchmark_results(
|
|
95
|
+
benchmark_results, self.language_view
|
|
96
|
+
)
|
|
97
|
+
else:
|
|
98
|
+
no_results_frame = pd.DataFrame(
|
|
99
|
+
{
|
|
100
|
+
"No results": [
|
|
101
|
+
"The per-language table is not available for this benchmark."
|
|
102
|
+
]
|
|
103
|
+
}
|
|
104
|
+
)
|
|
105
|
+
return no_results_frame
|
|
106
|
+
|
|
83
107
|
|
|
84
108
|
class RtebBenchmark(Benchmark):
|
|
85
109
|
"""Wrapper for RTEB benchmark."""
|
|
@@ -471,6 +471,7 @@ SEB = Benchmark(
|
|
|
471
471
|
name="MTEB(Scandinavian, v1)",
|
|
472
472
|
display_name="Scandinavian",
|
|
473
473
|
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/dk.svg",
|
|
474
|
+
language_view=["dan-Latn", "swe-Latn", "nno-Latn", "nob-Latn"],
|
|
474
475
|
tasks=get_tasks(
|
|
475
476
|
tasks=[
|
|
476
477
|
# Bitext
|
|
@@ -953,6 +954,28 @@ MTEB_multilingual_v1 = Benchmark(
|
|
|
953
954
|
MTEB_multilingual_v2 = Benchmark(
|
|
954
955
|
name="MTEB(Multilingual, v2)",
|
|
955
956
|
display_name="Multilingual",
|
|
957
|
+
language_view=[
|
|
958
|
+
"eng-Latn", # English
|
|
959
|
+
"zho-Hans", # Chinese (Simplified)
|
|
960
|
+
"hin-Deva", # Hindi
|
|
961
|
+
"spa-Latn", # Spanish
|
|
962
|
+
"fra-Latn", # French
|
|
963
|
+
"ara-Arab", # Arabic
|
|
964
|
+
"ben-Beng", # Bengali
|
|
965
|
+
"rus-Cyrl", # Russian
|
|
966
|
+
"por-Latn", # Portuguese
|
|
967
|
+
"urd-Arab", # Urdu
|
|
968
|
+
"ind-Latn", # Indonesian
|
|
969
|
+
"deu-Latn", # German
|
|
970
|
+
"jpn-Jpan", # Japanese
|
|
971
|
+
"swa-Latn", # Swahili
|
|
972
|
+
"mar-Deva", # Marathi
|
|
973
|
+
"tel-Telu", # Telugu
|
|
974
|
+
"tur-Latn", # Turkish
|
|
975
|
+
"tam-Taml", # Tamil
|
|
976
|
+
"vie-Latn", # Vietnamese
|
|
977
|
+
"kor-Hang", # Korean
|
|
978
|
+
],
|
|
956
979
|
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-globe.svg",
|
|
957
980
|
tasks=mteb_multilingual_tasks,
|
|
958
981
|
description="A large-scale multilingual expansion of MTEB, driven mainly by highly-curated community contributions covering 250+ languages. ",
|
|
@@ -2283,6 +2306,14 @@ VIDORE_V2 = Benchmark(
|
|
|
2283
2306
|
VIDORE_V3 = VidoreBenchmark(
|
|
2284
2307
|
name="ViDoRe(v3)",
|
|
2285
2308
|
display_name="ViDoRe V3",
|
|
2309
|
+
language_view=[
|
|
2310
|
+
"deu-Latn",
|
|
2311
|
+
"eng-Latn",
|
|
2312
|
+
"fra-Latn",
|
|
2313
|
+
"ita-Latn",
|
|
2314
|
+
"por-Latn",
|
|
2315
|
+
"spa-Latn",
|
|
2316
|
+
],
|
|
2286
2317
|
icon="https://cdn-uploads.huggingface.co/production/uploads/66e16a677c2eb2da5109fb5c/x99xqw__fl2UaPbiIdC_f.png",
|
|
2287
2318
|
tasks=get_tasks(
|
|
2288
2319
|
tasks=[
|
mteb/leaderboard/app.py
CHANGED
|
@@ -24,6 +24,7 @@ from mteb.leaderboard.benchmark_selector import (
|
|
|
24
24
|
)
|
|
25
25
|
from mteb.leaderboard.figures import _performance_size_plot, _radar_chart
|
|
26
26
|
from mteb.leaderboard.table import (
|
|
27
|
+
apply_per_language_styling_from_benchmark,
|
|
27
28
|
apply_per_task_styling_from_benchmark,
|
|
28
29
|
apply_summary_styling_from_benchmark,
|
|
29
30
|
)
|
|
@@ -361,6 +362,13 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
|
|
|
361
362
|
per_task_table = apply_per_task_styling_from_benchmark(
|
|
362
363
|
default_benchmark, filtered_benchmark_results
|
|
363
364
|
)
|
|
365
|
+
per_language_table = apply_per_language_styling_from_benchmark(
|
|
366
|
+
default_benchmark,
|
|
367
|
+
filtered_benchmark_results,
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
# Check if this benchmark displays per-language results
|
|
371
|
+
display_language_table = len(default_benchmark.language_view) > 0
|
|
364
372
|
|
|
365
373
|
lang_select = gr.CheckboxGroup(
|
|
366
374
|
sorted(default_results.languages),
|
|
@@ -554,6 +562,16 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
|
|
|
554
562
|
download_per_task.click(
|
|
555
563
|
_download_table, inputs=[per_task_table], outputs=[download_per_task]
|
|
556
564
|
)
|
|
565
|
+
with gr.Tab(
|
|
566
|
+
"Performance per language", visible=display_language_table
|
|
567
|
+
) as language_tab:
|
|
568
|
+
per_language_table.render()
|
|
569
|
+
download_per_language = gr.DownloadButton("Download Table")
|
|
570
|
+
download_per_language.click(
|
|
571
|
+
_download_table,
|
|
572
|
+
inputs=[per_language_table],
|
|
573
|
+
outputs=[download_per_language],
|
|
574
|
+
)
|
|
557
575
|
with gr.Tab("Task information"):
|
|
558
576
|
task_info_table = gr.DataFrame(_update_task_info, inputs=[task_select]) # noqa: F841
|
|
559
577
|
|
|
@@ -879,9 +897,18 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
|
|
|
879
897
|
per_task = apply_per_task_styling_from_benchmark(
|
|
880
898
|
benchmark, filtered_benchmark_results
|
|
881
899
|
)
|
|
900
|
+
per_language = apply_per_language_styling_from_benchmark(
|
|
901
|
+
benchmark,
|
|
902
|
+
filtered_benchmark_results,
|
|
903
|
+
)
|
|
882
904
|
elapsed = time.time() - start_time
|
|
883
905
|
logger.debug(f"update_tables callback: {elapsed}s")
|
|
884
|
-
return
|
|
906
|
+
return (
|
|
907
|
+
summary,
|
|
908
|
+
per_task,
|
|
909
|
+
per_language,
|
|
910
|
+
gr.update(visible=len(benchmark.language_view) > 0),
|
|
911
|
+
)
|
|
885
912
|
|
|
886
913
|
# Only update tables when models change, not when scores/tasks change directly
|
|
887
914
|
# This avoids redundant updates since scores/tasks changes trigger update_models
|
|
@@ -890,7 +917,12 @@ def get_leaderboard_app(cache: ResultCache = ResultCache()) -> gr.Blocks:
|
|
|
890
917
|
item.change(
|
|
891
918
|
update_tables,
|
|
892
919
|
inputs=[scores, task_select, models, benchmark_select],
|
|
893
|
-
outputs=[
|
|
920
|
+
outputs=[
|
|
921
|
+
summary_table,
|
|
922
|
+
per_task_table,
|
|
923
|
+
per_language_table,
|
|
924
|
+
language_tab,
|
|
925
|
+
],
|
|
894
926
|
)
|
|
895
927
|
|
|
896
928
|
gr.Markdown(ACKNOWLEDGEMENT, elem_id="ack_markdown")
|
mteb/leaderboard/table.py
CHANGED
|
@@ -120,6 +120,31 @@ def apply_per_task_styling_from_benchmark(
|
|
|
120
120
|
return _apply_per_task_table_styling(per_task_df)
|
|
121
121
|
|
|
122
122
|
|
|
123
|
+
def apply_per_language_styling_from_benchmark(
|
|
124
|
+
benchmark_instance: Benchmark, benchmark_results: BenchmarkResults
|
|
125
|
+
) -> gr.DataFrame:
|
|
126
|
+
"""Apply styling to per-language table created by the benchmark instance's _create_per_language_table method.
|
|
127
|
+
|
|
128
|
+
This supports polymorphism - different benchmark classes can have different table generation logic.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
benchmark_instance: The benchmark instance
|
|
132
|
+
benchmark_results: BenchmarkResults object containing model results (may be pre-filtered)
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Styled gr.DataFrame ready for display in the leaderboard
|
|
136
|
+
"""
|
|
137
|
+
# Use the instance method to support polymorphism
|
|
138
|
+
per_language_df = benchmark_instance._create_per_language_table(benchmark_results)
|
|
139
|
+
|
|
140
|
+
# If it's a no-results DataFrame, return it as-is
|
|
141
|
+
if "No results" in per_language_df.columns:
|
|
142
|
+
return gr.DataFrame(per_language_df)
|
|
143
|
+
|
|
144
|
+
# Apply the styling
|
|
145
|
+
return _apply_per_language_table_styling(per_language_df)
|
|
146
|
+
|
|
147
|
+
|
|
123
148
|
def _style_number_of_parameters(num_params: float) -> str:
|
|
124
149
|
"""Anything bigger than 1B is shown in billions with 1 decimal (e.g. 1.712 > 1.7) while anything smaller as 0.xxx B (e.g. 0.345 remains 0.345)"""
|
|
125
150
|
if num_params >= 1:
|
|
@@ -237,10 +262,47 @@ def _apply_per_task_table_styling(per_task: pd.DataFrame) -> gr.DataFrame:
|
|
|
237
262
|
"{:.2f}", subset=task_score_columns, na_rep=""
|
|
238
263
|
).highlight_max(subset=task_score_columns, props="font-weight: bold")
|
|
239
264
|
|
|
265
|
+
# setting task name column width to 250px
|
|
266
|
+
column_widths = _get_column_widths(per_task_style.data)
|
|
267
|
+
if len(column_widths) > 0:
|
|
268
|
+
column_widths[0] = "250px"
|
|
269
|
+
|
|
240
270
|
return gr.DataFrame(
|
|
241
271
|
per_task_style,
|
|
242
272
|
interactive=False,
|
|
243
273
|
pinned_columns=1,
|
|
274
|
+
column_widths=column_widths,
|
|
275
|
+
buttons=["copy", "fullscreen"],
|
|
276
|
+
show_search="filter",
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _apply_per_language_table_styling(per_language: pd.DataFrame) -> gr.DataFrame:
|
|
281
|
+
"""Apply styling to a raw per-task DataFrame
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
Styled gr.DataFrame ready for display in the leaderboard
|
|
285
|
+
"""
|
|
286
|
+
language_score_columns = per_language.select_dtypes("number").columns
|
|
287
|
+
per_language[language_score_columns] *= 100
|
|
288
|
+
|
|
289
|
+
if len(per_language.columns) > 100: # Avoid gradio error on very wide tables
|
|
290
|
+
per_language_style = per_language.round(2)
|
|
291
|
+
else:
|
|
292
|
+
per_language_style = per_language.style.format(
|
|
293
|
+
"{:.2f}", subset=language_score_columns, na_rep=""
|
|
294
|
+
).highlight_max(subset=language_score_columns, props="font-weight: bold")
|
|
295
|
+
|
|
296
|
+
# setting task name column width to 250px
|
|
297
|
+
column_widths = _get_column_widths(per_language_style.data)
|
|
298
|
+
if len(column_widths) > 0:
|
|
299
|
+
column_widths[0] = "250px"
|
|
300
|
+
|
|
301
|
+
return gr.DataFrame(
|
|
302
|
+
per_language_style,
|
|
303
|
+
interactive=False,
|
|
304
|
+
pinned_columns=1,
|
|
305
|
+
column_widths=column_widths,
|
|
244
306
|
buttons=["copy", "fullscreen"],
|
|
245
307
|
show_search="filter",
|
|
246
308
|
)
|
|
@@ -5,108 +5,10 @@ from mteb.models.model_meta import (
|
|
|
5
5
|
from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
|
|
6
6
|
from mteb.types import PromptType
|
|
7
7
|
|
|
8
|
+
from .facebookai import XLMR_LANGUAGES
|
|
9
|
+
|
|
8
10
|
E5_PAPER_RELEASE_DATE = "2024-02-08"
|
|
9
|
-
|
|
10
|
-
"afr-Latn",
|
|
11
|
-
"amh-Latn",
|
|
12
|
-
"ara-Latn",
|
|
13
|
-
"asm-Latn",
|
|
14
|
-
"aze-Latn",
|
|
15
|
-
"bel-Latn",
|
|
16
|
-
"bul-Latn",
|
|
17
|
-
"ben-Latn",
|
|
18
|
-
"ben-Beng",
|
|
19
|
-
"bre-Latn",
|
|
20
|
-
"bos-Latn",
|
|
21
|
-
"cat-Latn",
|
|
22
|
-
"ces-Latn",
|
|
23
|
-
"cym-Latn",
|
|
24
|
-
"dan-Latn",
|
|
25
|
-
"deu-Latn",
|
|
26
|
-
"ell-Latn",
|
|
27
|
-
"eng-Latn",
|
|
28
|
-
"epo-Latn",
|
|
29
|
-
"spa-Latn",
|
|
30
|
-
"est-Latn",
|
|
31
|
-
"eus-Latn",
|
|
32
|
-
"fas-Latn",
|
|
33
|
-
"fin-Latn",
|
|
34
|
-
"fra-Latn",
|
|
35
|
-
"fry-Latn",
|
|
36
|
-
"gle-Latn",
|
|
37
|
-
"gla-Latn",
|
|
38
|
-
"glg-Latn",
|
|
39
|
-
"guj-Latn",
|
|
40
|
-
"hau-Latn",
|
|
41
|
-
"heb-Latn",
|
|
42
|
-
"hin-Latn",
|
|
43
|
-
"hin-Deva",
|
|
44
|
-
"hrv-Latn",
|
|
45
|
-
"hun-Latn",
|
|
46
|
-
"hye-Latn",
|
|
47
|
-
"ind-Latn",
|
|
48
|
-
"isl-Latn",
|
|
49
|
-
"ita-Latn",
|
|
50
|
-
"jpn-Latn",
|
|
51
|
-
"jav-Latn",
|
|
52
|
-
"kat-Latn",
|
|
53
|
-
"kaz-Latn",
|
|
54
|
-
"khm-Latn",
|
|
55
|
-
"kan-Latn",
|
|
56
|
-
"kor-Latn",
|
|
57
|
-
"kur-Latn",
|
|
58
|
-
"kir-Latn",
|
|
59
|
-
"lat-Latn",
|
|
60
|
-
"lao-Latn",
|
|
61
|
-
"lit-Latn",
|
|
62
|
-
"lav-Latn",
|
|
63
|
-
"mlg-Latn",
|
|
64
|
-
"mkd-Latn",
|
|
65
|
-
"mal-Latn",
|
|
66
|
-
"mon-Latn",
|
|
67
|
-
"mar-Latn",
|
|
68
|
-
"msa-Latn",
|
|
69
|
-
"mya-Latn",
|
|
70
|
-
"nep-Latn",
|
|
71
|
-
"nld-Latn",
|
|
72
|
-
"nob-Latn",
|
|
73
|
-
"orm-Latn",
|
|
74
|
-
"ori-Latn",
|
|
75
|
-
"pan-Latn",
|
|
76
|
-
"pol-Latn",
|
|
77
|
-
"pus-Latn",
|
|
78
|
-
"por-Latn",
|
|
79
|
-
"ron-Latn",
|
|
80
|
-
"rus-Latn",
|
|
81
|
-
"san-Latn",
|
|
82
|
-
"snd-Latn",
|
|
83
|
-
"sin-Latn",
|
|
84
|
-
"slk-Latn",
|
|
85
|
-
"slv-Latn",
|
|
86
|
-
"som-Latn",
|
|
87
|
-
"sqi-Latn",
|
|
88
|
-
"srp-Latn",
|
|
89
|
-
"sun-Latn",
|
|
90
|
-
"swe-Latn",
|
|
91
|
-
"swa-Latn",
|
|
92
|
-
"tam-Latn",
|
|
93
|
-
"tam-Taml",
|
|
94
|
-
"tel-Latn",
|
|
95
|
-
"tel-Telu",
|
|
96
|
-
"tha-Latn",
|
|
97
|
-
"tgl-Latn",
|
|
98
|
-
"tur-Latn",
|
|
99
|
-
"uig-Latn",
|
|
100
|
-
"ukr-Latn",
|
|
101
|
-
"urd-Latn",
|
|
102
|
-
"urd-Arab",
|
|
103
|
-
"uzb-Latn",
|
|
104
|
-
"vie-Latn",
|
|
105
|
-
"xho-Latn",
|
|
106
|
-
"yid-Latn",
|
|
107
|
-
"zho-Hant",
|
|
108
|
-
"zho-Hans",
|
|
109
|
-
]
|
|
11
|
+
|
|
110
12
|
|
|
111
13
|
MULTILINGUAL_E5_CITATION = """
|
|
112
14
|
@article{wang2024multilingual,
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
from mteb.models import sentence_transformers_loader
|
|
2
|
+
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
3
|
+
|
|
4
|
+
XLMR_LANGUAGES = [
|
|
5
|
+
"afr-Latn",
|
|
6
|
+
"amh-Latn",
|
|
7
|
+
"ara-Latn",
|
|
8
|
+
"asm-Latn",
|
|
9
|
+
"aze-Latn",
|
|
10
|
+
"bel-Latn",
|
|
11
|
+
"bul-Latn",
|
|
12
|
+
"ben-Latn",
|
|
13
|
+
"ben-Beng",
|
|
14
|
+
"bre-Latn",
|
|
15
|
+
"bos-Latn",
|
|
16
|
+
"cat-Latn",
|
|
17
|
+
"ces-Latn",
|
|
18
|
+
"cym-Latn",
|
|
19
|
+
"dan-Latn",
|
|
20
|
+
"deu-Latn",
|
|
21
|
+
"ell-Latn",
|
|
22
|
+
"eng-Latn",
|
|
23
|
+
"epo-Latn",
|
|
24
|
+
"spa-Latn",
|
|
25
|
+
"est-Latn",
|
|
26
|
+
"eus-Latn",
|
|
27
|
+
"fas-Latn",
|
|
28
|
+
"fin-Latn",
|
|
29
|
+
"fra-Latn",
|
|
30
|
+
"fry-Latn",
|
|
31
|
+
"gle-Latn",
|
|
32
|
+
"gla-Latn",
|
|
33
|
+
"glg-Latn",
|
|
34
|
+
"guj-Latn",
|
|
35
|
+
"hau-Latn",
|
|
36
|
+
"heb-Latn",
|
|
37
|
+
"hin-Latn",
|
|
38
|
+
"hin-Deva",
|
|
39
|
+
"hrv-Latn",
|
|
40
|
+
"hun-Latn",
|
|
41
|
+
"hye-Latn",
|
|
42
|
+
"ind-Latn",
|
|
43
|
+
"isl-Latn",
|
|
44
|
+
"ita-Latn",
|
|
45
|
+
"jpn-Latn",
|
|
46
|
+
"jav-Latn",
|
|
47
|
+
"kat-Latn",
|
|
48
|
+
"kaz-Latn",
|
|
49
|
+
"khm-Latn",
|
|
50
|
+
"kan-Latn",
|
|
51
|
+
"kor-Latn",
|
|
52
|
+
"kur-Latn",
|
|
53
|
+
"kir-Latn",
|
|
54
|
+
"lat-Latn",
|
|
55
|
+
"lao-Latn",
|
|
56
|
+
"lit-Latn",
|
|
57
|
+
"lav-Latn",
|
|
58
|
+
"mlg-Latn",
|
|
59
|
+
"mkd-Latn",
|
|
60
|
+
"mal-Latn",
|
|
61
|
+
"mon-Latn",
|
|
62
|
+
"mar-Latn",
|
|
63
|
+
"msa-Latn",
|
|
64
|
+
"mya-Latn",
|
|
65
|
+
"nep-Latn",
|
|
66
|
+
"nld-Latn",
|
|
67
|
+
"nob-Latn",
|
|
68
|
+
"orm-Latn",
|
|
69
|
+
"ori-Latn",
|
|
70
|
+
"pan-Latn",
|
|
71
|
+
"pol-Latn",
|
|
72
|
+
"pus-Latn",
|
|
73
|
+
"por-Latn",
|
|
74
|
+
"ron-Latn",
|
|
75
|
+
"rus-Latn",
|
|
76
|
+
"san-Latn",
|
|
77
|
+
"snd-Latn",
|
|
78
|
+
"sin-Latn",
|
|
79
|
+
"slk-Latn",
|
|
80
|
+
"slv-Latn",
|
|
81
|
+
"som-Latn",
|
|
82
|
+
"sqi-Latn",
|
|
83
|
+
"srp-Latn",
|
|
84
|
+
"sun-Latn",
|
|
85
|
+
"swe-Latn",
|
|
86
|
+
"swa-Latn",
|
|
87
|
+
"tam-Latn",
|
|
88
|
+
"tam-Taml",
|
|
89
|
+
"tel-Latn",
|
|
90
|
+
"tel-Telu",
|
|
91
|
+
"tha-Latn",
|
|
92
|
+
"tgl-Latn",
|
|
93
|
+
"tur-Latn",
|
|
94
|
+
"uig-Latn",
|
|
95
|
+
"ukr-Latn",
|
|
96
|
+
"urd-Latn",
|
|
97
|
+
"urd-Arab",
|
|
98
|
+
"uzb-Latn",
|
|
99
|
+
"vie-Latn",
|
|
100
|
+
"xho-Latn",
|
|
101
|
+
"yid-Latn",
|
|
102
|
+
"zho-Hant",
|
|
103
|
+
"zho-Hans",
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
xlmr_base = ModelMeta(
|
|
108
|
+
loader=sentence_transformers_loader, # type: ignore[arg-type]
|
|
109
|
+
name="FacebookAI/xlm-roberta-base",
|
|
110
|
+
languages=XLMR_LANGUAGES,
|
|
111
|
+
open_weights=True,
|
|
112
|
+
revision="e73636d4f797dec63c3081bb6ed5c7b0bb3f2089",
|
|
113
|
+
release_date="2019-11-05", # arxiv paper release
|
|
114
|
+
n_parameters=278043648,
|
|
115
|
+
memory_usage_mb=1064,
|
|
116
|
+
embed_dim=768,
|
|
117
|
+
license="mit",
|
|
118
|
+
max_tokens=512,
|
|
119
|
+
reference="https://huggingface.co/FacebookAI/xlm-roberta-base",
|
|
120
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
121
|
+
framework=["Sentence Transformers", "PyTorch"],
|
|
122
|
+
use_instructions=False,
|
|
123
|
+
public_training_code=None,
|
|
124
|
+
public_training_data=None,
|
|
125
|
+
training_datasets=set(),
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
xlmr_large = ModelMeta(
|
|
129
|
+
loader=sentence_transformers_loader, # type: ignore[arg-type]
|
|
130
|
+
name="FacebookAI/xlm-roberta-large",
|
|
131
|
+
languages=XLMR_LANGUAGES,
|
|
132
|
+
open_weights=True,
|
|
133
|
+
revision="c23d21b0620b635a76227c604d44e43a9f0ee389",
|
|
134
|
+
release_date="2019-11-05", # arxiv paper release
|
|
135
|
+
n_parameters=559890432,
|
|
136
|
+
memory_usage_mb=2141,
|
|
137
|
+
embed_dim=1024,
|
|
138
|
+
license="mit",
|
|
139
|
+
max_tokens=512,
|
|
140
|
+
reference="https://huggingface.co/FacebookAI/xlm-roberta-large",
|
|
141
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
142
|
+
framework=["Sentence Transformers", "PyTorch"],
|
|
143
|
+
use_instructions=False,
|
|
144
|
+
public_training_code=None,
|
|
145
|
+
public_training_data=None,
|
|
146
|
+
training_datasets=set(),
|
|
147
|
+
)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from mteb.models import sentence_transformers_loader
|
|
2
|
+
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
3
|
+
|
|
4
|
+
sbert_swedish = ModelMeta(
|
|
5
|
+
loader=sentence_transformers_loader, # type: ignore[arg-type]
|
|
6
|
+
name="KBLab/sentence-bert-swedish-cased",
|
|
7
|
+
languages=["swe-Latn"],
|
|
8
|
+
open_weights=True,
|
|
9
|
+
revision="6b5e83cd29c03729cfdc33d13b1423399b0efb5c",
|
|
10
|
+
release_date="2023-01-11",
|
|
11
|
+
n_parameters=124690944,
|
|
12
|
+
memory_usage_mb=476,
|
|
13
|
+
embed_dim=768,
|
|
14
|
+
license="apache-2.0",
|
|
15
|
+
max_tokens=384,
|
|
16
|
+
reference="https://huggingface.co/KBLab/sentence-bert-swedish-cased",
|
|
17
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
18
|
+
framework=["Sentence Transformers", "PyTorch"],
|
|
19
|
+
use_instructions=False,
|
|
20
|
+
public_training_code=None,
|
|
21
|
+
public_training_data=None,
|
|
22
|
+
training_datasets=None,
|
|
23
|
+
adapted_from="sentence-transformers/all-mpnet-base-v2",
|
|
24
|
+
)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from mteb.models import sentence_transformers_loader
|
|
2
|
+
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
3
|
+
|
|
4
|
+
xlmr_scandi = ModelMeta(
|
|
5
|
+
loader=sentence_transformers_loader, # type: ignore[arg-type]
|
|
6
|
+
name="KFST/XLMRoberta-en-da-sv-nb",
|
|
7
|
+
languages=["swe-Latn", "nob-Latn", "nno-Latn", "dan-Latn", "eng-Latn"],
|
|
8
|
+
open_weights=True,
|
|
9
|
+
revision="d40c10ca7b1e68b5a8372f2d112dac9eb3279df1",
|
|
10
|
+
release_date="2022-02-22",
|
|
11
|
+
n_parameters=278043648,
|
|
12
|
+
memory_usage_mb=1061,
|
|
13
|
+
embed_dim=768,
|
|
14
|
+
license="not specified",
|
|
15
|
+
max_tokens=512,
|
|
16
|
+
reference="https://huggingface.co/KFST/XLMRoberta-en-da-sv-nb",
|
|
17
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
18
|
+
framework=["Sentence Transformers", "PyTorch"],
|
|
19
|
+
use_instructions=False,
|
|
20
|
+
public_training_code=None,
|
|
21
|
+
public_training_data=None,
|
|
22
|
+
training_datasets=None,
|
|
23
|
+
adapted_from="FacebookAI/xlm-roberta-base",
|
|
24
|
+
)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from mteb.models.model_meta import (
|
|
2
|
+
ModelMeta,
|
|
3
|
+
ScoringFunction,
|
|
4
|
+
)
|
|
5
|
+
from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
|
|
6
|
+
|
|
7
|
+
PAWAN_EMBD_CITATION = """@misc{medhi2025pawanembd,
|
|
8
|
+
title={PawanEmbd-68M: Distilled Embedding Model},
|
|
9
|
+
author={Medhi, D.},
|
|
10
|
+
year={2025},
|
|
11
|
+
url={https://huggingface.co/dmedhi/PawanEmbd-68M}
|
|
12
|
+
}"""
|
|
13
|
+
|
|
14
|
+
pawan_embd_68m = ModelMeta(
|
|
15
|
+
loader=sentence_transformers_loader,
|
|
16
|
+
name="dmedhi/PawanEmbd-68M",
|
|
17
|
+
languages=["eng-Latn"],
|
|
18
|
+
open_weights=True,
|
|
19
|
+
revision="32f295145802bdbd65699ad65fd27d2a5b69a909",
|
|
20
|
+
release_date="2025-12-08",
|
|
21
|
+
n_parameters=68_000_000,
|
|
22
|
+
memory_usage_mb=260,
|
|
23
|
+
embed_dim=768,
|
|
24
|
+
license="apache-2.0",
|
|
25
|
+
max_tokens=512,
|
|
26
|
+
reference="https://huggingface.co/dmedhi/PawanEmbd-68M",
|
|
27
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
28
|
+
framework=["Sentence Transformers", "PyTorch"],
|
|
29
|
+
adapted_from="ibm-granite/granite-embedding-278m-multilingual",
|
|
30
|
+
superseded_by=None,
|
|
31
|
+
public_training_code=None,
|
|
32
|
+
public_training_data=None,
|
|
33
|
+
use_instructions=False,
|
|
34
|
+
training_datasets={
|
|
35
|
+
"AllNLI",
|
|
36
|
+
},
|
|
37
|
+
citation=PAWAN_EMBD_CITATION,
|
|
38
|
+
)
|
|
@@ -296,7 +296,7 @@ class BenchmarkResults(BaseModel):
|
|
|
296
296
|
|
|
297
297
|
def to_dataframe(
|
|
298
298
|
self,
|
|
299
|
-
aggregation_level: Literal["subset", "split", "task"] = "task",
|
|
299
|
+
aggregation_level: Literal["subset", "split", "task", "language"] = "task",
|
|
300
300
|
aggregation_fn: Callable[[list[Score]], Any] | None = None,
|
|
301
301
|
include_model_revision: bool = False,
|
|
302
302
|
format: Literal["wide", "long"] = "wide",
|
|
@@ -321,6 +321,7 @@ class BenchmarkResults(BaseModel):
|
|
|
321
321
|
- "subset"/None: No aggregation will be done. The DataFrame will have one row per model, task, split and subset.
|
|
322
322
|
- "split": Aggregates the scores by split. The DataFrame will have one row per model, task and split.
|
|
323
323
|
- "task": Aggregates the scores by task. The DataFrame will have one row per model and task.
|
|
324
|
+
- "language": Aggregates the scores by language. The DataFrame will have one row per model and language.
|
|
324
325
|
aggregation_fn: The function to use for aggregation. If None, the mean will be used.
|
|
325
326
|
include_model_revision: If True, the model revision will be included in the DataFrame. If False, it will be excluded.
|
|
326
327
|
If there are multiple revisions for the same model, they will be joined using the `join_revisions` method.
|
mteb/results/model_result.py
CHANGED
|
@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
|
|
|
30
30
|
def _aggregate_and_pivot(
|
|
31
31
|
df: pd.DataFrame,
|
|
32
32
|
columns: list[str],
|
|
33
|
-
aggregation_level: Literal["subset", "split", "task"],
|
|
33
|
+
aggregation_level: Literal["subset", "split", "task", "language"],
|
|
34
34
|
format: Literal["wide", "long"],
|
|
35
35
|
aggregation_fn: Callable[[list[Score]], Any] | None,
|
|
36
36
|
) -> pd.DataFrame:
|
|
@@ -43,6 +43,12 @@ def _aggregate_and_pivot(
|
|
|
43
43
|
elif aggregation_level == "task":
|
|
44
44
|
index_columns = ["task_name"]
|
|
45
45
|
|
|
46
|
+
elif aggregation_level == "language":
|
|
47
|
+
index_columns = ["language"]
|
|
48
|
+
df = df.explode("language").reset_index(
|
|
49
|
+
drop=True
|
|
50
|
+
) # each language in its own row before aggregation
|
|
51
|
+
|
|
46
52
|
# perform aggregation
|
|
47
53
|
if aggregation_fn is None:
|
|
48
54
|
aggregation_fn = np.mean
|
|
@@ -227,7 +233,7 @@ class ModelResult(BaseModel):
|
|
|
227
233
|
)
|
|
228
234
|
return entries
|
|
229
235
|
|
|
230
|
-
def _get_score_for_table(self) -> list[dict[str, str | float]]:
|
|
236
|
+
def _get_score_for_table(self) -> list[dict[str, str | float | list[str]]]:
|
|
231
237
|
scores_data = []
|
|
232
238
|
model_name = self.model_name
|
|
233
239
|
for task_result in self.task_results:
|
|
@@ -239,10 +245,10 @@ class ModelResult(BaseModel):
|
|
|
239
245
|
"model_revision": self.model_revision,
|
|
240
246
|
"task_name": task_name,
|
|
241
247
|
"split": split,
|
|
248
|
+
"language": score_item.get("languages", ["Unknown"]),
|
|
242
249
|
"subset": score_item.get("hf_subset", "default"),
|
|
243
250
|
"score": score_item.get("main_score", None),
|
|
244
251
|
}
|
|
245
|
-
|
|
246
252
|
scores_data.append(row)
|
|
247
253
|
|
|
248
254
|
return scores_data
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.3.
|
|
3
|
+
Version: 2.3.10
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -56,11 +56,11 @@ mteb/abstasks/text/bitext_mining.py,sha256=8m86XHJ3TxguC9itxZRq2Bt_p0NYojojS2Btk
|
|
|
56
56
|
mteb/abstasks/text/reranking.py,sha256=rfRGRBeSjZLgkh8pneMgRm-vd9NHr5jSFH92YfOHfmU,7776
|
|
57
57
|
mteb/abstasks/text/summarization.py,sha256=KYEb8gh4JjpSsrvGUmQ2VlrVdzzVxIWcitXOJUaHhO4,6954
|
|
58
58
|
mteb/benchmarks/__init__.py,sha256=MQEVeli-zLaJ7Xg0z7RhXQwsdmm7Ht_W2Ln0rZo1Szc,225
|
|
59
|
-
mteb/benchmarks/_create_table.py,sha256=
|
|
60
|
-
mteb/benchmarks/benchmark.py,sha256=
|
|
59
|
+
mteb/benchmarks/_create_table.py,sha256=b2RqGqi0ZonKbHecEcZiF4pkfE96smFRIzxOI82ETA8,22304
|
|
60
|
+
mteb/benchmarks/benchmark.py,sha256=UEllUtZQ0L10SNnxRyKbiv4wLCMcNF2nUPhBDKY3nz8,5097
|
|
61
61
|
mteb/benchmarks/get_benchmark.py,sha256=-n_O-gitRKZi48gJKNgGuI36hsP7yLVSiwulnMHN7Gw,3935
|
|
62
62
|
mteb/benchmarks/benchmarks/__init__.py,sha256=Ig5dSFunzI-F-OamruuKJVSstbG3xQNkXCxRY3Bj_Ck,2180
|
|
63
|
-
mteb/benchmarks/benchmarks/benchmarks.py,sha256=
|
|
63
|
+
mteb/benchmarks/benchmarks/benchmarks.py,sha256=mZQ56KBQwnBj2qLSQFOv39Av0HBNpH9HXYsDoFmqvu4,95640
|
|
64
64
|
mteb/benchmarks/benchmarks/rteb_benchmarks.py,sha256=QnCSrTTaBfcRlAQp2Nu81tgv1idMXqiM16Fp2zKJ5Ys,10607
|
|
65
65
|
mteb/cli/__init__.py,sha256=v-csUr3eUZElIvrGB6QGtaIdndDfNWEe9oZchsGsJpg,64
|
|
66
66
|
mteb/cli/_display_tasks.py,sha256=7A06dT9sSoTz6shyMvskPxuc5eHY_H7PGPlROzMP0yw,2196
|
|
@@ -1430,10 +1430,10 @@ mteb/languages/language_family.json,sha256=OUGcHeOIPcZPb2FWmYLhxTS0JxjK5y3Fo6x0P
|
|
|
1430
1430
|
mteb/languages/language_scripts.py,sha256=5wix9HTYolNIpTiS5oXf2pGJyL7ftdGKs_m432w81V8,3998
|
|
1431
1431
|
mteb/languages/programming_languages.py,sha256=zxAakT3OSUnAuTnQ34VyeFIECnNXMlleZmAake6jsZE,211
|
|
1432
1432
|
mteb/leaderboard/__init__.py,sha256=991roXmtRwEQysV-37hWEzWpkvPgMCGRqZTHR-hm2io,88
|
|
1433
|
-
mteb/leaderboard/app.py,sha256
|
|
1433
|
+
mteb/leaderboard/app.py,sha256=-sBAkZ9JTr9czhsYEbSm92MfTmB8BOQ17WDkQ1dsP90,34282
|
|
1434
1434
|
mteb/leaderboard/benchmark_selector.py,sha256=qd-2L20RQ4ACke01UlytkhZok1dkWgfUlXzfET52kGc,7956
|
|
1435
1435
|
mteb/leaderboard/figures.py,sha256=mPO0go_23QEhAm1RJdLiBxPFCoUiA74_ztyl6yimc7k,7553
|
|
1436
|
-
mteb/leaderboard/table.py,sha256=
|
|
1436
|
+
mteb/leaderboard/table.py,sha256=NxXAUkQRWtxjJwfIiO9yvdvw9do3ogzqmAn6az01SSc,10609
|
|
1437
1437
|
mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9HydipKM,6570
|
|
1438
1438
|
mteb/models/__init__.py,sha256=ABTuoqiBjBtBWW3LYY7ItBHdylR6jWoy06HH0g6j6fU,910
|
|
1439
1439
|
mteb/models/abs_encoder.py,sha256=m0JkRfRPMYadDgBR9eozRloI31ZSWkSzDFINpwbfLZk,16533
|
|
@@ -1477,7 +1477,7 @@ mteb/models/model_implementations/colsmol_models.py,sha256=O2M7Ksydh94M_Iax4KytH
|
|
|
1477
1477
|
mteb/models/model_implementations/conan_models.py,sha256=G-s7xo9VtNX-f7lWKtYVGHHiMMN0Xp44PlNIp7E0LAo,6502
|
|
1478
1478
|
mteb/models/model_implementations/dino_models.py,sha256=QFgaFHR5YKrylqJGSljXCBn2W7qHhmF6KdXkvHrQNEI,16380
|
|
1479
1479
|
mteb/models/model_implementations/e5_instruct.py,sha256=9R4GoSFicgqNDCh3HhTN_8L1qhzuEKvatjHYn3T9zlU,7676
|
|
1480
|
-
mteb/models/model_implementations/e5_models.py,sha256=
|
|
1480
|
+
mteb/models/model_implementations/e5_models.py,sha256=ZLRgzx2uEBc_yWY6DwcJFUNKG6RHpWSEVp1_jaEURhs,9373
|
|
1481
1481
|
mteb/models/model_implementations/e5_v.py,sha256=_9W7I0ryIzx_H9eCkzwdm8iHdGX1LIjKGXkhSh_zNv8,6690
|
|
1482
1482
|
mteb/models/model_implementations/eagerworks_models.py,sha256=NOQkCUqn9jLSpf9p6KyaIHnJxYV1MNlr2z7hO2AcRSc,5744
|
|
1483
1483
|
mteb/models/model_implementations/emillykkejensen_models.py,sha256=QdhGqCm_1-AURkrniZj2S1MjwwIVOPMzLvpgfJq-3EQ,2779
|
|
@@ -1485,6 +1485,7 @@ mteb/models/model_implementations/en_code_retriever.py,sha256=leZ-0M6LrunocY3XQB
|
|
|
1485
1485
|
mteb/models/model_implementations/euler_models.py,sha256=fZoXYeDjSRN2Qj1Pf-ROi8xok03PjhYi4FLEZKjMPkk,905
|
|
1486
1486
|
mteb/models/model_implementations/evaclip_models.py,sha256=cPMGYLDIq4s8zJxb4vPXqJ-rqwPaq7KOh2QZSO6cDas,8000
|
|
1487
1487
|
mteb/models/model_implementations/fa_models.py,sha256=WGal70_ezITWoNdjcMdbOCTSCtoaXzuPadYstLVXxhg,7478
|
|
1488
|
+
mteb/models/model_implementations/facebookai.py,sha256=uhE6rB1YgxE0SIc7u8heE1U62qRFFA23IMgpjxBq_Ok,3116
|
|
1488
1489
|
mteb/models/model_implementations/geogpt_models.py,sha256=Juv86SwhgQX80lVLjAFtim2aSiJT1AcgjniyyiKyk1Q,1923
|
|
1489
1490
|
mteb/models/model_implementations/gme_v_models.py,sha256=NkfgR3_UdZzoBt1NnalVou6LOR-F7qXM4by9EbAVrys,13568
|
|
1490
1491
|
mteb/models/model_implementations/google_models.py,sha256=7QfsaJ5JNDRQxFl7Zh2AtiR2PR7PZcfeCBgviuOFBCo,9130
|
|
@@ -1499,7 +1500,9 @@ mteb/models/model_implementations/jasper_models.py,sha256=ZY7qRRpBpD3eVryQb4rLs5
|
|
|
1499
1500
|
mteb/models/model_implementations/jina_clip.py,sha256=CfiIxbhKspjQajNtObCfGPHOWPk6uLn4cuwydQHFTMo,5118
|
|
1500
1501
|
mteb/models/model_implementations/jina_models.py,sha256=HrHm2Io3g9gHwxU5icAaudy_E8rAVkAAIFSzVYWF-dM,34859
|
|
1501
1502
|
mteb/models/model_implementations/kalm_models.py,sha256=FmW7Z5Qs6WYBLuKvql3u4IJW36kj4k-Ypah8qTBEBkg,59837
|
|
1503
|
+
mteb/models/model_implementations/kblab.py,sha256=DDh8gDEI6YPjS4_yGYWC4HatE0mFf7vhGDU83zzV7V0,866
|
|
1502
1504
|
mteb/models/model_implementations/kennethenevoldsen_models.py,sha256=DF-9nmsewYO9ikZ0kV81ujKGr7Ot36-9iPoxN7KX2mY,2993
|
|
1505
|
+
mteb/models/model_implementations/kfst.py,sha256=BQj0fxMJwyA6NOdK26NDYVL3z2PW1_F-lTTVImxEWZQ,892
|
|
1503
1506
|
mteb/models/model_implementations/kowshik24_models.py,sha256=HoQpybjhquK2XSnawlq0aiSWFI5M7l6N4DNY4MQ-P10,976
|
|
1504
1507
|
mteb/models/model_implementations/lens_models.py,sha256=fC7_NB1F8vBAlXD0p0-hALf6eZTPFJwpz57dy71OlwI,1696
|
|
1505
1508
|
mteb/models/model_implementations/lgai_embedding_models.py,sha256=S83pbfkMH3YUNl4skusgbK-Rn-uLuScQVxgXwegR_N4,2333
|
|
@@ -1526,6 +1529,7 @@ mteb/models/model_implementations/openclip_models.py,sha256=W8XcokgLU1nSmMaWpYXk
|
|
|
1526
1529
|
mteb/models/model_implementations/opensearch_neural_sparse_models.py,sha256=fuxIjOx_kPoDps5C7LW3JllG-AZj4ktqeTNgJESHZh4,8351
|
|
1527
1530
|
mteb/models/model_implementations/ops_moa_models.py,sha256=luWw1j2iTMx1z1ydLCjvCI89E9Yvge7ruEawivJTmfE,2413
|
|
1528
1531
|
mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py,sha256=qGXv71qRjNCIFluZOwvfBlFlKKyN2bXBokwUPk4KHmM,1066
|
|
1532
|
+
mteb/models/model_implementations/pawan_models.py,sha256=rV2ePGIuYroocvwqDXm4VU369Y_Vr67CyAE-08K5B9c,1151
|
|
1529
1533
|
mteb/models/model_implementations/piccolo_models.py,sha256=d8Dtkv_ZTUOCmJLLOuwquq-gX-2UfKvAtl_LvAS0Xi0,2113
|
|
1530
1534
|
mteb/models/model_implementations/promptriever_models.py,sha256=S7uWes_P74p3OZR_KBJHJN_ezlvvRx2__46DMCWqV5M,6328
|
|
1531
1535
|
mteb/models/model_implementations/pylate_models.py,sha256=yINGQL97S4xjj74-FTWpO4KHX-E9NDOEeyQWyRmmnaE,14772
|
|
@@ -1573,8 +1577,8 @@ mteb/models/search_encoder_index/search_backend_protocol.py,sha256=TSjlx88stJcMl
|
|
|
1573
1577
|
mteb/models/search_encoder_index/search_indexes/__init__.py,sha256=Wm60_oUemUpFsvrCMW111dcPH2L2rt1iZrXMskXmG7o,88
|
|
1574
1578
|
mteb/models/search_encoder_index/search_indexes/faiss_search_index.py,sha256=WMs3QbbYV13fRuT3dakmdVMZLFdc_9ZzSupS3QxlbVQ,5555
|
|
1575
1579
|
mteb/results/__init__.py,sha256=EXQqK4Am5eIYzD52dpcGAFSdqnC38oE6JHN302oidHc,158
|
|
1576
|
-
mteb/results/benchmark_results.py,sha256=
|
|
1577
|
-
mteb/results/model_result.py,sha256=
|
|
1580
|
+
mteb/results/benchmark_results.py,sha256=b_g0QmTbwue9ZpWTtyPfgf_nyavckZHUgTVE6zqqtzM,18342
|
|
1581
|
+
mteb/results/model_result.py,sha256=Y6b_xfJlw8EFZq464ZVhyw0Rryv111hvMjnXbEZJpXk,14059
|
|
1578
1582
|
mteb/results/task_result.py,sha256=DgmAw6akotjp8m8E6gE8QP9mQMxUvyzu1hnZ5o01GkU,32303
|
|
1579
1583
|
mteb/tasks/__init__.py,sha256=izAxU0ip1F_YUwx0dFCuN35BaktdmePh6vlDiHC0kLo,503
|
|
1580
1584
|
mteb/tasks/aggregated_tasks/__init__.py,sha256=Ufgbh1AirxCQkojO3AUhUFWM8zQG10cfdVTkj_PeyLI,104
|
|
@@ -2578,9 +2582,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
|
|
|
2578
2582
|
mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
|
|
2579
2583
|
mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
|
|
2580
2584
|
mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
|
|
2581
|
-
mteb-2.3.
|
|
2582
|
-
mteb-2.3.
|
|
2583
|
-
mteb-2.3.
|
|
2584
|
-
mteb-2.3.
|
|
2585
|
-
mteb-2.3.
|
|
2586
|
-
mteb-2.3.
|
|
2585
|
+
mteb-2.3.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
2586
|
+
mteb-2.3.10.dist-info/METADATA,sha256=IPpkXC-YeiZU0BtiAnv-e9aS8X99_uAsGYxCCIz7nr4,13924
|
|
2587
|
+
mteb-2.3.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
2588
|
+
mteb-2.3.10.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
|
|
2589
|
+
mteb-2.3.10.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
|
|
2590
|
+
mteb-2.3.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|