mteb 2.1.8__py3-none-any.whl → 2.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/benchmarks/_create_table.py +1 -3
- mteb/benchmarks/benchmark.py +11 -3
- mteb/benchmarks/benchmarks/benchmarks.py +5 -6
- mteb/benchmarks/get_benchmark.py +2 -0
- mteb/leaderboard/benchmark_selector.py +3 -2
- mteb/models/model_implementations/emillykkejensen_models.py +70 -0
- {mteb-2.1.8.dist-info → mteb-2.1.9.dist-info}/METADATA +1 -1
- {mteb-2.1.8.dist-info → mteb-2.1.9.dist-info}/RECORD +12 -11
- {mteb-2.1.8.dist-info → mteb-2.1.9.dist-info}/WHEEL +0 -0
- {mteb-2.1.8.dist-info → mteb-2.1.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.8.dist-info → mteb-2.1.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.8.dist-info → mteb-2.1.9.dist-info}/top_level.txt +0 -0
mteb/benchmarks/_create_table.py
CHANGED
|
@@ -358,9 +358,7 @@ def _create_summary_table_mean_public_private(
|
|
|
358
358
|
"mean(public)": "Mean (Public)",
|
|
359
359
|
"mean(private)": "Mean (Private)",
|
|
360
360
|
}
|
|
361
|
-
|
|
362
|
-
if "Retrieval" in joint_table.columns:
|
|
363
|
-
rename_dict["Retrieval"] = "Mean (Task)"
|
|
361
|
+
|
|
364
362
|
joint_table = joint_table.rename(columns=rename_dict)
|
|
365
363
|
|
|
366
364
|
# Move borda rank to front
|
mteb/benchmarks/benchmark.py
CHANGED
|
@@ -87,7 +87,10 @@ class RtebBenchmark(Benchmark):
|
|
|
87
87
|
def _create_summary_table(
|
|
88
88
|
self, benchmark_results: BenchmarkResults
|
|
89
89
|
) -> pd.DataFrame:
|
|
90
|
-
|
|
90
|
+
joint_table = _create_summary_table_mean_public_private(benchmark_results)
|
|
91
|
+
# For RTEB: all tasks are Retrieval type, so Retrieval column = Mean (Task)
|
|
92
|
+
joint_table = joint_table.rename(columns={"Retrieval": "Mean (Task)"})
|
|
93
|
+
return joint_table
|
|
91
94
|
|
|
92
95
|
|
|
93
96
|
class HUMEBenchmark(Benchmark):
|
|
@@ -108,10 +111,15 @@ class MIEBBenchmark(Benchmark):
|
|
|
108
111
|
return _create_summary_table_mean_task_type(benchmark_results)
|
|
109
112
|
|
|
110
113
|
|
|
111
|
-
class
|
|
114
|
+
class VidoreBenchmark(Benchmark):
|
|
112
115
|
"""Wrapper for Vidore3 benchmark."""
|
|
113
116
|
|
|
114
117
|
def _create_summary_table(
|
|
115
118
|
self, benchmark_results: BenchmarkResults
|
|
116
119
|
) -> pd.DataFrame:
|
|
117
|
-
|
|
120
|
+
joint_table = _create_summary_table_mean_public_private(benchmark_results)
|
|
121
|
+
# For ViDoRe (V1, V2, V3): all tasks are Document Understanding type, so Document Understanding column = Mean (Task)
|
|
122
|
+
joint_table = joint_table.rename(
|
|
123
|
+
columns={"Document Understanding": "Mean (Task)"}
|
|
124
|
+
)
|
|
125
|
+
return joint_table
|
|
@@ -2,7 +2,7 @@ from mteb.benchmarks.benchmark import (
|
|
|
2
2
|
Benchmark,
|
|
3
3
|
HUMEBenchmark,
|
|
4
4
|
MIEBBenchmark,
|
|
5
|
-
|
|
5
|
+
VidoreBenchmark,
|
|
6
6
|
)
|
|
7
7
|
from mteb.get_tasks import MTEBTasks, get_task, get_tasks
|
|
8
8
|
|
|
@@ -2219,7 +2219,7 @@ VIDORE_V2 = Benchmark(
|
|
|
2219
2219
|
""",
|
|
2220
2220
|
)
|
|
2221
2221
|
|
|
2222
|
-
VIDORE_V3 =
|
|
2222
|
+
VIDORE_V3 = VidoreBenchmark(
|
|
2223
2223
|
name="ViDoRe(v3)",
|
|
2224
2224
|
display_name="ViDoRe V3",
|
|
2225
2225
|
icon="https://cdn-uploads.huggingface.co/production/uploads/66e16a677c2eb2da5109fb5c/x99xqw__fl2UaPbiIdC_f.png",
|
|
@@ -2253,10 +2253,9 @@ VIDORE_V3 = Vidore3Benchmark(
|
|
|
2253
2253
|
""",
|
|
2254
2254
|
)
|
|
2255
2255
|
|
|
2256
|
-
VISUAL_DOCUMENT_RETRIEVAL =
|
|
2257
|
-
name="
|
|
2258
|
-
display_name="
|
|
2259
|
-
icon="https://github.com/DennisSuitters/LibreICONS/raw/2d2172d15e3c6ca03c018629d60050e4b99e5c55/svg-color/libre-gui-picture.svg",
|
|
2256
|
+
VISUAL_DOCUMENT_RETRIEVAL = VidoreBenchmark(
|
|
2257
|
+
name="ViDoRe(v1&v2)",
|
|
2258
|
+
display_name="ViDoRe (V1&V2)",
|
|
2260
2259
|
tasks=get_tasks(
|
|
2261
2260
|
tasks=[
|
|
2262
2261
|
# v1
|
mteb/benchmarks/get_benchmark.py
CHANGED
|
@@ -39,6 +39,7 @@ def _get_previous_benchmark_names() -> dict[str, str]:
|
|
|
39
39
|
MTEB_RETRIEVAL_MEDICAL,
|
|
40
40
|
MTEB_RETRIEVAL_WITH_INSTRUCTIONS,
|
|
41
41
|
SEB,
|
|
42
|
+
VISUAL_DOCUMENT_RETRIEVAL,
|
|
42
43
|
MTEB_code,
|
|
43
44
|
MTEB_multilingual_v2,
|
|
44
45
|
)
|
|
@@ -63,6 +64,7 @@ def _get_previous_benchmark_names() -> dict[str, str]:
|
|
|
63
64
|
"MTEB(Chinese)": C_MTEB.name,
|
|
64
65
|
"FaMTEB(fas, beta)": FA_MTEB.name,
|
|
65
66
|
"BRIGHT(long)": BRIGHT_LONG.name,
|
|
67
|
+
"VisualDocumentRetrieval": VISUAL_DOCUMENT_RETRIEVAL.name,
|
|
66
68
|
}
|
|
67
69
|
return previous_benchmark_names
|
|
68
70
|
|
|
@@ -110,10 +110,11 @@ R_BENCHMARK_ENTRIES = [
|
|
|
110
110
|
MenuEntry(
|
|
111
111
|
"Image",
|
|
112
112
|
description=None,
|
|
113
|
-
open=
|
|
113
|
+
open=True,
|
|
114
114
|
benchmarks=[
|
|
115
|
-
mteb.get_benchmark("
|
|
115
|
+
mteb.get_benchmark("ViDoRe(v3)"),
|
|
116
116
|
mteb.get_benchmark("JinaVDR"),
|
|
117
|
+
MenuEntry("Other", [mteb.get_benchmark("ViDoRe(v1&v2)")]),
|
|
117
118
|
],
|
|
118
119
|
),
|
|
119
120
|
MenuEntry(
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from mteb.models.model_meta import ModelMeta
|
|
2
|
+
from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
|
|
3
|
+
|
|
4
|
+
embedding_gemma_300m_scandi = ModelMeta(
|
|
5
|
+
loader=sentence_transformers_loader, # type: ignore
|
|
6
|
+
name="emillykkejensen/EmbeddingGemma-Scandi-300m",
|
|
7
|
+
languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"],
|
|
8
|
+
open_weights=True,
|
|
9
|
+
revision="9f3307b9f601db564a9190cb475324d128dcfe86",
|
|
10
|
+
release_date="2025-10-17",
|
|
11
|
+
n_parameters=307_581_696,
|
|
12
|
+
embed_dim=768,
|
|
13
|
+
max_tokens=2048,
|
|
14
|
+
license="apache-2.0",
|
|
15
|
+
reference="https://huggingface.co/emillykkejensen/EmbeddingGemma-Scandi-300m",
|
|
16
|
+
framework=["Sentence Transformers", "PyTorch"],
|
|
17
|
+
use_instructions=True,
|
|
18
|
+
public_training_code=None,
|
|
19
|
+
public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
|
|
20
|
+
training_datasets=set(),
|
|
21
|
+
similarity_fn_name="cosine", # type: ignore[arg-type]
|
|
22
|
+
adapted_from="emillykkejensen/EmbeddingGemma-Scandi-300m",
|
|
23
|
+
memory_usage_mb=578,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
qwen_scandi = ModelMeta(
|
|
28
|
+
loader=sentence_transformers_loader, # type: ignore
|
|
29
|
+
name="emillykkejensen/Qwen3-Embedding-Scandi-0.6B",
|
|
30
|
+
languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"],
|
|
31
|
+
open_weights=True,
|
|
32
|
+
revision="cf1e7ba36ebd3d605549d8f02930a18e17b54513",
|
|
33
|
+
release_date="2025-10-17",
|
|
34
|
+
n_parameters=595776512,
|
|
35
|
+
memory_usage_mb=2272,
|
|
36
|
+
embed_dim=1024,
|
|
37
|
+
max_tokens=32768,
|
|
38
|
+
license="apache-2.0",
|
|
39
|
+
reference="https://huggingface.co/emillykkejensen/Qwen3-Embedding-Scandi-0.6B",
|
|
40
|
+
framework=["Sentence Transformers", "PyTorch"],
|
|
41
|
+
use_instructions=True,
|
|
42
|
+
public_training_code=None,
|
|
43
|
+
public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
|
|
44
|
+
training_datasets=set(),
|
|
45
|
+
similarity_fn_name="cosine", # type: ignore[arg-type]
|
|
46
|
+
adapted_from="Qwen/Qwen3-Embedding-0.6B",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
mmbert_scandi = ModelMeta(
|
|
51
|
+
loader=sentence_transformers_loader, # type: ignore
|
|
52
|
+
name="emillykkejensen/mmBERTscandi-base-embedding",
|
|
53
|
+
languages=["dan-Latn", "swe-Latn", "nor-Latn", "nob-Latn", "nno-Latn"],
|
|
54
|
+
open_weights=True,
|
|
55
|
+
revision="82d74c7a5d8e1ddf31b132865df2d16b2b0294ee",
|
|
56
|
+
release_date="2025-10-17",
|
|
57
|
+
n_parameters=306939648,
|
|
58
|
+
memory_usage_mb=1171,
|
|
59
|
+
embed_dim=768,
|
|
60
|
+
max_tokens=8192,
|
|
61
|
+
license="apache-2.0",
|
|
62
|
+
reference="https://huggingface.co/emillykkejensen/Qwen3-Embedding-Scandi-0.6B",
|
|
63
|
+
framework=["Sentence Transformers", "PyTorch"],
|
|
64
|
+
use_instructions=True,
|
|
65
|
+
public_training_code=None,
|
|
66
|
+
public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
|
|
67
|
+
training_datasets=set(),
|
|
68
|
+
similarity_fn_name="cosine", # type: ignore[arg-type]
|
|
69
|
+
adapted_from="jonasaise/scandmmBERT-base-scandinavian",
|
|
70
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.9
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -52,11 +52,11 @@ mteb/abstasks/text/bitext_mining.py,sha256=8m86XHJ3TxguC9itxZRq2Bt_p0NYojojS2Btk
|
|
|
52
52
|
mteb/abstasks/text/reranking.py,sha256=rfRGRBeSjZLgkh8pneMgRm-vd9NHr5jSFH92YfOHfmU,7776
|
|
53
53
|
mteb/abstasks/text/summarization.py,sha256=KYEb8gh4JjpSsrvGUmQ2VlrVdzzVxIWcitXOJUaHhO4,6954
|
|
54
54
|
mteb/benchmarks/__init__.py,sha256=MQEVeli-zLaJ7Xg0z7RhXQwsdmm7Ht_W2Ln0rZo1Szc,225
|
|
55
|
-
mteb/benchmarks/_create_table.py,sha256=
|
|
56
|
-
mteb/benchmarks/benchmark.py,sha256=
|
|
57
|
-
mteb/benchmarks/get_benchmark.py,sha256
|
|
55
|
+
mteb/benchmarks/_create_table.py,sha256=z3iqa5dajLk0DYxEE9EeO1qpR3VJXokg8ZQ2rdUkvdM,20452
|
|
56
|
+
mteb/benchmarks/benchmark.py,sha256=70RlMyyg_wkWTlU_IbfLl-KaqRWXGCKTd8fWe9X-AQE,4173
|
|
57
|
+
mteb/benchmarks/get_benchmark.py,sha256=-n_O-gitRKZi48gJKNgGuI36hsP7yLVSiwulnMHN7Gw,3935
|
|
58
58
|
mteb/benchmarks/benchmarks/__init__.py,sha256=UD6YjWPDVPSQdUhmD-4rho08Gs5LU9pS_C2jX5eUns0,2102
|
|
59
|
-
mteb/benchmarks/benchmarks/benchmarks.py,sha256=
|
|
59
|
+
mteb/benchmarks/benchmarks/benchmarks.py,sha256=v7n2fPGOC66zzBhS1nfSthY55DQnGNg-hGa4XfT21Vg,89941
|
|
60
60
|
mteb/benchmarks/benchmarks/rteb_benchmarks.py,sha256=QnCSrTTaBfcRlAQp2Nu81tgv1idMXqiM16Fp2zKJ5Ys,10607
|
|
61
61
|
mteb/cli/__init__.py,sha256=v-csUr3eUZElIvrGB6QGtaIdndDfNWEe9oZchsGsJpg,64
|
|
62
62
|
mteb/cli/_display_tasks.py,sha256=7A06dT9sSoTz6shyMvskPxuc5eHY_H7PGPlROzMP0yw,2196
|
|
@@ -1423,7 +1423,7 @@ mteb/languages/language_scripts.py,sha256=5wix9HTYolNIpTiS5oXf2pGJyL7ftdGKs_m432
|
|
|
1423
1423
|
mteb/languages/programming_languages.py,sha256=zxAakT3OSUnAuTnQ34VyeFIECnNXMlleZmAake6jsZE,211
|
|
1424
1424
|
mteb/leaderboard/__init__.py,sha256=991roXmtRwEQysV-37hWEzWpkvPgMCGRqZTHR-hm2io,88
|
|
1425
1425
|
mteb/leaderboard/app.py,sha256=xvOcK_ICmAwl1Mo5muI7fmCjYeSt1ztSgsQDrPF6OvM,32575
|
|
1426
|
-
mteb/leaderboard/benchmark_selector.py,sha256=
|
|
1426
|
+
mteb/leaderboard/benchmark_selector.py,sha256=hnXdo_Kj4UUAruFl6nZkCxAQ88IEfbaH8EADFJMMdVo,7686
|
|
1427
1427
|
mteb/leaderboard/figures.py,sha256=Rq20LFpaUhQD4tuKp7P7ExQtAjonMLibgO3ud0ykMag,7491
|
|
1428
1428
|
mteb/leaderboard/table.py,sha256=qs0H_Gt9FzRvzb-AL0YlqEe0YAsdYsVX3QlncfCBEqg,7828
|
|
1429
1429
|
mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9HydipKM,6570
|
|
@@ -1469,6 +1469,7 @@ mteb/models/model_implementations/dino_models.py,sha256=QFgaFHR5YKrylqJGSljXCBn2
|
|
|
1469
1469
|
mteb/models/model_implementations/e5_instruct.py,sha256=9R4GoSFicgqNDCh3HhTN_8L1qhzuEKvatjHYn3T9zlU,7676
|
|
1470
1470
|
mteb/models/model_implementations/e5_models.py,sha256=vsqkmm6XzZn9ROj_OUR0j2KiN75MEuQsOPeoyc1AeYg,10937
|
|
1471
1471
|
mteb/models/model_implementations/e5_v.py,sha256=_9W7I0ryIzx_H9eCkzwdm8iHdGX1LIjKGXkhSh_zNv8,6690
|
|
1472
|
+
mteb/models/model_implementations/emillykkejensen_models.py,sha256=1DEAGdSZZXDFbbw0YH-vkLm9Y-wthgbOJCRTIpV3Jeo,2795
|
|
1472
1473
|
mteb/models/model_implementations/en_code_retriever.py,sha256=leZ-0M6LrunocY3XQBYZU1uevDRopeyR5ujIhwqBbd8,1043
|
|
1473
1474
|
mteb/models/model_implementations/evaclip_models.py,sha256=cPMGYLDIq4s8zJxb4vPXqJ-rqwPaq7KOh2QZSO6cDas,8000
|
|
1474
1475
|
mteb/models/model_implementations/fa_models.py,sha256=WGal70_ezITWoNdjcMdbOCTSCtoaXzuPadYstLVXxhg,7478
|
|
@@ -2553,9 +2554,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
|
|
|
2553
2554
|
mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
|
|
2554
2555
|
mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
|
|
2555
2556
|
mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
|
|
2556
|
-
mteb-2.1.
|
|
2557
|
-
mteb-2.1.
|
|
2558
|
-
mteb-2.1.
|
|
2559
|
-
mteb-2.1.
|
|
2560
|
-
mteb-2.1.
|
|
2561
|
-
mteb-2.1.
|
|
2557
|
+
mteb-2.1.9.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
2558
|
+
mteb-2.1.9.dist-info/METADATA,sha256=yUOXi6O_wkyskXKnHDcDB6SqLZg5Q5Nc_a_qK7Pngpc,13573
|
|
2559
|
+
mteb-2.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
2560
|
+
mteb-2.1.9.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
|
|
2561
|
+
mteb-2.1.9.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
|
|
2562
|
+
mteb-2.1.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|