mteb 2.3.11__py3-none-any.whl → 2.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +57 -0
- mteb/deprecated_evaluator.py +8 -13
- mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
- mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
- mteb/evaluate.py +2 -33
- mteb/leaderboard/figures.py +1 -1
- mteb/leaderboard/table.py +1 -11
- mteb/models/abs_encoder.py +21 -17
- mteb/models/get_model_meta.py +3 -123
- mteb/models/instruct_wrapper.py +2 -1
- mteb/models/model_implementations/bica_model.py +34 -0
- mteb/models/model_implementations/google_models.py +10 -0
- mteb/models/model_implementations/mod_models.py +204 -0
- mteb/models/model_implementations/nomic_models.py +142 -4
- mteb/models/model_meta.py +396 -19
- mteb/models/sentence_transformer_wrapper.py +2 -7
- mteb/tasks/reranking/jpn/__init__.py +9 -1
- mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
- mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
- mteb/tasks/retrieval/jpn/__init__.py +8 -0
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
- mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
- mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
- mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
- {mteb-2.3.11.dist-info → mteb-2.4.1.dist-info}/METADATA +1 -1
- {mteb-2.3.11.dist-info → mteb-2.4.1.dist-info}/RECORD +36 -22
- {mteb-2.3.11.dist-info → mteb-2.4.1.dist-info}/WHEEL +0 -0
- {mteb-2.3.11.dist-info → mteb-2.4.1.dist-info}/entry_points.txt +0 -0
- {mteb-2.3.11.dist-info → mteb-2.4.1.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.3.11.dist-info → mteb-2.4.1.dist-info}/top_level.txt +0 -0
|
@@ -12,6 +12,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
|
|
|
12
12
|
FA_MTEB_2,
|
|
13
13
|
HUME,
|
|
14
14
|
JINA_VDR,
|
|
15
|
+
JMTEB_LITE_V1,
|
|
15
16
|
JMTEB_V2,
|
|
16
17
|
LONG_EMBED,
|
|
17
18
|
MIEB_ENG,
|
|
@@ -76,6 +77,7 @@ __all__ = [
|
|
|
76
77
|
"HUME",
|
|
77
78
|
"HUME",
|
|
78
79
|
"JINA_VDR",
|
|
80
|
+
"JMTEB_LITE_V1",
|
|
79
81
|
"JMTEB_V2",
|
|
80
82
|
"LONG_EMBED",
|
|
81
83
|
"MIEB_ENG",
|
|
@@ -2650,3 +2650,60 @@ JMTEB_V2 = Benchmark(
|
|
|
2650
2650
|
""",
|
|
2651
2651
|
contacts=["lsz05"],
|
|
2652
2652
|
)
|
|
2653
|
+
|
|
2654
|
+
JMTEB_LITE_V1 = Benchmark(
|
|
2655
|
+
name="JMTEB-lite(v1)",
|
|
2656
|
+
display_name="Japanese",
|
|
2657
|
+
icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
|
|
2658
|
+
tasks=get_tasks(
|
|
2659
|
+
languages=["jpn"],
|
|
2660
|
+
tasks=[
|
|
2661
|
+
# Clustering (3)
|
|
2662
|
+
"LivedoorNewsClustering.v2",
|
|
2663
|
+
"MewsC16JaClustering",
|
|
2664
|
+
"SIB200ClusteringS2S",
|
|
2665
|
+
# Classification (7)
|
|
2666
|
+
"AmazonReviewsClassification",
|
|
2667
|
+
"AmazonCounterfactualClassification",
|
|
2668
|
+
"MassiveIntentClassification",
|
|
2669
|
+
"MassiveScenarioClassification",
|
|
2670
|
+
"JapaneseSentimentClassification",
|
|
2671
|
+
"SIB200Classification",
|
|
2672
|
+
"WRIMEClassification",
|
|
2673
|
+
# STS (2)
|
|
2674
|
+
"JSTS",
|
|
2675
|
+
"JSICK",
|
|
2676
|
+
# Retrieval (11)
|
|
2677
|
+
"JaqketRetrievalLite",
|
|
2678
|
+
"MrTyDiJaRetrievalLite",
|
|
2679
|
+
"JaGovFaqsRetrieval",
|
|
2680
|
+
"NLPJournalTitleAbsRetrieval.V2",
|
|
2681
|
+
"NLPJournalTitleIntroRetrieval.V2",
|
|
2682
|
+
"NLPJournalAbsIntroRetrieval.V2",
|
|
2683
|
+
"NLPJournalAbsArticleRetrieval.V2",
|
|
2684
|
+
"JaCWIRRetrievalLite",
|
|
2685
|
+
"MIRACLJaRetrievalLite",
|
|
2686
|
+
"MintakaRetrieval",
|
|
2687
|
+
"MultiLongDocRetrieval",
|
|
2688
|
+
# Reranking (5)
|
|
2689
|
+
"ESCIReranking",
|
|
2690
|
+
"JQaRARerankingLite",
|
|
2691
|
+
"JaCWIRRerankingLite",
|
|
2692
|
+
"MIRACLReranking",
|
|
2693
|
+
"MultiLongDocReranking",
|
|
2694
|
+
],
|
|
2695
|
+
),
|
|
2696
|
+
description="JMTEB-lite is a lightweight version of JMTEB. It makes agile evaluation possible by reaching an average of 5x faster evaluation comparing with JMTEB, as 6 heavy datasets in JMTEB are optimized with hard negative pooling strategy, making them much smaller. The result of JMTEB-lite is proved to be highly relevant with that of JMTEB, making it a faithful preview of JMTEB.",
|
|
2697
|
+
reference="https://huggingface.co/datasets/sbintuitions/JMTEB-lite",
|
|
2698
|
+
citation=r"""
|
|
2699
|
+
@article{li2025jmteb,
|
|
2700
|
+
author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide and Kawahara, Daisuke},
|
|
2701
|
+
issue = {3},
|
|
2702
|
+
journal = {Vol.2025-NL-265,No.3,1-15},
|
|
2703
|
+
month = {sep},
|
|
2704
|
+
title = {{JMTEB and JMTEB-lite: Japanese Massive Text Embedding Benchmark and Its Lightweight Version}},
|
|
2705
|
+
year = {2025},
|
|
2706
|
+
}
|
|
2707
|
+
""",
|
|
2708
|
+
contacts=["lsz05"],
|
|
2709
|
+
)
|
mteb/deprecated_evaluator.py
CHANGED
|
@@ -13,21 +13,11 @@ from pathlib import Path
|
|
|
13
13
|
from time import time
|
|
14
14
|
from typing import TYPE_CHECKING, Any
|
|
15
15
|
|
|
16
|
-
from mteb.abstasks.task_metadata import TaskCategory, TaskType
|
|
17
|
-
from mteb.models.get_model_meta import (
|
|
18
|
-
_model_meta_from_cross_encoder,
|
|
19
|
-
_model_meta_from_sentence_transformers,
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
if sys.version_info >= (3, 13):
|
|
23
|
-
from warnings import deprecated
|
|
24
|
-
else:
|
|
25
|
-
from typing_extensions import deprecated
|
|
26
|
-
|
|
27
16
|
import datasets
|
|
28
17
|
|
|
29
18
|
import mteb
|
|
30
19
|
from mteb.abstasks import AbsTask
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskCategory, TaskType
|
|
31
21
|
from mteb.benchmarks import Benchmark
|
|
32
22
|
from mteb.models import (
|
|
33
23
|
CrossEncoderWrapper,
|
|
@@ -39,6 +29,11 @@ from mteb.models import (
|
|
|
39
29
|
from mteb.results import TaskResult
|
|
40
30
|
from mteb.types import ScoresDict
|
|
41
31
|
|
|
32
|
+
if sys.version_info >= (3, 13):
|
|
33
|
+
from warnings import deprecated
|
|
34
|
+
else:
|
|
35
|
+
from typing_extensions import deprecated
|
|
36
|
+
|
|
42
37
|
if TYPE_CHECKING:
|
|
43
38
|
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
44
39
|
|
|
@@ -669,9 +664,9 @@ class MTEB:
|
|
|
669
664
|
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
670
665
|
|
|
671
666
|
if isinstance(model, CrossEncoder):
|
|
672
|
-
meta =
|
|
667
|
+
meta = ModelMeta.from_cross_encoder(model)
|
|
673
668
|
elif isinstance(model, SentenceTransformer):
|
|
674
|
-
meta =
|
|
669
|
+
meta = ModelMeta.from_sentence_transformer_model(model)
|
|
675
670
|
else:
|
|
676
671
|
meta = ModelMeta(
|
|
677
672
|
loader=None,
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 91353,
|
|
4
|
+
"number_of_characters": 21318247,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 21231812,
|
|
7
|
+
"min_text_length": 14,
|
|
8
|
+
"average_text_length": 236.73496420846064,
|
|
9
|
+
"max_text_length": 438,
|
|
10
|
+
"unique_texts": 89683
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 86435,
|
|
15
|
+
"min_text_length": 16,
|
|
16
|
+
"average_text_length": 51.850629874025195,
|
|
17
|
+
"max_text_length": 118,
|
|
18
|
+
"unique_texts": 1667
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 16204,
|
|
23
|
+
"min_relevant_docs_per_query": 51,
|
|
24
|
+
"average_relevant_docs_per_query": 9.720455908818236,
|
|
25
|
+
"max_relevant_docs_per_query": 78,
|
|
26
|
+
"unique_relevant_docs": 89686
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 98941,
|
|
30
|
+
"min_top_ranked_per_query": 51,
|
|
31
|
+
"average_top_ranked_per_query": 59.35272945410918,
|
|
32
|
+
"max_top_ranked_per_query": 78
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 161744,
|
|
4
|
+
"number_of_characters": 29754484,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 29612965,
|
|
7
|
+
"min_text_length": 142,
|
|
8
|
+
"average_text_length": 187.72799599350847,
|
|
9
|
+
"max_text_length": 252,
|
|
10
|
+
"unique_texts": 156741
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 141519,
|
|
15
|
+
"min_text_length": 9,
|
|
16
|
+
"average_text_length": 35.37975,
|
|
17
|
+
"max_text_length": 176,
|
|
18
|
+
"unique_texts": 3993
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 3998,
|
|
23
|
+
"min_relevant_docs_per_query": 50,
|
|
24
|
+
"average_relevant_docs_per_query": 0.9995,
|
|
25
|
+
"max_relevant_docs_per_query": 51,
|
|
26
|
+
"unique_relevant_docs": 157744
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 204000,
|
|
30
|
+
"min_top_ranked_per_query": 51,
|
|
31
|
+
"average_top_ranked_per_query": 51.0,
|
|
32
|
+
"max_top_ranked_per_query": 51
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 306638,
|
|
4
|
+
"number_of_characters": 56607519,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 56466000,
|
|
7
|
+
"min_text_length": 142,
|
|
8
|
+
"average_text_length": 186.57934562084074,
|
|
9
|
+
"max_text_length": 252,
|
|
10
|
+
"unique_texts": 299096
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 141519,
|
|
15
|
+
"min_text_length": 9,
|
|
16
|
+
"average_text_length": 35.37975,
|
|
17
|
+
"max_text_length": 176,
|
|
18
|
+
"unique_texts": 3993
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 4000,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0,
|
|
25
|
+
"max_relevant_docs_per_query": 1,
|
|
26
|
+
"unique_relevant_docs": 4000
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 66799,
|
|
4
|
+
"number_of_characters": 280024895,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 279974341,
|
|
7
|
+
"min_text_length": 8,
|
|
8
|
+
"average_text_length": 4254.799869304884,
|
|
9
|
+
"max_text_length": 188424,
|
|
10
|
+
"unique_texts": 65802
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 50554,
|
|
15
|
+
"min_text_length": 16,
|
|
16
|
+
"average_text_length": 50.70611835506519,
|
|
17
|
+
"max_text_length": 98,
|
|
18
|
+
"unique_texts": 997
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 997,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0,
|
|
25
|
+
"max_relevant_docs_per_query": 1,
|
|
26
|
+
"unique_relevant_docs": 989
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 105924,
|
|
4
|
+
"number_of_characters": 20818958,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 20803724,
|
|
7
|
+
"min_text_length": 4,
|
|
8
|
+
"average_text_length": 198.01001294449097,
|
|
9
|
+
"max_text_length": 13231,
|
|
10
|
+
"unique_texts": 104988
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 15234,
|
|
15
|
+
"min_text_length": 7,
|
|
16
|
+
"average_text_length": 17.71395348837209,
|
|
17
|
+
"max_text_length": 48,
|
|
18
|
+
"unique_texts": 860
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 1790,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 2.0813953488372094,
|
|
25
|
+
"max_relevant_docs_per_query": 11,
|
|
26
|
+
"unique_relevant_docs": 1728
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 94102,
|
|
4
|
+
"number_of_characters": 17949014,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 17935995,
|
|
7
|
+
"min_text_length": 4,
|
|
8
|
+
"average_text_length": 192.07122357627807,
|
|
9
|
+
"max_text_length": 10778,
|
|
10
|
+
"unique_texts": 93122
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 13019,
|
|
15
|
+
"min_text_length": 6,
|
|
16
|
+
"average_text_length": 18.081944444444446,
|
|
17
|
+
"max_text_length": 44,
|
|
18
|
+
"unique_texts": 720
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 923,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.2819444444444446,
|
|
25
|
+
"max_relevant_docs_per_query": 3,
|
|
26
|
+
"unique_relevant_docs": 880
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
mteb/evaluate.py
CHANGED
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
from collections.abc import Iterable
|
|
5
|
-
from copy import deepcopy
|
|
6
5
|
from pathlib import Path
|
|
7
6
|
from time import time
|
|
8
7
|
from typing import TYPE_CHECKING, Any, cast
|
|
@@ -53,36 +52,6 @@ class OverwriteStrategy(HelpfulStrEnum):
|
|
|
53
52
|
ONLY_CACHE = "only-cache"
|
|
54
53
|
|
|
55
54
|
|
|
56
|
-
_empty_model_meta = ModelMeta(
|
|
57
|
-
loader=None,
|
|
58
|
-
name=None,
|
|
59
|
-
revision=None,
|
|
60
|
-
release_date=None,
|
|
61
|
-
languages=None,
|
|
62
|
-
framework=[],
|
|
63
|
-
similarity_fn_name=None,
|
|
64
|
-
n_parameters=None,
|
|
65
|
-
memory_usage_mb=None,
|
|
66
|
-
max_tokens=None,
|
|
67
|
-
embed_dim=None,
|
|
68
|
-
license=None,
|
|
69
|
-
open_weights=None,
|
|
70
|
-
public_training_code=None,
|
|
71
|
-
public_training_data=None,
|
|
72
|
-
use_instructions=None,
|
|
73
|
-
training_datasets=None,
|
|
74
|
-
modalities=[],
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def _create_empty_model_meta() -> ModelMeta:
|
|
79
|
-
logger.warning("Model metadata is missing. Using empty metadata.")
|
|
80
|
-
meta = deepcopy(_empty_model_meta)
|
|
81
|
-
meta.revision = "no_revision_available"
|
|
82
|
-
meta.name = "no_model_name_available"
|
|
83
|
-
return meta
|
|
84
|
-
|
|
85
|
-
|
|
86
55
|
def _sanitize_model(
|
|
87
56
|
model: ModelMeta | MTEBModels | SentenceTransformer | CrossEncoder,
|
|
88
57
|
) -> tuple[MTEBModels | ModelMeta, ModelMeta, ModelName, Revision]:
|
|
@@ -101,9 +70,9 @@ def _sanitize_model(
|
|
|
101
70
|
elif hasattr(model, "mteb_model_meta"):
|
|
102
71
|
meta = model.mteb_model_meta # type: ignore[attr-defined]
|
|
103
72
|
if not isinstance(meta, ModelMeta):
|
|
104
|
-
meta =
|
|
73
|
+
meta = ModelMeta.from_hub(None)
|
|
105
74
|
else:
|
|
106
|
-
meta =
|
|
75
|
+
meta = ModelMeta.from_hub(None) if not isinstance(model, ModelMeta) else model
|
|
107
76
|
|
|
108
77
|
model_name = cast(str, meta.name)
|
|
109
78
|
model_revision = cast(str, meta.revision)
|
mteb/leaderboard/figures.py
CHANGED
|
@@ -117,7 +117,7 @@ def _performance_size_plot(df: pd.DataFrame) -> go.Figure:
|
|
|
117
117
|
df["Max Tokens"] = df["Max Tokens"].map(_parse_float)
|
|
118
118
|
df["Log(Tokens)"] = np.log10(df["Max Tokens"])
|
|
119
119
|
df["Mean (Task)"] = df["Mean (Task)"].map(_parse_float)
|
|
120
|
-
df = df
|
|
120
|
+
df = df.dropna(
|
|
121
121
|
subset=["Mean (Task)", "Number of Parameters", "Embedding Dimensions"]
|
|
122
122
|
)
|
|
123
123
|
if not len(df.index):
|
mteb/leaderboard/table.py
CHANGED
|
@@ -26,16 +26,6 @@ def _format_scores(score: float) -> float:
|
|
|
26
26
|
return round(score * 100, 2)
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
def _get_column_types(df: pd.DataFrame) -> list[str]:
|
|
30
|
-
types = []
|
|
31
|
-
for column_name in df.columns:
|
|
32
|
-
if is_numeric_dtype(df[column_name]):
|
|
33
|
-
types.append("number")
|
|
34
|
-
else:
|
|
35
|
-
types.append("str")
|
|
36
|
-
return types
|
|
37
|
-
|
|
38
|
-
|
|
39
29
|
def _get_column_widths(df: pd.DataFrame) -> list[str]:
|
|
40
30
|
# Please do not remove this function when refactoring.
|
|
41
31
|
# Column width calculation seeminlgy changes regularly with Gradio releases,
|
|
@@ -226,7 +216,7 @@ def _apply_summary_table_styling(joint_table: pd.DataFrame) -> gr.DataFrame:
|
|
|
226
216
|
gmap=gmap_values.loc[mask],
|
|
227
217
|
)
|
|
228
218
|
|
|
229
|
-
column_types =
|
|
219
|
+
column_types = ["auto" for _ in joint_table_style.data.columns]
|
|
230
220
|
# setting model name column to markdown
|
|
231
221
|
if len(column_types) > 1:
|
|
232
222
|
column_types[1] = "markdown"
|
mteb/models/abs_encoder.py
CHANGED
|
@@ -54,11 +54,11 @@ class AbsEncoder(ABC):
|
|
|
54
54
|
"""A wrapper function around the model.encode method that handles the prompt_name argument and standardizes the output to a numpy array.
|
|
55
55
|
|
|
56
56
|
The order of priorities for prompt selection are:
|
|
57
|
-
1. Composed prompt of task name + prompt type
|
|
57
|
+
1. Composed prompt of task name + prompt type
|
|
58
58
|
2. Specific task prompt
|
|
59
|
-
3. Composed prompt of task type + prompt type
|
|
59
|
+
3. Composed prompt of task type + prompt type
|
|
60
60
|
4. Specific task type prompt
|
|
61
|
-
5. Specific prompt type
|
|
61
|
+
5. Specific prompt type
|
|
62
62
|
|
|
63
63
|
Args:
|
|
64
64
|
task_metadata: The task name to use for building the encoding prompt
|
|
@@ -105,7 +105,7 @@ class AbsEncoder(ABC):
|
|
|
105
105
|
|
|
106
106
|
Args:
|
|
107
107
|
task_metadata: The metadata of the task.
|
|
108
|
-
prompt_type: The name type of prompt.
|
|
108
|
+
prompt_type: The name type of prompt.
|
|
109
109
|
"""
|
|
110
110
|
if not self.model_prompts:
|
|
111
111
|
return None
|
|
@@ -210,13 +210,11 @@ class AbsEncoder(ABC):
|
|
|
210
210
|
task_metadata: The metadata of the task. Sentence-transformers uses this to
|
|
211
211
|
determine which prompt to use from a specified dictionary.
|
|
212
212
|
The order of priorities for prompt selection are:
|
|
213
|
-
1.
|
|
214
|
-
2. Specific task prompt
|
|
215
|
-
3.
|
|
216
|
-
4.
|
|
217
|
-
|
|
218
|
-
6. Default prompt from the task definition
|
|
219
|
-
prompt_type: The name type of prompt. (query or passage)
|
|
213
|
+
1. Specific task prompt
|
|
214
|
+
2. Specific task type prompt
|
|
215
|
+
3. Specific prompt type
|
|
216
|
+
4. Default prompt from the task definition
|
|
217
|
+
prompt_type: The name type of prompt.
|
|
220
218
|
|
|
221
219
|
Returns:
|
|
222
220
|
The instruction/prompt to be used for encoding sentences.
|
|
@@ -224,6 +222,12 @@ class AbsEncoder(ABC):
|
|
|
224
222
|
prompt = task_metadata.prompt
|
|
225
223
|
if self.prompts_dict and task_metadata.name in self.prompts_dict:
|
|
226
224
|
prompt = self.prompts_dict[task_metadata.name]
|
|
225
|
+
elif self.prompts_dict and task_metadata.type in self.prompts_dict:
|
|
226
|
+
prompt = self.prompts_dict[task_metadata.type]
|
|
227
|
+
elif (
|
|
228
|
+
self.prompts_dict and prompt_type and prompt_type.value in self.prompts_dict
|
|
229
|
+
):
|
|
230
|
+
prompt = self.prompts_dict[prompt_type.value]
|
|
227
231
|
|
|
228
232
|
if isinstance(prompt, dict) and prompt_type:
|
|
229
233
|
if prompt.get(prompt_type.value):
|
|
@@ -246,7 +250,7 @@ class AbsEncoder(ABC):
|
|
|
246
250
|
|
|
247
251
|
Args:
|
|
248
252
|
instruction: The instruction to be formatted.
|
|
249
|
-
prompt_type: The name type of prompt.
|
|
253
|
+
prompt_type: The name type of prompt.
|
|
250
254
|
"""
|
|
251
255
|
if self.instruction_template is None:
|
|
252
256
|
raise ValueError(
|
|
@@ -269,7 +273,7 @@ class AbsEncoder(ABC):
|
|
|
269
273
|
|
|
270
274
|
Args:
|
|
271
275
|
task_metadata: The metadata of the task
|
|
272
|
-
prompt_type: The name type of prompt.
|
|
276
|
+
prompt_type: The name type of prompt.
|
|
273
277
|
|
|
274
278
|
Returns:
|
|
275
279
|
The instruction to be used for encoding sentences.
|
|
@@ -373,14 +377,14 @@ class AbsEncoder(ABC):
|
|
|
373
377
|
task_metadata: The metadata of the task. Sentence-transformers uses this to
|
|
374
378
|
determine which prompt to use from a specified dictionary.
|
|
375
379
|
The order of priorities for prompt selection are:
|
|
376
|
-
1. Composed prompt of task name + prompt type
|
|
380
|
+
1. Composed prompt of task name + prompt type
|
|
377
381
|
2. Specific task prompt
|
|
378
|
-
3. Composed prompt of task type + prompt type
|
|
382
|
+
3. Composed prompt of task type + prompt type
|
|
379
383
|
4. Specific task type prompt
|
|
380
|
-
5. Specific prompt type
|
|
384
|
+
5. Specific prompt type
|
|
381
385
|
hf_split: Split of current task
|
|
382
386
|
hf_subset: Subset of current task
|
|
383
|
-
prompt_type: The name type of prompt.
|
|
387
|
+
prompt_type: The name type of prompt.
|
|
384
388
|
**kwargs: Additional arguments to pass to the encoder.
|
|
385
389
|
|
|
386
390
|
Returns:
|
mteb/models/get_model_meta.py
CHANGED
|
@@ -1,26 +1,15 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
1
|
import difflib
|
|
4
2
|
import logging
|
|
5
|
-
import warnings
|
|
6
3
|
from collections.abc import Iterable
|
|
7
|
-
from typing import
|
|
8
|
-
|
|
9
|
-
from huggingface_hub import ModelCard
|
|
10
|
-
from huggingface_hub.errors import RepositoryNotFoundError
|
|
4
|
+
from typing import Any
|
|
11
5
|
|
|
12
6
|
from mteb.abstasks import AbsTask
|
|
13
7
|
from mteb.models import (
|
|
14
|
-
CrossEncoderWrapper,
|
|
15
8
|
ModelMeta,
|
|
16
9
|
MTEBModels,
|
|
17
|
-
sentence_transformers_loader,
|
|
18
10
|
)
|
|
19
11
|
from mteb.models.model_implementations import MODEL_REGISTRY
|
|
20
12
|
|
|
21
|
-
if TYPE_CHECKING:
|
|
22
|
-
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
23
|
-
|
|
24
13
|
logger = logging.getLogger(__name__)
|
|
25
14
|
|
|
26
15
|
|
|
@@ -101,24 +90,9 @@ def get_model(
|
|
|
101
90
|
Returns:
|
|
102
91
|
A model object
|
|
103
92
|
"""
|
|
104
|
-
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
105
|
-
|
|
106
93
|
meta = get_model_meta(model_name, revision)
|
|
107
94
|
model = meta.load_model(**kwargs)
|
|
108
95
|
|
|
109
|
-
# If revision not available in the modelmeta, try to extract it from sentence-transformers
|
|
110
|
-
if hasattr(model, "model") and isinstance(model.model, SentenceTransformer): # type: ignore
|
|
111
|
-
_meta = _model_meta_from_sentence_transformers(model.model) # type: ignore
|
|
112
|
-
if meta.revision is None:
|
|
113
|
-
meta.revision = _meta.revision if _meta.revision else meta.revision
|
|
114
|
-
if not meta.similarity_fn_name:
|
|
115
|
-
meta.similarity_fn_name = _meta.similarity_fn_name
|
|
116
|
-
|
|
117
|
-
elif isinstance(model, CrossEncoder):
|
|
118
|
-
_meta = _model_meta_from_cross_encoder(model.model)
|
|
119
|
-
if meta.revision is None:
|
|
120
|
-
meta.revision = _meta.revision if _meta.revision else meta.revision
|
|
121
|
-
|
|
122
96
|
model.mteb_model_meta = meta # type: ignore
|
|
123
97
|
return model
|
|
124
98
|
|
|
@@ -148,12 +122,8 @@ def get_model_meta(
|
|
|
148
122
|
logger.info(
|
|
149
123
|
"Model not found in model registry. Attempting to extract metadata by loading the model ({model_name}) using HuggingFace."
|
|
150
124
|
)
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
meta.revision = revision
|
|
154
|
-
return meta
|
|
155
|
-
except RepositoryNotFoundError:
|
|
156
|
-
pass
|
|
125
|
+
meta = ModelMeta.from_hub(model_name, revision)
|
|
126
|
+
return meta
|
|
157
127
|
|
|
158
128
|
not_found_msg = f"Model '{model_name}' not found in MTEB registry"
|
|
159
129
|
not_found_msg += " nor on the Huggingface Hub." if fetch_from_hf else "."
|
|
@@ -171,93 +141,3 @@ def get_model_meta(
|
|
|
171
141
|
suggestion = f" Did you mean: '{close_matches[0]}'?"
|
|
172
142
|
|
|
173
143
|
raise KeyError(not_found_msg + suggestion)
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
def _model_meta_from_hf_hub(model_name: str) -> ModelMeta:
|
|
177
|
-
card = ModelCard.load(model_name)
|
|
178
|
-
card_data = card.data.to_dict()
|
|
179
|
-
frameworks = ["PyTorch"]
|
|
180
|
-
loader = None
|
|
181
|
-
if card_data.get("library_name", None) == "sentence-transformers":
|
|
182
|
-
frameworks.append("Sentence Transformers")
|
|
183
|
-
loader = sentence_transformers_loader
|
|
184
|
-
else:
|
|
185
|
-
msg = (
|
|
186
|
-
"Model library not recognized, defaulting to Sentence Transformers loader."
|
|
187
|
-
)
|
|
188
|
-
logger.warning(msg)
|
|
189
|
-
warnings.warn(msg)
|
|
190
|
-
loader = sentence_transformers_loader
|
|
191
|
-
|
|
192
|
-
revision = card_data.get("base_model_revision", None)
|
|
193
|
-
license = card_data.get("license", None)
|
|
194
|
-
return ModelMeta(
|
|
195
|
-
loader=loader,
|
|
196
|
-
name=model_name,
|
|
197
|
-
revision=revision,
|
|
198
|
-
release_date=None,
|
|
199
|
-
languages=None,
|
|
200
|
-
license=license,
|
|
201
|
-
framework=frameworks, # type: ignore
|
|
202
|
-
training_datasets=None,
|
|
203
|
-
similarity_fn_name=None,
|
|
204
|
-
n_parameters=None,
|
|
205
|
-
memory_usage_mb=None,
|
|
206
|
-
max_tokens=None,
|
|
207
|
-
embed_dim=None,
|
|
208
|
-
open_weights=True,
|
|
209
|
-
public_training_code=None,
|
|
210
|
-
public_training_data=None,
|
|
211
|
-
use_instructions=None,
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
def _model_meta_from_cross_encoder(model: CrossEncoder) -> ModelMeta:
|
|
216
|
-
return ModelMeta(
|
|
217
|
-
loader=CrossEncoderWrapper,
|
|
218
|
-
name=model.model.name_or_path,
|
|
219
|
-
revision=model.config._commit_hash,
|
|
220
|
-
release_date=None,
|
|
221
|
-
languages=None,
|
|
222
|
-
framework=["Sentence Transformers"],
|
|
223
|
-
similarity_fn_name=None,
|
|
224
|
-
n_parameters=None,
|
|
225
|
-
memory_usage_mb=None,
|
|
226
|
-
max_tokens=None,
|
|
227
|
-
embed_dim=None,
|
|
228
|
-
license=None,
|
|
229
|
-
open_weights=True,
|
|
230
|
-
public_training_code=None,
|
|
231
|
-
public_training_data=None,
|
|
232
|
-
use_instructions=None,
|
|
233
|
-
training_datasets=None,
|
|
234
|
-
)
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
def _model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMeta:
|
|
238
|
-
name: str | None = (
|
|
239
|
-
model.model_card_data.model_name
|
|
240
|
-
if model.model_card_data.model_name
|
|
241
|
-
else model.model_card_data.base_model
|
|
242
|
-
)
|
|
243
|
-
embeddings_dim = model.get_sentence_embedding_dimension()
|
|
244
|
-
meta = ModelMeta(
|
|
245
|
-
loader=sentence_transformers_loader,
|
|
246
|
-
name=name,
|
|
247
|
-
revision=model.model_card_data.base_model_revision,
|
|
248
|
-
release_date=None,
|
|
249
|
-
languages=None,
|
|
250
|
-
framework=["Sentence Transformers"],
|
|
251
|
-
similarity_fn_name=None,
|
|
252
|
-
n_parameters=None,
|
|
253
|
-
memory_usage_mb=None,
|
|
254
|
-
max_tokens=None,
|
|
255
|
-
embed_dim=embeddings_dim,
|
|
256
|
-
license=None,
|
|
257
|
-
open_weights=True,
|
|
258
|
-
public_training_code=None,
|
|
259
|
-
public_training_data=None,
|
|
260
|
-
use_instructions=None,
|
|
261
|
-
training_datasets=None,
|
|
262
|
-
)
|
|
263
|
-
return meta
|
mteb/models/instruct_wrapper.py
CHANGED
|
@@ -122,7 +122,8 @@ class InstructSentenceTransformerModel(AbsEncoder):
|
|
|
122
122
|
apply_instruction_to_passages: Whether to apply the instruction template to the passages.
|
|
123
123
|
padding_side: Padding side. If None, the padding side will be read from the model config.
|
|
124
124
|
add_eos_token: Whether to add the eos token to each input example.
|
|
125
|
-
prompts_dict: Dictionary of task names to prompt names. If
|
|
125
|
+
prompts_dict: Dictionary of task names to prompt names. If task name is missing in the dict or prompts dict is None, prompt from task metadata or
|
|
126
|
+
AbsTask.abstask_prompt will be used.
|
|
126
127
|
**kwargs: Kwargs for Sentence Transformer model.
|
|
127
128
|
"""
|
|
128
129
|
from sentence_transformers import SentenceTransformer
|