mteb 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +16 -9
- mteb/_evaluators/any_sts_evaluator.py +10 -5
- mteb/_evaluators/clustering_evaluator.py +10 -4
- mteb/_evaluators/evaluator.py +9 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
- mteb/_evaluators/pair_classification_evaluator.py +10 -5
- mteb/_evaluators/retrieval_evaluator.py +19 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +14 -10
- mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
- mteb/_evaluators/text/summarization_evaluator.py +8 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +17 -9
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +15 -6
- mteb/abstasks/clustering.py +17 -8
- mteb/abstasks/clustering_legacy.py +14 -6
- mteb/abstasks/image/image_text_pair_classification.py +17 -7
- mteb/abstasks/multilabel_classification.py +11 -5
- mteb/abstasks/pair_classification.py +19 -9
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +27 -16
- mteb/abstasks/retrieval_dataset_loaders.py +11 -8
- mteb/abstasks/sts.py +19 -10
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +14 -7
- mteb/abstasks/text/summarization.py +17 -7
- mteb/abstasks/zeroshot_classification.py +15 -7
- mteb/benchmarks/_create_table.py +13 -3
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +10 -5
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/evaluate.py +20 -18
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +7 -3
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +11 -4
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +9 -4
- mteb/models/model_implementations/bedrock_models.py +16 -6
- mteb/models/model_implementations/blip2_models.py +9 -4
- mteb/models/model_implementations/blip_models.py +9 -4
- mteb/models/model_implementations/bm25.py +15 -10
- mteb/models/model_implementations/bmretriever_models.py +6 -2
- mteb/models/model_implementations/cde_models.py +9 -5
- mteb/models/model_implementations/clip_models.py +9 -4
- mteb/models/model_implementations/cohere_models.py +10 -4
- mteb/models/model_implementations/cohere_v.py +9 -4
- mteb/models/model_implementations/colpali_models.py +4 -3
- mteb/models/model_implementations/colqwen_models.py +10 -31
- mteb/models/model_implementations/colsmol_models.py +1 -1
- mteb/models/model_implementations/conan_models.py +10 -4
- mteb/models/model_implementations/dino_models.py +9 -4
- mteb/models/model_implementations/e5_v.py +9 -4
- mteb/models/model_implementations/eagerworks_models.py +10 -4
- mteb/models/model_implementations/evaclip_models.py +9 -4
- mteb/models/model_implementations/gme_v_models.py +5 -3
- mteb/models/model_implementations/google_models.py +10 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +6 -5
- mteb/models/model_implementations/hinvec_models.py +5 -1
- mteb/models/model_implementations/jasper_models.py +12 -5
- mteb/models/model_implementations/jina_clip.py +9 -4
- mteb/models/model_implementations/jina_models.py +10 -5
- mteb/models/model_implementations/kalm_models.py +18 -12
- mteb/models/model_implementations/linq_models.py +6 -1
- mteb/models/model_implementations/listconranker.py +9 -4
- mteb/models/model_implementations/llm2clip_models.py +9 -4
- mteb/models/model_implementations/llm2vec_models.py +12 -6
- mteb/models/model_implementations/mcinext_models.py +5 -2
- mteb/models/model_implementations/moco_models.py +9 -4
- mteb/models/model_implementations/mod_models.py +1 -1
- mteb/models/model_implementations/model2vec_models.py +10 -4
- mteb/models/model_implementations/no_instruct_sentence_models.py +12 -5
- mteb/models/model_implementations/nomic_models.py +10 -4
- mteb/models/model_implementations/nomic_models_vision.py +4 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +7 -3
- mteb/models/model_implementations/nvidia_models.py +12 -4
- mteb/models/model_implementations/octen_models.py +1 -1
- mteb/models/model_implementations/openai_models.py +9 -4
- mteb/models/model_implementations/openclip_models.py +9 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -4
- mteb/models/model_implementations/ops_moa_models.py +7 -2
- mteb/models/model_implementations/promptriever_models.py +12 -6
- mteb/models/model_implementations/pylate_models.py +19 -13
- mteb/models/model_implementations/qwen3_models.py +8 -1
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/repllama_models.py +13 -6
- mteb/models/model_implementations/rerankers_custom.py +10 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +10 -4
- mteb/models/model_implementations/salesforce_models.py +7 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +4 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +5 -2
- mteb/models/model_implementations/seed_models.py +1 -1
- mteb/models/model_implementations/siglip_models.py +9 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/uae_models.py +9 -4
- mteb/models/model_implementations/vdr_models.py +7 -1
- mteb/models/model_implementations/vista_models.py +9 -4
- mteb/models/model_implementations/vlm2vec_models.py +9 -4
- mteb/models/model_implementations/voyage_models.py +10 -4
- mteb/models/model_implementations/voyage_v.py +10 -6
- mteb/models/model_implementations/yuan_models_en.py +1 -1
- mteb/models/model_meta.py +12 -7
- mteb/models/models_protocols.py +19 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +19 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +15 -9
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/clustering/nob/snl_clustering.py +7 -2
- mteb/tasks/clustering/nob/vg_clustering.py +7 -2
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/METADATA +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/RECORD +151 -151
- {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/WHEEL +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/top_level.txt +0 -0
|
@@ -10,6 +10,8 @@ RTEB_CITATION = r"""@article{rteb2025,
|
|
|
10
10
|
year = {2025},
|
|
11
11
|
}"""
|
|
12
12
|
|
|
13
|
+
removal_note = "\n\nNote: We have temporarily removed the 'Private' column to read more about this decision out the [announcement](https://github.com/embeddings-benchmark/mteb/issues/3934)."
|
|
14
|
+
|
|
13
15
|
RTEB_MAIN = RtebBenchmark(
|
|
14
16
|
name="RTEB(beta)",
|
|
15
17
|
display_name="RTEB Multilingual",
|
|
@@ -48,7 +50,8 @@ RTEB_MAIN = RtebBenchmark(
|
|
|
48
50
|
"JapaneseLegal1Retrieval",
|
|
49
51
|
],
|
|
50
52
|
),
|
|
51
|
-
description="RTEB (ReTrieval Embedding Benchmark) is a comprehensive benchmark for evaluating text retrieval models across multiple specialized domains including legal, finance, code, and healthcare. It contains diverse retrieval tasks designed to test models' ability to understand domain-specific terminology and retrieve relevant documents in specialized contexts across multiple languages. The dataset includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
53
|
+
description="RTEB (ReTrieval Embedding Benchmark) is a comprehensive benchmark for evaluating text retrieval models across multiple specialized domains including legal, finance, code, and healthcare. It contains diverse retrieval tasks designed to test models' ability to understand domain-specific terminology and retrieve relevant documents in specialized contexts across multiple languages. The dataset includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
54
|
+
+ removal_note,
|
|
52
55
|
citation=RTEB_CITATION,
|
|
53
56
|
contacts=["fzowl"],
|
|
54
57
|
)
|
|
@@ -83,7 +86,8 @@ RTEB_ENGLISH = RtebBenchmark(
|
|
|
83
86
|
],
|
|
84
87
|
languages=["eng"],
|
|
85
88
|
),
|
|
86
|
-
description="RTEB English is a subset of RTEB containing retrieval tasks in English across legal, finance, code, and healthcare domains. Includes diverse tasks covering specialized domains such as healthcare and finance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
89
|
+
description="RTEB English is a subset of RTEB containing retrieval tasks in English across legal, finance, code, and healthcare domains. Includes diverse tasks covering specialized domains such as healthcare and finance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
90
|
+
+ removal_note,
|
|
87
91
|
citation=RTEB_CITATION,
|
|
88
92
|
contacts=["fzowl"],
|
|
89
93
|
)
|
|
@@ -101,7 +105,8 @@ RTEB_FRENCH = RtebBenchmark(
|
|
|
101
105
|
],
|
|
102
106
|
languages=["fra"],
|
|
103
107
|
),
|
|
104
|
-
description="RTEB French is a subset of RTEB containing retrieval tasks in French across legal and general knowledge domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
108
|
+
description="RTEB French is a subset of RTEB containing retrieval tasks in French across legal and general knowledge domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
109
|
+
+ removal_note,
|
|
105
110
|
citation=RTEB_CITATION,
|
|
106
111
|
contacts=["fzowl"],
|
|
107
112
|
)
|
|
@@ -119,7 +124,8 @@ RTEB_GERMAN = RtebBenchmark(
|
|
|
119
124
|
"GermanLegal1Retrieval",
|
|
120
125
|
],
|
|
121
126
|
),
|
|
122
|
-
description="RTEB German is a subset of RTEB containing retrieval tasks in German across legal, healthcare, and business domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
127
|
+
description="RTEB German is a subset of RTEB containing retrieval tasks in German across legal, healthcare, and business domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
128
|
+
+ removal_note,
|
|
123
129
|
citation=RTEB_CITATION,
|
|
124
130
|
contacts=["fzowl"],
|
|
125
131
|
)
|
|
@@ -135,7 +141,8 @@ RTEB_JAPANESE = RtebBenchmark(
|
|
|
135
141
|
"JapaneseLegal1Retrieval",
|
|
136
142
|
],
|
|
137
143
|
),
|
|
138
|
-
description="RTEB Japanese is a subset of RTEB containing retrieval tasks in Japanese across legal and code domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
144
|
+
description="RTEB Japanese is a subset of RTEB containing retrieval tasks in Japanese across legal and code domains. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
145
|
+
+ removal_note,
|
|
139
146
|
citation=RTEB_CITATION,
|
|
140
147
|
contacts=["fzowl"],
|
|
141
148
|
)
|
|
@@ -156,7 +163,8 @@ RTEB_FINANCE = RtebBenchmark(
|
|
|
156
163
|
"EnglishFinance4Retrieval",
|
|
157
164
|
],
|
|
158
165
|
),
|
|
159
|
-
description="RTEB Finance is a subset of RTEB containing retrieval tasks specifically focused on financial domain including finance benchmarks, Q&A, financial document retrieval, and corporate governance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
166
|
+
description="RTEB Finance is a subset of RTEB containing retrieval tasks specifically focused on financial domain including finance benchmarks, Q&A, financial document retrieval, and corporate governance. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
167
|
+
+ removal_note,
|
|
160
168
|
citation=RTEB_CITATION,
|
|
161
169
|
contacts=["fzowl"],
|
|
162
170
|
)
|
|
@@ -177,7 +185,8 @@ RTEB_LEGAL = RtebBenchmark(
|
|
|
177
185
|
"JapaneseLegal1Retrieval",
|
|
178
186
|
],
|
|
179
187
|
),
|
|
180
|
-
description="RTEB Legal is a subset of RTEB containing retrieval tasks specifically focused on legal domain including case documents, statutes, legal summarization, and multilingual legal Q&A. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
188
|
+
description="RTEB Legal is a subset of RTEB containing retrieval tasks specifically focused on legal domain including case documents, statutes, legal summarization, and multilingual legal Q&A. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
189
|
+
+ removal_note,
|
|
181
190
|
citation=RTEB_CITATION,
|
|
182
191
|
contacts=["fzowl"],
|
|
183
192
|
)
|
|
@@ -199,7 +208,8 @@ RTEB_CODE = RtebBenchmark(
|
|
|
199
208
|
"JapaneseCode1Retrieval",
|
|
200
209
|
],
|
|
201
210
|
),
|
|
202
|
-
description="RTEB Code is a subset of RTEB containing retrieval tasks specifically focused on programming and code domains including algorithmic problems, data science tasks, code evaluation, SQL retrieval, and multilingual code retrieval. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
211
|
+
description="RTEB Code is a subset of RTEB containing retrieval tasks specifically focused on programming and code domains including algorithmic problems, data science tasks, code evaluation, SQL retrieval, and multilingual code retrieval. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
212
|
+
+ removal_note,
|
|
203
213
|
citation=RTEB_CITATION,
|
|
204
214
|
contacts=["fzowl"],
|
|
205
215
|
)
|
|
@@ -217,7 +227,8 @@ RTEB_HEALTHCARE = RtebBenchmark(
|
|
|
217
227
|
"GermanHealthcare1Retrieval",
|
|
218
228
|
],
|
|
219
229
|
),
|
|
220
|
-
description="RTEB Healthcare is a subset of RTEB containing retrieval tasks specifically focused on healthcare and medical domains including medical Q&A, healthcare information retrieval, cross-lingual medical retrieval, and multilingual medical consultation. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
230
|
+
description="RTEB Healthcare is a subset of RTEB containing retrieval tasks specifically focused on healthcare and medical domains including medical Q&A, healthcare information retrieval, cross-lingual medical retrieval, and multilingual medical consultation. The benchmark includes both open and closed datasets, providing a robust evaluation framework for real-world applications. To submit results on private tasks, please create [open an issue](https://github.com/embeddings-benchmark/mteb/issues)."
|
|
231
|
+
+ removal_note,
|
|
221
232
|
citation=RTEB_CITATION,
|
|
222
233
|
contacts=["fzowl"],
|
|
223
234
|
)
|
mteb/cache.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import gzip
|
|
2
4
|
import io
|
|
3
5
|
import json
|
|
@@ -7,9 +9,8 @@ import shutil
|
|
|
7
9
|
import subprocess
|
|
8
10
|
import warnings
|
|
9
11
|
from collections import defaultdict
|
|
10
|
-
from collections.abc import Iterable, Sequence
|
|
11
12
|
from pathlib import Path
|
|
12
|
-
from typing import cast
|
|
13
|
+
from typing import TYPE_CHECKING, cast
|
|
13
14
|
|
|
14
15
|
import requests
|
|
15
16
|
from pydantic import ValidationError
|
|
@@ -19,7 +20,11 @@ from mteb.abstasks import AbsTask
|
|
|
19
20
|
from mteb.benchmarks.benchmark import Benchmark
|
|
20
21
|
from mteb.models import ModelMeta
|
|
21
22
|
from mteb.results import BenchmarkResults, ModelResult, TaskResult
|
|
22
|
-
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from collections.abc import Iterable, Sequence
|
|
26
|
+
|
|
27
|
+
from mteb.types import ModelName, Revision
|
|
23
28
|
|
|
24
29
|
logger = logging.getLogger(__name__)
|
|
25
30
|
|
|
@@ -584,7 +589,7 @@ class ResultCache:
|
|
|
584
589
|
|
|
585
590
|
first_model = next(iter(models))
|
|
586
591
|
if isinstance(first_model, ModelMeta):
|
|
587
|
-
models = cast(Iterable[ModelMeta], models)
|
|
592
|
+
models = cast("Iterable[ModelMeta]", models)
|
|
588
593
|
name_and_revision = {
|
|
589
594
|
(m.model_name_as_path(), m.revision or "no_revision_available")
|
|
590
595
|
for m in models
|
|
@@ -595,7 +600,7 @@ class ResultCache:
|
|
|
595
600
|
if (p.parent.parent.name, p.parent.name) in name_and_revision
|
|
596
601
|
]
|
|
597
602
|
|
|
598
|
-
str_models = cast(Sequence[str], models)
|
|
603
|
+
str_models = cast("Sequence[str]", models)
|
|
599
604
|
model_names = {m.replace("/", "__").replace(" ", "_") for m in str_models}
|
|
600
605
|
return [p for p in paths if p.parent.parent.name in model_names]
|
|
601
606
|
|
mteb/cli/_display_tasks.py
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
2
4
|
|
|
3
|
-
from mteb.abstasks import AbsTask
|
|
4
|
-
from mteb.benchmarks import Benchmark
|
|
5
5
|
from mteb.get_tasks import MTEBTasks
|
|
6
6
|
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from collections.abc import Iterable, Sequence
|
|
9
|
+
|
|
10
|
+
from mteb.abstasks import AbsTask
|
|
11
|
+
from mteb.benchmarks import Benchmark
|
|
12
|
+
|
|
7
13
|
|
|
8
14
|
def _display_benchmarks(benchmarks: Sequence[Benchmark]) -> None:
|
|
9
15
|
"""Get all benchmarks available in the MTEB."""
|
mteb/cli/build_cli.py
CHANGED
|
@@ -3,17 +3,20 @@ import logging
|
|
|
3
3
|
import os
|
|
4
4
|
import warnings
|
|
5
5
|
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
6
7
|
|
|
7
8
|
import torch
|
|
8
9
|
from rich.logging import RichHandler
|
|
9
10
|
|
|
10
11
|
import mteb
|
|
11
|
-
from mteb.abstasks.abstask import AbsTask
|
|
12
12
|
from mteb.cache import ResultCache
|
|
13
13
|
from mteb.cli._display_tasks import _display_benchmarks, _display_tasks
|
|
14
14
|
from mteb.cli.generate_model_card import generate_model_card
|
|
15
15
|
from mteb.evaluate import OverwriteStrategy
|
|
16
|
-
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from mteb.abstasks.abstask import AbsTask
|
|
19
|
+
from mteb.types import EncodeKwargs
|
|
17
20
|
|
|
18
21
|
logger = logging.getLogger(__name__)
|
|
19
22
|
|
mteb/cli/generate_model_card.py
CHANGED
|
@@ -1,14 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import warnings
|
|
3
|
-
from collections.abc import Sequence
|
|
4
5
|
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
5
7
|
|
|
6
8
|
from huggingface_hub import ModelCard, ModelCardData, repo_exists
|
|
7
9
|
|
|
8
10
|
from mteb.abstasks.abstask import AbsTask
|
|
9
|
-
from mteb.benchmarks.benchmark import Benchmark
|
|
10
11
|
from mteb.cache import ResultCache
|
|
11
12
|
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Sequence
|
|
15
|
+
|
|
16
|
+
from mteb.abstasks.abstask import AbsTask
|
|
17
|
+
from mteb.benchmarks.benchmark import Benchmark
|
|
18
|
+
|
|
12
19
|
logger = logging.getLogger(__name__)
|
|
13
20
|
|
|
14
21
|
|
mteb/deprecated_evaluator.py
CHANGED
|
@@ -6,7 +6,6 @@ import os
|
|
|
6
6
|
import sys
|
|
7
7
|
import traceback
|
|
8
8
|
import warnings
|
|
9
|
-
from collections.abc import Iterable, Sequence
|
|
10
9
|
from copy import deepcopy
|
|
11
10
|
from datetime import datetime
|
|
12
11
|
from itertools import chain
|
|
@@ -18,26 +17,31 @@ import datasets
|
|
|
18
17
|
|
|
19
18
|
import mteb
|
|
20
19
|
from mteb.abstasks import AbsTask
|
|
21
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
22
|
-
from mteb.abstasks.task_metadata import TaskCategory, TaskType
|
|
23
20
|
from mteb.benchmarks import Benchmark
|
|
24
21
|
from mteb.models import (
|
|
25
22
|
CrossEncoderWrapper,
|
|
26
23
|
ModelMeta,
|
|
27
|
-
MTEBModels,
|
|
28
24
|
SentenceTransformerEncoderWrapper,
|
|
29
25
|
)
|
|
30
26
|
from mteb.results import TaskResult
|
|
31
|
-
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from collections.abc import Iterable, Sequence
|
|
30
|
+
|
|
31
|
+
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
32
|
+
|
|
33
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
34
|
+
from mteb.abstasks.task_metadata import TaskCategory, TaskType
|
|
35
|
+
from mteb.models import (
|
|
36
|
+
MTEBModels,
|
|
37
|
+
)
|
|
38
|
+
from mteb.types import EncodeKwargs, ScoresDict
|
|
32
39
|
|
|
33
40
|
if sys.version_info >= (3, 13):
|
|
34
41
|
from warnings import deprecated
|
|
35
42
|
else:
|
|
36
43
|
from typing_extensions import deprecated
|
|
37
44
|
|
|
38
|
-
if TYPE_CHECKING:
|
|
39
|
-
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
40
|
-
|
|
41
45
|
logger = logging.getLogger(__name__)
|
|
42
46
|
|
|
43
47
|
|
|
@@ -66,9 +70,9 @@ class MTEB:
|
|
|
66
70
|
"""
|
|
67
71
|
if isinstance(next(iter(tasks)), Benchmark):
|
|
68
72
|
self.benchmarks = tasks
|
|
69
|
-
self.tasks = list(chain.from_iterable(cast(Iterable[Benchmark], tasks)))
|
|
73
|
+
self.tasks = list(chain.from_iterable(cast("Iterable[Benchmark]", tasks)))
|
|
70
74
|
elif isinstance(next(iter(tasks)), AbsTask):
|
|
71
|
-
self.tasks = list(cast(Iterable[AbsTask], tasks))
|
|
75
|
+
self.tasks = list(cast("Iterable[AbsTask]", tasks))
|
|
72
76
|
|
|
73
77
|
self.err_logs_path = Path(err_logs_path)
|
|
74
78
|
self._last_evaluated_splits: dict[str, list[str]] = {}
|
|
@@ -313,7 +317,7 @@ class MTEB:
|
|
|
313
317
|
elif isinstance(model, CrossEncoder):
|
|
314
318
|
mteb_model = CrossEncoderWrapper(model)
|
|
315
319
|
else:
|
|
316
|
-
mteb_model = cast(MTEBModels, model)
|
|
320
|
+
mteb_model = cast("MTEBModels", model)
|
|
317
321
|
|
|
318
322
|
meta = self.create_model_meta(mteb_model)
|
|
319
323
|
output_path = self._create_output_folder(meta, output_folder)
|
|
@@ -346,7 +350,7 @@ class MTEB:
|
|
|
346
350
|
)
|
|
347
351
|
|
|
348
352
|
if task.is_aggregate:
|
|
349
|
-
aggregated_task = cast(AbsTaskAggregate, task)
|
|
353
|
+
aggregated_task = cast("AbsTaskAggregate", task)
|
|
350
354
|
self_ = MTEB(tasks=aggregated_task.metadata.tasks)
|
|
351
355
|
aggregated_task_results = self_.run(
|
|
352
356
|
mteb_model,
|
mteb/evaluate.py
CHANGED
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import warnings
|
|
5
|
-
from collections.abc import Iterable
|
|
6
5
|
from pathlib import Path
|
|
7
6
|
from time import time
|
|
8
7
|
from typing import TYPE_CHECKING, cast
|
|
@@ -17,22 +16,25 @@ from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
|
17
16
|
from mteb.benchmarks.benchmark import Benchmark
|
|
18
17
|
from mteb.cache import ResultCache
|
|
19
18
|
from mteb.models.model_meta import ModelMeta
|
|
20
|
-
from mteb.models.models_protocols import (
|
|
21
|
-
MTEBModels,
|
|
22
|
-
)
|
|
23
19
|
from mteb.models.sentence_transformer_wrapper import (
|
|
24
20
|
CrossEncoderWrapper,
|
|
25
21
|
SentenceTransformerEncoderWrapper,
|
|
26
22
|
)
|
|
27
23
|
from mteb.results import ModelResult, TaskResult
|
|
28
24
|
from mteb.results.task_result import TaskError
|
|
29
|
-
from mteb.types import
|
|
30
|
-
from mteb.types._encoder_io import EncodeKwargs
|
|
31
|
-
from mteb.types._metadata import ModelName, Revision
|
|
25
|
+
from mteb.types import PromptType
|
|
32
26
|
|
|
33
27
|
if TYPE_CHECKING:
|
|
28
|
+
from collections.abc import Iterable
|
|
29
|
+
|
|
34
30
|
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
35
31
|
|
|
32
|
+
from mteb.models.models_protocols import (
|
|
33
|
+
MTEBModels,
|
|
34
|
+
)
|
|
35
|
+
from mteb.types import EncodeKwargs, HFSubset, SplitName
|
|
36
|
+
from mteb.types._metadata import ModelName, Revision
|
|
37
|
+
|
|
36
38
|
logger = logging.getLogger(__name__)
|
|
37
39
|
|
|
38
40
|
|
|
@@ -69,13 +71,13 @@ def _sanitize_model(
|
|
|
69
71
|
meta = getattr(model, "mteb_model_meta")
|
|
70
72
|
if not isinstance(meta, ModelMeta):
|
|
71
73
|
meta = ModelMeta._from_hub(None)
|
|
72
|
-
wrapped_model = cast(MTEBModels | ModelMeta, model)
|
|
74
|
+
wrapped_model = cast("MTEBModels | ModelMeta", model)
|
|
73
75
|
else:
|
|
74
76
|
meta = ModelMeta._from_hub(None) if not isinstance(model, ModelMeta) else model
|
|
75
77
|
wrapped_model = meta
|
|
76
78
|
|
|
77
|
-
model_name = cast(str, meta.name)
|
|
78
|
-
model_revision = cast(str, meta.revision)
|
|
79
|
+
model_name = cast("str", meta.name)
|
|
80
|
+
model_revision = cast("str", meta.revision)
|
|
79
81
|
|
|
80
82
|
return wrapped_model, meta, model_name, model_revision
|
|
81
83
|
|
|
@@ -132,8 +134,8 @@ def _evaluate_task(
|
|
|
132
134
|
|
|
133
135
|
task.check_if_dataset_is_superseded()
|
|
134
136
|
|
|
135
|
-
|
|
136
|
-
if not
|
|
137
|
+
data_preloaded = task.data_loaded
|
|
138
|
+
if not data_preloaded:
|
|
137
139
|
try:
|
|
138
140
|
task.load_data()
|
|
139
141
|
except DatasetNotFoundError as e:
|
|
@@ -176,7 +178,7 @@ def _evaluate_task(
|
|
|
176
178
|
kg_co2_emissions=None,
|
|
177
179
|
)
|
|
178
180
|
|
|
179
|
-
if
|
|
181
|
+
if not data_preloaded: # only unload if we loaded the data
|
|
180
182
|
task.unload_data()
|
|
181
183
|
|
|
182
184
|
return result
|
|
@@ -202,10 +204,10 @@ def _check_model_modalities(
|
|
|
202
204
|
if isinstance(tasks, AbsTask):
|
|
203
205
|
check_tasks = [tasks]
|
|
204
206
|
elif isinstance(tasks, Benchmark):
|
|
205
|
-
benchmark = cast(Benchmark, tasks)
|
|
207
|
+
benchmark = cast("Benchmark", tasks)
|
|
206
208
|
check_tasks = benchmark.tasks
|
|
207
209
|
else:
|
|
208
|
-
check_tasks = cast(Iterable[AbsTask], tasks)
|
|
210
|
+
check_tasks = cast("Iterable[AbsTask]", tasks)
|
|
209
211
|
|
|
210
212
|
warnings, errors = [], []
|
|
211
213
|
|
|
@@ -298,7 +300,7 @@ def evaluate(
|
|
|
298
300
|
changed.
|
|
299
301
|
- "only-cache": Only load the results from the cache folder and do not run the task. Useful if you just want to load the results from the
|
|
300
302
|
cache.
|
|
301
|
-
prediction_folder: Optional folder in which to save model predictions for the task. Predictions of the tasks will be
|
|
303
|
+
prediction_folder: Optional folder in which to save model predictions for the task. Predictions of the tasks will be saved in `prediction_folder/{task_name}_predictions.json`
|
|
302
304
|
show_progress_bar: Whether to show a progress bar when running the evaluation. Default is True. Setting this to False will also set the
|
|
303
305
|
`encode_kwargs['show_progress_bar']` to False if encode_kwargs is unspecified.
|
|
304
306
|
public_only: Run only public tasks. If None, it will attempt to run the private task.
|
|
@@ -342,7 +344,7 @@ def evaluate(
|
|
|
342
344
|
|
|
343
345
|
# AbsTaskAggregate is a special case where we have to run multiple tasks and combine the results
|
|
344
346
|
if isinstance(tasks, AbsTaskAggregate):
|
|
345
|
-
aggregated_task = cast(AbsTaskAggregate, tasks)
|
|
347
|
+
aggregated_task = cast("AbsTaskAggregate", tasks)
|
|
346
348
|
results = evaluate(
|
|
347
349
|
model,
|
|
348
350
|
aggregated_task.metadata.tasks,
|
|
@@ -365,7 +367,7 @@ def evaluate(
|
|
|
365
367
|
if isinstance(tasks, AbsTask):
|
|
366
368
|
task = tasks
|
|
367
369
|
else:
|
|
368
|
-
tasks = cast(Iterable[AbsTask], tasks)
|
|
370
|
+
tasks = cast("Iterable[AbsTask]", tasks)
|
|
369
371
|
evaluate_results = []
|
|
370
372
|
exceptions = []
|
|
371
373
|
tasks_tqdm = tqdm(
|
mteb/filter_tasks.py
CHANGED
|
@@ -1,19 +1,24 @@
|
|
|
1
1
|
"""This script contains functions that are used to get an overview of the MTEB benchmark."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import logging
|
|
4
|
-
from
|
|
5
|
-
from typing import overload
|
|
6
|
+
from typing import TYPE_CHECKING, overload
|
|
6
7
|
|
|
7
|
-
from mteb.abstasks import (
|
|
8
|
-
AbsTask,
|
|
9
|
-
)
|
|
10
8
|
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
11
|
-
from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
|
|
12
9
|
from mteb.languages import (
|
|
13
10
|
ISO_TO_LANGUAGE,
|
|
14
11
|
ISO_TO_SCRIPT,
|
|
15
12
|
)
|
|
16
|
-
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from collections.abc import Iterable, Sequence
|
|
16
|
+
|
|
17
|
+
from mteb.abstasks import (
|
|
18
|
+
AbsTask,
|
|
19
|
+
)
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
|
|
21
|
+
from mteb.types import Modalities
|
|
17
22
|
|
|
18
23
|
logger = logging.getLogger(__name__)
|
|
19
24
|
|
mteb/get_tasks.py
CHANGED
|
@@ -1,20 +1,25 @@
|
|
|
1
1
|
"""This script contains functions that are used to get an overview of the MTEB benchmark."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import difflib
|
|
4
6
|
import logging
|
|
5
7
|
import warnings
|
|
6
8
|
from collections import Counter, defaultdict
|
|
7
|
-
from
|
|
8
|
-
from typing import Any
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
9
10
|
|
|
10
11
|
import pandas as pd
|
|
11
12
|
|
|
12
13
|
from mteb.abstasks import (
|
|
13
14
|
AbsTask,
|
|
14
15
|
)
|
|
15
|
-
from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
|
|
16
16
|
from mteb.filter_tasks import filter_tasks
|
|
17
|
-
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from collections.abc import Iterable, Sequence
|
|
20
|
+
|
|
21
|
+
from mteb.abstasks.task_metadata import TaskCategory, TaskDomain, TaskType
|
|
22
|
+
from mteb.types import Modalities
|
|
18
23
|
|
|
19
24
|
logger = logging.getLogger(__name__)
|
|
20
25
|
|
|
@@ -1,10 +1,15 @@
|
|
|
1
|
-
from
|
|
2
|
-
from dataclasses import dataclass
|
|
1
|
+
from __future__ import annotations
|
|
3
2
|
|
|
4
|
-
from
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
5
|
|
|
6
6
|
from mteb.languages.check_language_code import check_language_code
|
|
7
7
|
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from collections.abc import Iterable, Sequence
|
|
10
|
+
|
|
11
|
+
from typing_extensions import Self
|
|
12
|
+
|
|
8
13
|
|
|
9
14
|
@dataclass
|
|
10
15
|
class LanguageScripts:
|
mteb/leaderboard/app.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import itertools
|
|
2
4
|
import json
|
|
3
5
|
import logging
|
|
@@ -5,15 +7,14 @@ import tempfile
|
|
|
5
7
|
import time
|
|
6
8
|
import warnings
|
|
7
9
|
from pathlib import Path
|
|
8
|
-
from typing import Literal, get_args
|
|
10
|
+
from typing import TYPE_CHECKING, Literal, get_args
|
|
9
11
|
from urllib.parse import urlencode
|
|
10
12
|
|
|
11
13
|
import cachetools
|
|
12
14
|
import gradio as gr
|
|
13
|
-
import pandas as pd
|
|
15
|
+
import pandas as pd # noqa: TC002 # gradio tries to validate typehints
|
|
14
16
|
|
|
15
17
|
import mteb
|
|
16
|
-
from mteb import BenchmarkResults
|
|
17
18
|
from mteb.benchmarks.benchmark import RtebBenchmark
|
|
18
19
|
from mteb.cache import ResultCache
|
|
19
20
|
from mteb.leaderboard.benchmark_selector import (
|
|
@@ -31,6 +32,9 @@ from mteb.leaderboard.table import (
|
|
|
31
32
|
from mteb.leaderboard.text_segments import ACKNOWLEDGEMENT, FAQ
|
|
32
33
|
from mteb.models.model_meta import MODEL_TYPES
|
|
33
34
|
|
|
35
|
+
if TYPE_CHECKING:
|
|
36
|
+
from mteb import BenchmarkResults
|
|
37
|
+
|
|
34
38
|
logger = logging.getLogger(__name__)
|
|
35
39
|
|
|
36
40
|
|
mteb/leaderboard/table.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
1
5
|
import gradio as gr
|
|
2
6
|
import matplotlib.pyplot as plt
|
|
3
7
|
import numpy as np
|
|
@@ -5,8 +9,9 @@ import pandas as pd
|
|
|
5
9
|
from matplotlib.colors import LinearSegmentedColormap
|
|
6
10
|
from pandas.api.types import is_numeric_dtype
|
|
7
11
|
|
|
8
|
-
|
|
9
|
-
from mteb.
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from mteb.benchmarks.benchmark import Benchmark
|
|
14
|
+
from mteb.results.benchmark_results import BenchmarkResults
|
|
10
15
|
|
|
11
16
|
|
|
12
17
|
def _borda_count(scores: pd.Series) -> pd.Series:
|
mteb/load_results.py
CHANGED
|
@@ -1,13 +1,19 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import json
|
|
2
4
|
import logging
|
|
3
5
|
import sys
|
|
4
|
-
from
|
|
5
|
-
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
6
7
|
|
|
7
8
|
from mteb.abstasks.abstask import AbsTask
|
|
8
9
|
from mteb.models.model_meta import ModelMeta
|
|
9
10
|
from mteb.results import BenchmarkResults, ModelResult, TaskResult
|
|
10
|
-
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Iterable, Sequence
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from mteb.types import ModelName, Revision
|
|
11
17
|
|
|
12
18
|
if sys.version_info >= (3, 13):
|
|
13
19
|
from warnings import deprecated
|
mteb/models/abs_encoder.py
CHANGED
|
@@ -1,14 +1,12 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import warnings
|
|
3
5
|
from abc import ABC, abstractmethod
|
|
4
|
-
from
|
|
5
|
-
from typing import Any, Literal, cast, get_args, overload
|
|
6
|
-
|
|
7
|
-
from torch.utils.data import DataLoader
|
|
8
|
-
from typing_extensions import Unpack
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Literal, cast, get_args, overload
|
|
9
7
|
|
|
10
8
|
import mteb
|
|
11
|
-
from mteb.abstasks.task_metadata import
|
|
9
|
+
from mteb.abstasks.task_metadata import TaskType
|
|
12
10
|
from mteb.similarity_functions import (
|
|
13
11
|
cos_sim,
|
|
14
12
|
dot_score,
|
|
@@ -18,13 +16,25 @@ from mteb.similarity_functions import (
|
|
|
18
16
|
pairwise_max_sim,
|
|
19
17
|
)
|
|
20
18
|
from mteb.types import (
|
|
21
|
-
Array,
|
|
22
|
-
BatchedInput,
|
|
23
|
-
EncodeKwargs,
|
|
24
19
|
PromptType,
|
|
25
20
|
)
|
|
26
21
|
|
|
27
|
-
from .model_meta import
|
|
22
|
+
from .model_meta import ScoringFunction
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from collections.abc import Callable, Sequence
|
|
26
|
+
|
|
27
|
+
from torch.utils.data import DataLoader
|
|
28
|
+
from typing_extensions import Unpack
|
|
29
|
+
|
|
30
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
31
|
+
from mteb.types import (
|
|
32
|
+
Array,
|
|
33
|
+
BatchedInput,
|
|
34
|
+
EncodeKwargs,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
from .model_meta import ModelMeta
|
|
28
38
|
|
|
29
39
|
logger = logging.getLogger(__name__)
|
|
30
40
|
|
|
@@ -314,7 +324,7 @@ class AbsEncoder(ABC):
|
|
|
314
324
|
):
|
|
315
325
|
arr = self.model.similarity(embeddings1, embeddings2)
|
|
316
326
|
# We assume that the model returns an Array-like object:
|
|
317
|
-
arr = cast(Array, arr)
|
|
327
|
+
arr = cast("Array", arr)
|
|
318
328
|
return arr
|
|
319
329
|
return cos_sim(embeddings1, embeddings2)
|
|
320
330
|
if self.mteb_model_meta.similarity_fn_name is ScoringFunction.COSINE:
|
|
@@ -352,7 +362,7 @@ class AbsEncoder(ABC):
|
|
|
352
362
|
):
|
|
353
363
|
arr = self.model.similarity_pairwise(embeddings1, embeddings2)
|
|
354
364
|
# We assume that the model returns an Array-like object:
|
|
355
|
-
arr = cast(Array, arr)
|
|
365
|
+
arr = cast("Array", arr)
|
|
356
366
|
return arr
|
|
357
367
|
return pairwise_cos_sim(embeddings1, embeddings2)
|
|
358
368
|
if self.mteb_model_meta.similarity_fn_name is ScoringFunction.COSINE:
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
from typing import Any, Protocol, runtime_checkable
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
|
|
5
4
|
|
|
6
|
-
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
@runtime_checkable
|
|
@@ -1,6 +1,12 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import hashlib
|
|
2
|
-
from
|
|
3
|
-
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from collections.abc import Mapping
|
|
8
|
+
|
|
9
|
+
from PIL import Image
|
|
4
10
|
|
|
5
11
|
|
|
6
12
|
def _hash_item(item: Mapping[str, Any]) -> str:
|
|
@@ -10,8 +16,6 @@ def _hash_item(item: Mapping[str, Any]) -> str:
|
|
|
10
16
|
item_hash = hashlib.sha256(item_text.encode()).hexdigest()
|
|
11
17
|
|
|
12
18
|
if "image" in item:
|
|
13
|
-
from PIL import Image
|
|
14
|
-
|
|
15
19
|
image: Image.Image = item["image"]
|
|
16
20
|
item_hash += hashlib.sha256(image.tobytes()).hexdigest()
|
|
17
21
|
|