mteb 2.7.1__py3-none-any.whl → 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +16 -9
- mteb/_evaluators/any_sts_evaluator.py +10 -5
- mteb/_evaluators/clustering_evaluator.py +10 -4
- mteb/_evaluators/evaluator.py +9 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
- mteb/_evaluators/pair_classification_evaluator.py +10 -5
- mteb/_evaluators/retrieval_evaluator.py +19 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +14 -10
- mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
- mteb/_evaluators/text/summarization_evaluator.py +8 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +17 -9
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +15 -6
- mteb/abstasks/clustering.py +17 -8
- mteb/abstasks/clustering_legacy.py +14 -6
- mteb/abstasks/image/image_text_pair_classification.py +17 -7
- mteb/abstasks/multilabel_classification.py +11 -5
- mteb/abstasks/pair_classification.py +19 -9
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +27 -16
- mteb/abstasks/retrieval_dataset_loaders.py +11 -8
- mteb/abstasks/sts.py +19 -10
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +14 -7
- mteb/abstasks/text/summarization.py +17 -7
- mteb/abstasks/zeroshot_classification.py +15 -7
- mteb/benchmarks/_create_table.py +13 -3
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +20 -14
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/evaluate.py +20 -18
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +7 -3
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +11 -4
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +9 -4
- mteb/models/model_implementations/bedrock_models.py +16 -6
- mteb/models/model_implementations/blip2_models.py +9 -4
- mteb/models/model_implementations/blip_models.py +9 -4
- mteb/models/model_implementations/bm25.py +15 -10
- mteb/models/model_implementations/bmretriever_models.py +6 -2
- mteb/models/model_implementations/cde_models.py +9 -5
- mteb/models/model_implementations/clip_models.py +9 -4
- mteb/models/model_implementations/cohere_models.py +10 -4
- mteb/models/model_implementations/cohere_v.py +9 -4
- mteb/models/model_implementations/colpali_models.py +4 -3
- mteb/models/model_implementations/colqwen_models.py +10 -31
- mteb/models/model_implementations/colsmol_models.py +1 -1
- mteb/models/model_implementations/conan_models.py +10 -4
- mteb/models/model_implementations/dino_models.py +9 -4
- mteb/models/model_implementations/e5_v.py +9 -4
- mteb/models/model_implementations/eagerworks_models.py +10 -4
- mteb/models/model_implementations/evaclip_models.py +9 -4
- mteb/models/model_implementations/gme_v_models.py +5 -3
- mteb/models/model_implementations/google_models.py +10 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +6 -5
- mteb/models/model_implementations/hinvec_models.py +5 -1
- mteb/models/model_implementations/jasper_models.py +12 -5
- mteb/models/model_implementations/jina_clip.py +9 -4
- mteb/models/model_implementations/jina_models.py +10 -5
- mteb/models/model_implementations/kalm_models.py +18 -12
- mteb/models/model_implementations/linq_models.py +6 -1
- mteb/models/model_implementations/listconranker.py +9 -4
- mteb/models/model_implementations/llm2clip_models.py +9 -4
- mteb/models/model_implementations/llm2vec_models.py +12 -6
- mteb/models/model_implementations/mcinext_models.py +5 -2
- mteb/models/model_implementations/mdbr_models.py +3 -1
- mteb/models/model_implementations/{mxbai_models.py → mixedbread_ai_models.py} +91 -0
- mteb/models/model_implementations/moco_models.py +9 -4
- mteb/models/model_implementations/mod_models.py +1 -1
- mteb/models/model_implementations/model2vec_models.py +10 -4
- mteb/models/model_implementations/no_instruct_sentence_models.py +12 -5
- mteb/models/model_implementations/nomic_models.py +10 -4
- mteb/models/model_implementations/nomic_models_vision.py +4 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +7 -3
- mteb/models/model_implementations/nvidia_models.py +12 -4
- mteb/models/model_implementations/octen_models.py +1 -1
- mteb/models/model_implementations/openai_models.py +9 -4
- mteb/models/model_implementations/openclip_models.py +9 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -4
- mteb/models/model_implementations/ops_moa_models.py +7 -2
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +12 -6
- mteb/models/model_implementations/pylate_models.py +19 -13
- mteb/models/model_implementations/qwen3_models.py +8 -1
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/repllama_models.py +13 -6
- mteb/models/model_implementations/rerankers_custom.py +10 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +10 -4
- mteb/models/model_implementations/salesforce_models.py +7 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +4 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +5 -2
- mteb/models/model_implementations/seed_models.py +1 -1
- mteb/models/model_implementations/siglip_models.py +9 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/uae_models.py +9 -4
- mteb/models/model_implementations/vdr_models.py +7 -1
- mteb/models/model_implementations/vista_models.py +9 -4
- mteb/models/model_implementations/vlm2vec_models.py +9 -4
- mteb/models/model_implementations/voyage_models.py +10 -4
- mteb/models/model_implementations/voyage_v.py +10 -6
- mteb/models/model_implementations/yuan_models_en.py +1 -1
- mteb/models/model_meta.py +12 -7
- mteb/models/models_protocols.py +19 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +19 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +41 -10
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/clustering/nob/snl_clustering.py +7 -2
- mteb/tasks/clustering/nob/vg_clustering.py +7 -2
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/METADATA +1 -1
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/RECORD +155 -154
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/WHEEL +0 -0
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from mteb.abstasks.
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.sts.multilingual.sts17_multilingual_visual_sts import (
|
|
4
4
|
STS17MultilingualVisualSTS,
|
|
5
5
|
)
|
|
6
6
|
|
|
7
|
-
task_list_sts17
|
|
7
|
+
task_list_sts17 = [
|
|
8
8
|
STS17MultilingualVisualSTS().filter_languages(
|
|
9
9
|
languages=["eng"], hf_subsets=["en-en"]
|
|
10
10
|
)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from mteb.abstasks.
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.sts.multilingual.sts_benchmark_multilingual_visual_sts import (
|
|
4
4
|
STSBenchmarkMultilingualVisualSTS,
|
|
5
5
|
)
|
|
6
6
|
|
|
7
|
-
task_list_stsb
|
|
7
|
+
task_list_stsb = [
|
|
8
8
|
STSBenchmarkMultilingualVisualSTS().filter_languages(
|
|
9
9
|
languages=["eng"], hf_subsets=["en"]
|
|
10
10
|
)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from mteb.abstasks import
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.retrieval import (
|
|
4
4
|
CQADupstackAndroidRetrievalFa,
|
|
5
5
|
CQADupstackEnglishRetrievalFa,
|
|
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
|
|
|
15
15
|
CQADupstackWordpressRetrievalFa,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
|
-
task_list_cqa
|
|
18
|
+
task_list_cqa = [
|
|
19
19
|
CQADupstackAndroidRetrievalFa(),
|
|
20
20
|
CQADupstackEnglishRetrievalFa(),
|
|
21
21
|
CQADupstackGamingRetrievalFa(),
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from mteb.abstasks import
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.classification import (
|
|
4
4
|
SynPerChatbotConvSAAnger,
|
|
5
5
|
SynPerChatbotConvSAFear,
|
|
@@ -12,7 +12,7 @@ from mteb.tasks.classification import (
|
|
|
12
12
|
SynPerChatbotConvSASurprise,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
|
-
task_list_cqa
|
|
15
|
+
task_list_cqa = [
|
|
16
16
|
SynPerChatbotConvSAAnger(),
|
|
17
17
|
SynPerChatbotConvSASatisfaction(),
|
|
18
18
|
SynPerChatbotConvSAFriendship(),
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from mteb.abstasks.
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.sts.multilingual.sts17_multilingual_visual_sts import (
|
|
4
4
|
STS17MultilingualVisualSTS,
|
|
5
5
|
)
|
|
6
6
|
|
|
7
|
-
task_list_sts17_multi
|
|
7
|
+
task_list_sts17_multi = [
|
|
8
8
|
STS17MultilingualVisualSTS().filter_languages(
|
|
9
9
|
languages=["ara", "eng", "spa", "kor"],
|
|
10
10
|
hf_subsets=[
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from mteb.abstasks.
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.sts.multilingual.sts_benchmark_multilingual_visual_sts import (
|
|
4
4
|
STSBenchmarkMultilingualVisualSTS,
|
|
5
5
|
)
|
|
6
6
|
|
|
7
|
-
task_list_multi
|
|
7
|
+
task_list_multi = [
|
|
8
8
|
STSBenchmarkMultilingualVisualSTS().filter_languages(
|
|
9
9
|
languages=[
|
|
10
10
|
"deu",
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from mteb.abstasks import
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.retrieval import (
|
|
4
4
|
CQADupstackAndroidNLRetrieval,
|
|
5
5
|
CQADupstackEnglishNLRetrieval,
|
|
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
|
|
|
15
15
|
CQADupstackWordpressNLRetrieval,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
|
-
task_list_cqa
|
|
18
|
+
task_list_cqa = [
|
|
19
19
|
CQADupstackAndroidNLRetrieval(),
|
|
20
20
|
CQADupstackEnglishNLRetrieval(),
|
|
21
21
|
CQADupstackGamingNLRetrieval(),
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from mteb.abstasks import
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.retrieval.pol.cqadupstack_pl_retrieval import (
|
|
4
4
|
CQADupstackAndroidRetrievalPL,
|
|
5
5
|
CQADupstackEnglishRetrievalPL,
|
|
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval.pol.cqadupstack_pl_retrieval import (
|
|
|
15
15
|
CQADupstackWordpressRetrievalPL,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
|
-
task_list_cqa
|
|
18
|
+
task_list_cqa = [
|
|
19
19
|
CQADupstackAndroidRetrievalPL(),
|
|
20
20
|
CQADupstackEnglishRetrievalPL(),
|
|
21
21
|
CQADupstackGamingRetrievalPL(),
|
|
@@ -1,13 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import random
|
|
2
|
-
from collections.abc import Iterable
|
|
3
4
|
from itertools import islice
|
|
4
|
-
from typing import TypeVar
|
|
5
|
+
from typing import TYPE_CHECKING, TypeVar
|
|
5
6
|
|
|
6
7
|
import datasets
|
|
7
8
|
|
|
8
9
|
from mteb.abstasks.clustering_legacy import AbsTaskClusteringLegacy
|
|
9
10
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
10
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Iterable
|
|
14
|
+
|
|
15
|
+
|
|
11
16
|
T = TypeVar("T")
|
|
12
17
|
|
|
13
18
|
|
|
@@ -1,13 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import random
|
|
2
|
-
from collections.abc import Iterable
|
|
3
4
|
from itertools import islice
|
|
4
|
-
from typing import TypeVar
|
|
5
|
+
from typing import TYPE_CHECKING, TypeVar
|
|
5
6
|
|
|
6
7
|
import datasets
|
|
7
8
|
|
|
8
9
|
from mteb.abstasks.clustering_legacy import AbsTaskClusteringLegacy
|
|
9
10
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
10
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Iterable
|
|
14
|
+
|
|
15
|
+
|
|
11
16
|
T = TypeVar("T")
|
|
12
17
|
|
|
13
18
|
|
|
@@ -1,8 +1,13 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
2
4
|
|
|
3
5
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
4
6
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
5
7
|
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
|
|
6
11
|
_CITATION = """
|
|
7
12
|
@misc{weller2025theoreticallimit,
|
|
8
13
|
archiveprefix = {arXiv},
|
|
@@ -30,15 +30,15 @@ def load_ruscibench_data(
|
|
|
30
30
|
|
|
31
31
|
for lang in langs:
|
|
32
32
|
lang_corpus = cast(
|
|
33
|
-
datasets.Dataset,
|
|
33
|
+
"datasets.Dataset",
|
|
34
34
|
datasets.load_dataset(path, f"corpus-{lang}", revision=revision),
|
|
35
35
|
)["corpus"]
|
|
36
36
|
lang_queries = cast(
|
|
37
|
-
datasets.Dataset,
|
|
37
|
+
"datasets.Dataset",
|
|
38
38
|
datasets.load_dataset(path, f"queries-{lang}", revision=revision),
|
|
39
39
|
)["queries"]
|
|
40
40
|
lang_qrels = cast(
|
|
41
|
-
datasets.Dataset,
|
|
41
|
+
"datasets.Dataset",
|
|
42
42
|
datasets.load_dataset(path, f"{lang}", revision=revision),
|
|
43
43
|
)["test"]
|
|
44
44
|
corpus[lang] = {
|
mteb/types/_encoder_io.py
CHANGED
|
@@ -7,10 +7,10 @@ from typing import TYPE_CHECKING, TypedDict
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import torch
|
|
9
9
|
from datasets import Dataset
|
|
10
|
-
from typing_extensions import NotRequired
|
|
11
10
|
|
|
12
11
|
if TYPE_CHECKING:
|
|
13
12
|
from PIL import Image
|
|
13
|
+
from typing_extensions import NotRequired
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class EncodeKwargs(TypedDict):
|
mteb/types/statistics.py
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from typing_extensions import TypedDict
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from typing_extensions import NotRequired
|
|
9
|
+
|
|
10
|
+
from mteb.types import HFSubset
|
|
4
11
|
|
|
5
12
|
|
|
6
13
|
class SplitDescriptiveStatistics(TypedDict):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.7.
|
|
3
|
+
Version: 2.7.3
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|