mteb 2.7.1__py3-none-any.whl → 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +16 -9
- mteb/_evaluators/any_sts_evaluator.py +10 -5
- mteb/_evaluators/clustering_evaluator.py +10 -4
- mteb/_evaluators/evaluator.py +9 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
- mteb/_evaluators/pair_classification_evaluator.py +10 -5
- mteb/_evaluators/retrieval_evaluator.py +19 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +14 -10
- mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
- mteb/_evaluators/text/summarization_evaluator.py +8 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +17 -9
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +15 -6
- mteb/abstasks/clustering.py +17 -8
- mteb/abstasks/clustering_legacy.py +14 -6
- mteb/abstasks/image/image_text_pair_classification.py +17 -7
- mteb/abstasks/multilabel_classification.py +11 -5
- mteb/abstasks/pair_classification.py +19 -9
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +27 -16
- mteb/abstasks/retrieval_dataset_loaders.py +11 -8
- mteb/abstasks/sts.py +19 -10
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +14 -7
- mteb/abstasks/text/summarization.py +17 -7
- mteb/abstasks/zeroshot_classification.py +15 -7
- mteb/benchmarks/_create_table.py +13 -3
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +20 -14
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/evaluate.py +20 -18
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +7 -3
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +11 -4
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +9 -4
- mteb/models/model_implementations/bedrock_models.py +16 -6
- mteb/models/model_implementations/blip2_models.py +9 -4
- mteb/models/model_implementations/blip_models.py +9 -4
- mteb/models/model_implementations/bm25.py +15 -10
- mteb/models/model_implementations/bmretriever_models.py +6 -2
- mteb/models/model_implementations/cde_models.py +9 -5
- mteb/models/model_implementations/clip_models.py +9 -4
- mteb/models/model_implementations/cohere_models.py +10 -4
- mteb/models/model_implementations/cohere_v.py +9 -4
- mteb/models/model_implementations/colpali_models.py +4 -3
- mteb/models/model_implementations/colqwen_models.py +10 -31
- mteb/models/model_implementations/colsmol_models.py +1 -1
- mteb/models/model_implementations/conan_models.py +10 -4
- mteb/models/model_implementations/dino_models.py +9 -4
- mteb/models/model_implementations/e5_v.py +9 -4
- mteb/models/model_implementations/eagerworks_models.py +10 -4
- mteb/models/model_implementations/evaclip_models.py +9 -4
- mteb/models/model_implementations/gme_v_models.py +5 -3
- mteb/models/model_implementations/google_models.py +10 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +6 -5
- mteb/models/model_implementations/hinvec_models.py +5 -1
- mteb/models/model_implementations/jasper_models.py +12 -5
- mteb/models/model_implementations/jina_clip.py +9 -4
- mteb/models/model_implementations/jina_models.py +10 -5
- mteb/models/model_implementations/kalm_models.py +18 -12
- mteb/models/model_implementations/linq_models.py +6 -1
- mteb/models/model_implementations/listconranker.py +9 -4
- mteb/models/model_implementations/llm2clip_models.py +9 -4
- mteb/models/model_implementations/llm2vec_models.py +12 -6
- mteb/models/model_implementations/mcinext_models.py +5 -2
- mteb/models/model_implementations/mdbr_models.py +3 -1
- mteb/models/model_implementations/{mxbai_models.py → mixedbread_ai_models.py} +91 -0
- mteb/models/model_implementations/moco_models.py +9 -4
- mteb/models/model_implementations/mod_models.py +1 -1
- mteb/models/model_implementations/model2vec_models.py +10 -4
- mteb/models/model_implementations/no_instruct_sentence_models.py +12 -5
- mteb/models/model_implementations/nomic_models.py +10 -4
- mteb/models/model_implementations/nomic_models_vision.py +4 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +7 -3
- mteb/models/model_implementations/nvidia_models.py +12 -4
- mteb/models/model_implementations/octen_models.py +1 -1
- mteb/models/model_implementations/openai_models.py +9 -4
- mteb/models/model_implementations/openclip_models.py +9 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -4
- mteb/models/model_implementations/ops_moa_models.py +7 -2
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +12 -6
- mteb/models/model_implementations/pylate_models.py +19 -13
- mteb/models/model_implementations/qwen3_models.py +8 -1
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/repllama_models.py +13 -6
- mteb/models/model_implementations/rerankers_custom.py +10 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +10 -4
- mteb/models/model_implementations/salesforce_models.py +7 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +4 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +5 -2
- mteb/models/model_implementations/seed_models.py +1 -1
- mteb/models/model_implementations/siglip_models.py +9 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/uae_models.py +9 -4
- mteb/models/model_implementations/vdr_models.py +7 -1
- mteb/models/model_implementations/vista_models.py +9 -4
- mteb/models/model_implementations/vlm2vec_models.py +9 -4
- mteb/models/model_implementations/voyage_models.py +10 -4
- mteb/models/model_implementations/voyage_v.py +10 -6
- mteb/models/model_implementations/yuan_models_en.py +1 -1
- mteb/models/model_meta.py +12 -7
- mteb/models/models_protocols.py +19 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +19 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +41 -10
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/clustering/nob/snl_clustering.py +7 -2
- mteb/tasks/clustering/nob/vg_clustering.py +7 -2
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/METADATA +1 -1
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/RECORD +155 -154
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/WHEEL +0 -0
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +1,19 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
|
-
from torch.utils.data import DataLoader
|
|
5
6
|
from tqdm.auto import tqdm
|
|
6
7
|
|
|
7
8
|
from mteb._requires_package import requires_image_dependencies
|
|
8
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
9
9
|
from mteb.models.abs_encoder import AbsEncoder
|
|
10
10
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
11
|
-
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from torch.utils.data import DataLoader
|
|
14
|
+
|
|
15
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
16
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
12
17
|
|
|
13
18
|
VISTA_CITATION = """@article{zhou2024vista,
|
|
14
19
|
title={VISTA: Visualized Text Embedding For Universal Multi-Modal Retrieval},
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from typing import Any
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
3
5
|
|
|
4
6
|
import torch
|
|
5
|
-
from torch.utils.data import DataLoader
|
|
6
7
|
from tqdm.auto import tqdm
|
|
7
8
|
|
|
8
9
|
from mteb._requires_package import (
|
|
@@ -10,10 +11,14 @@ from mteb._requires_package import (
|
|
|
10
11
|
requires_package,
|
|
11
12
|
suggest_package,
|
|
12
13
|
)
|
|
13
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
14
14
|
from mteb.models.abs_encoder import AbsEncoder
|
|
15
15
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
16
|
-
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from torch.utils.data import DataLoader
|
|
19
|
+
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
21
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
17
22
|
|
|
18
23
|
logger = logging.getLogger(__name__)
|
|
19
24
|
|
|
@@ -1,16 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import time
|
|
2
4
|
from functools import wraps
|
|
3
|
-
from typing import Any, Literal
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
4
6
|
|
|
5
7
|
import numpy as np
|
|
6
|
-
from torch.utils.data import DataLoader
|
|
7
8
|
from tqdm.auto import tqdm
|
|
8
9
|
|
|
9
10
|
from mteb._requires_package import requires_package
|
|
10
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
11
11
|
from mteb.models.abs_encoder import AbsEncoder
|
|
12
12
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
13
|
-
from mteb.types import
|
|
13
|
+
from mteb.types import PromptType
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from torch.utils.data import DataLoader
|
|
17
|
+
|
|
18
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
19
|
+
from mteb.types import Array, BatchedInput
|
|
14
20
|
|
|
15
21
|
VOYAGE_TRAINING_DATA = set(
|
|
16
22
|
# Self-reported (message from VoyageAI member)
|
|
@@ -4,17 +4,19 @@ import logging
|
|
|
4
4
|
from typing import TYPE_CHECKING, Any, Literal
|
|
5
5
|
|
|
6
6
|
import torch
|
|
7
|
-
from torch.utils.data import DataLoader
|
|
8
7
|
from tqdm.auto import tqdm
|
|
9
8
|
|
|
10
9
|
from mteb._requires_package import requires_image_dependencies, requires_package
|
|
11
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
12
10
|
from mteb.models.abs_encoder import AbsEncoder
|
|
13
11
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
14
|
-
from mteb.types import
|
|
12
|
+
from mteb.types import PromptType
|
|
15
13
|
|
|
16
14
|
if TYPE_CHECKING:
|
|
17
15
|
from PIL import Image
|
|
16
|
+
from torch.utils.data import DataLoader
|
|
17
|
+
|
|
18
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
19
|
+
from mteb.types import Array, BatchedInput
|
|
18
20
|
|
|
19
21
|
logger = logging.getLogger(__name__)
|
|
20
22
|
|
|
@@ -27,6 +29,8 @@ def _downsample_image(
|
|
|
27
29
|
Returns:
|
|
28
30
|
The downsampled image.
|
|
29
31
|
"""
|
|
32
|
+
from PIL.Image import Resampling
|
|
33
|
+
|
|
30
34
|
width, height = image.size
|
|
31
35
|
pixels = width * height
|
|
32
36
|
|
|
@@ -42,15 +46,15 @@ def _downsample_image(
|
|
|
42
46
|
logger.info(
|
|
43
47
|
f"Downsampling image from {width}x{height} to {new_width}x{new_height}"
|
|
44
48
|
)
|
|
45
|
-
return image.resize(new_size,
|
|
49
|
+
return image.resize(new_size, Resampling.LANCZOS)
|
|
46
50
|
if width > height:
|
|
47
51
|
if width > 10000:
|
|
48
52
|
logger.error("Processing extremely wide images.")
|
|
49
|
-
return image.resize((10000, height),
|
|
53
|
+
return image.resize((10000, height), Resampling.LANCZOS)
|
|
50
54
|
else:
|
|
51
55
|
if height > 10000:
|
|
52
56
|
logger.error("Processing extremely high images.")
|
|
53
|
-
return image.resize((width, 10000),
|
|
57
|
+
return image.resize((width, 10000), Resampling.LANCZOS)
|
|
54
58
|
return image
|
|
55
59
|
|
|
56
60
|
|
mteb/models/model_meta.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import json
|
|
4
4
|
import logging
|
|
5
5
|
import warnings
|
|
6
|
-
from collections.abc import Callable
|
|
6
|
+
from collections.abc import Callable
|
|
7
7
|
from dataclasses import field
|
|
8
8
|
from enum import Enum
|
|
9
9
|
from functools import partial
|
|
@@ -11,9 +11,7 @@ from pathlib import Path
|
|
|
11
11
|
from typing import TYPE_CHECKING, Any, Literal, cast
|
|
12
12
|
|
|
13
13
|
from huggingface_hub import (
|
|
14
|
-
GitCommitInfo,
|
|
15
14
|
ModelCard,
|
|
16
|
-
ModelCardData,
|
|
17
15
|
get_safetensors_metadata,
|
|
18
16
|
hf_hub_download,
|
|
19
17
|
list_repo_commits,
|
|
@@ -30,17 +28,24 @@ from huggingface_hub.errors import (
|
|
|
30
28
|
)
|
|
31
29
|
from pydantic import BaseModel, ConfigDict, field_validator, model_validator
|
|
32
30
|
from transformers import AutoConfig
|
|
33
|
-
from typing_extensions import Self
|
|
34
31
|
|
|
35
32
|
from mteb._helpful_enum import HelpfulStrEnum
|
|
36
33
|
from mteb.languages import check_language_code
|
|
37
|
-
from mteb.models.models_protocols import
|
|
34
|
+
from mteb.models.models_protocols import MTEBModels
|
|
38
35
|
from mteb.types import ISOLanguageScript, Licenses, Modalities, StrDate, StrURL
|
|
39
36
|
|
|
40
37
|
if TYPE_CHECKING:
|
|
38
|
+
from collections.abc import Sequence
|
|
39
|
+
|
|
40
|
+
from huggingface_hub import (
|
|
41
|
+
GitCommitInfo,
|
|
42
|
+
ModelCardData,
|
|
43
|
+
)
|
|
41
44
|
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
45
|
+
from typing_extensions import Self
|
|
42
46
|
|
|
43
47
|
from mteb.abstasks import AbsTask
|
|
48
|
+
from mteb.models.models_protocols import EncoderProtocol
|
|
44
49
|
|
|
45
50
|
|
|
46
51
|
logger = logging.getLogger(__name__)
|
|
@@ -479,7 +484,7 @@ class ModelMeta(BaseModel):
|
|
|
479
484
|
if isinstance(tasks[0], str):
|
|
480
485
|
benchmark_datasets = set(tasks)
|
|
481
486
|
else:
|
|
482
|
-
tasks = cast(Sequence[
|
|
487
|
+
tasks = cast("Sequence[AbsTask]", tasks)
|
|
483
488
|
benchmark_datasets = set()
|
|
484
489
|
for task in tasks:
|
|
485
490
|
benchmark_datasets.add(task.metadata.name)
|
|
@@ -534,7 +539,7 @@ class ModelMeta(BaseModel):
|
|
|
534
539
|
if isinstance(tasks[0], str):
|
|
535
540
|
benchmark_datasets = set(tasks)
|
|
536
541
|
else:
|
|
537
|
-
tasks = cast(Sequence[
|
|
542
|
+
tasks = cast("Sequence[AbsTask]", tasks)
|
|
538
543
|
benchmark_datasets = {task.metadata.name for task in tasks}
|
|
539
544
|
overlap = training_datasets & benchmark_datasets
|
|
540
545
|
perc_overlap = 100 * (len(overlap) / len(benchmark_datasets))
|
mteb/models/models_protocols.py
CHANGED
|
@@ -1,22 +1,23 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
from typing_extensions import Unpack
|
|
5
|
-
|
|
6
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
7
|
-
from mteb.types import (
|
|
8
|
-
Array,
|
|
9
|
-
BatchedInput,
|
|
10
|
-
CorpusDatasetType,
|
|
11
|
-
EncodeKwargs,
|
|
12
|
-
PromptType,
|
|
13
|
-
QueryDatasetType,
|
|
14
|
-
RetrievalOutputType,
|
|
15
|
-
TopRankedDocumentsType,
|
|
16
|
-
)
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
|
|
17
4
|
|
|
18
5
|
if TYPE_CHECKING:
|
|
6
|
+
from torch.utils.data import DataLoader
|
|
7
|
+
from typing_extensions import Unpack
|
|
8
|
+
|
|
9
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
19
10
|
from mteb.models.model_meta import ModelMeta
|
|
11
|
+
from mteb.types import (
|
|
12
|
+
Array,
|
|
13
|
+
BatchedInput,
|
|
14
|
+
CorpusDatasetType,
|
|
15
|
+
EncodeKwargs,
|
|
16
|
+
PromptType,
|
|
17
|
+
QueryDatasetType,
|
|
18
|
+
RetrievalOutputType,
|
|
19
|
+
TopRankedDocumentsType,
|
|
20
|
+
)
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
@runtime_checkable
|
|
@@ -72,7 +73,7 @@ class SearchProtocol(Protocol):
|
|
|
72
73
|
...
|
|
73
74
|
|
|
74
75
|
@property
|
|
75
|
-
def mteb_model_meta(self) ->
|
|
76
|
+
def mteb_model_meta(self) -> ModelMeta:
|
|
76
77
|
"""Metadata of the model"""
|
|
77
78
|
...
|
|
78
79
|
|
|
@@ -177,7 +178,7 @@ class EncoderProtocol(Protocol):
|
|
|
177
178
|
...
|
|
178
179
|
|
|
179
180
|
@property
|
|
180
|
-
def mteb_model_meta(self) ->
|
|
181
|
+
def mteb_model_meta(self) -> ModelMeta:
|
|
181
182
|
"""Metadata of the model"""
|
|
182
183
|
...
|
|
183
184
|
|
|
@@ -236,7 +237,7 @@ class CrossEncoderProtocol(Protocol):
|
|
|
236
237
|
...
|
|
237
238
|
|
|
238
239
|
@property
|
|
239
|
-
def mteb_model_meta(self) ->
|
|
240
|
+
def mteb_model_meta(self) -> ModelMeta:
|
|
240
241
|
"""Metadata of the model"""
|
|
241
242
|
...
|
|
242
243
|
|
|
@@ -1,7 +1,11 @@
|
|
|
1
|
-
from
|
|
2
|
-
from typing import Protocol
|
|
1
|
+
from __future__ import annotations
|
|
3
2
|
|
|
4
|
-
from
|
|
3
|
+
from typing import TYPE_CHECKING, Protocol
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from collections.abc import Callable
|
|
7
|
+
|
|
8
|
+
from mteb.types import Array, TopRankedDocumentsType
|
|
5
9
|
|
|
6
10
|
|
|
7
11
|
class IndexEncoderSearchProtocol(Protocol):
|
|
@@ -1,14 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import warnings
|
|
3
|
-
from
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
4
6
|
|
|
5
7
|
import numpy as np
|
|
6
8
|
import torch
|
|
7
9
|
|
|
8
10
|
from mteb._requires_package import requires_package
|
|
9
11
|
from mteb.models.model_meta import ScoringFunction
|
|
10
|
-
|
|
11
|
-
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Callable
|
|
15
|
+
|
|
16
|
+
import faiss
|
|
17
|
+
|
|
18
|
+
from mteb.models.models_protocols import EncoderProtocol
|
|
19
|
+
from mteb.types import Array, TopRankedDocumentsType
|
|
20
|
+
|
|
12
21
|
|
|
13
22
|
logger = logging.getLogger(__name__)
|
|
14
23
|
|
|
@@ -33,7 +42,6 @@ class FaissSearchIndex:
|
|
|
33
42
|
install_instruction="pip install mteb[faiss-cpu]",
|
|
34
43
|
)
|
|
35
44
|
|
|
36
|
-
import faiss
|
|
37
45
|
from faiss import IndexFlatIP, IndexFlatL2
|
|
38
46
|
|
|
39
47
|
# https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
|
mteb/models/search_wrappers.py
CHANGED
|
@@ -1,28 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import heapq
|
|
2
4
|
import logging
|
|
3
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
4
6
|
|
|
5
7
|
import torch
|
|
6
8
|
from datasets import Dataset
|
|
7
|
-
from torch.utils.data import DataLoader
|
|
8
9
|
|
|
9
10
|
from mteb._create_dataloaders import (
|
|
10
11
|
create_dataloader,
|
|
11
12
|
)
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
13
|
from mteb.types import (
|
|
14
|
-
Array,
|
|
15
|
-
BatchedInput,
|
|
16
|
-
CorpusDatasetType,
|
|
17
|
-
EncodeKwargs,
|
|
18
14
|
PromptType,
|
|
19
|
-
QueryDatasetType,
|
|
20
|
-
RetrievalOutputType,
|
|
21
|
-
TopRankedDocumentsType,
|
|
22
15
|
)
|
|
23
16
|
|
|
24
|
-
|
|
25
|
-
from .
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from torch.utils.data import DataLoader
|
|
19
|
+
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
21
|
+
from mteb.types import (
|
|
22
|
+
Array,
|
|
23
|
+
BatchedInput,
|
|
24
|
+
CorpusDatasetType,
|
|
25
|
+
EncodeKwargs,
|
|
26
|
+
QueryDatasetType,
|
|
27
|
+
RetrievalOutputType,
|
|
28
|
+
TopRankedDocumentsType,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
from .models_protocols import CrossEncoderProtocol, EncoderProtocol
|
|
32
|
+
from .search_encoder_index.search_backend_protocol import IndexEncoderSearchProtocol
|
|
26
33
|
|
|
27
34
|
logger = logging.getLogger(__name__)
|
|
28
35
|
|
|
@@ -7,19 +7,20 @@ from typing import TYPE_CHECKING, Any
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import torch
|
|
9
9
|
from packaging.version import Version
|
|
10
|
-
from torch.utils.data import DataLoader
|
|
11
|
-
from typing_extensions import Unpack
|
|
12
10
|
|
|
13
11
|
from mteb._log_once import LogOnce
|
|
14
12
|
from mteb.models import ModelMeta
|
|
15
|
-
from mteb.types import
|
|
13
|
+
from mteb.types import PromptType
|
|
16
14
|
|
|
17
15
|
from .abs_encoder import AbsEncoder
|
|
18
16
|
|
|
19
17
|
if TYPE_CHECKING:
|
|
20
18
|
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
19
|
+
from torch.utils.data import DataLoader
|
|
20
|
+
from typing_extensions import Unpack
|
|
21
21
|
|
|
22
22
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
23
|
+
from mteb.types import Array, BatchedInput, EncodeKwargs
|
|
23
24
|
|
|
24
25
|
logger = logging.getLogger(__name__)
|
|
25
26
|
|
mteb/models/vllm_wrapper.py
CHANGED
|
@@ -4,23 +4,25 @@ import atexit
|
|
|
4
4
|
import gc
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
|
-
from collections.abc import Callable
|
|
8
7
|
from typing import TYPE_CHECKING, Any, Literal
|
|
9
8
|
|
|
10
9
|
import numpy as np
|
|
11
10
|
import torch
|
|
12
|
-
from torch.utils.data import DataLoader
|
|
13
11
|
|
|
14
12
|
from mteb._requires_package import requires_package
|
|
15
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
16
13
|
from mteb.models import ModelMeta
|
|
17
14
|
from mteb.models.abs_encoder import AbsEncoder
|
|
18
|
-
from mteb.types import
|
|
15
|
+
from mteb.types import PromptType
|
|
19
16
|
|
|
20
17
|
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Callable
|
|
19
|
+
|
|
20
|
+
from torch.utils.data import DataLoader
|
|
21
21
|
from vllm.config import PoolerConfig # type: ignore[import-not-found]
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
|
|
23
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
24
|
+
from mteb.types import Array, BatchedInput
|
|
25
|
+
|
|
24
26
|
|
|
25
27
|
logger = logging.getLogger(__name__)
|
|
26
28
|
|
|
@@ -4,34 +4,39 @@ import functools
|
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
6
|
import warnings
|
|
7
|
-
from collections.abc import Callable, Iterable, Iterator
|
|
8
7
|
from pathlib import Path
|
|
9
|
-
from typing import Any, Literal, cast
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Literal, cast
|
|
10
9
|
|
|
11
10
|
import pandas as pd
|
|
12
11
|
from packaging.version import InvalidVersion, Version
|
|
13
12
|
from pydantic import BaseModel, ConfigDict
|
|
14
|
-
from typing_extensions import Self
|
|
15
13
|
|
|
16
|
-
from mteb.abstasks.abstask import AbsTask
|
|
17
|
-
from mteb.abstasks.task_metadata import (
|
|
18
|
-
TaskDomain,
|
|
19
|
-
TaskType,
|
|
20
|
-
)
|
|
21
14
|
from mteb.benchmarks.benchmark import Benchmark
|
|
22
15
|
from mteb.models import ModelMeta
|
|
23
16
|
from mteb.models.get_model_meta import get_model_metas
|
|
24
|
-
from mteb.types import (
|
|
25
|
-
ISOLanguage,
|
|
26
|
-
ISOLanguageScript,
|
|
27
|
-
Modalities,
|
|
28
|
-
Score,
|
|
29
|
-
ScoresDict,
|
|
30
|
-
SplitName,
|
|
31
|
-
)
|
|
32
17
|
|
|
33
18
|
from .model_result import ModelResult, _aggregate_and_pivot
|
|
34
19
|
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from collections.abc import Callable, Iterable, Iterator
|
|
22
|
+
|
|
23
|
+
from typing_extensions import Self
|
|
24
|
+
|
|
25
|
+
from mteb.abstasks.abstask import AbsTask
|
|
26
|
+
from mteb.abstasks.task_metadata import (
|
|
27
|
+
TaskDomain,
|
|
28
|
+
TaskType,
|
|
29
|
+
)
|
|
30
|
+
from mteb.types import (
|
|
31
|
+
ISOLanguage,
|
|
32
|
+
ISOLanguageScript,
|
|
33
|
+
Modalities,
|
|
34
|
+
Score,
|
|
35
|
+
ScoresDict,
|
|
36
|
+
SplitName,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
35
40
|
logger = logging.getLogger(__name__)
|
|
36
41
|
|
|
37
42
|
|
|
@@ -144,7 +149,7 @@ class BenchmarkResults(BaseModel):
|
|
|
144
149
|
raise ValueError("name in ModelMeta is None. It must be a string.")
|
|
145
150
|
name_rev[name.name] = name.revision
|
|
146
151
|
else:
|
|
147
|
-
name_ = cast(str, name)
|
|
152
|
+
name_ = cast("str", name)
|
|
148
153
|
name_rev[name_] = revision
|
|
149
154
|
|
|
150
155
|
for model_res in self.model_results:
|
mteb/results/model_result.py
CHANGED
|
@@ -2,30 +2,36 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import warnings
|
|
5
|
-
from
|
|
6
|
-
from typing import Any, Literal, cast
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Literal, cast
|
|
7
6
|
|
|
8
7
|
import numpy as np
|
|
9
8
|
import pandas as pd
|
|
10
9
|
from pydantic import BaseModel, ConfigDict, Field
|
|
11
10
|
from typing_extensions import overload
|
|
12
11
|
|
|
13
|
-
from mteb.abstasks.abstask import AbsTask
|
|
14
|
-
from mteb.abstasks.task_metadata import (
|
|
15
|
-
TaskDomain,
|
|
16
|
-
TaskType,
|
|
17
|
-
)
|
|
18
12
|
from mteb.types import (
|
|
19
|
-
ISOLanguage,
|
|
20
|
-
ISOLanguageScript,
|
|
21
13
|
Modalities,
|
|
22
|
-
Score,
|
|
23
|
-
ScoresDict,
|
|
24
|
-
SplitName,
|
|
25
14
|
)
|
|
26
15
|
|
|
27
16
|
from .task_result import TaskError, TaskResult
|
|
28
17
|
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from collections.abc import Callable, Iterable
|
|
20
|
+
|
|
21
|
+
from mteb.abstasks.abstask import AbsTask
|
|
22
|
+
from mteb.abstasks.task_metadata import (
|
|
23
|
+
TaskDomain,
|
|
24
|
+
TaskType,
|
|
25
|
+
)
|
|
26
|
+
from mteb.types import (
|
|
27
|
+
ISOLanguage,
|
|
28
|
+
ISOLanguageScript,
|
|
29
|
+
Score,
|
|
30
|
+
ScoresDict,
|
|
31
|
+
SplitName,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
29
35
|
logger = logging.getLogger(__name__)
|
|
30
36
|
|
|
31
37
|
|
|
@@ -83,7 +89,7 @@ class ModelResult(BaseModel):
|
|
|
83
89
|
model_revision: str | None
|
|
84
90
|
task_results: list[TaskResult]
|
|
85
91
|
default_modalities: list[Modalities] = Field(
|
|
86
|
-
default_factory=lambda: [cast(Modalities, "text")], alias="modalities"
|
|
92
|
+
default_factory=lambda: [cast("Modalities", "text")], alias="modalities"
|
|
87
93
|
)
|
|
88
94
|
model_config = (
|
|
89
95
|
ConfigDict( # to free up the name model_* which is otherwise protected
|
|
@@ -202,8 +208,8 @@ class ModelResult(BaseModel):
|
|
|
202
208
|
aggregation = aggregation if aggregation is not None else np.mean
|
|
203
209
|
else:
|
|
204
210
|
use_fast = True
|
|
205
|
-
aggregation = cast(Callable[[list[Score]], Any], aggregation)
|
|
206
|
-
getter = cast(Callable[[ScoresDict], Score], getter)
|
|
211
|
+
aggregation = cast("Callable[[list[Score]], Any]", aggregation)
|
|
212
|
+
getter = cast("Callable[[ScoresDict], Score]", getter)
|
|
207
213
|
|
|
208
214
|
if format == "wide":
|
|
209
215
|
scores = {}
|
mteb/results/task_result.py
CHANGED
|
@@ -4,34 +4,40 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import warnings
|
|
6
6
|
from collections import defaultdict
|
|
7
|
-
from collections.abc import Callable, Iterable, Mapping
|
|
8
7
|
from functools import cached_property
|
|
9
8
|
from importlib.metadata import version
|
|
10
|
-
from
|
|
11
|
-
from typing import Any
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
12
10
|
|
|
13
11
|
import numpy as np
|
|
14
12
|
from huggingface_hub import EvalResult
|
|
15
13
|
from packaging.version import Version
|
|
16
14
|
from pydantic import BaseModel, field_validator
|
|
17
|
-
from typing_extensions import Self
|
|
18
15
|
|
|
19
16
|
from mteb import TaskMetadata
|
|
20
17
|
from mteb._helpful_enum import HelpfulStrEnum
|
|
21
18
|
from mteb.abstasks import AbsTaskClassification
|
|
22
19
|
from mteb.abstasks.abstask import AbsTask
|
|
23
|
-
from mteb.abstasks.task_metadata import TaskDomain
|
|
24
20
|
from mteb.languages import LanguageScripts
|
|
25
21
|
from mteb.models.model_meta import ScoringFunction
|
|
26
22
|
from mteb.types import (
|
|
27
|
-
HFSubset,
|
|
28
|
-
ISOLanguage,
|
|
29
|
-
ISOLanguageScript,
|
|
30
|
-
Score,
|
|
31
23
|
ScoresDict,
|
|
32
24
|
SplitName,
|
|
33
25
|
)
|
|
34
26
|
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from collections.abc import Callable, Iterable, Mapping
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
|
|
31
|
+
from typing_extensions import Self
|
|
32
|
+
|
|
33
|
+
from mteb.abstasks.task_metadata import TaskDomain
|
|
34
|
+
from mteb.types import (
|
|
35
|
+
HFSubset,
|
|
36
|
+
ISOLanguage,
|
|
37
|
+
ISOLanguageScript,
|
|
38
|
+
Score,
|
|
39
|
+
)
|
|
40
|
+
|
|
35
41
|
logger = logging.getLogger(__name__)
|
|
36
42
|
|
|
37
43
|
|
|
@@ -610,7 +616,10 @@ class TaskResult(BaseModel):
|
|
|
610
616
|
new_res = {**self.to_dict(), "scores": new_scores}
|
|
611
617
|
return TaskResult.from_validated(**new_res)
|
|
612
618
|
|
|
613
|
-
def validate_and_filter_scores(
|
|
619
|
+
def validate_and_filter_scores(
|
|
620
|
+
self,
|
|
621
|
+
task: AbsTask | None = None,
|
|
622
|
+
) -> TaskResult:
|
|
614
623
|
"""Validate and filter the scores against the task metadata.
|
|
615
624
|
|
|
616
625
|
This ensures that the scores are correct for the given task, by removing any splits besides those specified in the task metadata.
|
|
@@ -658,11 +667,33 @@ class TaskResult(BaseModel):
|
|
|
658
667
|
msg = f"{task.metadata.name}: Missing subsets {missing_subsets_str} for split {split}"
|
|
659
668
|
logger.warning(msg)
|
|
660
669
|
warnings.warn(msg)
|
|
670
|
+
for missing_subset in missing_subsets:
|
|
671
|
+
new_scores[split].append(
|
|
672
|
+
{
|
|
673
|
+
"hf_subset": missing_subset,
|
|
674
|
+
"main_score": np.nan,
|
|
675
|
+
"languages": task.metadata.hf_subsets_to_langscripts.get(
|
|
676
|
+
missing_subset, []
|
|
677
|
+
),
|
|
678
|
+
}
|
|
679
|
+
)
|
|
661
680
|
seen_splits.add(split)
|
|
662
681
|
if seen_splits != set(splits):
|
|
663
682
|
msg = f"{task.metadata.name}: Missing splits {set(splits) - seen_splits}"
|
|
664
683
|
logger.warning(msg)
|
|
665
684
|
warnings.warn(msg)
|
|
685
|
+
for missing_split in set(splits) - seen_splits:
|
|
686
|
+
new_scores[missing_split] = []
|
|
687
|
+
for missing_subset in hf_subsets:
|
|
688
|
+
new_scores[missing_split].append(
|
|
689
|
+
{
|
|
690
|
+
"hf_subset": missing_subset,
|
|
691
|
+
"main_score": np.nan,
|
|
692
|
+
"languages": task.metadata.hf_subsets_to_langscripts.get(
|
|
693
|
+
missing_subset, []
|
|
694
|
+
),
|
|
695
|
+
}
|
|
696
|
+
)
|
|
666
697
|
data = self.model_dump()
|
|
667
698
|
data["scores"] = new_scores
|
|
668
699
|
return type(self).model_construct(**data)
|
mteb/similarity_functions.py
CHANGED
|
@@ -1,8 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
1
5
|
import torch
|
|
2
6
|
|
|
3
|
-
from mteb.models import EncoderProtocol
|
|
4
7
|
from mteb.models.model_meta import ScoringFunction
|
|
5
|
-
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from mteb.models import EncoderProtocol
|
|
11
|
+
from mteb.types import Array
|
|
6
12
|
|
|
7
13
|
|
|
8
14
|
def _use_torch_compile():
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from mteb.abstasks import
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.retrieval import (
|
|
4
4
|
CQADupstackAndroidRetrieval,
|
|
5
5
|
CQADupstackEnglishRetrieval,
|
|
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
|
|
|
15
15
|
CQADupstackWordpressRetrieval,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
|
-
task_list_cqa
|
|
18
|
+
task_list_cqa = [
|
|
19
19
|
CQADupstackAndroidRetrieval(),
|
|
20
20
|
CQADupstackEnglishRetrieval(),
|
|
21
21
|
CQADupstackGamingRetrieval(),
|