mteb 2.7.1__py3-none-any.whl → 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +16 -9
- mteb/_evaluators/any_sts_evaluator.py +10 -5
- mteb/_evaluators/clustering_evaluator.py +10 -4
- mteb/_evaluators/evaluator.py +9 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
- mteb/_evaluators/pair_classification_evaluator.py +10 -5
- mteb/_evaluators/retrieval_evaluator.py +19 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +14 -10
- mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
- mteb/_evaluators/text/summarization_evaluator.py +8 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +17 -9
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +15 -6
- mteb/abstasks/clustering.py +17 -8
- mteb/abstasks/clustering_legacy.py +14 -6
- mteb/abstasks/image/image_text_pair_classification.py +17 -7
- mteb/abstasks/multilabel_classification.py +11 -5
- mteb/abstasks/pair_classification.py +19 -9
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +27 -16
- mteb/abstasks/retrieval_dataset_loaders.py +11 -8
- mteb/abstasks/sts.py +19 -10
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +14 -7
- mteb/abstasks/text/summarization.py +17 -7
- mteb/abstasks/zeroshot_classification.py +15 -7
- mteb/benchmarks/_create_table.py +13 -3
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +20 -14
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/evaluate.py +20 -18
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +7 -3
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +11 -4
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +9 -4
- mteb/models/model_implementations/bedrock_models.py +16 -6
- mteb/models/model_implementations/blip2_models.py +9 -4
- mteb/models/model_implementations/blip_models.py +9 -4
- mteb/models/model_implementations/bm25.py +15 -10
- mteb/models/model_implementations/bmretriever_models.py +6 -2
- mteb/models/model_implementations/cde_models.py +9 -5
- mteb/models/model_implementations/clip_models.py +9 -4
- mteb/models/model_implementations/cohere_models.py +10 -4
- mteb/models/model_implementations/cohere_v.py +9 -4
- mteb/models/model_implementations/colpali_models.py +4 -3
- mteb/models/model_implementations/colqwen_models.py +10 -31
- mteb/models/model_implementations/colsmol_models.py +1 -1
- mteb/models/model_implementations/conan_models.py +10 -4
- mteb/models/model_implementations/dino_models.py +9 -4
- mteb/models/model_implementations/e5_v.py +9 -4
- mteb/models/model_implementations/eagerworks_models.py +10 -4
- mteb/models/model_implementations/evaclip_models.py +9 -4
- mteb/models/model_implementations/gme_v_models.py +5 -3
- mteb/models/model_implementations/google_models.py +10 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +6 -5
- mteb/models/model_implementations/hinvec_models.py +5 -1
- mteb/models/model_implementations/jasper_models.py +12 -5
- mteb/models/model_implementations/jina_clip.py +9 -4
- mteb/models/model_implementations/jina_models.py +10 -5
- mteb/models/model_implementations/kalm_models.py +18 -12
- mteb/models/model_implementations/linq_models.py +6 -1
- mteb/models/model_implementations/listconranker.py +9 -4
- mteb/models/model_implementations/llm2clip_models.py +9 -4
- mteb/models/model_implementations/llm2vec_models.py +12 -6
- mteb/models/model_implementations/mcinext_models.py +5 -2
- mteb/models/model_implementations/mdbr_models.py +3 -1
- mteb/models/model_implementations/{mxbai_models.py → mixedbread_ai_models.py} +91 -0
- mteb/models/model_implementations/moco_models.py +9 -4
- mteb/models/model_implementations/mod_models.py +1 -1
- mteb/models/model_implementations/model2vec_models.py +10 -4
- mteb/models/model_implementations/no_instruct_sentence_models.py +12 -5
- mteb/models/model_implementations/nomic_models.py +10 -4
- mteb/models/model_implementations/nomic_models_vision.py +4 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +7 -3
- mteb/models/model_implementations/nvidia_models.py +12 -4
- mteb/models/model_implementations/octen_models.py +1 -1
- mteb/models/model_implementations/openai_models.py +9 -4
- mteb/models/model_implementations/openclip_models.py +9 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -4
- mteb/models/model_implementations/ops_moa_models.py +7 -2
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +12 -6
- mteb/models/model_implementations/pylate_models.py +19 -13
- mteb/models/model_implementations/qwen3_models.py +8 -1
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/repllama_models.py +13 -6
- mteb/models/model_implementations/rerankers_custom.py +10 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +10 -4
- mteb/models/model_implementations/salesforce_models.py +7 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +4 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +5 -2
- mteb/models/model_implementations/seed_models.py +1 -1
- mteb/models/model_implementations/siglip_models.py +9 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/uae_models.py +9 -4
- mteb/models/model_implementations/vdr_models.py +7 -1
- mteb/models/model_implementations/vista_models.py +9 -4
- mteb/models/model_implementations/vlm2vec_models.py +9 -4
- mteb/models/model_implementations/voyage_models.py +10 -4
- mteb/models/model_implementations/voyage_v.py +10 -6
- mteb/models/model_implementations/yuan_models_en.py +1 -1
- mteb/models/model_meta.py +12 -7
- mteb/models/models_protocols.py +19 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +19 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +41 -10
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/clustering/nob/snl_clustering.py +7 -2
- mteb/tasks/clustering/nob/vg_clustering.py +7 -2
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/METADATA +1 -1
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/RECORD +155 -154
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/WHEEL +0 -0
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/top_level.txt +0 -0
mteb/__init__.py
CHANGED
|
@@ -3,6 +3,7 @@ from importlib.metadata import version
|
|
|
3
3
|
from mteb import types
|
|
4
4
|
from mteb.abstasks import AbsTask
|
|
5
5
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
6
|
+
from mteb.cache import ResultCache
|
|
6
7
|
from mteb.deprecated_evaluator import MTEB
|
|
7
8
|
from mteb.evaluate import evaluate
|
|
8
9
|
from mteb.filter_tasks import filter_tasks
|
|
@@ -33,6 +34,7 @@ __all__ = [
|
|
|
33
34
|
"CrossEncoderProtocol",
|
|
34
35
|
"EncoderProtocol",
|
|
35
36
|
"IndexEncoderSearchProtocol",
|
|
37
|
+
"ResultCache",
|
|
36
38
|
"SearchProtocol",
|
|
37
39
|
"SentenceTransformerEncoderWrapper",
|
|
38
40
|
"TaskMetadata",
|
mteb/_create_dataloaders.py
CHANGED
|
@@ -1,21 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import warnings
|
|
3
|
-
from
|
|
4
|
-
from typing import Any, cast
|
|
5
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
5
6
|
|
|
6
7
|
import torch
|
|
7
8
|
from datasets import Dataset, Image
|
|
8
9
|
from torch.utils.data import DataLoader, default_collate
|
|
9
10
|
|
|
10
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
11
11
|
from mteb.types import (
|
|
12
|
-
BatchedInput,
|
|
13
|
-
Conversation,
|
|
14
12
|
ConversationTurn,
|
|
15
13
|
PromptType,
|
|
16
|
-
QueryDatasetType,
|
|
17
14
|
)
|
|
18
|
-
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from collections.abc import Callable
|
|
18
|
+
|
|
19
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
20
|
+
from mteb.types import (
|
|
21
|
+
BatchedInput,
|
|
22
|
+
Conversation,
|
|
23
|
+
QueryDatasetType,
|
|
24
|
+
)
|
|
25
|
+
from mteb.types._encoder_io import CorpusInput, ImageInput, QueryInput, TextInput
|
|
19
26
|
|
|
20
27
|
logger = logging.getLogger(__name__)
|
|
21
28
|
|
|
@@ -128,7 +135,7 @@ def _convert_conv_history_to_query(
|
|
|
128
135
|
conversation = row["text"]
|
|
129
136
|
# if it's a list of strings, just join them
|
|
130
137
|
if isinstance(conversation, list) and isinstance(conversation[0], str):
|
|
131
|
-
conversation_ = cast(list[str], conversation)
|
|
138
|
+
conversation_ = cast("list[str]", conversation)
|
|
132
139
|
conv_str = "; ".join(conversation_)
|
|
133
140
|
current_conversation = [
|
|
134
141
|
ConversationTurn(role="user", content=message) for message in conversation_
|
|
@@ -173,7 +180,7 @@ def _convert_conv_history_to_query(
|
|
|
173
180
|
|
|
174
181
|
row["text"] = conv_str
|
|
175
182
|
row["conversation"] = current_conversation
|
|
176
|
-
return cast(dict[str, str | list[ConversationTurn]], row)
|
|
183
|
+
return cast("dict[str, str | list[ConversationTurn]]", row)
|
|
177
184
|
|
|
178
185
|
|
|
179
186
|
def _create_dataloader_for_queries_conversation(
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from typing import TypedDict
|
|
4
|
+
from typing import TYPE_CHECKING, TypedDict
|
|
3
5
|
|
|
4
|
-
from datasets import Dataset
|
|
5
6
|
from sklearn.metrics.pairwise import (
|
|
6
7
|
paired_cosine_distances,
|
|
7
8
|
paired_euclidean_distances,
|
|
@@ -9,13 +10,17 @@ from sklearn.metrics.pairwise import (
|
|
|
9
10
|
)
|
|
10
11
|
|
|
11
12
|
from mteb._create_dataloaders import create_dataloader
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
|
-
from mteb.models import EncoderProtocol
|
|
14
13
|
from mteb.similarity_functions import compute_pairwise_similarity
|
|
15
|
-
from mteb.types import EncodeKwargs, PromptType
|
|
16
14
|
|
|
17
15
|
from .evaluator import Evaluator
|
|
18
16
|
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from datasets import Dataset
|
|
19
|
+
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
21
|
+
from mteb.models import EncoderProtocol
|
|
22
|
+
from mteb.types import EncodeKwargs, PromptType
|
|
23
|
+
|
|
19
24
|
logger = logging.getLogger(__name__)
|
|
20
25
|
|
|
21
26
|
|
|
@@ -1,15 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
2
5
|
|
|
3
|
-
from datasets import Dataset
|
|
4
6
|
from sklearn import cluster
|
|
5
7
|
|
|
6
8
|
from mteb._create_dataloaders import create_dataloader
|
|
7
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
8
|
-
from mteb.models import EncoderProtocol
|
|
9
|
-
from mteb.types import EncodeKwargs
|
|
10
9
|
|
|
11
10
|
from .evaluator import Evaluator
|
|
12
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from datasets import Dataset
|
|
14
|
+
|
|
15
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
16
|
+
from mteb.models import EncoderProtocol
|
|
17
|
+
from mteb.types import EncodeKwargs
|
|
18
|
+
|
|
13
19
|
logger = logging.getLogger(__name__)
|
|
14
20
|
|
|
15
21
|
|
mteb/_evaluators/evaluator.py
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
2
|
-
from
|
|
3
|
-
from typing import Any
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
4
5
|
|
|
5
6
|
from mteb.abstasks.abstask import _set_seed
|
|
6
|
-
|
|
7
|
-
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from collections.abc import Iterable, Mapping
|
|
10
|
+
|
|
11
|
+
from mteb.models import EncoderProtocol
|
|
12
|
+
from mteb.types import EncodeKwargs
|
|
8
13
|
|
|
9
14
|
|
|
10
15
|
class Evaluator(ABC):
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from collections.abc import Sequence
|
|
5
4
|
from typing import TYPE_CHECKING, Any
|
|
6
5
|
|
|
7
6
|
import torch
|
|
@@ -14,13 +13,16 @@ from mteb._create_dataloaders import (
|
|
|
14
13
|
)
|
|
15
14
|
from mteb._evaluators.evaluator import Evaluator
|
|
16
15
|
from mteb._requires_package import requires_image_dependencies
|
|
17
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
18
|
-
from mteb.models.models_protocols import EncoderProtocol
|
|
19
|
-
from mteb.types import EncodeKwargs
|
|
20
16
|
|
|
21
17
|
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Sequence
|
|
19
|
+
|
|
22
20
|
from PIL.Image import Image
|
|
23
21
|
|
|
22
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
23
|
+
from mteb.models.models_protocols import EncoderProtocol
|
|
24
|
+
from mteb.types import EncodeKwargs
|
|
25
|
+
|
|
24
26
|
|
|
25
27
|
logger = logging.getLogger(__name__)
|
|
26
28
|
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from typing import Any, TypedDict
|
|
4
|
+
from typing import TYPE_CHECKING, Any, TypedDict
|
|
3
5
|
|
|
4
6
|
import numpy as np
|
|
5
|
-
from datasets import Dataset
|
|
6
7
|
from sklearn.metrics.pairwise import (
|
|
7
8
|
paired_cosine_distances,
|
|
8
9
|
paired_euclidean_distances,
|
|
@@ -11,10 +12,14 @@ from sklearn.metrics.pairwise import (
|
|
|
11
12
|
|
|
12
13
|
from mteb._create_dataloaders import _create_dataloader_from_texts, create_dataloader
|
|
13
14
|
from mteb._evaluators.evaluator import Evaluator
|
|
14
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
|
-
from mteb.models import EncoderProtocol
|
|
16
15
|
from mteb.similarity_functions import compute_pairwise_similarity
|
|
17
|
-
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from datasets import Dataset
|
|
19
|
+
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
21
|
+
from mteb.models import EncoderProtocol
|
|
22
|
+
from mteb.types import EncodeKwargs, PromptType
|
|
18
23
|
|
|
19
24
|
logger = logging.getLogger(__name__)
|
|
20
25
|
|
|
@@ -1,23 +1,29 @@
|
|
|
1
|
-
import
|
|
2
|
-
from collections.abc import Sequence
|
|
1
|
+
from __future__ import annotations
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
from
|
|
6
|
-
from mteb.types import (
|
|
7
|
-
CorpusDatasetType,
|
|
8
|
-
EncodeKwargs,
|
|
9
|
-
QueryDatasetType,
|
|
10
|
-
RelevantDocumentsType,
|
|
11
|
-
RetrievalEvaluationResult,
|
|
12
|
-
RetrievalOutputType,
|
|
13
|
-
TopRankedDocumentsType,
|
|
14
|
-
)
|
|
3
|
+
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
15
5
|
|
|
16
6
|
from .evaluator import Evaluator
|
|
17
7
|
from .retrieval_metrics import (
|
|
18
8
|
calculate_retrieval_scores,
|
|
19
9
|
)
|
|
20
10
|
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from collections.abc import Sequence
|
|
13
|
+
|
|
14
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
|
+
from mteb.models import SearchProtocol
|
|
16
|
+
from mteb.types import (
|
|
17
|
+
CorpusDatasetType,
|
|
18
|
+
EncodeKwargs,
|
|
19
|
+
QueryDatasetType,
|
|
20
|
+
RelevantDocumentsType,
|
|
21
|
+
RetrievalEvaluationResult,
|
|
22
|
+
RetrievalOutputType,
|
|
23
|
+
TopRankedDocumentsType,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
21
27
|
logger = logging.getLogger(__name__)
|
|
22
28
|
|
|
23
29
|
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from collections import defaultdict
|
|
3
|
-
from
|
|
4
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
5
6
|
|
|
6
7
|
import numpy as np
|
|
7
8
|
import pandas as pd
|
|
@@ -9,7 +10,12 @@ import pytrec_eval
|
|
|
9
10
|
from packaging.version import Version
|
|
10
11
|
from sklearn.metrics import auc
|
|
11
12
|
|
|
12
|
-
from mteb.types import
|
|
13
|
+
from mteb.types import RetrievalEvaluationResult
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from collections.abc import Mapping
|
|
17
|
+
|
|
18
|
+
from mteb.types import RelevantDocumentsType
|
|
13
19
|
|
|
14
20
|
logger = logging.getLogger(__name__)
|
|
15
21
|
|
|
@@ -1,18 +1,22 @@
|
|
|
1
|
-
import
|
|
2
|
-
from typing import Any, Protocol, cast
|
|
1
|
+
from __future__ import annotations
|
|
3
2
|
|
|
4
|
-
import
|
|
5
|
-
from
|
|
6
|
-
from torch.utils.data import DataLoader
|
|
7
|
-
from typing_extensions import Self
|
|
3
|
+
import logging
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Protocol, cast
|
|
8
5
|
|
|
9
6
|
from mteb._create_dataloaders import create_dataloader
|
|
10
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
11
|
-
from mteb.models import EncoderProtocol
|
|
12
|
-
from mteb.types import Array, BatchedInput, EncodeKwargs
|
|
13
7
|
|
|
14
8
|
from .evaluator import Evaluator
|
|
15
9
|
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
import numpy as np
|
|
12
|
+
from datasets import Dataset
|
|
13
|
+
from torch.utils.data import DataLoader
|
|
14
|
+
from typing_extensions import Self
|
|
15
|
+
|
|
16
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
17
|
+
from mteb.models import EncoderProtocol
|
|
18
|
+
from mteb.types import Array, BatchedInput, EncodeKwargs
|
|
19
|
+
|
|
16
20
|
logger = logging.getLogger(__name__)
|
|
17
21
|
|
|
18
22
|
|
|
@@ -104,7 +108,7 @@ class SklearnEvaluator(Evaluator):
|
|
|
104
108
|
hf_subset=self.hf_subset,
|
|
105
109
|
**encode_kwargs,
|
|
106
110
|
)
|
|
107
|
-
test_cache = cast(Array, test_cache)
|
|
111
|
+
test_cache = cast("Array", test_cache)
|
|
108
112
|
|
|
109
113
|
logger.info("Running - Fitting classifier...")
|
|
110
114
|
y_train = self.train_dataset[self.label_column_name]
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
2
5
|
|
|
3
6
|
import torch
|
|
4
7
|
from datasets import Dataset
|
|
@@ -6,9 +9,11 @@ from tqdm.auto import tqdm
|
|
|
6
9
|
|
|
7
10
|
from mteb._create_dataloaders import _create_dataloader_from_texts
|
|
8
11
|
from mteb._evaluators.evaluator import Evaluator
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
from mteb.
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
|
+
from mteb.models import EncoderProtocol
|
|
16
|
+
from mteb.types import Array, EncodeKwargs
|
|
12
17
|
|
|
13
18
|
logger = logging.getLogger(__name__)
|
|
14
19
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import sys
|
|
3
|
-
from typing import TypedDict
|
|
5
|
+
from typing import TYPE_CHECKING, TypedDict
|
|
4
6
|
|
|
5
7
|
import numpy as np
|
|
6
8
|
import torch
|
|
@@ -9,10 +11,12 @@ from tqdm.auto import tqdm
|
|
|
9
11
|
|
|
10
12
|
from mteb._create_dataloaders import _create_dataloader_from_texts
|
|
11
13
|
from mteb._evaluators.evaluator import Evaluator
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
|
-
from mteb.models import EncoderProtocol
|
|
14
14
|
from mteb.similarity_functions import cos_sim, dot_score
|
|
15
|
-
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
18
|
+
from mteb.models import EncoderProtocol
|
|
19
|
+
from mteb.types import EncodeKwargs
|
|
16
20
|
|
|
17
21
|
# if later than python 3.13 use typing module
|
|
18
22
|
if sys.version_info >= (3, 13):
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
2
5
|
|
|
3
6
|
from datasets import Dataset
|
|
4
7
|
|
|
@@ -6,13 +9,17 @@ from mteb._create_dataloaders import (
|
|
|
6
9
|
_create_dataloader_from_texts,
|
|
7
10
|
create_dataloader,
|
|
8
11
|
)
|
|
9
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
10
|
-
from mteb.models import EncoderProtocol
|
|
11
12
|
from mteb.similarity_functions import similarity
|
|
12
|
-
from mteb.types import Array, EncodeKwargs
|
|
13
13
|
|
|
14
14
|
from .evaluator import Evaluator
|
|
15
15
|
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from datasets import Dataset
|
|
18
|
+
|
|
19
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
20
|
+
from mteb.models import EncoderProtocol
|
|
21
|
+
from mteb.types import Array, EncodeKwargs
|
|
22
|
+
|
|
16
23
|
logger = logging.getLogger(__name__)
|
|
17
24
|
|
|
18
25
|
|
mteb/_helpful_enum.py
CHANGED
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
"""Simplified version of https://gist.github.com/AlexeyVatolin/ea3adc21aa7a767603ff393b22085adc from https://github.com/embeddings-benchmark/mteb/pull/2900"""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import logging
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
4
7
|
|
|
5
8
|
import datasets
|
|
6
9
|
import pandas as pd
|
|
7
|
-
from datasets import
|
|
10
|
+
from datasets import DatasetDict
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from datasets import Dataset
|
|
8
14
|
|
|
9
|
-
from mteb import TaskMetadata
|
|
15
|
+
from mteb import TaskMetadata
|
|
10
16
|
|
|
11
17
|
logger = logging.getLogger(__name__)
|
|
12
18
|
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
2
5
|
|
|
3
6
|
from datasets import DatasetDict
|
|
4
7
|
|
|
5
|
-
from mteb import TaskMetadata
|
|
6
|
-
from mteb.abstasks import AbsTaskClassification
|
|
7
8
|
from mteb.abstasks._data_filter.filters import (
|
|
8
9
|
deduplicate,
|
|
9
10
|
filter_empty,
|
|
@@ -13,6 +14,10 @@ from mteb.abstasks._data_filter.filters import (
|
|
|
13
14
|
split_train_test,
|
|
14
15
|
)
|
|
15
16
|
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from mteb import TaskMetadata
|
|
19
|
+
from mteb.abstasks import AbsTaskClassification
|
|
20
|
+
|
|
16
21
|
logger = logging.getLogger(__name__)
|
|
17
22
|
|
|
18
23
|
|
|
@@ -2,10 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
4
|
from collections import Counter
|
|
5
|
-
from collections.abc import Mapping
|
|
6
5
|
from typing import TYPE_CHECKING, cast
|
|
7
6
|
|
|
8
|
-
from mteb.types import TopRankedDocumentsType
|
|
9
7
|
from mteb.types.statistics import (
|
|
10
8
|
ImageStatistics,
|
|
11
9
|
LabelStatistics,
|
|
@@ -16,8 +14,12 @@ from mteb.types.statistics import (
|
|
|
16
14
|
)
|
|
17
15
|
|
|
18
16
|
if TYPE_CHECKING:
|
|
17
|
+
from collections.abc import Mapping
|
|
18
|
+
|
|
19
19
|
from PIL import Image
|
|
20
20
|
|
|
21
|
+
from mteb.types import TopRankedDocumentsType
|
|
22
|
+
|
|
21
23
|
|
|
22
24
|
def calculate_text_statistics(texts: list[str]) -> TextStatistics:
|
|
23
25
|
"""Calculate descriptive statistics for a list of texts.
|
|
@@ -87,13 +89,13 @@ def calculate_label_statistics(labels: list[int | list[int]]) -> LabelStatistics
|
|
|
87
89
|
|
|
88
90
|
if not isinstance(labels[0], list):
|
|
89
91
|
# single label classification
|
|
90
|
-
single_label = cast(list[int], labels)
|
|
92
|
+
single_label = cast("list[int]", labels)
|
|
91
93
|
label_len = [1] * len(single_label)
|
|
92
94
|
total_label_len = len(single_label)
|
|
93
95
|
total_labels.extend(single_label)
|
|
94
96
|
elif isinstance(labels[0], list):
|
|
95
97
|
# multilabel classification
|
|
96
|
-
multilabel_labels = cast(list[list[int]], labels)
|
|
98
|
+
multilabel_labels = cast("list[list[int]]", labels)
|
|
97
99
|
label_len = [len(l) for l in multilabel_labels]
|
|
98
100
|
total_label_len = sum(label_len)
|
|
99
101
|
for l in multilabel_labels:
|
mteb/abstasks/abstask.py
CHANGED
|
@@ -1,30 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import json
|
|
2
4
|
import logging
|
|
3
5
|
import warnings
|
|
4
6
|
from abc import ABC, abstractmethod
|
|
5
|
-
from collections.abc import
|
|
7
|
+
from collections.abc import Sequence
|
|
6
8
|
from copy import copy
|
|
7
9
|
from pathlib import Path
|
|
8
|
-
from typing import Any, Literal, cast
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Literal, cast
|
|
9
11
|
|
|
10
12
|
import numpy as np
|
|
11
13
|
from datasets import ClassLabel, Dataset, DatasetDict, load_dataset
|
|
12
14
|
from sklearn.preprocessing import MultiLabelBinarizer
|
|
13
15
|
from tqdm.auto import tqdm
|
|
14
|
-
from typing_extensions import Self
|
|
15
16
|
|
|
16
17
|
from mteb._set_seed import _set_seed
|
|
17
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
18
18
|
from mteb.languages import LanguageScripts
|
|
19
19
|
from mteb.models import (
|
|
20
20
|
CrossEncoderProtocol,
|
|
21
21
|
EncoderProtocol,
|
|
22
|
-
MTEBModels,
|
|
23
22
|
SearchProtocol,
|
|
24
23
|
)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
from
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from collections.abc import Mapping
|
|
27
|
+
|
|
28
|
+
from typing_extensions import Self
|
|
29
|
+
|
|
30
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
31
|
+
from mteb.models import (
|
|
32
|
+
MTEBModels,
|
|
33
|
+
)
|
|
34
|
+
from mteb.types import EncodeKwargs, HFSubset, Modalities, ScoresDict
|
|
35
|
+
from mteb.types.statistics import DescriptiveStatistics, SplitDescriptiveStatistics
|
|
28
36
|
|
|
29
37
|
logger = logging.getLogger(__name__)
|
|
30
38
|
|
|
@@ -163,7 +171,7 @@ class AbsTask(ABC):
|
|
|
163
171
|
if not self.data_loaded:
|
|
164
172
|
self.load_data()
|
|
165
173
|
|
|
166
|
-
self.dataset = cast(dict[HFSubset, DatasetDict], self.dataset)
|
|
174
|
+
self.dataset = cast("dict[HFSubset, DatasetDict]", self.dataset)
|
|
167
175
|
|
|
168
176
|
scores = {}
|
|
169
177
|
if self.hf_subsets is None:
|
|
@@ -1,28 +1,39 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from datetime import datetime
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
3
6
|
|
|
4
7
|
from pydantic import ConfigDict, Field, model_validator
|
|
5
|
-
from typing_extensions import Self
|
|
6
8
|
|
|
7
9
|
from mteb.types import (
|
|
8
|
-
ISOLanguageScript,
|
|
9
10
|
Languages,
|
|
10
|
-
Licenses,
|
|
11
|
-
Modalities,
|
|
12
|
-
StrDate,
|
|
13
11
|
)
|
|
14
12
|
|
|
15
13
|
from .abstask import AbsTask
|
|
16
14
|
from .task_metadata import (
|
|
17
|
-
AnnotatorType,
|
|
18
15
|
MetadataDatasetDict,
|
|
19
|
-
SampleCreationMethod,
|
|
20
|
-
TaskDomain,
|
|
21
16
|
TaskMetadata,
|
|
22
|
-
TaskSubtype,
|
|
23
17
|
TaskType,
|
|
24
18
|
)
|
|
25
19
|
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from typing_extensions import Self
|
|
22
|
+
|
|
23
|
+
from mteb.types import (
|
|
24
|
+
ISOLanguageScript,
|
|
25
|
+
Licenses,
|
|
26
|
+
Modalities,
|
|
27
|
+
StrDate,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
from .task_metadata import (
|
|
31
|
+
AnnotatorType,
|
|
32
|
+
SampleCreationMethod,
|
|
33
|
+
TaskDomain,
|
|
34
|
+
TaskSubtype,
|
|
35
|
+
)
|
|
36
|
+
|
|
26
37
|
logger = logging.getLogger(__name__)
|
|
27
38
|
|
|
28
39
|
|
mteb/abstasks/aggregated_task.py
CHANGED
|
@@ -1,19 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import warnings
|
|
3
|
-
from
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
|
-
from datasets import Dataset, DatasetDict
|
|
9
8
|
|
|
10
|
-
from mteb.models.models_protocols import MTEBModels
|
|
11
9
|
from mteb.results.task_result import TaskResult
|
|
12
|
-
from mteb.types import EncodeKwargs, HFSubset, ScoresDict
|
|
13
|
-
from mteb.types.statistics import DescriptiveStatistics
|
|
14
10
|
|
|
15
11
|
from .abstask import AbsTask
|
|
16
|
-
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Mapping
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from datasets import Dataset, DatasetDict
|
|
18
|
+
|
|
19
|
+
from mteb.models.models_protocols import MTEBModels
|
|
20
|
+
from mteb.types import EncodeKwargs, HFSubset, ScoresDict
|
|
21
|
+
from mteb.types.statistics import DescriptiveStatistics
|
|
22
|
+
|
|
23
|
+
from .aggregate_task_metadata import AggregateTaskMetadata
|
|
17
24
|
|
|
18
25
|
logger = logging.getLogger(__name__)
|
|
19
26
|
|
mteb/abstasks/classification.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from collections import defaultdict
|
|
3
|
-
from
|
|
4
|
-
from typing import Any, TypedDict
|
|
5
|
+
from typing import TYPE_CHECKING, Any, TypedDict
|
|
5
6
|
|
|
6
7
|
import numpy as np
|
|
7
8
|
from datasets import Dataset, DatasetDict
|
|
@@ -16,12 +17,8 @@ from sklearn.metrics import (
|
|
|
16
17
|
|
|
17
18
|
from mteb._evaluators.sklearn_evaluator import SklearnEvaluator, SklearnModelProtocol
|
|
18
19
|
from mteb.models import EncoderProtocol, MTEBModels
|
|
19
|
-
from mteb.types import EncodeKwargs, HFSubset, ScoresDict
|
|
20
20
|
from mteb.types.statistics import (
|
|
21
|
-
ImageStatistics,
|
|
22
|
-
LabelStatistics,
|
|
23
21
|
SplitDescriptiveStatistics,
|
|
24
|
-
TextStatistics,
|
|
25
22
|
)
|
|
26
23
|
|
|
27
24
|
from ._statistics_calculation import (
|
|
@@ -31,6 +28,18 @@ from ._statistics_calculation import (
|
|
|
31
28
|
)
|
|
32
29
|
from .abstask import AbsTask
|
|
33
30
|
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
|
|
34
|
+
from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
|
|
35
|
+
from mteb.models import MTEBModels
|
|
36
|
+
from mteb.types import EncodeKwargs, HFSubset, ScoresDict
|
|
37
|
+
from mteb.types.statistics import (
|
|
38
|
+
ImageStatistics,
|
|
39
|
+
LabelStatistics,
|
|
40
|
+
TextStatistics,
|
|
41
|
+
)
|
|
42
|
+
|
|
34
43
|
logger = logging.getLogger(__name__)
|
|
35
44
|
|
|
36
45
|
|