mteb 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +16 -9
- mteb/_evaluators/any_sts_evaluator.py +10 -5
- mteb/_evaluators/clustering_evaluator.py +10 -4
- mteb/_evaluators/evaluator.py +9 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
- mteb/_evaluators/pair_classification_evaluator.py +10 -5
- mteb/_evaluators/retrieval_evaluator.py +19 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +14 -10
- mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
- mteb/_evaluators/text/summarization_evaluator.py +8 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +17 -9
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +15 -6
- mteb/abstasks/clustering.py +17 -8
- mteb/abstasks/clustering_legacy.py +14 -6
- mteb/abstasks/image/image_text_pair_classification.py +17 -7
- mteb/abstasks/multilabel_classification.py +11 -5
- mteb/abstasks/pair_classification.py +19 -9
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +27 -16
- mteb/abstasks/retrieval_dataset_loaders.py +11 -8
- mteb/abstasks/sts.py +19 -10
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +14 -7
- mteb/abstasks/text/summarization.py +17 -7
- mteb/abstasks/zeroshot_classification.py +15 -7
- mteb/benchmarks/_create_table.py +13 -3
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +10 -5
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/evaluate.py +20 -18
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +7 -3
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +11 -4
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +9 -4
- mteb/models/model_implementations/bedrock_models.py +16 -6
- mteb/models/model_implementations/blip2_models.py +9 -4
- mteb/models/model_implementations/blip_models.py +9 -4
- mteb/models/model_implementations/bm25.py +15 -10
- mteb/models/model_implementations/bmretriever_models.py +6 -2
- mteb/models/model_implementations/cde_models.py +9 -5
- mteb/models/model_implementations/clip_models.py +9 -4
- mteb/models/model_implementations/cohere_models.py +10 -4
- mteb/models/model_implementations/cohere_v.py +9 -4
- mteb/models/model_implementations/colpali_models.py +4 -3
- mteb/models/model_implementations/colqwen_models.py +10 -31
- mteb/models/model_implementations/colsmol_models.py +1 -1
- mteb/models/model_implementations/conan_models.py +10 -4
- mteb/models/model_implementations/dino_models.py +9 -4
- mteb/models/model_implementations/e5_v.py +9 -4
- mteb/models/model_implementations/eagerworks_models.py +10 -4
- mteb/models/model_implementations/evaclip_models.py +9 -4
- mteb/models/model_implementations/gme_v_models.py +5 -3
- mteb/models/model_implementations/google_models.py +10 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +6 -5
- mteb/models/model_implementations/hinvec_models.py +5 -1
- mteb/models/model_implementations/jasper_models.py +12 -5
- mteb/models/model_implementations/jina_clip.py +9 -4
- mteb/models/model_implementations/jina_models.py +10 -5
- mteb/models/model_implementations/kalm_models.py +18 -12
- mteb/models/model_implementations/linq_models.py +6 -1
- mteb/models/model_implementations/listconranker.py +9 -4
- mteb/models/model_implementations/llm2clip_models.py +9 -4
- mteb/models/model_implementations/llm2vec_models.py +12 -6
- mteb/models/model_implementations/mcinext_models.py +5 -2
- mteb/models/model_implementations/moco_models.py +9 -4
- mteb/models/model_implementations/mod_models.py +1 -1
- mteb/models/model_implementations/model2vec_models.py +10 -4
- mteb/models/model_implementations/no_instruct_sentence_models.py +12 -5
- mteb/models/model_implementations/nomic_models.py +10 -4
- mteb/models/model_implementations/nomic_models_vision.py +4 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +7 -3
- mteb/models/model_implementations/nvidia_models.py +12 -4
- mteb/models/model_implementations/octen_models.py +1 -1
- mteb/models/model_implementations/openai_models.py +9 -4
- mteb/models/model_implementations/openclip_models.py +9 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -4
- mteb/models/model_implementations/ops_moa_models.py +7 -2
- mteb/models/model_implementations/promptriever_models.py +12 -6
- mteb/models/model_implementations/pylate_models.py +19 -13
- mteb/models/model_implementations/qwen3_models.py +8 -1
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/repllama_models.py +13 -6
- mteb/models/model_implementations/rerankers_custom.py +10 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +10 -4
- mteb/models/model_implementations/salesforce_models.py +7 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +4 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +5 -2
- mteb/models/model_implementations/seed_models.py +1 -1
- mteb/models/model_implementations/siglip_models.py +9 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/uae_models.py +9 -4
- mteb/models/model_implementations/vdr_models.py +7 -1
- mteb/models/model_implementations/vista_models.py +9 -4
- mteb/models/model_implementations/vlm2vec_models.py +9 -4
- mteb/models/model_implementations/voyage_models.py +10 -4
- mteb/models/model_implementations/voyage_v.py +10 -6
- mteb/models/model_implementations/yuan_models_en.py +1 -1
- mteb/models/model_meta.py +12 -7
- mteb/models/models_protocols.py +19 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +19 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +15 -9
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/clustering/nob/snl_clustering.py +7 -2
- mteb/tasks/clustering/nob/vg_clustering.py +7 -2
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/METADATA +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/RECORD +151 -151
- {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/WHEEL +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/top_level.txt +0 -0
|
@@ -1,16 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import json
|
|
2
4
|
import logging
|
|
3
5
|
import warnings
|
|
4
6
|
from pathlib import Path
|
|
5
|
-
from typing import Any
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
6
8
|
|
|
7
9
|
import numpy as np
|
|
8
10
|
|
|
9
11
|
from mteb._requires_package import requires_package
|
|
10
|
-
from mteb.types import BatchedInput
|
|
11
12
|
|
|
12
13
|
from ._hash_utils import _hash_item
|
|
13
14
|
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
import faiss
|
|
17
|
+
|
|
18
|
+
from mteb.types import BatchedInput
|
|
19
|
+
|
|
14
20
|
logger = logging.getLogger(__name__)
|
|
15
21
|
|
|
16
22
|
|
|
@@ -24,7 +30,6 @@ class FaissCache:
|
|
|
24
30
|
"FAISS-based vector cache",
|
|
25
31
|
install_instruction="pip install mteb[faiss-cpu]",
|
|
26
32
|
)
|
|
27
|
-
import faiss
|
|
28
33
|
|
|
29
34
|
self.directory = Path(directory)
|
|
30
35
|
self.directory.mkdir(parents=True, exist_ok=True)
|
|
@@ -1,21 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from pathlib import Path
|
|
3
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
4
6
|
|
|
5
7
|
import numpy as np
|
|
6
8
|
import torch
|
|
7
9
|
from datasets import Dataset
|
|
8
|
-
from torch.utils.data import DataLoader
|
|
9
10
|
|
|
10
11
|
from mteb._create_dataloaders import create_dataloader
|
|
11
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
12
|
-
from mteb.models.cache_wrappers.cache_backend_protocol import (
|
|
13
|
-
CacheBackendProtocol,
|
|
14
|
-
)
|
|
15
12
|
from mteb.models.cache_wrappers.cache_backends.numpy_cache import NumpyCache
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
from
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from torch.utils.data import DataLoader
|
|
16
|
+
|
|
17
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
18
|
+
from mteb.models.cache_wrappers.cache_backend_protocol import (
|
|
19
|
+
CacheBackendProtocol,
|
|
20
|
+
)
|
|
21
|
+
from mteb.models.model_meta import ModelMeta
|
|
22
|
+
from mteb.models.models_protocols import EncoderProtocol
|
|
23
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
19
24
|
|
|
20
25
|
logger = logging.getLogger(__name__)
|
|
21
26
|
|
mteb/models/get_model_meta.py
CHANGED
|
@@ -1,15 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import difflib
|
|
2
4
|
import logging
|
|
3
|
-
from
|
|
4
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
5
6
|
|
|
6
|
-
from mteb.abstasks import AbsTask
|
|
7
7
|
from mteb.models import (
|
|
8
8
|
ModelMeta,
|
|
9
|
-
MTEBModels,
|
|
10
9
|
)
|
|
11
10
|
from mteb.models.model_implementations import MODEL_REGISTRY
|
|
12
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Iterable
|
|
14
|
+
|
|
15
|
+
from mteb.abstasks import AbsTask
|
|
16
|
+
from mteb.models import (
|
|
17
|
+
MTEBModels,
|
|
18
|
+
)
|
|
19
|
+
|
|
13
20
|
logger = logging.getLogger(__name__)
|
|
14
21
|
|
|
15
22
|
|
mteb/models/instruct_wrapper.py
CHANGED
|
@@ -1,16 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from
|
|
3
|
-
from typing import Any
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
4
5
|
|
|
5
6
|
import torch
|
|
6
|
-
from torch.utils.data import DataLoader
|
|
7
7
|
|
|
8
8
|
from mteb._requires_package import requires_package
|
|
9
|
-
from mteb.
|
|
10
|
-
from mteb.types import Array, BatchedInput, PromptType
|
|
9
|
+
from mteb.types import PromptType
|
|
11
10
|
|
|
12
11
|
from .abs_encoder import AbsEncoder
|
|
13
12
|
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Callable
|
|
15
|
+
|
|
16
|
+
from torch.utils.data import DataLoader
|
|
17
|
+
|
|
18
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
19
|
+
from mteb.types import Array, BatchedInput
|
|
20
|
+
|
|
21
|
+
|
|
14
22
|
logger = logging.getLogger(__name__)
|
|
15
23
|
|
|
16
24
|
|
|
@@ -1,13 +1,18 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
|
-
from torch.utils.data import DataLoader
|
|
5
6
|
from tqdm.auto import tqdm
|
|
6
7
|
|
|
7
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
8
8
|
from mteb.models.abs_encoder import AbsEncoder
|
|
9
9
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from torch.utils.data import DataLoader
|
|
13
|
+
|
|
14
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
11
16
|
|
|
12
17
|
|
|
13
18
|
class ALIGNModel(AbsEncoder):
|
|
@@ -1,20 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import json
|
|
2
4
|
import logging
|
|
3
5
|
import re
|
|
4
|
-
from typing import Any
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
5
7
|
|
|
6
8
|
import numpy as np
|
|
7
|
-
from torch.utils.data import DataLoader
|
|
8
9
|
from tqdm.auto import tqdm
|
|
9
10
|
|
|
10
11
|
from mteb._requires_package import requires_package
|
|
11
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
12
12
|
from mteb.models.abs_encoder import AbsEncoder
|
|
13
13
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
14
|
-
from mteb.types import Array, BatchedInput, PromptType
|
|
15
14
|
|
|
16
|
-
from .cohere_models import
|
|
17
|
-
|
|
15
|
+
from .cohere_models import (
|
|
16
|
+
model_prompts as cohere_model_prompts,
|
|
17
|
+
)
|
|
18
|
+
from .cohere_models import (
|
|
19
|
+
supported_languages as cohere_supported_languages,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from torch.utils.data import DataLoader
|
|
24
|
+
|
|
25
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
26
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
27
|
+
|
|
18
28
|
|
|
19
29
|
logger = logging.getLogger(__name__)
|
|
20
30
|
|
|
@@ -1,14 +1,19 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
|
-
from torch.utils.data import DataLoader
|
|
5
6
|
from tqdm.auto import tqdm
|
|
6
7
|
|
|
7
8
|
from mteb._requires_package import requires_package
|
|
8
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
9
9
|
from mteb.models.abs_encoder import AbsEncoder
|
|
10
10
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
11
|
-
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from torch.utils.data import DataLoader
|
|
14
|
+
|
|
15
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
16
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
12
17
|
|
|
13
18
|
BLIP2_CITATION = """@inproceedings{li2023blip2,
|
|
14
19
|
title={{BLIP-2:} Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models},
|
|
@@ -1,14 +1,19 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
6
|
from torch.nn.functional import normalize
|
|
5
|
-
from torch.utils.data import DataLoader
|
|
6
7
|
from tqdm.auto import tqdm
|
|
7
8
|
|
|
8
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
9
9
|
from mteb.models.abs_encoder import AbsEncoder
|
|
10
10
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
11
|
-
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from torch.utils.data import DataLoader
|
|
14
|
+
|
|
15
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
16
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
12
17
|
|
|
13
18
|
BLIP_CITATION = """@misc{https://doi.org/10.48550/arxiv.2201.12086,
|
|
14
19
|
doi = {10.48550/ARXIV.2201.12086},
|
|
@@ -1,18 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
2
5
|
|
|
3
6
|
from mteb._create_dataloaders import _create_text_queries_dataloader
|
|
4
7
|
from mteb._requires_package import requires_package
|
|
5
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
6
8
|
from mteb.models.model_meta import ModelMeta
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
12
|
+
from mteb.models.models_protocols import SearchProtocol
|
|
13
|
+
from mteb.types import (
|
|
14
|
+
CorpusDatasetType,
|
|
15
|
+
EncodeKwargs,
|
|
16
|
+
InstructionDatasetType,
|
|
17
|
+
QueryDatasetType,
|
|
18
|
+
RetrievalOutputType,
|
|
19
|
+
TopRankedDocumentsType,
|
|
20
|
+
)
|
|
16
21
|
|
|
17
22
|
logger = logging.getLogger(__name__)
|
|
18
23
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
3
4
|
|
|
4
5
|
import torch
|
|
5
6
|
from sentence_transformers import SentenceTransformer
|
|
@@ -9,6 +10,9 @@ from mteb.models import ModelMeta
|
|
|
9
10
|
from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
|
|
10
11
|
from mteb.types import PromptType
|
|
11
12
|
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Callable
|
|
15
|
+
|
|
12
16
|
|
|
13
17
|
def instruction_template(
|
|
14
18
|
instruction: str, prompt_type: PromptType | None = None
|
|
@@ -1,27 +1,31 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from collections.abc import Sequence
|
|
3
4
|
from typing import TYPE_CHECKING, Any
|
|
4
5
|
|
|
5
6
|
import numpy as np
|
|
6
7
|
import torch
|
|
7
|
-
from torch.utils.data import DataLoader
|
|
8
8
|
|
|
9
9
|
import mteb
|
|
10
10
|
from mteb._create_dataloaders import _corpus_to_dict
|
|
11
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
12
11
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
13
|
-
from mteb.models.models_protocols import PromptType
|
|
14
12
|
from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
|
|
15
|
-
from mteb.types import
|
|
13
|
+
from mteb.types import PromptType
|
|
16
14
|
|
|
17
15
|
from .bge_models import bge_full_data
|
|
18
16
|
|
|
19
17
|
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Sequence
|
|
19
|
+
|
|
20
|
+
from torch.utils.data import DataLoader
|
|
21
|
+
|
|
20
22
|
from mteb.abstasks import (
|
|
21
23
|
AbsTaskClassification,
|
|
22
24
|
AbsTaskRetrieval,
|
|
23
25
|
AbsTaskSummarization,
|
|
24
26
|
)
|
|
27
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
28
|
+
from mteb.types import Array, BatchedInput
|
|
25
29
|
logger = logging.getLogger(__name__)
|
|
26
30
|
|
|
27
31
|
CDE_CITATION = """@misc{morris2024contextualdocumentembeddings,
|
|
@@ -1,13 +1,18 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
|
-
from torch.utils.data import DataLoader
|
|
5
6
|
from tqdm.auto import tqdm
|
|
6
7
|
|
|
7
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
8
8
|
from mteb.models.abs_encoder import AbsEncoder
|
|
9
9
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from torch.utils.data import DataLoader
|
|
13
|
+
|
|
14
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
11
16
|
|
|
12
17
|
|
|
13
18
|
class CLIPModel(AbsEncoder):
|
|
@@ -1,18 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import time
|
|
3
5
|
from functools import wraps
|
|
4
|
-
from typing import Any, Literal, get_args
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Literal, get_args
|
|
5
7
|
|
|
6
8
|
import numpy as np
|
|
7
9
|
import torch
|
|
8
|
-
from torch.utils.data import DataLoader
|
|
9
10
|
from tqdm.auto import tqdm
|
|
10
11
|
|
|
11
12
|
from mteb._requires_package import requires_package
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
13
|
from mteb.models.abs_encoder import AbsEncoder
|
|
14
14
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
15
|
-
from mteb.types import
|
|
15
|
+
from mteb.types import PromptType
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from torch.utils.data import DataLoader
|
|
19
|
+
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
21
|
+
from mteb.types import Array, BatchedInput
|
|
16
22
|
|
|
17
23
|
logger = logging.getLogger(__name__)
|
|
18
24
|
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import base64
|
|
2
4
|
import io
|
|
3
5
|
import os
|
|
4
6
|
import time
|
|
5
|
-
from typing import Any, Literal, get_args
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Literal, get_args
|
|
6
8
|
|
|
7
9
|
import torch
|
|
8
|
-
from torch.utils.data import DataLoader
|
|
9
10
|
from tqdm.auto import tqdm
|
|
10
11
|
|
|
11
12
|
from mteb._requires_package import requires_image_dependencies, requires_package
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
13
|
from mteb.models import ModelMeta
|
|
14
14
|
from mteb.models.abs_encoder import AbsEncoder
|
|
15
15
|
from mteb.models.model_implementations.cohere_models import (
|
|
@@ -18,7 +18,12 @@ from mteb.models.model_implementations.cohere_models import (
|
|
|
18
18
|
retry_with_rate_limit,
|
|
19
19
|
)
|
|
20
20
|
from mteb.models.model_meta import ScoringFunction
|
|
21
|
-
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from torch.utils.data import DataLoader
|
|
24
|
+
|
|
25
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
26
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
22
27
|
|
|
23
28
|
|
|
24
29
|
def _post_process_embeddings(
|
|
@@ -4,20 +4,21 @@ import logging
|
|
|
4
4
|
from typing import TYPE_CHECKING, Any
|
|
5
5
|
|
|
6
6
|
import torch
|
|
7
|
-
from torch.utils.data import DataLoader
|
|
8
7
|
from tqdm.auto import tqdm
|
|
9
8
|
|
|
10
9
|
from mteb._requires_package import (
|
|
11
10
|
requires_image_dependencies,
|
|
12
11
|
requires_package,
|
|
13
12
|
)
|
|
14
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
13
|
from mteb.models.abs_encoder import AbsEncoder
|
|
16
14
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
17
|
-
from mteb.types import Array, BatchedInput, PromptType
|
|
18
15
|
|
|
19
16
|
if TYPE_CHECKING:
|
|
20
17
|
from PIL import Image
|
|
18
|
+
from torch.utils.data import DataLoader
|
|
19
|
+
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
21
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
21
22
|
|
|
22
23
|
logger = logging.getLogger(__name__)
|
|
23
24
|
|
|
@@ -1,18 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from typing import Any
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
3
5
|
|
|
4
6
|
import torch
|
|
5
|
-
from torch.utils.data import DataLoader
|
|
6
7
|
from tqdm.auto import tqdm
|
|
7
8
|
|
|
8
9
|
from mteb._requires_package import (
|
|
9
10
|
requires_image_dependencies,
|
|
10
11
|
requires_package,
|
|
11
12
|
)
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
13
|
from mteb.models.abs_encoder import AbsEncoder
|
|
14
14
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
15
|
-
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from torch.utils.data import DataLoader
|
|
18
|
+
|
|
19
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
20
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
16
21
|
|
|
17
22
|
from .colpali_models import (
|
|
18
23
|
COLPALI_CITATION,
|
|
@@ -329,32 +334,6 @@ colqwen3_4b = ModelMeta(
|
|
|
329
334
|
citation=TOMORO_CITATION,
|
|
330
335
|
)
|
|
331
336
|
|
|
332
|
-
colnomic_7b = ModelMeta(
|
|
333
|
-
loader=ColQwen2_5Wrapper,
|
|
334
|
-
loader_kwargs=dict(
|
|
335
|
-
torch_dtype=torch.float16,
|
|
336
|
-
),
|
|
337
|
-
name="nomic-ai/colnomic-embed-multimodal-7b",
|
|
338
|
-
model_type=["late-interaction"],
|
|
339
|
-
languages=["eng-Latn"],
|
|
340
|
-
revision="530094e83a40ca4edcb5c9e5ddfa61a4b5ea0d2f",
|
|
341
|
-
release_date="2025-03-31",
|
|
342
|
-
modalities=["image", "text"],
|
|
343
|
-
n_parameters=7_000_000_000,
|
|
344
|
-
memory_usage_mb=14400,
|
|
345
|
-
max_tokens=128000,
|
|
346
|
-
embed_dim=128,
|
|
347
|
-
license="apache-2.0",
|
|
348
|
-
open_weights=True,
|
|
349
|
-
public_training_code="https://github.com/nomic-ai/colpali",
|
|
350
|
-
public_training_data="https://huggingface.co/datasets/vidore/colpali_train_set",
|
|
351
|
-
framework=["ColPali", "safetensors"],
|
|
352
|
-
reference="https://huggingface.co/nomic-ai/colnomic-embed-multimodal-7b",
|
|
353
|
-
similarity_fn_name="MaxSim",
|
|
354
|
-
use_instructions=True,
|
|
355
|
-
training_datasets=COLPALI_TRAINING_DATA,
|
|
356
|
-
citation=COLPALI_CITATION,
|
|
357
|
-
)
|
|
358
337
|
|
|
359
338
|
COLNOMIC_CITATION = """
|
|
360
339
|
@misc{nomicembedmultimodal2025,
|
|
@@ -402,7 +381,7 @@ colnomic_3b = ModelMeta(
|
|
|
402
381
|
)
|
|
403
382
|
|
|
404
383
|
colnomic_7b = ModelMeta(
|
|
405
|
-
loader=
|
|
384
|
+
loader=ColQwen2_5Wrapper,
|
|
406
385
|
loader_kwargs=dict(
|
|
407
386
|
torch_dtype=torch.float16,
|
|
408
387
|
),
|
|
@@ -56,7 +56,7 @@ colsmol_256m = ModelMeta(
|
|
|
56
56
|
name="vidore/colSmol-256M",
|
|
57
57
|
model_type=["late-interaction"],
|
|
58
58
|
languages=["eng-Latn"],
|
|
59
|
-
revision="
|
|
59
|
+
revision="a59110fdf114638b8018e6c9a018907e12f14855",
|
|
60
60
|
release_date="2025-01-22",
|
|
61
61
|
modalities=["image", "text"],
|
|
62
62
|
n_parameters=256_000_000,
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import hashlib
|
|
2
4
|
import json
|
|
3
5
|
import logging
|
|
@@ -5,20 +7,24 @@ import os
|
|
|
5
7
|
import random
|
|
6
8
|
import string
|
|
7
9
|
import time
|
|
8
|
-
from typing import Any
|
|
10
|
+
from typing import TYPE_CHECKING, Any
|
|
9
11
|
|
|
10
12
|
import numpy as np
|
|
11
13
|
import requests
|
|
12
|
-
from torch.utils.data import DataLoader
|
|
13
14
|
|
|
14
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
15
|
from mteb.models.abs_encoder import AbsEncoder
|
|
16
16
|
from mteb.models.model_meta import ModelMeta
|
|
17
|
-
from mteb.types import Array, BatchedInput, PromptType
|
|
18
17
|
|
|
19
18
|
from .bge_models import bge_full_data
|
|
20
19
|
from .e5_instruct import E5_MISTRAL_TRAINING_DATA
|
|
21
20
|
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from torch.utils.data import DataLoader
|
|
23
|
+
|
|
24
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
25
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
26
|
+
|
|
27
|
+
|
|
22
28
|
conan_zh_datasets = {
|
|
23
29
|
"BQ",
|
|
24
30
|
"LCQMC",
|
|
@@ -1,13 +1,18 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
|
-
from torch.utils.data import DataLoader
|
|
5
6
|
from tqdm.auto import tqdm
|
|
6
7
|
|
|
7
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
8
8
|
from mteb.models.abs_encoder import AbsEncoder
|
|
9
9
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from torch.utils.data import DataLoader
|
|
13
|
+
|
|
14
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
11
16
|
|
|
12
17
|
|
|
13
18
|
class DINOModel(AbsEncoder):
|
|
@@ -1,14 +1,19 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
6
|
from packaging import version
|
|
5
|
-
from torch.utils.data import DataLoader
|
|
6
7
|
from tqdm.auto import tqdm
|
|
7
8
|
|
|
8
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
9
9
|
from mteb.models.abs_encoder import AbsEncoder
|
|
10
10
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
11
|
-
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from torch.utils.data import DataLoader
|
|
14
|
+
|
|
15
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
16
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
12
17
|
|
|
13
18
|
E5_V_TRANSFORMERS_VERSION = (
|
|
14
19
|
"4.44.2" # Issue 1647: Only works with transformers==4.44.2.
|
|
@@ -1,17 +1,23 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
|
-
from torch.utils.data import DataLoader
|
|
5
6
|
from tqdm.auto import tqdm
|
|
6
7
|
|
|
7
8
|
from mteb._requires_package import (
|
|
8
9
|
requires_image_dependencies,
|
|
9
10
|
requires_package,
|
|
10
11
|
)
|
|
11
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
12
12
|
from mteb.models.abs_encoder import AbsEncoder
|
|
13
13
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
14
|
-
from mteb.types import
|
|
14
|
+
from mteb.types import PromptType
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from torch.utils.data import DataLoader
|
|
18
|
+
|
|
19
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
20
|
+
from mteb.types import Array, BatchedInput
|
|
15
21
|
|
|
16
22
|
|
|
17
23
|
class EagerEmbedV1Wrapper(AbsEncoder):
|
|
@@ -1,15 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from pathlib import Path
|
|
2
|
-
from typing import Any
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
3
5
|
|
|
4
6
|
import torch
|
|
5
|
-
from torch.utils.data import DataLoader
|
|
6
7
|
from tqdm.auto import tqdm
|
|
7
8
|
|
|
8
9
|
from mteb._requires_package import requires_image_dependencies
|
|
9
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
10
10
|
from mteb.models.abs_encoder import AbsEncoder
|
|
11
11
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
12
|
-
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from torch.utils.data import DataLoader
|
|
15
|
+
|
|
16
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
17
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
13
18
|
|
|
14
19
|
EVA_CLIP_CITATION = """@article{EVA-CLIP,
|
|
15
20
|
title={EVA-CLIP: Improved Training Techniques for CLIP at Scale},
|
|
@@ -6,16 +6,18 @@ import warnings
|
|
|
6
6
|
from typing import TYPE_CHECKING, Any
|
|
7
7
|
|
|
8
8
|
import torch
|
|
9
|
-
from torch.utils.data import DataLoader
|
|
10
9
|
from tqdm.autonotebook import tqdm
|
|
11
10
|
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
11
|
from mteb.models.abs_encoder import AbsEncoder
|
|
14
12
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
15
|
-
from mteb.types import
|
|
13
|
+
from mteb.types import PromptType
|
|
16
14
|
|
|
17
15
|
if TYPE_CHECKING:
|
|
18
16
|
from PIL import Image
|
|
17
|
+
from torch.utils.data import DataLoader
|
|
18
|
+
|
|
19
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
20
|
+
from mteb.types import Array, BatchedInput
|
|
19
21
|
|
|
20
22
|
logger = logging.getLogger(__name__)
|
|
21
23
|
|