mteb 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. mteb/_create_dataloaders.py +16 -9
  2. mteb/_evaluators/any_sts_evaluator.py +10 -5
  3. mteb/_evaluators/clustering_evaluator.py +10 -4
  4. mteb/_evaluators/evaluator.py +9 -4
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
  6. mteb/_evaluators/pair_classification_evaluator.py +10 -5
  7. mteb/_evaluators/retrieval_evaluator.py +19 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +14 -10
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +8 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +17 -9
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +15 -6
  21. mteb/abstasks/clustering.py +17 -8
  22. mteb/abstasks/clustering_legacy.py +14 -6
  23. mteb/abstasks/image/image_text_pair_classification.py +17 -7
  24. mteb/abstasks/multilabel_classification.py +11 -5
  25. mteb/abstasks/pair_classification.py +19 -9
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +27 -16
  28. mteb/abstasks/retrieval_dataset_loaders.py +11 -8
  29. mteb/abstasks/sts.py +19 -10
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +14 -7
  32. mteb/abstasks/text/summarization.py +17 -7
  33. mteb/abstasks/zeroshot_classification.py +15 -7
  34. mteb/benchmarks/_create_table.py +13 -3
  35. mteb/benchmarks/benchmark.py +11 -1
  36. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  37. mteb/cache.py +10 -5
  38. mteb/cli/_display_tasks.py +9 -3
  39. mteb/cli/build_cli.py +5 -2
  40. mteb/cli/generate_model_card.py +9 -2
  41. mteb/deprecated_evaluator.py +16 -12
  42. mteb/evaluate.py +20 -18
  43. mteb/filter_tasks.py +12 -7
  44. mteb/get_tasks.py +9 -4
  45. mteb/languages/language_scripts.py +8 -3
  46. mteb/leaderboard/app.py +7 -3
  47. mteb/leaderboard/table.py +7 -2
  48. mteb/load_results.py +9 -3
  49. mteb/models/abs_encoder.py +22 -12
  50. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  51. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  52. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  53. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  54. mteb/models/get_model_meta.py +11 -4
  55. mteb/models/instruct_wrapper.py +13 -5
  56. mteb/models/model_implementations/align_models.py +9 -4
  57. mteb/models/model_implementations/bedrock_models.py +16 -6
  58. mteb/models/model_implementations/blip2_models.py +9 -4
  59. mteb/models/model_implementations/blip_models.py +9 -4
  60. mteb/models/model_implementations/bm25.py +15 -10
  61. mteb/models/model_implementations/bmretriever_models.py +6 -2
  62. mteb/models/model_implementations/cde_models.py +9 -5
  63. mteb/models/model_implementations/clip_models.py +9 -4
  64. mteb/models/model_implementations/cohere_models.py +10 -4
  65. mteb/models/model_implementations/cohere_v.py +9 -4
  66. mteb/models/model_implementations/colpali_models.py +4 -3
  67. mteb/models/model_implementations/colqwen_models.py +10 -31
  68. mteb/models/model_implementations/colsmol_models.py +1 -1
  69. mteb/models/model_implementations/conan_models.py +10 -4
  70. mteb/models/model_implementations/dino_models.py +9 -4
  71. mteb/models/model_implementations/e5_v.py +9 -4
  72. mteb/models/model_implementations/eagerworks_models.py +10 -4
  73. mteb/models/model_implementations/evaclip_models.py +9 -4
  74. mteb/models/model_implementations/gme_v_models.py +5 -3
  75. mteb/models/model_implementations/google_models.py +10 -4
  76. mteb/models/model_implementations/granite_vision_embedding_models.py +6 -5
  77. mteb/models/model_implementations/hinvec_models.py +5 -1
  78. mteb/models/model_implementations/jasper_models.py +12 -5
  79. mteb/models/model_implementations/jina_clip.py +9 -4
  80. mteb/models/model_implementations/jina_models.py +10 -5
  81. mteb/models/model_implementations/kalm_models.py +18 -12
  82. mteb/models/model_implementations/linq_models.py +6 -1
  83. mteb/models/model_implementations/listconranker.py +9 -4
  84. mteb/models/model_implementations/llm2clip_models.py +9 -4
  85. mteb/models/model_implementations/llm2vec_models.py +12 -6
  86. mteb/models/model_implementations/mcinext_models.py +5 -2
  87. mteb/models/model_implementations/moco_models.py +9 -4
  88. mteb/models/model_implementations/mod_models.py +1 -1
  89. mteb/models/model_implementations/model2vec_models.py +10 -4
  90. mteb/models/model_implementations/no_instruct_sentence_models.py +12 -5
  91. mteb/models/model_implementations/nomic_models.py +10 -4
  92. mteb/models/model_implementations/nomic_models_vision.py +4 -3
  93. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +7 -3
  94. mteb/models/model_implementations/nvidia_models.py +12 -4
  95. mteb/models/model_implementations/octen_models.py +1 -1
  96. mteb/models/model_implementations/openai_models.py +9 -4
  97. mteb/models/model_implementations/openclip_models.py +9 -4
  98. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -4
  99. mteb/models/model_implementations/ops_moa_models.py +7 -2
  100. mteb/models/model_implementations/promptriever_models.py +12 -6
  101. mteb/models/model_implementations/pylate_models.py +19 -13
  102. mteb/models/model_implementations/qwen3_models.py +8 -1
  103. mteb/models/model_implementations/random_baseline.py +4 -3
  104. mteb/models/model_implementations/repllama_models.py +13 -6
  105. mteb/models/model_implementations/rerankers_custom.py +10 -4
  106. mteb/models/model_implementations/rerankers_monot5_based.py +10 -4
  107. mteb/models/model_implementations/salesforce_models.py +7 -1
  108. mteb/models/model_implementations/seed_1_6_embedding_models.py +4 -2
  109. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +5 -2
  110. mteb/models/model_implementations/seed_models.py +1 -1
  111. mteb/models/model_implementations/siglip_models.py +9 -4
  112. mteb/models/model_implementations/slm_models.py +7 -4
  113. mteb/models/model_implementations/uae_models.py +9 -4
  114. mteb/models/model_implementations/vdr_models.py +7 -1
  115. mteb/models/model_implementations/vista_models.py +9 -4
  116. mteb/models/model_implementations/vlm2vec_models.py +9 -4
  117. mteb/models/model_implementations/voyage_models.py +10 -4
  118. mteb/models/model_implementations/voyage_v.py +10 -6
  119. mteb/models/model_implementations/yuan_models_en.py +1 -1
  120. mteb/models/model_meta.py +12 -7
  121. mteb/models/models_protocols.py +19 -18
  122. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  123. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  124. mteb/models/search_wrappers.py +19 -12
  125. mteb/models/sentence_transformer_wrapper.py +4 -3
  126. mteb/models/vllm_wrapper.py +8 -6
  127. mteb/results/benchmark_results.py +22 -17
  128. mteb/results/model_result.py +21 -15
  129. mteb/results/task_result.py +15 -9
  130. mteb/similarity_functions.py +8 -2
  131. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  132. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  133. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  134. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  135. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  136. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  137. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  138. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  139. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  140. mteb/tasks/clustering/nob/snl_clustering.py +7 -2
  141. mteb/tasks/clustering/nob/vg_clustering.py +7 -2
  142. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  143. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
  144. mteb/types/_encoder_io.py +1 -1
  145. mteb/types/statistics.py +9 -2
  146. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/METADATA +1 -1
  147. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/RECORD +151 -151
  148. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/WHEEL +0 -0
  149. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/entry_points.txt +0 -0
  150. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/licenses/LICENSE +0 -0
  151. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import warnings
4
6
  from pathlib import Path
5
- from typing import Any
7
+ from typing import TYPE_CHECKING, Any
6
8
 
7
9
  import numpy as np
8
10
 
9
11
  from mteb._requires_package import requires_package
10
- from mteb.types import BatchedInput
11
12
 
12
13
  from ._hash_utils import _hash_item
13
14
 
15
+ if TYPE_CHECKING:
16
+ import faiss
17
+
18
+ from mteb.types import BatchedInput
19
+
14
20
  logger = logging.getLogger(__name__)
15
21
 
16
22
 
@@ -24,7 +30,6 @@ class FaissCache:
24
30
  "FAISS-based vector cache",
25
31
  install_instruction="pip install mteb[faiss-cpu]",
26
32
  )
27
- import faiss
28
33
 
29
34
  self.directory = Path(directory)
30
35
  self.directory.mkdir(parents=True, exist_ok=True)
@@ -1,21 +1,26 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from pathlib import Path
3
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
4
6
 
5
7
  import numpy as np
6
8
  import torch
7
9
  from datasets import Dataset
8
- from torch.utils.data import DataLoader
9
10
 
10
11
  from mteb._create_dataloaders import create_dataloader
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
- from mteb.models.cache_wrappers.cache_backend_protocol import (
13
- CacheBackendProtocol,
14
- )
15
12
  from mteb.models.cache_wrappers.cache_backends.numpy_cache import NumpyCache
16
- from mteb.models.model_meta import ModelMeta
17
- from mteb.models.models_protocols import EncoderProtocol
18
- from mteb.types import Array, BatchedInput, PromptType
13
+
14
+ if TYPE_CHECKING:
15
+ from torch.utils.data import DataLoader
16
+
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.models.cache_wrappers.cache_backend_protocol import (
19
+ CacheBackendProtocol,
20
+ )
21
+ from mteb.models.model_meta import ModelMeta
22
+ from mteb.models.models_protocols import EncoderProtocol
23
+ from mteb.types import Array, BatchedInput, PromptType
19
24
 
20
25
  logger = logging.getLogger(__name__)
21
26
 
@@ -1,15 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import difflib
2
4
  import logging
3
- from collections.abc import Iterable
4
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
5
6
 
6
- from mteb.abstasks import AbsTask
7
7
  from mteb.models import (
8
8
  ModelMeta,
9
- MTEBModels,
10
9
  )
11
10
  from mteb.models.model_implementations import MODEL_REGISTRY
12
11
 
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Iterable
14
+
15
+ from mteb.abstasks import AbsTask
16
+ from mteb.models import (
17
+ MTEBModels,
18
+ )
19
+
13
20
  logger = logging.getLogger(__name__)
14
21
 
15
22
 
@@ -1,16 +1,24 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import torch
6
- from torch.utils.data import DataLoader
7
7
 
8
8
  from mteb._requires_package import requires_package
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
- from mteb.types import Array, BatchedInput, PromptType
9
+ from mteb.types import PromptType
11
10
 
12
11
  from .abs_encoder import AbsEncoder
13
12
 
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Callable
15
+
16
+ from torch.utils.data import DataLoader
17
+
18
+ from mteb.abstasks.task_metadata import TaskMetadata
19
+ from mteb.types import Array, BatchedInput
20
+
21
+
14
22
  logger = logging.getLogger(__name__)
15
23
 
16
24
 
@@ -1,13 +1,18 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.abs_encoder import AbsEncoder
9
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
 
13
18
  class ALIGNModel(AbsEncoder):
@@ -1,20 +1,30 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import re
4
- from typing import Any
6
+ from typing import TYPE_CHECKING, Any
5
7
 
6
8
  import numpy as np
7
- from torch.utils.data import DataLoader
8
9
  from tqdm.auto import tqdm
9
10
 
10
11
  from mteb._requires_package import requires_package
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
12
  from mteb.models.abs_encoder import AbsEncoder
13
13
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
15
14
 
16
- from .cohere_models import model_prompts as cohere_model_prompts
17
- from .cohere_models import supported_languages as cohere_supported_languages
15
+ from .cohere_models import (
16
+ model_prompts as cohere_model_prompts,
17
+ )
18
+ from .cohere_models import (
19
+ supported_languages as cohere_supported_languages,
20
+ )
21
+
22
+ if TYPE_CHECKING:
23
+ from torch.utils.data import DataLoader
24
+
25
+ from mteb.abstasks.task_metadata import TaskMetadata
26
+ from mteb.types import Array, BatchedInput, PromptType
27
+
18
28
 
19
29
  logger = logging.getLogger(__name__)
20
30
 
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  BLIP2_CITATION = """@inproceedings{li2023blip2,
14
19
  title={{BLIP-2:} Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models},
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
6
  from torch.nn.functional import normalize
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  BLIP_CITATION = """@misc{https://doi.org/10.48550/arxiv.2201.12086,
14
19
  doi = {10.48550/ARXIV.2201.12086},
@@ -1,18 +1,23 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from typing import TYPE_CHECKING
2
5
 
3
6
  from mteb._create_dataloaders import _create_text_queries_dataloader
4
7
  from mteb._requires_package import requires_package
5
- from mteb.abstasks.task_metadata import TaskMetadata
6
8
  from mteb.models.model_meta import ModelMeta
7
- from mteb.models.models_protocols import SearchProtocol
8
- from mteb.types import (
9
- CorpusDatasetType,
10
- EncodeKwargs,
11
- InstructionDatasetType,
12
- QueryDatasetType,
13
- RetrievalOutputType,
14
- TopRankedDocumentsType,
15
- )
9
+
10
+ if TYPE_CHECKING:
11
+ from mteb.abstasks.task_metadata import TaskMetadata
12
+ from mteb.models.models_protocols import SearchProtocol
13
+ from mteb.types import (
14
+ CorpusDatasetType,
15
+ EncodeKwargs,
16
+ InstructionDatasetType,
17
+ QueryDatasetType,
18
+ RetrievalOutputType,
19
+ TopRankedDocumentsType,
20
+ )
16
21
 
17
22
  logger = logging.getLogger(__name__)
18
23
 
@@ -1,5 +1,6 @@
1
- from collections.abc import Callable
2
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
3
4
 
4
5
  import torch
5
6
  from sentence_transformers import SentenceTransformer
@@ -9,6 +10,9 @@ from mteb.models import ModelMeta
9
10
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
10
11
  from mteb.types import PromptType
11
12
 
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Callable
15
+
12
16
 
13
17
  def instruction_template(
14
18
  instruction: str, prompt_type: PromptType | None = None
@@ -1,27 +1,31 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Sequence
3
4
  from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import numpy as np
6
7
  import torch
7
- from torch.utils.data import DataLoader
8
8
 
9
9
  import mteb
10
10
  from mteb._create_dataloaders import _corpus_to_dict
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
13
- from mteb.models.models_protocols import PromptType
14
12
  from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
15
- from mteb.types import Array, BatchedInput
13
+ from mteb.types import PromptType
16
14
 
17
15
  from .bge_models import bge_full_data
18
16
 
19
17
  if TYPE_CHECKING:
18
+ from collections.abc import Sequence
19
+
20
+ from torch.utils.data import DataLoader
21
+
20
22
  from mteb.abstasks import (
21
23
  AbsTaskClassification,
22
24
  AbsTaskRetrieval,
23
25
  AbsTaskSummarization,
24
26
  )
27
+ from mteb.abstasks.task_metadata import TaskMetadata
28
+ from mteb.types import Array, BatchedInput
25
29
  logger = logging.getLogger(__name__)
26
30
 
27
31
  CDE_CITATION = """@misc{morris2024contextualdocumentembeddings,
@@ -1,13 +1,18 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.abs_encoder import AbsEncoder
9
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
 
13
18
  class CLIPModel(AbsEncoder):
@@ -1,18 +1,24 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import time
3
5
  from functools import wraps
4
- from typing import Any, Literal, get_args
6
+ from typing import TYPE_CHECKING, Any, Literal, get_args
5
7
 
6
8
  import numpy as np
7
9
  import torch
8
- from torch.utils.data import DataLoader
9
10
  from tqdm.auto import tqdm
10
11
 
11
12
  from mteb._requires_package import requires_package
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.models.abs_encoder import AbsEncoder
14
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
15
- from mteb.types import Array, BatchedInput, PromptType
15
+ from mteb.types import PromptType
16
+
17
+ if TYPE_CHECKING:
18
+ from torch.utils.data import DataLoader
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.types import Array, BatchedInput
16
22
 
17
23
  logger = logging.getLogger(__name__)
18
24
 
@@ -1,15 +1,15 @@
1
+ from __future__ import annotations
2
+
1
3
  import base64
2
4
  import io
3
5
  import os
4
6
  import time
5
- from typing import Any, Literal, get_args
7
+ from typing import TYPE_CHECKING, Any, Literal, get_args
6
8
 
7
9
  import torch
8
- from torch.utils.data import DataLoader
9
10
  from tqdm.auto import tqdm
10
11
 
11
12
  from mteb._requires_package import requires_image_dependencies, requires_package
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.models import ModelMeta
14
14
  from mteb.models.abs_encoder import AbsEncoder
15
15
  from mteb.models.model_implementations.cohere_models import (
@@ -18,7 +18,12 @@ from mteb.models.model_implementations.cohere_models import (
18
18
  retry_with_rate_limit,
19
19
  )
20
20
  from mteb.models.model_meta import ScoringFunction
21
- from mteb.types import Array, BatchedInput, PromptType
21
+
22
+ if TYPE_CHECKING:
23
+ from torch.utils.data import DataLoader
24
+
25
+ from mteb.abstasks.task_metadata import TaskMetadata
26
+ from mteb.types import Array, BatchedInput, PromptType
22
27
 
23
28
 
24
29
  def _post_process_embeddings(
@@ -4,20 +4,21 @@ import logging
4
4
  from typing import TYPE_CHECKING, Any
5
5
 
6
6
  import torch
7
- from torch.utils.data import DataLoader
8
7
  from tqdm.auto import tqdm
9
8
 
10
9
  from mteb._requires_package import (
11
10
  requires_image_dependencies,
12
11
  requires_package,
13
12
  )
14
- from mteb.abstasks.task_metadata import TaskMetadata
15
13
  from mteb.models.abs_encoder import AbsEncoder
16
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
17
- from mteb.types import Array, BatchedInput, PromptType
18
15
 
19
16
  if TYPE_CHECKING:
20
17
  from PIL import Image
18
+ from torch.utils.data import DataLoader
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.types import Array, BatchedInput, PromptType
21
22
 
22
23
  logger = logging.getLogger(__name__)
23
24
 
@@ -1,18 +1,23 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import (
9
10
  requires_image_dependencies,
10
11
  requires_package,
11
12
  )
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.models.abs_encoder import AbsEncoder
14
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
15
- from mteb.types import Array, BatchedInput, PromptType
15
+
16
+ if TYPE_CHECKING:
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput, PromptType
16
21
 
17
22
  from .colpali_models import (
18
23
  COLPALI_CITATION,
@@ -329,32 +334,6 @@ colqwen3_4b = ModelMeta(
329
334
  citation=TOMORO_CITATION,
330
335
  )
331
336
 
332
- colnomic_7b = ModelMeta(
333
- loader=ColQwen2_5Wrapper,
334
- loader_kwargs=dict(
335
- torch_dtype=torch.float16,
336
- ),
337
- name="nomic-ai/colnomic-embed-multimodal-7b",
338
- model_type=["late-interaction"],
339
- languages=["eng-Latn"],
340
- revision="530094e83a40ca4edcb5c9e5ddfa61a4b5ea0d2f",
341
- release_date="2025-03-31",
342
- modalities=["image", "text"],
343
- n_parameters=7_000_000_000,
344
- memory_usage_mb=14400,
345
- max_tokens=128000,
346
- embed_dim=128,
347
- license="apache-2.0",
348
- open_weights=True,
349
- public_training_code="https://github.com/nomic-ai/colpali",
350
- public_training_data="https://huggingface.co/datasets/vidore/colpali_train_set",
351
- framework=["ColPali", "safetensors"],
352
- reference="https://huggingface.co/nomic-ai/colnomic-embed-multimodal-7b",
353
- similarity_fn_name="MaxSim",
354
- use_instructions=True,
355
- training_datasets=COLPALI_TRAINING_DATA,
356
- citation=COLPALI_CITATION,
357
- )
358
337
 
359
338
  COLNOMIC_CITATION = """
360
339
  @misc{nomicembedmultimodal2025,
@@ -402,7 +381,7 @@ colnomic_3b = ModelMeta(
402
381
  )
403
382
 
404
383
  colnomic_7b = ModelMeta(
405
- loader=ColQwen2Wrapper,
384
+ loader=ColQwen2_5Wrapper,
406
385
  loader_kwargs=dict(
407
386
  torch_dtype=torch.float16,
408
387
  ),
@@ -56,7 +56,7 @@ colsmol_256m = ModelMeta(
56
56
  name="vidore/colSmol-256M",
57
57
  model_type=["late-interaction"],
58
58
  languages=["eng-Latn"],
59
- revision="530094e83a40ca4edcb5c9e5ddfa61a4b5ea0d2f",
59
+ revision="a59110fdf114638b8018e6c9a018907e12f14855",
60
60
  release_date="2025-01-22",
61
61
  modalities=["image", "text"],
62
62
  n_parameters=256_000_000,
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import hashlib
2
4
  import json
3
5
  import logging
@@ -5,20 +7,24 @@ import os
5
7
  import random
6
8
  import string
7
9
  import time
8
- from typing import Any
10
+ from typing import TYPE_CHECKING, Any
9
11
 
10
12
  import numpy as np
11
13
  import requests
12
- from torch.utils.data import DataLoader
13
14
 
14
- from mteb.abstasks.task_metadata import TaskMetadata
15
15
  from mteb.models.abs_encoder import AbsEncoder
16
16
  from mteb.models.model_meta import ModelMeta
17
- from mteb.types import Array, BatchedInput, PromptType
18
17
 
19
18
  from .bge_models import bge_full_data
20
19
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
21
20
 
21
+ if TYPE_CHECKING:
22
+ from torch.utils.data import DataLoader
23
+
24
+ from mteb.abstasks.task_metadata import TaskMetadata
25
+ from mteb.types import Array, BatchedInput, PromptType
26
+
27
+
22
28
  conan_zh_datasets = {
23
29
  "BQ",
24
30
  "LCQMC",
@@ -1,13 +1,18 @@
1
- from typing import Any, Literal
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any, Literal
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.abs_encoder import AbsEncoder
9
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
 
13
18
  class DINOModel(AbsEncoder):
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
6
  from packaging import version
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  E5_V_TRANSFORMERS_VERSION = (
14
19
  "4.44.2" # Issue 1647: Only works with transformers==4.44.2.
@@ -1,17 +1,23 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import (
8
9
  requires_image_dependencies,
9
10
  requires_package,
10
11
  )
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
12
  from mteb.models.abs_encoder import AbsEncoder
13
13
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
14
+ from mteb.types import PromptType
15
+
16
+ if TYPE_CHECKING:
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput
15
21
 
16
22
 
17
23
  class EagerEmbedV1Wrapper(AbsEncoder):
@@ -1,15 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  from pathlib import Path
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import requires_image_dependencies
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
10
  from mteb.models.abs_encoder import AbsEncoder
11
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
13
18
 
14
19
  EVA_CLIP_CITATION = """@article{EVA-CLIP,
15
20
  title={EVA-CLIP: Improved Training Techniques for CLIP at Scale},
@@ -6,16 +6,18 @@ import warnings
6
6
  from typing import TYPE_CHECKING, Any
7
7
 
8
8
  import torch
9
- from torch.utils.data import DataLoader
10
9
  from tqdm.autonotebook import tqdm
11
10
 
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
11
  from mteb.models.abs_encoder import AbsEncoder
14
12
  from mteb.models.model_meta import ModelMeta, ScoringFunction
15
- from mteb.types import Array, BatchedInput, PromptType
13
+ from mteb.types import PromptType
16
14
 
17
15
  if TYPE_CHECKING:
18
16
  from PIL import Image
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput
19
21
 
20
22
  logger = logging.getLogger(__name__)
21
23