mteb 2.7.1__py3-none-any.whl → 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +16 -9
  3. mteb/_evaluators/any_sts_evaluator.py +10 -5
  4. mteb/_evaluators/clustering_evaluator.py +10 -4
  5. mteb/_evaluators/evaluator.py +9 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
  7. mteb/_evaluators/pair_classification_evaluator.py +10 -5
  8. mteb/_evaluators/retrieval_evaluator.py +19 -13
  9. mteb/_evaluators/retrieval_metrics.py +9 -3
  10. mteb/_evaluators/sklearn_evaluator.py +14 -10
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
  12. mteb/_evaluators/text/summarization_evaluator.py +8 -4
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +8 -2
  16. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  17. mteb/abstasks/_statistics_calculation.py +6 -4
  18. mteb/abstasks/abstask.py +17 -9
  19. mteb/abstasks/aggregate_task_metadata.py +20 -9
  20. mteb/abstasks/aggregated_task.py +15 -8
  21. mteb/abstasks/classification.py +15 -6
  22. mteb/abstasks/clustering.py +17 -8
  23. mteb/abstasks/clustering_legacy.py +14 -6
  24. mteb/abstasks/image/image_text_pair_classification.py +17 -7
  25. mteb/abstasks/multilabel_classification.py +11 -5
  26. mteb/abstasks/pair_classification.py +19 -9
  27. mteb/abstasks/regression.py +14 -6
  28. mteb/abstasks/retrieval.py +27 -16
  29. mteb/abstasks/retrieval_dataset_loaders.py +11 -8
  30. mteb/abstasks/sts.py +19 -10
  31. mteb/abstasks/task_metadata.py +17 -8
  32. mteb/abstasks/text/bitext_mining.py +14 -7
  33. mteb/abstasks/text/summarization.py +17 -7
  34. mteb/abstasks/zeroshot_classification.py +15 -7
  35. mteb/benchmarks/_create_table.py +13 -3
  36. mteb/benchmarks/benchmark.py +11 -1
  37. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  38. mteb/cache.py +20 -14
  39. mteb/cli/_display_tasks.py +9 -3
  40. mteb/cli/build_cli.py +5 -2
  41. mteb/cli/generate_model_card.py +9 -2
  42. mteb/deprecated_evaluator.py +16 -12
  43. mteb/evaluate.py +20 -18
  44. mteb/filter_tasks.py +12 -7
  45. mteb/get_tasks.py +9 -4
  46. mteb/languages/language_scripts.py +8 -3
  47. mteb/leaderboard/app.py +7 -3
  48. mteb/leaderboard/table.py +7 -2
  49. mteb/load_results.py +9 -3
  50. mteb/models/abs_encoder.py +22 -12
  51. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  52. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  53. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  54. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  55. mteb/models/get_model_meta.py +11 -4
  56. mteb/models/instruct_wrapper.py +13 -5
  57. mteb/models/model_implementations/align_models.py +9 -4
  58. mteb/models/model_implementations/bedrock_models.py +16 -6
  59. mteb/models/model_implementations/blip2_models.py +9 -4
  60. mteb/models/model_implementations/blip_models.py +9 -4
  61. mteb/models/model_implementations/bm25.py +15 -10
  62. mteb/models/model_implementations/bmretriever_models.py +6 -2
  63. mteb/models/model_implementations/cde_models.py +9 -5
  64. mteb/models/model_implementations/clip_models.py +9 -4
  65. mteb/models/model_implementations/cohere_models.py +10 -4
  66. mteb/models/model_implementations/cohere_v.py +9 -4
  67. mteb/models/model_implementations/colpali_models.py +4 -3
  68. mteb/models/model_implementations/colqwen_models.py +10 -31
  69. mteb/models/model_implementations/colsmol_models.py +1 -1
  70. mteb/models/model_implementations/conan_models.py +10 -4
  71. mteb/models/model_implementations/dino_models.py +9 -4
  72. mteb/models/model_implementations/e5_v.py +9 -4
  73. mteb/models/model_implementations/eagerworks_models.py +10 -4
  74. mteb/models/model_implementations/evaclip_models.py +9 -4
  75. mteb/models/model_implementations/gme_v_models.py +5 -3
  76. mteb/models/model_implementations/google_models.py +10 -4
  77. mteb/models/model_implementations/granite_vision_embedding_models.py +6 -5
  78. mteb/models/model_implementations/hinvec_models.py +5 -1
  79. mteb/models/model_implementations/jasper_models.py +12 -5
  80. mteb/models/model_implementations/jina_clip.py +9 -4
  81. mteb/models/model_implementations/jina_models.py +10 -5
  82. mteb/models/model_implementations/kalm_models.py +18 -12
  83. mteb/models/model_implementations/linq_models.py +6 -1
  84. mteb/models/model_implementations/listconranker.py +9 -4
  85. mteb/models/model_implementations/llm2clip_models.py +9 -4
  86. mteb/models/model_implementations/llm2vec_models.py +12 -6
  87. mteb/models/model_implementations/mcinext_models.py +5 -2
  88. mteb/models/model_implementations/mdbr_models.py +3 -1
  89. mteb/models/model_implementations/{mxbai_models.py → mixedbread_ai_models.py} +91 -0
  90. mteb/models/model_implementations/moco_models.py +9 -4
  91. mteb/models/model_implementations/mod_models.py +1 -1
  92. mteb/models/model_implementations/model2vec_models.py +10 -4
  93. mteb/models/model_implementations/no_instruct_sentence_models.py +12 -5
  94. mteb/models/model_implementations/nomic_models.py +10 -4
  95. mteb/models/model_implementations/nomic_models_vision.py +4 -3
  96. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +7 -3
  97. mteb/models/model_implementations/nvidia_models.py +12 -4
  98. mteb/models/model_implementations/octen_models.py +1 -1
  99. mteb/models/model_implementations/openai_models.py +9 -4
  100. mteb/models/model_implementations/openclip_models.py +9 -4
  101. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -4
  102. mteb/models/model_implementations/ops_moa_models.py +7 -2
  103. mteb/models/model_implementations/pixie_models.py +56 -0
  104. mteb/models/model_implementations/promptriever_models.py +12 -6
  105. mteb/models/model_implementations/pylate_models.py +19 -13
  106. mteb/models/model_implementations/qwen3_models.py +8 -1
  107. mteb/models/model_implementations/random_baseline.py +4 -3
  108. mteb/models/model_implementations/repllama_models.py +13 -6
  109. mteb/models/model_implementations/rerankers_custom.py +10 -4
  110. mteb/models/model_implementations/rerankers_monot5_based.py +10 -4
  111. mteb/models/model_implementations/salesforce_models.py +7 -1
  112. mteb/models/model_implementations/seed_1_6_embedding_models.py +4 -2
  113. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +5 -2
  114. mteb/models/model_implementations/seed_models.py +1 -1
  115. mteb/models/model_implementations/siglip_models.py +9 -4
  116. mteb/models/model_implementations/slm_models.py +7 -4
  117. mteb/models/model_implementations/uae_models.py +9 -4
  118. mteb/models/model_implementations/vdr_models.py +7 -1
  119. mteb/models/model_implementations/vista_models.py +9 -4
  120. mteb/models/model_implementations/vlm2vec_models.py +9 -4
  121. mteb/models/model_implementations/voyage_models.py +10 -4
  122. mteb/models/model_implementations/voyage_v.py +10 -6
  123. mteb/models/model_implementations/yuan_models_en.py +1 -1
  124. mteb/models/model_meta.py +12 -7
  125. mteb/models/models_protocols.py +19 -18
  126. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  127. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  128. mteb/models/search_wrappers.py +19 -12
  129. mteb/models/sentence_transformer_wrapper.py +4 -3
  130. mteb/models/vllm_wrapper.py +8 -6
  131. mteb/results/benchmark_results.py +22 -17
  132. mteb/results/model_result.py +21 -15
  133. mteb/results/task_result.py +41 -10
  134. mteb/similarity_functions.py +8 -2
  135. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  136. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  137. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  138. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  139. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  140. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  141. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  142. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  143. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  144. mteb/tasks/clustering/nob/snl_clustering.py +7 -2
  145. mteb/tasks/clustering/nob/vg_clustering.py +7 -2
  146. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  147. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
  148. mteb/types/_encoder_io.py +1 -1
  149. mteb/types/statistics.py +9 -2
  150. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/METADATA +1 -1
  151. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/RECORD +155 -154
  152. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/WHEEL +0 -0
  153. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/entry_points.txt +0 -0
  154. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/licenses/LICENSE +0 -0
  155. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,12 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import warnings
3
5
  from abc import ABC, abstractmethod
4
- from collections.abc import Callable, Sequence
5
- from typing import Any, Literal, cast, get_args, overload
6
-
7
- from torch.utils.data import DataLoader
8
- from typing_extensions import Unpack
6
+ from typing import TYPE_CHECKING, Any, Literal, cast, get_args, overload
9
7
 
10
8
  import mteb
11
- from mteb.abstasks.task_metadata import TaskMetadata, TaskType
9
+ from mteb.abstasks.task_metadata import TaskType
12
10
  from mteb.similarity_functions import (
13
11
  cos_sim,
14
12
  dot_score,
@@ -18,13 +16,25 @@ from mteb.similarity_functions import (
18
16
  pairwise_max_sim,
19
17
  )
20
18
  from mteb.types import (
21
- Array,
22
- BatchedInput,
23
- EncodeKwargs,
24
19
  PromptType,
25
20
  )
26
21
 
27
- from .model_meta import ModelMeta, ScoringFunction
22
+ from .model_meta import ScoringFunction
23
+
24
+ if TYPE_CHECKING:
25
+ from collections.abc import Callable, Sequence
26
+
27
+ from torch.utils.data import DataLoader
28
+ from typing_extensions import Unpack
29
+
30
+ from mteb.abstasks.task_metadata import TaskMetadata
31
+ from mteb.types import (
32
+ Array,
33
+ BatchedInput,
34
+ EncodeKwargs,
35
+ )
36
+
37
+ from .model_meta import ModelMeta
28
38
 
29
39
  logger = logging.getLogger(__name__)
30
40
 
@@ -314,7 +324,7 @@ class AbsEncoder(ABC):
314
324
  ):
315
325
  arr = self.model.similarity(embeddings1, embeddings2)
316
326
  # We assume that the model returns an Array-like object:
317
- arr = cast(Array, arr)
327
+ arr = cast("Array", arr)
318
328
  return arr
319
329
  return cos_sim(embeddings1, embeddings2)
320
330
  if self.mteb_model_meta.similarity_fn_name is ScoringFunction.COSINE:
@@ -352,7 +362,7 @@ class AbsEncoder(ABC):
352
362
  ):
353
363
  arr = self.model.similarity_pairwise(embeddings1, embeddings2)
354
364
  # We assume that the model returns an Array-like object:
355
- arr = cast(Array, arr)
365
+ arr = cast("Array", arr)
356
366
  return arr
357
367
  return pairwise_cos_sim(embeddings1, embeddings2)
358
368
  if self.mteb_model_meta.similarity_fn_name is ScoringFunction.COSINE:
@@ -1,9 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
- from pathlib import Path
4
- from typing import Any, Protocol, runtime_checkable
3
+ from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
5
4
 
6
- import numpy as np
5
+ if TYPE_CHECKING:
6
+ from pathlib import Path
7
+
8
+ import numpy as np
7
9
 
8
10
 
9
11
  @runtime_checkable
@@ -1,6 +1,12 @@
1
+ from __future__ import annotations
2
+
1
3
  import hashlib
2
- from collections.abc import Mapping
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ if TYPE_CHECKING:
7
+ from collections.abc import Mapping
8
+
9
+ from PIL import Image
4
10
 
5
11
 
6
12
  def _hash_item(item: Mapping[str, Any]) -> str:
@@ -10,8 +16,6 @@ def _hash_item(item: Mapping[str, Any]) -> str:
10
16
  item_hash = hashlib.sha256(item_text.encode()).hexdigest()
11
17
 
12
18
  if "image" in item:
13
- from PIL import Image
14
-
15
19
  image: Image.Image = item["image"]
16
20
  item_hash += hashlib.sha256(image.tobytes()).hexdigest()
17
21
 
@@ -1,16 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import warnings
4
6
  from pathlib import Path
5
- from typing import Any
7
+ from typing import TYPE_CHECKING, Any
6
8
 
7
9
  import numpy as np
8
10
 
9
11
  from mteb._requires_package import requires_package
10
- from mteb.types import BatchedInput
11
12
 
12
13
  from ._hash_utils import _hash_item
13
14
 
15
+ if TYPE_CHECKING:
16
+ import faiss
17
+
18
+ from mteb.types import BatchedInput
19
+
14
20
  logger = logging.getLogger(__name__)
15
21
 
16
22
 
@@ -24,7 +30,6 @@ class FaissCache:
24
30
  "FAISS-based vector cache",
25
31
  install_instruction="pip install mteb[faiss-cpu]",
26
32
  )
27
- import faiss
28
33
 
29
34
  self.directory = Path(directory)
30
35
  self.directory.mkdir(parents=True, exist_ok=True)
@@ -1,21 +1,26 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from pathlib import Path
3
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
4
6
 
5
7
  import numpy as np
6
8
  import torch
7
9
  from datasets import Dataset
8
- from torch.utils.data import DataLoader
9
10
 
10
11
  from mteb._create_dataloaders import create_dataloader
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
- from mteb.models.cache_wrappers.cache_backend_protocol import (
13
- CacheBackendProtocol,
14
- )
15
12
  from mteb.models.cache_wrappers.cache_backends.numpy_cache import NumpyCache
16
- from mteb.models.model_meta import ModelMeta
17
- from mteb.models.models_protocols import EncoderProtocol
18
- from mteb.types import Array, BatchedInput, PromptType
13
+
14
+ if TYPE_CHECKING:
15
+ from torch.utils.data import DataLoader
16
+
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.models.cache_wrappers.cache_backend_protocol import (
19
+ CacheBackendProtocol,
20
+ )
21
+ from mteb.models.model_meta import ModelMeta
22
+ from mteb.models.models_protocols import EncoderProtocol
23
+ from mteb.types import Array, BatchedInput, PromptType
19
24
 
20
25
  logger = logging.getLogger(__name__)
21
26
 
@@ -1,15 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import difflib
2
4
  import logging
3
- from collections.abc import Iterable
4
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
5
6
 
6
- from mteb.abstasks import AbsTask
7
7
  from mteb.models import (
8
8
  ModelMeta,
9
- MTEBModels,
10
9
  )
11
10
  from mteb.models.model_implementations import MODEL_REGISTRY
12
11
 
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Iterable
14
+
15
+ from mteb.abstasks import AbsTask
16
+ from mteb.models import (
17
+ MTEBModels,
18
+ )
19
+
13
20
  logger = logging.getLogger(__name__)
14
21
 
15
22
 
@@ -1,16 +1,24 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import torch
6
- from torch.utils.data import DataLoader
7
7
 
8
8
  from mteb._requires_package import requires_package
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
- from mteb.types import Array, BatchedInput, PromptType
9
+ from mteb.types import PromptType
11
10
 
12
11
  from .abs_encoder import AbsEncoder
13
12
 
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Callable
15
+
16
+ from torch.utils.data import DataLoader
17
+
18
+ from mteb.abstasks.task_metadata import TaskMetadata
19
+ from mteb.types import Array, BatchedInput
20
+
21
+
14
22
  logger = logging.getLogger(__name__)
15
23
 
16
24
 
@@ -1,13 +1,18 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.abs_encoder import AbsEncoder
9
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
 
13
18
  class ALIGNModel(AbsEncoder):
@@ -1,20 +1,30 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import re
4
- from typing import Any
6
+ from typing import TYPE_CHECKING, Any
5
7
 
6
8
  import numpy as np
7
- from torch.utils.data import DataLoader
8
9
  from tqdm.auto import tqdm
9
10
 
10
11
  from mteb._requires_package import requires_package
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
12
  from mteb.models.abs_encoder import AbsEncoder
13
13
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
15
14
 
16
- from .cohere_models import model_prompts as cohere_model_prompts
17
- from .cohere_models import supported_languages as cohere_supported_languages
15
+ from .cohere_models import (
16
+ model_prompts as cohere_model_prompts,
17
+ )
18
+ from .cohere_models import (
19
+ supported_languages as cohere_supported_languages,
20
+ )
21
+
22
+ if TYPE_CHECKING:
23
+ from torch.utils.data import DataLoader
24
+
25
+ from mteb.abstasks.task_metadata import TaskMetadata
26
+ from mteb.types import Array, BatchedInput, PromptType
27
+
18
28
 
19
29
  logger = logging.getLogger(__name__)
20
30
 
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  BLIP2_CITATION = """@inproceedings{li2023blip2,
14
19
  title={{BLIP-2:} Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models},
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
6
  from torch.nn.functional import normalize
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  BLIP_CITATION = """@misc{https://doi.org/10.48550/arxiv.2201.12086,
14
19
  doi = {10.48550/ARXIV.2201.12086},
@@ -1,18 +1,23 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from typing import TYPE_CHECKING
2
5
 
3
6
  from mteb._create_dataloaders import _create_text_queries_dataloader
4
7
  from mteb._requires_package import requires_package
5
- from mteb.abstasks.task_metadata import TaskMetadata
6
8
  from mteb.models.model_meta import ModelMeta
7
- from mteb.models.models_protocols import SearchProtocol
8
- from mteb.types import (
9
- CorpusDatasetType,
10
- EncodeKwargs,
11
- InstructionDatasetType,
12
- QueryDatasetType,
13
- RetrievalOutputType,
14
- TopRankedDocumentsType,
15
- )
9
+
10
+ if TYPE_CHECKING:
11
+ from mteb.abstasks.task_metadata import TaskMetadata
12
+ from mteb.models.models_protocols import SearchProtocol
13
+ from mteb.types import (
14
+ CorpusDatasetType,
15
+ EncodeKwargs,
16
+ InstructionDatasetType,
17
+ QueryDatasetType,
18
+ RetrievalOutputType,
19
+ TopRankedDocumentsType,
20
+ )
16
21
 
17
22
  logger = logging.getLogger(__name__)
18
23
 
@@ -1,5 +1,6 @@
1
- from collections.abc import Callable
2
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
3
4
 
4
5
  import torch
5
6
  from sentence_transformers import SentenceTransformer
@@ -9,6 +10,9 @@ from mteb.models import ModelMeta
9
10
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
10
11
  from mteb.types import PromptType
11
12
 
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Callable
15
+
12
16
 
13
17
  def instruction_template(
14
18
  instruction: str, prompt_type: PromptType | None = None
@@ -1,27 +1,31 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Sequence
3
4
  from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import numpy as np
6
7
  import torch
7
- from torch.utils.data import DataLoader
8
8
 
9
9
  import mteb
10
10
  from mteb._create_dataloaders import _corpus_to_dict
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
13
- from mteb.models.models_protocols import PromptType
14
12
  from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
15
- from mteb.types import Array, BatchedInput
13
+ from mteb.types import PromptType
16
14
 
17
15
  from .bge_models import bge_full_data
18
16
 
19
17
  if TYPE_CHECKING:
18
+ from collections.abc import Sequence
19
+
20
+ from torch.utils.data import DataLoader
21
+
20
22
  from mteb.abstasks import (
21
23
  AbsTaskClassification,
22
24
  AbsTaskRetrieval,
23
25
  AbsTaskSummarization,
24
26
  )
27
+ from mteb.abstasks.task_metadata import TaskMetadata
28
+ from mteb.types import Array, BatchedInput
25
29
  logger = logging.getLogger(__name__)
26
30
 
27
31
  CDE_CITATION = """@misc{morris2024contextualdocumentembeddings,
@@ -1,13 +1,18 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.abs_encoder import AbsEncoder
9
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
 
13
18
  class CLIPModel(AbsEncoder):
@@ -1,18 +1,24 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import time
3
5
  from functools import wraps
4
- from typing import Any, Literal, get_args
6
+ from typing import TYPE_CHECKING, Any, Literal, get_args
5
7
 
6
8
  import numpy as np
7
9
  import torch
8
- from torch.utils.data import DataLoader
9
10
  from tqdm.auto import tqdm
10
11
 
11
12
  from mteb._requires_package import requires_package
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.models.abs_encoder import AbsEncoder
14
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
15
- from mteb.types import Array, BatchedInput, PromptType
15
+ from mteb.types import PromptType
16
+
17
+ if TYPE_CHECKING:
18
+ from torch.utils.data import DataLoader
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.types import Array, BatchedInput
16
22
 
17
23
  logger = logging.getLogger(__name__)
18
24
 
@@ -1,15 +1,15 @@
1
+ from __future__ import annotations
2
+
1
3
  import base64
2
4
  import io
3
5
  import os
4
6
  import time
5
- from typing import Any, Literal, get_args
7
+ from typing import TYPE_CHECKING, Any, Literal, get_args
6
8
 
7
9
  import torch
8
- from torch.utils.data import DataLoader
9
10
  from tqdm.auto import tqdm
10
11
 
11
12
  from mteb._requires_package import requires_image_dependencies, requires_package
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.models import ModelMeta
14
14
  from mteb.models.abs_encoder import AbsEncoder
15
15
  from mteb.models.model_implementations.cohere_models import (
@@ -18,7 +18,12 @@ from mteb.models.model_implementations.cohere_models import (
18
18
  retry_with_rate_limit,
19
19
  )
20
20
  from mteb.models.model_meta import ScoringFunction
21
- from mteb.types import Array, BatchedInput, PromptType
21
+
22
+ if TYPE_CHECKING:
23
+ from torch.utils.data import DataLoader
24
+
25
+ from mteb.abstasks.task_metadata import TaskMetadata
26
+ from mteb.types import Array, BatchedInput, PromptType
22
27
 
23
28
 
24
29
  def _post_process_embeddings(
@@ -4,20 +4,21 @@ import logging
4
4
  from typing import TYPE_CHECKING, Any
5
5
 
6
6
  import torch
7
- from torch.utils.data import DataLoader
8
7
  from tqdm.auto import tqdm
9
8
 
10
9
  from mteb._requires_package import (
11
10
  requires_image_dependencies,
12
11
  requires_package,
13
12
  )
14
- from mteb.abstasks.task_metadata import TaskMetadata
15
13
  from mteb.models.abs_encoder import AbsEncoder
16
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
17
- from mteb.types import Array, BatchedInput, PromptType
18
15
 
19
16
  if TYPE_CHECKING:
20
17
  from PIL import Image
18
+ from torch.utils.data import DataLoader
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.types import Array, BatchedInput, PromptType
21
22
 
22
23
  logger = logging.getLogger(__name__)
23
24
 
@@ -1,18 +1,23 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import (
9
10
  requires_image_dependencies,
10
11
  requires_package,
11
12
  )
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.models.abs_encoder import AbsEncoder
14
14
  from mteb.models.model_meta import ModelMeta, ScoringFunction
15
- from mteb.types import Array, BatchedInput, PromptType
15
+
16
+ if TYPE_CHECKING:
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput, PromptType
16
21
 
17
22
  from .colpali_models import (
18
23
  COLPALI_CITATION,
@@ -329,32 +334,6 @@ colqwen3_4b = ModelMeta(
329
334
  citation=TOMORO_CITATION,
330
335
  )
331
336
 
332
- colnomic_7b = ModelMeta(
333
- loader=ColQwen2_5Wrapper,
334
- loader_kwargs=dict(
335
- torch_dtype=torch.float16,
336
- ),
337
- name="nomic-ai/colnomic-embed-multimodal-7b",
338
- model_type=["late-interaction"],
339
- languages=["eng-Latn"],
340
- revision="530094e83a40ca4edcb5c9e5ddfa61a4b5ea0d2f",
341
- release_date="2025-03-31",
342
- modalities=["image", "text"],
343
- n_parameters=7_000_000_000,
344
- memory_usage_mb=14400,
345
- max_tokens=128000,
346
- embed_dim=128,
347
- license="apache-2.0",
348
- open_weights=True,
349
- public_training_code="https://github.com/nomic-ai/colpali",
350
- public_training_data="https://huggingface.co/datasets/vidore/colpali_train_set",
351
- framework=["ColPali", "safetensors"],
352
- reference="https://huggingface.co/nomic-ai/colnomic-embed-multimodal-7b",
353
- similarity_fn_name="MaxSim",
354
- use_instructions=True,
355
- training_datasets=COLPALI_TRAINING_DATA,
356
- citation=COLPALI_CITATION,
357
- )
358
337
 
359
338
  COLNOMIC_CITATION = """
360
339
  @misc{nomicembedmultimodal2025,
@@ -402,7 +381,7 @@ colnomic_3b = ModelMeta(
402
381
  )
403
382
 
404
383
  colnomic_7b = ModelMeta(
405
- loader=ColQwen2Wrapper,
384
+ loader=ColQwen2_5Wrapper,
406
385
  loader_kwargs=dict(
407
386
  torch_dtype=torch.float16,
408
387
  ),
@@ -56,7 +56,7 @@ colsmol_256m = ModelMeta(
56
56
  name="vidore/colSmol-256M",
57
57
  model_type=["late-interaction"],
58
58
  languages=["eng-Latn"],
59
- revision="530094e83a40ca4edcb5c9e5ddfa61a4b5ea0d2f",
59
+ revision="a59110fdf114638b8018e6c9a018907e12f14855",
60
60
  release_date="2025-01-22",
61
61
  modalities=["image", "text"],
62
62
  n_parameters=256_000_000,
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import hashlib
2
4
  import json
3
5
  import logging
@@ -5,20 +7,24 @@ import os
5
7
  import random
6
8
  import string
7
9
  import time
8
- from typing import Any
10
+ from typing import TYPE_CHECKING, Any
9
11
 
10
12
  import numpy as np
11
13
  import requests
12
- from torch.utils.data import DataLoader
13
14
 
14
- from mteb.abstasks.task_metadata import TaskMetadata
15
15
  from mteb.models.abs_encoder import AbsEncoder
16
16
  from mteb.models.model_meta import ModelMeta
17
- from mteb.types import Array, BatchedInput, PromptType
18
17
 
19
18
  from .bge_models import bge_full_data
20
19
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
21
20
 
21
+ if TYPE_CHECKING:
22
+ from torch.utils.data import DataLoader
23
+
24
+ from mteb.abstasks.task_metadata import TaskMetadata
25
+ from mteb.types import Array, BatchedInput, PromptType
26
+
27
+
22
28
  conan_zh_datasets = {
23
29
  "BQ",
24
30
  "LCQMC",