mteb 2.7.1__py3-none-any.whl → 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +16 -9
  3. mteb/_evaluators/any_sts_evaluator.py +10 -5
  4. mteb/_evaluators/clustering_evaluator.py +10 -4
  5. mteb/_evaluators/evaluator.py +9 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
  7. mteb/_evaluators/pair_classification_evaluator.py +10 -5
  8. mteb/_evaluators/retrieval_evaluator.py +19 -13
  9. mteb/_evaluators/retrieval_metrics.py +9 -3
  10. mteb/_evaluators/sklearn_evaluator.py +14 -10
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
  12. mteb/_evaluators/text/summarization_evaluator.py +8 -4
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +8 -2
  16. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  17. mteb/abstasks/_statistics_calculation.py +6 -4
  18. mteb/abstasks/abstask.py +17 -9
  19. mteb/abstasks/aggregate_task_metadata.py +20 -9
  20. mteb/abstasks/aggregated_task.py +15 -8
  21. mteb/abstasks/classification.py +15 -6
  22. mteb/abstasks/clustering.py +17 -8
  23. mteb/abstasks/clustering_legacy.py +14 -6
  24. mteb/abstasks/image/image_text_pair_classification.py +17 -7
  25. mteb/abstasks/multilabel_classification.py +11 -5
  26. mteb/abstasks/pair_classification.py +19 -9
  27. mteb/abstasks/regression.py +14 -6
  28. mteb/abstasks/retrieval.py +27 -16
  29. mteb/abstasks/retrieval_dataset_loaders.py +11 -8
  30. mteb/abstasks/sts.py +19 -10
  31. mteb/abstasks/task_metadata.py +17 -8
  32. mteb/abstasks/text/bitext_mining.py +14 -7
  33. mteb/abstasks/text/summarization.py +17 -7
  34. mteb/abstasks/zeroshot_classification.py +15 -7
  35. mteb/benchmarks/_create_table.py +13 -3
  36. mteb/benchmarks/benchmark.py +11 -1
  37. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  38. mteb/cache.py +20 -14
  39. mteb/cli/_display_tasks.py +9 -3
  40. mteb/cli/build_cli.py +5 -2
  41. mteb/cli/generate_model_card.py +9 -2
  42. mteb/deprecated_evaluator.py +16 -12
  43. mteb/evaluate.py +20 -18
  44. mteb/filter_tasks.py +12 -7
  45. mteb/get_tasks.py +9 -4
  46. mteb/languages/language_scripts.py +8 -3
  47. mteb/leaderboard/app.py +7 -3
  48. mteb/leaderboard/table.py +7 -2
  49. mteb/load_results.py +9 -3
  50. mteb/models/abs_encoder.py +22 -12
  51. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  52. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  53. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  54. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  55. mteb/models/get_model_meta.py +11 -4
  56. mteb/models/instruct_wrapper.py +13 -5
  57. mteb/models/model_implementations/align_models.py +9 -4
  58. mteb/models/model_implementations/bedrock_models.py +16 -6
  59. mteb/models/model_implementations/blip2_models.py +9 -4
  60. mteb/models/model_implementations/blip_models.py +9 -4
  61. mteb/models/model_implementations/bm25.py +15 -10
  62. mteb/models/model_implementations/bmretriever_models.py +6 -2
  63. mteb/models/model_implementations/cde_models.py +9 -5
  64. mteb/models/model_implementations/clip_models.py +9 -4
  65. mteb/models/model_implementations/cohere_models.py +10 -4
  66. mteb/models/model_implementations/cohere_v.py +9 -4
  67. mteb/models/model_implementations/colpali_models.py +4 -3
  68. mteb/models/model_implementations/colqwen_models.py +10 -31
  69. mteb/models/model_implementations/colsmol_models.py +1 -1
  70. mteb/models/model_implementations/conan_models.py +10 -4
  71. mteb/models/model_implementations/dino_models.py +9 -4
  72. mteb/models/model_implementations/e5_v.py +9 -4
  73. mteb/models/model_implementations/eagerworks_models.py +10 -4
  74. mteb/models/model_implementations/evaclip_models.py +9 -4
  75. mteb/models/model_implementations/gme_v_models.py +5 -3
  76. mteb/models/model_implementations/google_models.py +10 -4
  77. mteb/models/model_implementations/granite_vision_embedding_models.py +6 -5
  78. mteb/models/model_implementations/hinvec_models.py +5 -1
  79. mteb/models/model_implementations/jasper_models.py +12 -5
  80. mteb/models/model_implementations/jina_clip.py +9 -4
  81. mteb/models/model_implementations/jina_models.py +10 -5
  82. mteb/models/model_implementations/kalm_models.py +18 -12
  83. mteb/models/model_implementations/linq_models.py +6 -1
  84. mteb/models/model_implementations/listconranker.py +9 -4
  85. mteb/models/model_implementations/llm2clip_models.py +9 -4
  86. mteb/models/model_implementations/llm2vec_models.py +12 -6
  87. mteb/models/model_implementations/mcinext_models.py +5 -2
  88. mteb/models/model_implementations/mdbr_models.py +3 -1
  89. mteb/models/model_implementations/{mxbai_models.py → mixedbread_ai_models.py} +91 -0
  90. mteb/models/model_implementations/moco_models.py +9 -4
  91. mteb/models/model_implementations/mod_models.py +1 -1
  92. mteb/models/model_implementations/model2vec_models.py +10 -4
  93. mteb/models/model_implementations/no_instruct_sentence_models.py +12 -5
  94. mteb/models/model_implementations/nomic_models.py +10 -4
  95. mteb/models/model_implementations/nomic_models_vision.py +4 -3
  96. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +7 -3
  97. mteb/models/model_implementations/nvidia_models.py +12 -4
  98. mteb/models/model_implementations/octen_models.py +1 -1
  99. mteb/models/model_implementations/openai_models.py +9 -4
  100. mteb/models/model_implementations/openclip_models.py +9 -4
  101. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -4
  102. mteb/models/model_implementations/ops_moa_models.py +7 -2
  103. mteb/models/model_implementations/pixie_models.py +56 -0
  104. mteb/models/model_implementations/promptriever_models.py +12 -6
  105. mteb/models/model_implementations/pylate_models.py +19 -13
  106. mteb/models/model_implementations/qwen3_models.py +8 -1
  107. mteb/models/model_implementations/random_baseline.py +4 -3
  108. mteb/models/model_implementations/repllama_models.py +13 -6
  109. mteb/models/model_implementations/rerankers_custom.py +10 -4
  110. mteb/models/model_implementations/rerankers_monot5_based.py +10 -4
  111. mteb/models/model_implementations/salesforce_models.py +7 -1
  112. mteb/models/model_implementations/seed_1_6_embedding_models.py +4 -2
  113. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +5 -2
  114. mteb/models/model_implementations/seed_models.py +1 -1
  115. mteb/models/model_implementations/siglip_models.py +9 -4
  116. mteb/models/model_implementations/slm_models.py +7 -4
  117. mteb/models/model_implementations/uae_models.py +9 -4
  118. mteb/models/model_implementations/vdr_models.py +7 -1
  119. mteb/models/model_implementations/vista_models.py +9 -4
  120. mteb/models/model_implementations/vlm2vec_models.py +9 -4
  121. mteb/models/model_implementations/voyage_models.py +10 -4
  122. mteb/models/model_implementations/voyage_v.py +10 -6
  123. mteb/models/model_implementations/yuan_models_en.py +1 -1
  124. mteb/models/model_meta.py +12 -7
  125. mteb/models/models_protocols.py +19 -18
  126. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  127. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  128. mteb/models/search_wrappers.py +19 -12
  129. mteb/models/sentence_transformer_wrapper.py +4 -3
  130. mteb/models/vllm_wrapper.py +8 -6
  131. mteb/results/benchmark_results.py +22 -17
  132. mteb/results/model_result.py +21 -15
  133. mteb/results/task_result.py +41 -10
  134. mteb/similarity_functions.py +8 -2
  135. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  136. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  137. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  138. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  139. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  140. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  141. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  142. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  143. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  144. mteb/tasks/clustering/nob/snl_clustering.py +7 -2
  145. mteb/tasks/clustering/nob/vg_clustering.py +7 -2
  146. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  147. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
  148. mteb/types/_encoder_io.py +1 -1
  149. mteb/types/statistics.py +9 -2
  150. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/METADATA +1 -1
  151. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/RECORD +155 -154
  152. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/WHEEL +0 -0
  153. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/entry_points.txt +0 -0
  154. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/licenses/LICENSE +0 -0
  155. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,22 @@
1
- from collections.abc import Generator
1
+ from __future__ import annotations
2
+
2
3
  from itertools import islice
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import numpy as np
6
7
  import torch
7
- from torch.utils.data import DataLoader
8
8
 
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
9
  from mteb.models.abs_encoder import AbsEncoder
11
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.types import Array, BatchedInput, PromptType
11
+ from mteb.types import PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Generator
15
+
16
+ from torch.utils.data import DataLoader
17
+
18
+ from mteb.abstasks.task_metadata import TaskMetadata
19
+ from mteb.types import Array, BatchedInput
13
20
 
14
21
 
15
22
  # https://docs.python.org/3/library/itertools.html#itertools.batched
@@ -1,15 +1,21 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
7
  import torch.nn.functional as F
6
8
  from packaging.version import Version
7
- from torch.utils.data import DataLoader
8
9
 
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
11
  from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
12
- from mteb.types import Array, BatchedInput, PromptType
12
+ from mteb.types import PromptType
13
+
14
+ if TYPE_CHECKING:
15
+ from torch.utils.data import DataLoader
16
+
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.types import Array, BatchedInput
13
19
 
14
20
  logger = logging.getLogger(__name__)
15
21
 
@@ -4,17 +4,18 @@ from typing import TYPE_CHECKING, Any
4
4
 
5
5
  import torch
6
6
  import torch.nn.functional as F
7
- from torch.utils.data import DataLoader
8
7
  from tqdm.auto import tqdm
9
8
 
10
9
  from mteb._requires_package import requires_package
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
10
  from mteb.models.abs_encoder import AbsEncoder
13
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
15
12
 
16
13
  if TYPE_CHECKING:
17
14
  from PIL import Image
15
+ from torch.utils.data import DataLoader
16
+
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.types import Array, BatchedInput, PromptType
18
19
 
19
20
  NOMIC_EMBED_VISION_CITATION = """@article{nussbaum2024nomicembedvision,
20
21
  title={Nomic Embed Vision: Expanding the Latent Space},
@@ -1,14 +1,18 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
6
  from packaging.version import Version
5
7
  from torch.utils.data import DataLoader
6
8
  from transformers import __version__ as transformers_version
7
9
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
10
  from mteb.models.abs_encoder import AbsEncoder
10
11
  from mteb.models.model_meta import ModelMeta
11
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
12
16
 
13
17
  LLAMA_NEMORETRIEVER_CITATION = """@misc{xu2025llamanemoretrievercolembedtopperforming,
14
18
  title={Llama Nemoretriever Colembed: Top-Performing Text-Image Retrieval Model},
@@ -1,11 +1,11 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import torch
6
7
  import torch.nn.functional as F
7
8
  from packaging.version import Version
8
- from torch.utils.data import DataLoader
9
9
  from tqdm import tqdm
10
10
  from transformers import AutoModel, AutoTokenizer
11
11
  from transformers import __version__ as transformers_version
@@ -16,7 +16,15 @@ from mteb.models import CrossEncoderWrapper
16
16
  from mteb.models.abs_encoder import AbsEncoder
17
17
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
18
18
  from mteb.models.model_meta import ModelMeta, ScoringFunction
19
- from mteb.types import Array, BatchedInput, PromptType
19
+ from mteb.types import PromptType
20
+
21
+ if TYPE_CHECKING:
22
+ from collections.abc import Callable
23
+
24
+ from torch.utils.data import DataLoader
25
+
26
+ from mteb import TaskMetadata
27
+ from mteb.types import Array, BatchedInput
20
28
 
21
29
  logger = logging.getLogger(__name__)
22
30
 
@@ -1,6 +1,6 @@
1
1
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
2
  from mteb.models.model_meta import ModelMeta
3
- from mteb.models.models_protocols import PromptType
3
+ from mteb.types import PromptType
4
4
 
5
5
 
6
6
  def instruction_template(
@@ -1,15 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any, ClassVar
4
+ from typing import TYPE_CHECKING, Any, ClassVar
3
5
 
4
6
  import numpy as np
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import requires_package
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
10
  from mteb.models.abs_encoder import AbsEncoder
11
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
13
18
 
14
19
  logger = logging.getLogger(__name__)
15
20
 
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_image_dependencies, requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  OPENCLIP_CITATION = """@inproceedings{cherti2023reproducible,
14
19
  title={Reproducible scaling laws for contrastive language-image learning},
@@ -1,12 +1,18 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
 
6
- from mteb.abstasks.task_metadata import TaskMetadata
7
7
  from mteb.models.abs_encoder import AbsEncoder
8
8
  from mteb.models.model_meta import ModelMeta
9
- from mteb.types import Array, BatchedInput, PromptType
9
+ from mteb.types import PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput
10
16
 
11
17
  v2_training_data = {
12
18
  "MSMARCO",
@@ -1,8 +1,13 @@
1
- import numpy as np
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
2
4
 
3
5
  from mteb.models.abs_encoder import AbsEncoder
4
6
  from mteb.models.model_meta import ModelMeta
5
7
 
8
+ if TYPE_CHECKING:
9
+ from mteb.types import Array
10
+
6
11
 
7
12
  class OPSWrapper(AbsEncoder):
8
13
  def __init__(self, model_name: str, revision: str):
@@ -15,7 +20,7 @@ class OPSWrapper(AbsEncoder):
15
20
  )
16
21
  self.output_dim = 1536
17
22
 
18
- def encode(self, sentences: list[str], **kwargs) -> np.ndarray:
23
+ def encode(self, sentences: list[str], **kwargs) -> Array:
19
24
  embeddings = self.model.encode(sentences, **kwargs)
20
25
  return embeddings[:, : self.output_dim]
21
26
 
@@ -0,0 +1,56 @@
1
+ from mteb.models.model_implementations.arctic_models import (
2
+ ARCTIC_V2_CITATION,
3
+ LANGUAGES_V2_0,
4
+ arctic_v2_training_datasets,
5
+ )
6
+ from mteb.models.model_meta import (
7
+ ModelMeta,
8
+ ScoringFunction,
9
+ )
10
+ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
11
+
12
+ PIXIE_RUNE_V1_CITATION = """@misc{TelePIX-PIXIE-Rune-v1.0,
13
+ title = {PIXIE-Rune-v1.0},
14
+ author = {TelePIX AI Research Team and Bongmin Kim},
15
+ year = {2026},
16
+ howpublished = {Hugging Face model card},
17
+ url = {https://huggingface.co/telepix/PIXIE-Rune-v1.0}
18
+ }"""
19
+
20
+ PIXIE_RUNE_V1_PROMPTS = {
21
+ "query": "query: ",
22
+ "document": "",
23
+ }
24
+
25
+ # it is further fine-tuned on TelePIX proprietary IR data (not public).
26
+ pixie_rune_v1_training_datasets = set(arctic_v2_training_datasets) | {
27
+ "TelePIX-Proprietary-IR-Triplets",
28
+ }
29
+
30
+ pixie_rune_v1_0 = ModelMeta(
31
+ loader=sentence_transformers_loader,
32
+ loader_kwargs={
33
+ "model_prompts": PIXIE_RUNE_V1_PROMPTS,
34
+ },
35
+ name="telepix/PIXIE-Rune-v1.0",
36
+ model_type=["dense"],
37
+ revision="b2486496da71191626666a88f9bfec844933a134",
38
+ release_date="2026-01-15",
39
+ languages=LANGUAGES_V2_0,
40
+ open_weights=True,
41
+ framework=["Sentence Transformers", "PyTorch", "safetensors"],
42
+ n_parameters=567754752,
43
+ memory_usage_mb=2166,
44
+ max_tokens=6144,
45
+ embed_dim=1024,
46
+ license="apache-2.0",
47
+ reference="https://huggingface.co/telepix/PIXIE-Rune-v1.0",
48
+ similarity_fn_name=ScoringFunction.COSINE,
49
+ use_instructions=True,
50
+ adapted_from="Snowflake/snowflake-arctic-embed-l-v2.0",
51
+ superseded_by=None,
52
+ public_training_code=None,
53
+ public_training_data=None,
54
+ training_datasets=pixie_rune_v1_training_datasets,
55
+ citation=PIXIE_RUNE_V1_CITATION + "\n\n" + ARCTIC_V2_CITATION,
56
+ )
@@ -1,15 +1,21 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import torch
6
- from torch.utils.data import DataLoader
7
7
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
8
  from mteb.models.abs_encoder import AbsEncoder
10
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.models.models_protocols import EncoderProtocol
12
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from collections.abc import Callable
13
+
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.models.models_protocols import EncoderProtocol
18
+ from mteb.types import Array, BatchedInput, PromptType
13
19
 
14
20
  from .repllama_models import RepLLaMAModel, model_prompts
15
21
 
@@ -1,30 +1,36 @@
1
+ from __future__ import annotations
2
+
1
3
  import heapq
2
4
  import logging
3
5
  import shutil
4
6
  import tempfile
5
7
  from pathlib import Path
6
- from typing import Any
8
+ from typing import TYPE_CHECKING, Any
7
9
 
8
10
  import torch
9
- from torch.utils.data import DataLoader
10
11
 
11
12
  from mteb._create_dataloaders import (
12
13
  create_dataloader,
13
14
  )
14
15
  from mteb._requires_package import requires_package
15
- from mteb.abstasks.task_metadata import TaskMetadata
16
16
  from mteb.models.abs_encoder import AbsEncoder
17
17
  from mteb.models.model_meta import ModelMeta, ScoringFunction
18
- from mteb.types import (
19
- Array,
20
- BatchedInput,
21
- CorpusDatasetType,
22
- EncodeKwargs,
23
- PromptType,
24
- QueryDatasetType,
25
- RetrievalOutputType,
26
- TopRankedDocumentsType,
27
- )
18
+ from mteb.types import PromptType
19
+
20
+ if TYPE_CHECKING:
21
+ from torch.utils.data import DataLoader
22
+
23
+ from mteb.abstasks.task_metadata import TaskMetadata
24
+ from mteb.types import (
25
+ Array,
26
+ BatchedInput,
27
+ CorpusDatasetType,
28
+ EncodeKwargs,
29
+ QueryDatasetType,
30
+ RetrievalOutputType,
31
+ TopRankedDocumentsType,
32
+ )
33
+
28
34
 
29
35
  logger = logging.getLogger(__name__)
30
36
 
@@ -1,6 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
6
  from mteb.models.model_meta import ModelMeta
3
- from mteb.models.models_protocols import EncoderProtocol, PromptType
7
+ from mteb.types import PromptType
8
+
9
+ if TYPE_CHECKING:
10
+ from mteb.models.models_protocols import EncoderProtocol
4
11
 
5
12
 
6
13
  def instruction_template(
@@ -5,18 +5,19 @@ from typing import TYPE_CHECKING, Any, Literal
5
5
 
6
6
  import numpy as np
7
7
  import torch
8
- from torch.utils.data import DataLoader
9
8
 
10
- from mteb.abstasks.task_metadata import TaskMetadata
11
9
  from mteb.models.model_meta import ModelMeta
12
10
  from mteb.similarity_functions import (
13
11
  select_pairwise_similarity,
14
12
  select_similarity,
15
13
  )
16
- from mteb.types._encoder_io import Array, BatchedInput, PromptType
17
14
 
18
15
  if TYPE_CHECKING:
19
16
  from PIL import Image
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types._encoder_io import Array, BatchedInput, PromptType
20
21
 
21
22
 
22
23
  def _string_to_vector(text: str | None, size: int) -> np.ndarray:
@@ -1,22 +1,29 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import numpy as np
6
7
  import torch
7
8
  import torch.nn.functional as F
8
- from torch.utils.data import DataLoader
9
9
  from tqdm.auto import tqdm
10
10
 
11
11
  from mteb._requires_package import requires_package
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
12
  from mteb.models.abs_encoder import AbsEncoder
14
13
  from mteb.models.model_meta import (
15
14
  ModelMeta,
16
15
  ScoringFunction,
17
16
  )
18
- from mteb.models.models_protocols import EncoderProtocol
19
- from mteb.types import Array, BatchedInput, PromptType
17
+ from mteb.types import PromptType
18
+
19
+ if TYPE_CHECKING:
20
+ from collections.abc import Callable
21
+
22
+ from torch.utils.data import DataLoader
23
+
24
+ from mteb.abstasks.task_metadata import TaskMetadata
25
+ from mteb.models.models_protocols import EncoderProtocol
26
+ from mteb.types import Array, BatchedInput
20
27
 
21
28
  logger = logging.getLogger(__name__)
22
29
 
@@ -1,16 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
 
7
8
  from mteb._requires_package import requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.model_meta import ModelMeta
10
- from mteb.types import Array, BatchedInput, PromptType
11
10
 
12
11
  from .bge_models import bge_m3_training_data
13
12
 
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
18
+
19
+
14
20
  logger = logging.getLogger(__name__)
15
21
 
16
22
 
@@ -1,15 +1,21 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.model_meta import ModelMeta
9
- from mteb.types import Array, BatchedInput, PromptType
10
9
 
11
10
  from .rerankers_custom import RerankerWrapper
12
11
 
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
17
+
18
+
13
19
  logger = logging.getLogger(__name__)
14
20
 
15
21
 
@@ -1,12 +1,18 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  from mteb.models.instruct_wrapper import (
2
6
  InstructSentenceTransformerModel,
3
7
  instruct_wrapper,
4
8
  )
5
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
6
- from mteb.types import PromptType
7
10
 
8
11
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
9
12
 
13
+ if TYPE_CHECKING:
14
+ from mteb.types import PromptType
15
+
10
16
 
11
17
  def instruction_template(
12
18
  instruction: str, prompt_type: PromptType | None = None
@@ -13,16 +13,18 @@ import torch
13
13
  from torch.utils.data import DataLoader
14
14
 
15
15
  from mteb._requires_package import requires_package
16
- from mteb.abstasks.task_metadata import TaskMetadata
17
16
  from mteb.models.abs_encoder import AbsEncoder
18
17
  from mteb.models.model_implementations.bge_models import bge_chinese_training_data
19
18
  from mteb.models.model_implementations.nvidia_models import nvidia_training_datasets
20
19
  from mteb.models.model_meta import ModelMeta
21
- from mteb.types import Array, BatchedInput, PromptType
20
+ from mteb.types import PromptType
22
21
 
23
22
  if TYPE_CHECKING:
24
23
  from PIL import Image
25
24
 
25
+ from mteb.abstasks.task_metadata import TaskMetadata
26
+ from mteb.types import Array, BatchedInput
27
+
26
28
 
27
29
  logger = logging.getLogger(__name__)
28
30
 
@@ -15,15 +15,18 @@ from torch.utils.data import DataLoader
15
15
  from tqdm import tqdm
16
16
 
17
17
  from mteb._requires_package import requires_package
18
- from mteb.abstasks.task_metadata import TaskMetadata
19
18
  from mteb.models.abs_encoder import AbsEncoder
20
19
  from mteb.models.model_implementations.bge_models import bge_chinese_training_data
21
20
  from mteb.models.model_implementations.nvidia_models import nvidia_training_datasets
22
21
  from mteb.models.model_meta import ModelMeta
23
- from mteb.types import Array, BatchedInput, PromptType
22
+ from mteb.types import PromptType
24
23
 
25
24
  if TYPE_CHECKING:
26
25
  from PIL import Image
26
+ from torch.utils.data import DataLoader
27
+
28
+ from mteb.abstasks.task_metadata import TaskMetadata
29
+ from mteb.types import Array, BatchedInput
27
30
 
28
31
 
29
32
  logger = logging.getLogger(__name__)
@@ -9,7 +9,7 @@ from tqdm.auto import tqdm
9
9
  from mteb._requires_package import requires_package
10
10
  from mteb.models.abs_encoder import AbsEncoder
11
11
  from mteb.models.model_meta import ModelMeta
12
- from mteb.models.models_protocols import PromptType
12
+ from mteb.types import PromptType
13
13
 
14
14
  from .bge_models import bge_chinese_training_data
15
15
  from .nvidia_models import nvidia_training_datasets
@@ -1,13 +1,18 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.abs_encoder import AbsEncoder
9
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
  SIGLIP_CITATION = """@misc{zhai2023sigmoid,
13
18
  title={Sigmoid Loss for Language Image Pre-Training},
@@ -13,24 +13,27 @@ Based on:
13
13
  from __future__ import annotations
14
14
 
15
15
  import logging
16
- from typing import Any
16
+ from typing import TYPE_CHECKING, Any
17
17
 
18
18
  import torch
19
- from torch.utils.data import DataLoader
20
19
  from tqdm.auto import tqdm
21
20
 
22
21
  from mteb._requires_package import (
23
22
  requires_image_dependencies,
24
23
  requires_package,
25
24
  )
26
- from mteb.abstasks.task_metadata import TaskMetadata
27
25
  from mteb.models.abs_encoder import AbsEncoder
28
26
  from mteb.models.model_implementations.colpali_models import (
29
27
  COLPALI_CITATION,
30
28
  COLPALI_TRAINING_DATA,
31
29
  )
32
30
  from mteb.models.model_meta import ModelMeta, ScoringFunction
33
- from mteb.types import Array, BatchedInput, PromptType
31
+
32
+ if TYPE_CHECKING:
33
+ from torch.utils.data import DataLoader
34
+
35
+ from mteb.abstasks.task_metadata import TaskMetadata
36
+ from mteb.types import Array, BatchedInput, PromptType
34
37
 
35
38
  logger = logging.getLogger(__name__)
36
39
 
@@ -1,13 +1,18 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.model_meta import ModelMeta, ScoringFunction
9
9
  from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
  logger = logging.getLogger(__name__)
13
18
 
@@ -1,6 +1,12 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
6
  from mteb.models.model_meta import ModelMeta, ScoringFunction
3
- from mteb.types import PromptType
7
+
8
+ if TYPE_CHECKING:
9
+ from mteb.types import PromptType
4
10
 
5
11
 
6
12
  def instruction_template(