mteb 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. mteb/_create_dataloaders.py +16 -9
  2. mteb/_evaluators/any_sts_evaluator.py +10 -5
  3. mteb/_evaluators/clustering_evaluator.py +10 -4
  4. mteb/_evaluators/evaluator.py +9 -4
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
  6. mteb/_evaluators/pair_classification_evaluator.py +10 -5
  7. mteb/_evaluators/retrieval_evaluator.py +19 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +14 -10
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +8 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +17 -9
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +15 -6
  21. mteb/abstasks/clustering.py +17 -8
  22. mteb/abstasks/clustering_legacy.py +14 -6
  23. mteb/abstasks/image/image_text_pair_classification.py +17 -7
  24. mteb/abstasks/multilabel_classification.py +11 -5
  25. mteb/abstasks/pair_classification.py +19 -9
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +27 -16
  28. mteb/abstasks/retrieval_dataset_loaders.py +11 -8
  29. mteb/abstasks/sts.py +19 -10
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +14 -7
  32. mteb/abstasks/text/summarization.py +17 -7
  33. mteb/abstasks/zeroshot_classification.py +15 -7
  34. mteb/benchmarks/_create_table.py +13 -3
  35. mteb/benchmarks/benchmark.py +11 -1
  36. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  37. mteb/cache.py +10 -5
  38. mteb/cli/_display_tasks.py +9 -3
  39. mteb/cli/build_cli.py +5 -2
  40. mteb/cli/generate_model_card.py +9 -2
  41. mteb/deprecated_evaluator.py +16 -12
  42. mteb/evaluate.py +20 -18
  43. mteb/filter_tasks.py +12 -7
  44. mteb/get_tasks.py +9 -4
  45. mteb/languages/language_scripts.py +8 -3
  46. mteb/leaderboard/app.py +7 -3
  47. mteb/leaderboard/table.py +7 -2
  48. mteb/load_results.py +9 -3
  49. mteb/models/abs_encoder.py +22 -12
  50. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  51. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  52. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  53. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  54. mteb/models/get_model_meta.py +11 -4
  55. mteb/models/instruct_wrapper.py +13 -5
  56. mteb/models/model_implementations/align_models.py +9 -4
  57. mteb/models/model_implementations/bedrock_models.py +16 -6
  58. mteb/models/model_implementations/blip2_models.py +9 -4
  59. mteb/models/model_implementations/blip_models.py +9 -4
  60. mteb/models/model_implementations/bm25.py +15 -10
  61. mteb/models/model_implementations/bmretriever_models.py +6 -2
  62. mteb/models/model_implementations/cde_models.py +9 -5
  63. mteb/models/model_implementations/clip_models.py +9 -4
  64. mteb/models/model_implementations/cohere_models.py +10 -4
  65. mteb/models/model_implementations/cohere_v.py +9 -4
  66. mteb/models/model_implementations/colpali_models.py +4 -3
  67. mteb/models/model_implementations/colqwen_models.py +10 -31
  68. mteb/models/model_implementations/colsmol_models.py +1 -1
  69. mteb/models/model_implementations/conan_models.py +10 -4
  70. mteb/models/model_implementations/dino_models.py +9 -4
  71. mteb/models/model_implementations/e5_v.py +9 -4
  72. mteb/models/model_implementations/eagerworks_models.py +10 -4
  73. mteb/models/model_implementations/evaclip_models.py +9 -4
  74. mteb/models/model_implementations/gme_v_models.py +5 -3
  75. mteb/models/model_implementations/google_models.py +10 -4
  76. mteb/models/model_implementations/granite_vision_embedding_models.py +6 -5
  77. mteb/models/model_implementations/hinvec_models.py +5 -1
  78. mteb/models/model_implementations/jasper_models.py +12 -5
  79. mteb/models/model_implementations/jina_clip.py +9 -4
  80. mteb/models/model_implementations/jina_models.py +10 -5
  81. mteb/models/model_implementations/kalm_models.py +18 -12
  82. mteb/models/model_implementations/linq_models.py +6 -1
  83. mteb/models/model_implementations/listconranker.py +9 -4
  84. mteb/models/model_implementations/llm2clip_models.py +9 -4
  85. mteb/models/model_implementations/llm2vec_models.py +12 -6
  86. mteb/models/model_implementations/mcinext_models.py +5 -2
  87. mteb/models/model_implementations/moco_models.py +9 -4
  88. mteb/models/model_implementations/mod_models.py +1 -1
  89. mteb/models/model_implementations/model2vec_models.py +10 -4
  90. mteb/models/model_implementations/no_instruct_sentence_models.py +12 -5
  91. mteb/models/model_implementations/nomic_models.py +10 -4
  92. mteb/models/model_implementations/nomic_models_vision.py +4 -3
  93. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +7 -3
  94. mteb/models/model_implementations/nvidia_models.py +12 -4
  95. mteb/models/model_implementations/octen_models.py +1 -1
  96. mteb/models/model_implementations/openai_models.py +9 -4
  97. mteb/models/model_implementations/openclip_models.py +9 -4
  98. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -4
  99. mteb/models/model_implementations/ops_moa_models.py +7 -2
  100. mteb/models/model_implementations/promptriever_models.py +12 -6
  101. mteb/models/model_implementations/pylate_models.py +19 -13
  102. mteb/models/model_implementations/qwen3_models.py +8 -1
  103. mteb/models/model_implementations/random_baseline.py +4 -3
  104. mteb/models/model_implementations/repllama_models.py +13 -6
  105. mteb/models/model_implementations/rerankers_custom.py +10 -4
  106. mteb/models/model_implementations/rerankers_monot5_based.py +10 -4
  107. mteb/models/model_implementations/salesforce_models.py +7 -1
  108. mteb/models/model_implementations/seed_1_6_embedding_models.py +4 -2
  109. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +5 -2
  110. mteb/models/model_implementations/seed_models.py +1 -1
  111. mteb/models/model_implementations/siglip_models.py +9 -4
  112. mteb/models/model_implementations/slm_models.py +7 -4
  113. mteb/models/model_implementations/uae_models.py +9 -4
  114. mteb/models/model_implementations/vdr_models.py +7 -1
  115. mteb/models/model_implementations/vista_models.py +9 -4
  116. mteb/models/model_implementations/vlm2vec_models.py +9 -4
  117. mteb/models/model_implementations/voyage_models.py +10 -4
  118. mteb/models/model_implementations/voyage_v.py +10 -6
  119. mteb/models/model_implementations/yuan_models_en.py +1 -1
  120. mteb/models/model_meta.py +12 -7
  121. mteb/models/models_protocols.py +19 -18
  122. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  123. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  124. mteb/models/search_wrappers.py +19 -12
  125. mteb/models/sentence_transformer_wrapper.py +4 -3
  126. mteb/models/vllm_wrapper.py +8 -6
  127. mteb/results/benchmark_results.py +22 -17
  128. mteb/results/model_result.py +21 -15
  129. mteb/results/task_result.py +15 -9
  130. mteb/similarity_functions.py +8 -2
  131. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  132. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  133. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  134. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  135. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  136. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  137. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  138. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  139. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  140. mteb/tasks/clustering/nob/snl_clustering.py +7 -2
  141. mteb/tasks/clustering/nob/vg_clustering.py +7 -2
  142. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  143. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
  144. mteb/types/_encoder_io.py +1 -1
  145. mteb/types/statistics.py +9 -2
  146. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/METADATA +1 -1
  147. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/RECORD +151 -151
  148. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/WHEEL +0 -0
  149. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/entry_points.txt +0 -0
  150. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/licenses/LICENSE +0 -0
  151. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,23 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import numpy as np
4
6
  from packaging.version import Version
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
  from transformers import __version__ as transformers_version
8
9
 
9
10
  from mteb._requires_package import requires_package
10
- from mteb.abstasks.task_metadata import TaskMetadata
11
11
  from mteb.models import sentence_transformers_loader
12
12
  from mteb.models.abs_encoder import AbsEncoder
13
13
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
14
+ from mteb.types import PromptType
15
+
16
+ if TYPE_CHECKING:
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import Array, BatchedInput
15
21
 
16
22
  MULTILINGUAL_EVALUATED_LANGUAGES = [
17
23
  "arb-Arab",
@@ -4,20 +4,21 @@ import logging
4
4
  from typing import TYPE_CHECKING, Any
5
5
 
6
6
  import torch
7
- from torch.utils.data import DataLoader
8
7
  from tqdm.auto import tqdm
9
8
 
10
9
  from mteb._requires_package import (
11
10
  requires_image_dependencies,
12
11
  )
13
- from mteb.abstasks.task_metadata import TaskMetadata
14
12
  from mteb.models.model_meta import ModelMeta
15
- from mteb.types import Array, BatchedInput, PromptType
16
-
17
- logger = logging.getLogger(__name__)
18
13
 
19
14
  if TYPE_CHECKING:
20
15
  from PIL import Image
16
+ from torch.utils.data import DataLoader
17
+
18
+ from mteb.abstasks.task_metadata import TaskMetadata
19
+ from mteb.types import Array, BatchedInput, PromptType
20
+
21
+ logger = logging.getLogger(__name__)
21
22
 
22
23
 
23
24
  class GraniteVisionEmbeddingWrapper:
@@ -1,9 +1,13 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from typing import TYPE_CHECKING
2
5
 
3
6
  from mteb.models.model_meta import ModelMeta
4
7
  from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
5
- from mteb.types import PromptType
6
8
 
9
+ if TYPE_CHECKING:
10
+ from mteb.types import PromptType
7
11
  logger = logging.getLogger(__name__)
8
12
 
9
13
 
@@ -1,11 +1,10 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import torch
6
- from torch.utils.data import DataLoader
7
7
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
8
  from mteb.models.abs_encoder import AbsEncoder
10
9
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
11
10
  from mteb.models.model_implementations.bge_models import (
@@ -17,7 +16,15 @@ from mteb.models.model_implementations.e5_instruct import E5_MISTRAL_TRAINING_DA
17
16
  from mteb.models.model_implementations.nvidia_models import nvidia_training_datasets
18
17
  from mteb.models.model_implementations.qzhou_models import qzhou_training_data
19
18
  from mteb.models.model_meta import ModelMeta, ScoringFunction
20
- from mteb.types import Array, BatchedInput, PromptType
19
+ from mteb.types import PromptType
20
+
21
+ if TYPE_CHECKING:
22
+ from collections.abc import Callable
23
+
24
+ from torch.utils.data import DataLoader
25
+
26
+ from mteb.abstasks.task_metadata import TaskMetadata
27
+ from mteb.types import Array, BatchedInput
21
28
 
22
29
  logger = logging.getLogger(__name__)
23
30
 
@@ -1,15 +1,20 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_image_dependencies
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_implementations.colpali_models import COLPALI_TRAINING_DATA
11
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
13
18
 
14
19
  JINA_CLIP_CITATION = """@article{koukounas2024jinaclip,
15
20
  title={Jina CLIP: Your CLIP Model Is Also Your Text Retriever},
@@ -1,14 +1,13 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from collections import defaultdict
3
- from typing import Any, ClassVar
5
+ from typing import TYPE_CHECKING, Any, ClassVar
4
6
 
5
7
  import numpy as np
6
8
  import torch
7
- from sentence_transformers import CrossEncoder
8
- from torch.utils.data import DataLoader
9
9
 
10
10
  from mteb._requires_package import requires_package
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
11
  from mteb.languages import PROGRAMMING_LANGS
13
12
  from mteb.models.abs_encoder import AbsEncoder
14
13
  from mteb.models.model_meta import ModelMeta, ScoringFunction
@@ -16,7 +15,13 @@ from mteb.models.sentence_transformer_wrapper import (
16
15
  CrossEncoderWrapper,
17
16
  SentenceTransformerEncoderWrapper,
18
17
  )
19
- from mteb.types import Array, BatchedInput, PromptType
18
+
19
+ if TYPE_CHECKING:
20
+ from sentence_transformers import CrossEncoder
21
+ from torch.utils.data import DataLoader
22
+
23
+ from mteb.abstasks.task_metadata import TaskMetadata
24
+ from mteb.types import Array, BatchedInput, PromptType
20
25
 
21
26
  logger = logging.getLogger(__name__)
22
27
 
@@ -1,14 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
9
9
  from mteb.models.model_meta import ModelMeta
10
10
  from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
11
- from mteb.types import Array, BatchedInput, PromptType
11
+ from mteb.types import PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput
12
18
 
13
19
  logger = logging.getLogger(__name__)
14
20
 
@@ -907,23 +913,23 @@ KaLM_Embedding_KaLM_embedding_multilingual_mini_instruct_v2_5 = ModelMeta(
907
913
  adapted_from="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2",
908
914
  superseded_by=None,
909
915
  citation="""@misc{zhao2025kalmembeddingv2,
910
- title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
916
+ title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
911
917
  author={Xinping Zhao and Xinshuo Hu and Zifei Shan and Shouzheng Huang and Yao Zhou and Xin Zhang and Zetian Sun and Zhenyu Liu and Dongfang Li and Xinyuan Wei and Youcheng Pan and Yang Xiang and Meishan Zhang and Haofen Wang and Jun Yu and Baotian Hu and Min Zhang},
912
918
  year={2025},
913
919
  eprint={2506.20923},
914
920
  archivePrefix={arXiv},
915
921
  primaryClass={cs.CL},
916
- url={https://arxiv.org/abs/2506.20923},
922
+ url={https://arxiv.org/abs/2506.20923},
917
923
  }
918
924
 
919
925
  @misc{hu2025kalmembedding,
920
- title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
926
+ title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
921
927
  author={Xinshuo Hu and Zifei Shan and Xinping Zhao and Zetian Sun and Zhenyu Liu and Dongfang Li and Shaolin Ye and Xinyuan Wei and Qian Chen and Baotian Hu and Haofen Wang and Jun Yu and Min Zhang},
922
928
  year={2025},
923
929
  eprint={2501.01028},
924
930
  archivePrefix={arXiv},
925
931
  primaryClass={cs.CL},
926
- url={https://arxiv.org/abs/2501.01028},
932
+ url={https://arxiv.org/abs/2501.01028},
927
933
  }""",
928
934
  )
929
935
 
@@ -954,22 +960,22 @@ KaLM_Embedding_gemma_3_12b_2511 = ModelMeta(
954
960
  public_training_data=None,
955
961
  training_datasets=KaLM_Embedding_gemma_3_12b_training_data,
956
962
  citation="""@misc{zhao2025kalmembeddingv2,
957
- title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
963
+ title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
958
964
  author={Xinping Zhao and Xinshuo Hu and Zifei Shan and Shouzheng Huang and Yao Zhou and Xin Zhang and Zetian Sun and Zhenyu Liu and Dongfang Li and Xinyuan Wei and Youcheng Pan and Yang Xiang and Meishan Zhang and Haofen Wang and Jun Yu and Baotian Hu and Min Zhang},
959
965
  year={2025},
960
966
  eprint={2506.20923},
961
967
  archivePrefix={arXiv},
962
968
  primaryClass={cs.CL},
963
- url={https://arxiv.org/abs/2506.20923},
969
+ url={https://arxiv.org/abs/2506.20923},
964
970
  }
965
971
 
966
972
  @misc{hu2025kalmembedding,
967
- title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
973
+ title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
968
974
  author={Xinshuo Hu and Zifei Shan and Xinping Zhao and Zetian Sun and Zhenyu Liu and Dongfang Li and Shaolin Ye and Xinyuan Wei and Qian Chen and Baotian Hu and Haofen Wang and Jun Yu and Min Zhang},
969
975
  year={2025},
970
976
  eprint={2501.01028},
971
977
  archivePrefix={arXiv},
972
978
  primaryClass={cs.CL},
973
- url={https://arxiv.org/abs/2501.01028},
979
+ url={https://arxiv.org/abs/2501.01028},
974
980
  }""",
975
981
  )
@@ -1,11 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  import torch
2
6
 
3
7
  from mteb.models.instruct_wrapper import instruct_wrapper
4
8
  from mteb.models.model_meta import ModelMeta, ScoringFunction
5
- from mteb.types import PromptType
6
9
 
7
10
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
8
11
 
12
+ if TYPE_CHECKING:
13
+ from mteb.types import PromptType
9
14
  LINQ_EMBED_MISTRAL_CITATION = """@misc{LinqAIResearch2024,
10
15
  title={Linq-Embed-Mistral:Elevating Text Retrieval with Improved GPT Data Through Task-Specific Control and Quality Refinement},
11
16
  author={Junseong Kim and Seolhwa Lee and Jihoon Kwon and Sangmo Gu and Yejin Kim and Minkyung Cho and Jy-yong Sohn and Chanyeol Choi},
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
 
6
- from mteb.abstasks.task_metadata import TaskMetadata
7
7
  from mteb.models.model_meta import ModelMeta
8
- from mteb.types import BatchedInput, PromptType
9
8
 
10
9
  from .rerankers_custom import RerankerWrapper
11
10
 
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import BatchedInput, PromptType
16
+
12
17
  LISTCONRANKER_CITATION = """@article{liu2025listconranker,
13
18
  title={ListConRanker: A Contrastive Text Reranker with Listwise Encoding},
14
19
  author={Liu, Junlong and Ma, Yue and Zhao, Ruihui and Zheng, Junhao and Ma, Qianli and Kang, Yangyang},
@@ -1,15 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  from pathlib import Path
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import requires_image_dependencies, requires_package
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
10
  from mteb.models.abs_encoder import AbsEncoder
11
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
13
18
 
14
19
  LLM2CLIP_CITATION = """@misc{huang2024llm2clippowerfullanguagemodel,
15
20
  title={LLM2CLIP: Powerful Language Model Unlock Richer Visual Representation},
@@ -1,16 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import torch
6
- from torch.utils.data import DataLoader
7
7
 
8
8
  from mteb._requires_package import requires_package, suggest_package
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
9
  from mteb.models.abs_encoder import AbsEncoder
11
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.models.models_protocols import EncoderProtocol
13
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Callable
14
+
15
+ from torch.utils.data import DataLoader
16
+
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.models.models_protocols import EncoderProtocol
19
+ from mteb.types import Array, BatchedInput, PromptType
14
20
 
15
21
  logger = logging.getLogger(__name__)
16
22
 
@@ -1,16 +1,19 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import os
3
5
  import time
4
6
  import warnings
5
- from typing import Any
7
+ from typing import TYPE_CHECKING, Any
6
8
 
7
9
  import numpy as np
8
10
  import requests
9
11
 
10
12
  from mteb.models.abs_encoder import AbsEncoder
11
13
  from mteb.models.model_meta import ModelMeta
12
- from mteb.types import PromptType
13
14
 
15
+ if TYPE_CHECKING:
16
+ from mteb.types import PromptType
14
17
  logger = logging.getLogger(__name__)
15
18
 
16
19
  HAKIM_CITATION = """@article{sarmadi2025hakim,
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_image_dependencies, requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  MOCOV3_CITATION = """@Article{chen2021mocov3,
14
19
  author = {Xinlei Chen* and Saining Xie* and Kaiming He},
@@ -1,6 +1,6 @@
1
1
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
2
  from mteb.models.model_meta import ModelMeta
3
- from mteb.models.models_protocols import PromptType
3
+ from mteb.types import PromptType
4
4
 
5
5
 
6
6
  def instruction_template(
@@ -1,17 +1,23 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import numpy as np
5
- from torch.utils.data import DataLoader
6
7
 
7
8
  from mteb._requires_package import requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
12
11
 
13
12
  from .bge_models import bge_training_data
14
13
 
14
+ if TYPE_CHECKING:
15
+ from torch.utils.data import DataLoader
16
+
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.types import Array, BatchedInput, PromptType
19
+
20
+
15
21
  logger = logging.getLogger(__name__)
16
22
 
17
23
  MODEL2VEC_CITATION = """@software{minishlab2024model2vec,
@@ -1,15 +1,22 @@
1
- from collections.abc import Generator
1
+ from __future__ import annotations
2
+
2
3
  from itertools import islice
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import numpy as np
6
7
  import torch
7
- from torch.utils.data import DataLoader
8
8
 
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
9
  from mteb.models.abs_encoder import AbsEncoder
11
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.types import Array, BatchedInput, PromptType
11
+ from mteb.types import PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Generator
15
+
16
+ from torch.utils.data import DataLoader
17
+
18
+ from mteb.abstasks.task_metadata import TaskMetadata
19
+ from mteb.types import Array, BatchedInput
13
20
 
14
21
 
15
22
  # https://docs.python.org/3/library/itertools.html#itertools.batched
@@ -1,15 +1,21 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
7
  import torch.nn.functional as F
6
8
  from packaging.version import Version
7
- from torch.utils.data import DataLoader
8
9
 
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
11
  from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
12
- from mteb.types import Array, BatchedInput, PromptType
12
+ from mteb.types import PromptType
13
+
14
+ if TYPE_CHECKING:
15
+ from torch.utils.data import DataLoader
16
+
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.types import Array, BatchedInput
13
19
 
14
20
  logger = logging.getLogger(__name__)
15
21
 
@@ -4,17 +4,18 @@ from typing import TYPE_CHECKING, Any
4
4
 
5
5
  import torch
6
6
  import torch.nn.functional as F
7
- from torch.utils.data import DataLoader
8
7
  from tqdm.auto import tqdm
9
8
 
10
9
  from mteb._requires_package import requires_package
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
10
  from mteb.models.abs_encoder import AbsEncoder
13
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
15
12
 
16
13
  if TYPE_CHECKING:
17
14
  from PIL import Image
15
+ from torch.utils.data import DataLoader
16
+
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.types import Array, BatchedInput, PromptType
18
19
 
19
20
  NOMIC_EMBED_VISION_CITATION = """@article{nussbaum2024nomicembedvision,
20
21
  title={Nomic Embed Vision: Expanding the Latent Space},
@@ -1,14 +1,18 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
6
  from packaging.version import Version
5
7
  from torch.utils.data import DataLoader
6
8
  from transformers import __version__ as transformers_version
7
9
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
10
  from mteb.models.abs_encoder import AbsEncoder
10
11
  from mteb.models.model_meta import ModelMeta
11
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
12
16
 
13
17
  LLAMA_NEMORETRIEVER_CITATION = """@misc{xu2025llamanemoretrievercolembedtopperforming,
14
18
  title={Llama Nemoretriever Colembed: Top-Performing Text-Image Retrieval Model},
@@ -1,11 +1,11 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import torch
6
7
  import torch.nn.functional as F
7
8
  from packaging.version import Version
8
- from torch.utils.data import DataLoader
9
9
  from tqdm import tqdm
10
10
  from transformers import AutoModel, AutoTokenizer
11
11
  from transformers import __version__ as transformers_version
@@ -16,7 +16,15 @@ from mteb.models import CrossEncoderWrapper
16
16
  from mteb.models.abs_encoder import AbsEncoder
17
17
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
18
18
  from mteb.models.model_meta import ModelMeta, ScoringFunction
19
- from mteb.types import Array, BatchedInput, PromptType
19
+ from mteb.types import PromptType
20
+
21
+ if TYPE_CHECKING:
22
+ from collections.abc import Callable
23
+
24
+ from torch.utils.data import DataLoader
25
+
26
+ from mteb import TaskMetadata
27
+ from mteb.types import Array, BatchedInput
20
28
 
21
29
  logger = logging.getLogger(__name__)
22
30
 
@@ -1,6 +1,6 @@
1
1
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
2
  from mteb.models.model_meta import ModelMeta
3
- from mteb.models.models_protocols import PromptType
3
+ from mteb.types import PromptType
4
4
 
5
5
 
6
6
  def instruction_template(
@@ -1,15 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any, ClassVar
4
+ from typing import TYPE_CHECKING, Any, ClassVar
3
5
 
4
6
  import numpy as np
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import requires_package
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
10
  from mteb.models.abs_encoder import AbsEncoder
11
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
12
- from mteb.types import Array, BatchedInput, PromptType
12
+
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
13
18
 
14
19
  logger = logging.getLogger(__name__)
15
20
 
@@ -1,14 +1,19 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_image_dependencies, requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  OPENCLIP_CITATION = """@inproceedings{cherti2023reproducible,
14
19
  title={Reproducible scaling laws for contrastive language-image learning},
@@ -1,12 +1,18 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
 
6
- from mteb.abstasks.task_metadata import TaskMetadata
7
7
  from mteb.models.abs_encoder import AbsEncoder
8
8
  from mteb.models.model_meta import ModelMeta
9
- from mteb.types import Array, BatchedInput, PromptType
9
+ from mteb.types import PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput
10
16
 
11
17
  v2_training_data = {
12
18
  "MSMARCO",
@@ -1,8 +1,13 @@
1
- import numpy as np
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
2
4
 
3
5
  from mteb.models.abs_encoder import AbsEncoder
4
6
  from mteb.models.model_meta import ModelMeta
5
7
 
8
+ if TYPE_CHECKING:
9
+ from mteb.types import Array
10
+
6
11
 
7
12
  class OPSWrapper(AbsEncoder):
8
13
  def __init__(self, model_name: str, revision: str):
@@ -15,7 +20,7 @@ class OPSWrapper(AbsEncoder):
15
20
  )
16
21
  self.output_dim = 1536
17
22
 
18
- def encode(self, sentences: list[str], **kwargs) -> np.ndarray:
23
+ def encode(self, sentences: list[str], **kwargs) -> Array:
19
24
  embeddings = self.model.encode(sentences, **kwargs)
20
25
  return embeddings[:, : self.output_dim]
21
26