mteb 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. mteb/_create_dataloaders.py +16 -9
  2. mteb/_evaluators/any_sts_evaluator.py +10 -5
  3. mteb/_evaluators/clustering_evaluator.py +10 -4
  4. mteb/_evaluators/evaluator.py +9 -4
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
  6. mteb/_evaluators/pair_classification_evaluator.py +10 -5
  7. mteb/_evaluators/retrieval_evaluator.py +19 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +14 -10
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +8 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +17 -9
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +15 -6
  21. mteb/abstasks/clustering.py +17 -8
  22. mteb/abstasks/clustering_legacy.py +14 -6
  23. mteb/abstasks/image/image_text_pair_classification.py +17 -7
  24. mteb/abstasks/multilabel_classification.py +11 -5
  25. mteb/abstasks/pair_classification.py +19 -9
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +27 -16
  28. mteb/abstasks/retrieval_dataset_loaders.py +11 -8
  29. mteb/abstasks/sts.py +19 -10
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +14 -7
  32. mteb/abstasks/text/summarization.py +17 -7
  33. mteb/abstasks/zeroshot_classification.py +15 -7
  34. mteb/benchmarks/_create_table.py +13 -3
  35. mteb/benchmarks/benchmark.py +11 -1
  36. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  37. mteb/cache.py +10 -5
  38. mteb/cli/_display_tasks.py +9 -3
  39. mteb/cli/build_cli.py +5 -2
  40. mteb/cli/generate_model_card.py +9 -2
  41. mteb/deprecated_evaluator.py +16 -12
  42. mteb/evaluate.py +20 -18
  43. mteb/filter_tasks.py +12 -7
  44. mteb/get_tasks.py +9 -4
  45. mteb/languages/language_scripts.py +8 -3
  46. mteb/leaderboard/app.py +7 -3
  47. mteb/leaderboard/table.py +7 -2
  48. mteb/load_results.py +9 -3
  49. mteb/models/abs_encoder.py +22 -12
  50. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  51. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  52. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  53. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  54. mteb/models/get_model_meta.py +11 -4
  55. mteb/models/instruct_wrapper.py +13 -5
  56. mteb/models/model_implementations/align_models.py +9 -4
  57. mteb/models/model_implementations/bedrock_models.py +16 -6
  58. mteb/models/model_implementations/blip2_models.py +9 -4
  59. mteb/models/model_implementations/blip_models.py +9 -4
  60. mteb/models/model_implementations/bm25.py +15 -10
  61. mteb/models/model_implementations/bmretriever_models.py +6 -2
  62. mteb/models/model_implementations/cde_models.py +9 -5
  63. mteb/models/model_implementations/clip_models.py +9 -4
  64. mteb/models/model_implementations/cohere_models.py +10 -4
  65. mteb/models/model_implementations/cohere_v.py +9 -4
  66. mteb/models/model_implementations/colpali_models.py +4 -3
  67. mteb/models/model_implementations/colqwen_models.py +10 -31
  68. mteb/models/model_implementations/colsmol_models.py +1 -1
  69. mteb/models/model_implementations/conan_models.py +10 -4
  70. mteb/models/model_implementations/dino_models.py +9 -4
  71. mteb/models/model_implementations/e5_v.py +9 -4
  72. mteb/models/model_implementations/eagerworks_models.py +10 -4
  73. mteb/models/model_implementations/evaclip_models.py +9 -4
  74. mteb/models/model_implementations/gme_v_models.py +5 -3
  75. mteb/models/model_implementations/google_models.py +10 -4
  76. mteb/models/model_implementations/granite_vision_embedding_models.py +6 -5
  77. mteb/models/model_implementations/hinvec_models.py +5 -1
  78. mteb/models/model_implementations/jasper_models.py +12 -5
  79. mteb/models/model_implementations/jina_clip.py +9 -4
  80. mteb/models/model_implementations/jina_models.py +10 -5
  81. mteb/models/model_implementations/kalm_models.py +18 -12
  82. mteb/models/model_implementations/linq_models.py +6 -1
  83. mteb/models/model_implementations/listconranker.py +9 -4
  84. mteb/models/model_implementations/llm2clip_models.py +9 -4
  85. mteb/models/model_implementations/llm2vec_models.py +12 -6
  86. mteb/models/model_implementations/mcinext_models.py +5 -2
  87. mteb/models/model_implementations/moco_models.py +9 -4
  88. mteb/models/model_implementations/mod_models.py +1 -1
  89. mteb/models/model_implementations/model2vec_models.py +10 -4
  90. mteb/models/model_implementations/no_instruct_sentence_models.py +12 -5
  91. mteb/models/model_implementations/nomic_models.py +10 -4
  92. mteb/models/model_implementations/nomic_models_vision.py +4 -3
  93. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +7 -3
  94. mteb/models/model_implementations/nvidia_models.py +12 -4
  95. mteb/models/model_implementations/octen_models.py +1 -1
  96. mteb/models/model_implementations/openai_models.py +9 -4
  97. mteb/models/model_implementations/openclip_models.py +9 -4
  98. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -4
  99. mteb/models/model_implementations/ops_moa_models.py +7 -2
  100. mteb/models/model_implementations/promptriever_models.py +12 -6
  101. mteb/models/model_implementations/pylate_models.py +19 -13
  102. mteb/models/model_implementations/qwen3_models.py +8 -1
  103. mteb/models/model_implementations/random_baseline.py +4 -3
  104. mteb/models/model_implementations/repllama_models.py +13 -6
  105. mteb/models/model_implementations/rerankers_custom.py +10 -4
  106. mteb/models/model_implementations/rerankers_monot5_based.py +10 -4
  107. mteb/models/model_implementations/salesforce_models.py +7 -1
  108. mteb/models/model_implementations/seed_1_6_embedding_models.py +4 -2
  109. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +5 -2
  110. mteb/models/model_implementations/seed_models.py +1 -1
  111. mteb/models/model_implementations/siglip_models.py +9 -4
  112. mteb/models/model_implementations/slm_models.py +7 -4
  113. mteb/models/model_implementations/uae_models.py +9 -4
  114. mteb/models/model_implementations/vdr_models.py +7 -1
  115. mteb/models/model_implementations/vista_models.py +9 -4
  116. mteb/models/model_implementations/vlm2vec_models.py +9 -4
  117. mteb/models/model_implementations/voyage_models.py +10 -4
  118. mteb/models/model_implementations/voyage_v.py +10 -6
  119. mteb/models/model_implementations/yuan_models_en.py +1 -1
  120. mteb/models/model_meta.py +12 -7
  121. mteb/models/models_protocols.py +19 -18
  122. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  123. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  124. mteb/models/search_wrappers.py +19 -12
  125. mteb/models/sentence_transformer_wrapper.py +4 -3
  126. mteb/models/vllm_wrapper.py +8 -6
  127. mteb/results/benchmark_results.py +22 -17
  128. mteb/results/model_result.py +21 -15
  129. mteb/results/task_result.py +15 -9
  130. mteb/similarity_functions.py +8 -2
  131. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  132. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  133. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  134. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  135. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  136. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  137. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  138. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  139. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  140. mteb/tasks/clustering/nob/snl_clustering.py +7 -2
  141. mteb/tasks/clustering/nob/vg_clustering.py +7 -2
  142. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  143. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
  144. mteb/types/_encoder_io.py +1 -1
  145. mteb/types/statistics.py +9 -2
  146. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/METADATA +1 -1
  147. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/RECORD +151 -151
  148. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/WHEEL +0 -0
  149. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/entry_points.txt +0 -0
  150. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/licenses/LICENSE +0 -0
  151. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,21 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import torch
6
- from torch.utils.data import DataLoader
7
7
 
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
8
  from mteb.models.abs_encoder import AbsEncoder
10
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.models.models_protocols import EncoderProtocol
12
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from collections.abc import Callable
13
+
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.models.models_protocols import EncoderProtocol
18
+ from mteb.types import Array, BatchedInput, PromptType
13
19
 
14
20
  from .repllama_models import RepLLaMAModel, model_prompts
15
21
 
@@ -1,30 +1,36 @@
1
+ from __future__ import annotations
2
+
1
3
  import heapq
2
4
  import logging
3
5
  import shutil
4
6
  import tempfile
5
7
  from pathlib import Path
6
- from typing import Any
8
+ from typing import TYPE_CHECKING, Any
7
9
 
8
10
  import torch
9
- from torch.utils.data import DataLoader
10
11
 
11
12
  from mteb._create_dataloaders import (
12
13
  create_dataloader,
13
14
  )
14
15
  from mteb._requires_package import requires_package
15
- from mteb.abstasks.task_metadata import TaskMetadata
16
16
  from mteb.models.abs_encoder import AbsEncoder
17
17
  from mteb.models.model_meta import ModelMeta, ScoringFunction
18
- from mteb.types import (
19
- Array,
20
- BatchedInput,
21
- CorpusDatasetType,
22
- EncodeKwargs,
23
- PromptType,
24
- QueryDatasetType,
25
- RetrievalOutputType,
26
- TopRankedDocumentsType,
27
- )
18
+ from mteb.types import PromptType
19
+
20
+ if TYPE_CHECKING:
21
+ from torch.utils.data import DataLoader
22
+
23
+ from mteb.abstasks.task_metadata import TaskMetadata
24
+ from mteb.types import (
25
+ Array,
26
+ BatchedInput,
27
+ CorpusDatasetType,
28
+ EncodeKwargs,
29
+ QueryDatasetType,
30
+ RetrievalOutputType,
31
+ TopRankedDocumentsType,
32
+ )
33
+
28
34
 
29
35
  logger = logging.getLogger(__name__)
30
36
 
@@ -1,6 +1,13 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
6
  from mteb.models.model_meta import ModelMeta
3
- from mteb.models.models_protocols import EncoderProtocol, PromptType
7
+ from mteb.types import PromptType
8
+
9
+ if TYPE_CHECKING:
10
+ from mteb.models.models_protocols import EncoderProtocol
4
11
 
5
12
 
6
13
  def instruction_template(
@@ -5,18 +5,19 @@ from typing import TYPE_CHECKING, Any, Literal
5
5
 
6
6
  import numpy as np
7
7
  import torch
8
- from torch.utils.data import DataLoader
9
8
 
10
- from mteb.abstasks.task_metadata import TaskMetadata
11
9
  from mteb.models.model_meta import ModelMeta
12
10
  from mteb.similarity_functions import (
13
11
  select_pairwise_similarity,
14
12
  select_similarity,
15
13
  )
16
- from mteb.types._encoder_io import Array, BatchedInput, PromptType
17
14
 
18
15
  if TYPE_CHECKING:
19
16
  from PIL import Image
17
+ from torch.utils.data import DataLoader
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types._encoder_io import Array, BatchedInput, PromptType
20
21
 
21
22
 
22
23
  def _string_to_vector(text: str | None, size: int) -> np.ndarray:
@@ -1,22 +1,29 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from collections.abc import Callable
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  import numpy as np
6
7
  import torch
7
8
  import torch.nn.functional as F
8
- from torch.utils.data import DataLoader
9
9
  from tqdm.auto import tqdm
10
10
 
11
11
  from mteb._requires_package import requires_package
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
12
  from mteb.models.abs_encoder import AbsEncoder
14
13
  from mteb.models.model_meta import (
15
14
  ModelMeta,
16
15
  ScoringFunction,
17
16
  )
18
- from mteb.models.models_protocols import EncoderProtocol
19
- from mteb.types import Array, BatchedInput, PromptType
17
+ from mteb.types import PromptType
18
+
19
+ if TYPE_CHECKING:
20
+ from collections.abc import Callable
21
+
22
+ from torch.utils.data import DataLoader
23
+
24
+ from mteb.abstasks.task_metadata import TaskMetadata
25
+ from mteb.models.models_protocols import EncoderProtocol
26
+ from mteb.types import Array, BatchedInput
20
27
 
21
28
  logger = logging.getLogger(__name__)
22
29
 
@@ -1,16 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
 
7
8
  from mteb._requires_package import requires_package
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.model_meta import ModelMeta
10
- from mteb.types import Array, BatchedInput, PromptType
11
10
 
12
11
  from .bge_models import bge_m3_training_data
13
12
 
13
+ if TYPE_CHECKING:
14
+ from torch.utils.data import DataLoader
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.types import Array, BatchedInput, PromptType
18
+
19
+
14
20
  logger = logging.getLogger(__name__)
15
21
 
16
22
 
@@ -1,15 +1,21 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.model_meta import ModelMeta
9
- from mteb.types import Array, BatchedInput, PromptType
10
9
 
11
10
  from .rerankers_custom import RerankerWrapper
12
11
 
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
17
+
18
+
13
19
  logger = logging.getLogger(__name__)
14
20
 
15
21
 
@@ -1,12 +1,18 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  from mteb.models.instruct_wrapper import (
2
6
  InstructSentenceTransformerModel,
3
7
  instruct_wrapper,
4
8
  )
5
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
6
- from mteb.types import PromptType
7
10
 
8
11
  from .e5_instruct import E5_MISTRAL_TRAINING_DATA
9
12
 
13
+ if TYPE_CHECKING:
14
+ from mteb.types import PromptType
15
+
10
16
 
11
17
  def instruction_template(
12
18
  instruction: str, prompt_type: PromptType | None = None
@@ -13,16 +13,18 @@ import torch
13
13
  from torch.utils.data import DataLoader
14
14
 
15
15
  from mteb._requires_package import requires_package
16
- from mteb.abstasks.task_metadata import TaskMetadata
17
16
  from mteb.models.abs_encoder import AbsEncoder
18
17
  from mteb.models.model_implementations.bge_models import bge_chinese_training_data
19
18
  from mteb.models.model_implementations.nvidia_models import nvidia_training_datasets
20
19
  from mteb.models.model_meta import ModelMeta
21
- from mteb.types import Array, BatchedInput, PromptType
20
+ from mteb.types import PromptType
22
21
 
23
22
  if TYPE_CHECKING:
24
23
  from PIL import Image
25
24
 
25
+ from mteb.abstasks.task_metadata import TaskMetadata
26
+ from mteb.types import Array, BatchedInput
27
+
26
28
 
27
29
  logger = logging.getLogger(__name__)
28
30
 
@@ -15,15 +15,18 @@ from torch.utils.data import DataLoader
15
15
  from tqdm import tqdm
16
16
 
17
17
  from mteb._requires_package import requires_package
18
- from mteb.abstasks.task_metadata import TaskMetadata
19
18
  from mteb.models.abs_encoder import AbsEncoder
20
19
  from mteb.models.model_implementations.bge_models import bge_chinese_training_data
21
20
  from mteb.models.model_implementations.nvidia_models import nvidia_training_datasets
22
21
  from mteb.models.model_meta import ModelMeta
23
- from mteb.types import Array, BatchedInput, PromptType
22
+ from mteb.types import PromptType
24
23
 
25
24
  if TYPE_CHECKING:
26
25
  from PIL import Image
26
+ from torch.utils.data import DataLoader
27
+
28
+ from mteb.abstasks.task_metadata import TaskMetadata
29
+ from mteb.types import Array, BatchedInput
27
30
 
28
31
 
29
32
  logger = logging.getLogger(__name__)
@@ -9,7 +9,7 @@ from tqdm.auto import tqdm
9
9
  from mteb._requires_package import requires_package
10
10
  from mteb.models.abs_encoder import AbsEncoder
11
11
  from mteb.models.model_meta import ModelMeta
12
- from mteb.models.models_protocols import PromptType
12
+ from mteb.types import PromptType
13
13
 
14
14
  from .bge_models import bge_chinese_training_data
15
15
  from .nvidia_models import nvidia_training_datasets
@@ -1,13 +1,18 @@
1
- from typing import Any
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.abs_encoder import AbsEncoder
9
9
  from mteb.models.model_meta import ModelMeta, ScoringFunction
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
  SIGLIP_CITATION = """@misc{zhai2023sigmoid,
13
18
  title={Sigmoid Loss for Language Image Pre-Training},
@@ -13,24 +13,27 @@ Based on:
13
13
  from __future__ import annotations
14
14
 
15
15
  import logging
16
- from typing import Any
16
+ from typing import TYPE_CHECKING, Any
17
17
 
18
18
  import torch
19
- from torch.utils.data import DataLoader
20
19
  from tqdm.auto import tqdm
21
20
 
22
21
  from mteb._requires_package import (
23
22
  requires_image_dependencies,
24
23
  requires_package,
25
24
  )
26
- from mteb.abstasks.task_metadata import TaskMetadata
27
25
  from mteb.models.abs_encoder import AbsEncoder
28
26
  from mteb.models.model_implementations.colpali_models import (
29
27
  COLPALI_CITATION,
30
28
  COLPALI_TRAINING_DATA,
31
29
  )
32
30
  from mteb.models.model_meta import ModelMeta, ScoringFunction
33
- from mteb.types import Array, BatchedInput, PromptType
31
+
32
+ if TYPE_CHECKING:
33
+ from torch.utils.data import DataLoader
34
+
35
+ from mteb.abstasks.task_metadata import TaskMetadata
36
+ from mteb.types import Array, BatchedInput, PromptType
34
37
 
35
38
  logger = logging.getLogger(__name__)
36
39
 
@@ -1,13 +1,18 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
 
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
8
  from mteb.models.model_meta import ModelMeta, ScoringFunction
9
9
  from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
10
- from mteb.types import Array, BatchedInput, PromptType
10
+
11
+ if TYPE_CHECKING:
12
+ from torch.utils.data import DataLoader
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.types import Array, BatchedInput, PromptType
11
16
 
12
17
  logger = logging.getLogger(__name__)
13
18
 
@@ -1,6 +1,12 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
6
  from mteb.models.model_meta import ModelMeta, ScoringFunction
3
- from mteb.types import PromptType
7
+
8
+ if TYPE_CHECKING:
9
+ from mteb.types import PromptType
4
10
 
5
11
 
6
12
  def instruction_template(
@@ -1,14 +1,19 @@
1
- from typing import Any, Literal
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any, Literal
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_image_dependencies
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  VISTA_CITATION = """@article{zhou2024vista,
14
19
  title={VISTA: Visualized Text Embedding For Universal Multi-Modal Retrieval},
@@ -1,8 +1,9 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import (
@@ -10,10 +11,14 @@ from mteb._requires_package import (
10
11
  requires_package,
11
12
  suggest_package,
12
13
  )
13
- from mteb.abstasks.task_metadata import TaskMetadata
14
14
  from mteb.models.abs_encoder import AbsEncoder
15
15
  from mteb.models.model_meta import ModelMeta, ScoringFunction
16
- from mteb.types import Array, BatchedInput, PromptType
16
+
17
+ if TYPE_CHECKING:
18
+ from torch.utils.data import DataLoader
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.types import Array, BatchedInput, PromptType
17
22
 
18
23
  logger = logging.getLogger(__name__)
19
24
 
@@ -1,16 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import time
2
4
  from functools import wraps
3
- from typing import Any, Literal
5
+ from typing import TYPE_CHECKING, Any, Literal
4
6
 
5
7
  import numpy as np
6
- from torch.utils.data import DataLoader
7
8
  from tqdm.auto import tqdm
8
9
 
9
10
  from mteb._requires_package import requires_package
10
- from mteb.abstasks.task_metadata import TaskMetadata
11
11
  from mteb.models.abs_encoder import AbsEncoder
12
12
  from mteb.models.model_meta import ModelMeta, ScoringFunction
13
- from mteb.types import Array, BatchedInput, PromptType
13
+ from mteb.types import PromptType
14
+
15
+ if TYPE_CHECKING:
16
+ from torch.utils.data import DataLoader
17
+
18
+ from mteb.abstasks.task_metadata import TaskMetadata
19
+ from mteb.types import Array, BatchedInput
14
20
 
15
21
  VOYAGE_TRAINING_DATA = set(
16
22
  # Self-reported (message from VoyageAI member)
@@ -4,17 +4,19 @@ import logging
4
4
  from typing import TYPE_CHECKING, Any, Literal
5
5
 
6
6
  import torch
7
- from torch.utils.data import DataLoader
8
7
  from tqdm.auto import tqdm
9
8
 
10
9
  from mteb._requires_package import requires_image_dependencies, requires_package
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
10
  from mteb.models.abs_encoder import AbsEncoder
13
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
12
+ from mteb.types import PromptType
15
13
 
16
14
  if TYPE_CHECKING:
17
15
  from PIL import Image
16
+ from torch.utils.data import DataLoader
17
+
18
+ from mteb.abstasks.task_metadata import TaskMetadata
19
+ from mteb.types import Array, BatchedInput
18
20
 
19
21
  logger = logging.getLogger(__name__)
20
22
 
@@ -27,6 +29,8 @@ def _downsample_image(
27
29
  Returns:
28
30
  The downsampled image.
29
31
  """
32
+ from PIL.Image import Resampling
33
+
30
34
  width, height = image.size
31
35
  pixels = width * height
32
36
 
@@ -42,15 +46,15 @@ def _downsample_image(
42
46
  logger.info(
43
47
  f"Downsampling image from {width}x{height} to {new_width}x{new_height}"
44
48
  )
45
- return image.resize(new_size, Image.LANCZOS)
49
+ return image.resize(new_size, Resampling.LANCZOS)
46
50
  if width > height:
47
51
  if width > 10000:
48
52
  logger.error("Processing extremely wide images.")
49
- return image.resize((10000, height), Image.LANCZOS)
53
+ return image.resize((10000, height), Resampling.LANCZOS)
50
54
  else:
51
55
  if height > 10000:
52
56
  logger.error("Processing extremely high images.")
53
- return image.resize((width, 10000), Image.LANCZOS)
57
+ return image.resize((width, 10000), Resampling.LANCZOS)
54
58
  return image
55
59
 
56
60
 
@@ -1,6 +1,6 @@
1
1
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
2
  from mteb.models.model_meta import ModelMeta
3
- from mteb.models.models_protocols import PromptType
3
+ from mteb.types import PromptType
4
4
 
5
5
 
6
6
  def instruction_template(
mteb/models/model_meta.py CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import json
4
4
  import logging
5
5
  import warnings
6
- from collections.abc import Callable, Sequence
6
+ from collections.abc import Callable
7
7
  from dataclasses import field
8
8
  from enum import Enum
9
9
  from functools import partial
@@ -11,9 +11,7 @@ from pathlib import Path
11
11
  from typing import TYPE_CHECKING, Any, Literal, cast
12
12
 
13
13
  from huggingface_hub import (
14
- GitCommitInfo,
15
14
  ModelCard,
16
- ModelCardData,
17
15
  get_safetensors_metadata,
18
16
  hf_hub_download,
19
17
  list_repo_commits,
@@ -30,17 +28,24 @@ from huggingface_hub.errors import (
30
28
  )
31
29
  from pydantic import BaseModel, ConfigDict, field_validator, model_validator
32
30
  from transformers import AutoConfig
33
- from typing_extensions import Self
34
31
 
35
32
  from mteb._helpful_enum import HelpfulStrEnum
36
33
  from mteb.languages import check_language_code
37
- from mteb.models.models_protocols import EncoderProtocol, MTEBModels
34
+ from mteb.models.models_protocols import MTEBModels
38
35
  from mteb.types import ISOLanguageScript, Licenses, Modalities, StrDate, StrURL
39
36
 
40
37
  if TYPE_CHECKING:
38
+ from collections.abc import Sequence
39
+
40
+ from huggingface_hub import (
41
+ GitCommitInfo,
42
+ ModelCardData,
43
+ )
41
44
  from sentence_transformers import CrossEncoder, SentenceTransformer
45
+ from typing_extensions import Self
42
46
 
43
47
  from mteb.abstasks import AbsTask
48
+ from mteb.models.models_protocols import EncoderProtocol
44
49
 
45
50
 
46
51
  logger = logging.getLogger(__name__)
@@ -479,7 +484,7 @@ class ModelMeta(BaseModel):
479
484
  if isinstance(tasks[0], str):
480
485
  benchmark_datasets = set(tasks)
481
486
  else:
482
- tasks = cast(Sequence["AbsTask"], tasks)
487
+ tasks = cast("Sequence[AbsTask]", tasks)
483
488
  benchmark_datasets = set()
484
489
  for task in tasks:
485
490
  benchmark_datasets.add(task.metadata.name)
@@ -534,7 +539,7 @@ class ModelMeta(BaseModel):
534
539
  if isinstance(tasks[0], str):
535
540
  benchmark_datasets = set(tasks)
536
541
  else:
537
- tasks = cast(Sequence["AbsTask"], tasks)
542
+ tasks = cast("Sequence[AbsTask]", tasks)
538
543
  benchmark_datasets = {task.metadata.name for task in tasks}
539
544
  overlap = training_datasets & benchmark_datasets
540
545
  perc_overlap = 100 * (len(overlap) / len(benchmark_datasets))
@@ -1,22 +1,23 @@
1
- from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
1
+ from __future__ import annotations
2
2
 
3
- from torch.utils.data import DataLoader
4
- from typing_extensions import Unpack
5
-
6
- from mteb.abstasks.task_metadata import TaskMetadata
7
- from mteb.types import (
8
- Array,
9
- BatchedInput,
10
- CorpusDatasetType,
11
- EncodeKwargs,
12
- PromptType,
13
- QueryDatasetType,
14
- RetrievalOutputType,
15
- TopRankedDocumentsType,
16
- )
3
+ from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
17
4
 
18
5
  if TYPE_CHECKING:
6
+ from torch.utils.data import DataLoader
7
+ from typing_extensions import Unpack
8
+
9
+ from mteb.abstasks.task_metadata import TaskMetadata
19
10
  from mteb.models.model_meta import ModelMeta
11
+ from mteb.types import (
12
+ Array,
13
+ BatchedInput,
14
+ CorpusDatasetType,
15
+ EncodeKwargs,
16
+ PromptType,
17
+ QueryDatasetType,
18
+ RetrievalOutputType,
19
+ TopRankedDocumentsType,
20
+ )
20
21
 
21
22
 
22
23
  @runtime_checkable
@@ -72,7 +73,7 @@ class SearchProtocol(Protocol):
72
73
  ...
73
74
 
74
75
  @property
75
- def mteb_model_meta(self) -> "ModelMeta":
76
+ def mteb_model_meta(self) -> ModelMeta:
76
77
  """Metadata of the model"""
77
78
  ...
78
79
 
@@ -177,7 +178,7 @@ class EncoderProtocol(Protocol):
177
178
  ...
178
179
 
179
180
  @property
180
- def mteb_model_meta(self) -> "ModelMeta":
181
+ def mteb_model_meta(self) -> ModelMeta:
181
182
  """Metadata of the model"""
182
183
  ...
183
184
 
@@ -236,7 +237,7 @@ class CrossEncoderProtocol(Protocol):
236
237
  ...
237
238
 
238
239
  @property
239
- def mteb_model_meta(self) -> "ModelMeta":
240
+ def mteb_model_meta(self) -> ModelMeta:
240
241
  """Metadata of the model"""
241
242
  ...
242
243
 
@@ -1,7 +1,11 @@
1
- from collections.abc import Callable
2
- from typing import Protocol
1
+ from __future__ import annotations
3
2
 
4
- from mteb.types import Array, TopRankedDocumentsType
3
+ from typing import TYPE_CHECKING, Protocol
4
+
5
+ if TYPE_CHECKING:
6
+ from collections.abc import Callable
7
+
8
+ from mteb.types import Array, TopRankedDocumentsType
5
9
 
6
10
 
7
11
  class IndexEncoderSearchProtocol(Protocol):