mteb 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. mteb/_create_dataloaders.py +16 -9
  2. mteb/_evaluators/any_sts_evaluator.py +10 -5
  3. mteb/_evaluators/clustering_evaluator.py +10 -4
  4. mteb/_evaluators/evaluator.py +9 -4
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
  6. mteb/_evaluators/pair_classification_evaluator.py +10 -5
  7. mteb/_evaluators/retrieval_evaluator.py +19 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +14 -10
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +8 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +17 -9
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +15 -6
  21. mteb/abstasks/clustering.py +17 -8
  22. mteb/abstasks/clustering_legacy.py +14 -6
  23. mteb/abstasks/image/image_text_pair_classification.py +17 -7
  24. mteb/abstasks/multilabel_classification.py +11 -5
  25. mteb/abstasks/pair_classification.py +19 -9
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +27 -16
  28. mteb/abstasks/retrieval_dataset_loaders.py +11 -8
  29. mteb/abstasks/sts.py +19 -10
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +14 -7
  32. mteb/abstasks/text/summarization.py +17 -7
  33. mteb/abstasks/zeroshot_classification.py +15 -7
  34. mteb/benchmarks/_create_table.py +13 -3
  35. mteb/benchmarks/benchmark.py +11 -1
  36. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  37. mteb/cache.py +10 -5
  38. mteb/cli/_display_tasks.py +9 -3
  39. mteb/cli/build_cli.py +5 -2
  40. mteb/cli/generate_model_card.py +9 -2
  41. mteb/deprecated_evaluator.py +16 -12
  42. mteb/evaluate.py +20 -18
  43. mteb/filter_tasks.py +12 -7
  44. mteb/get_tasks.py +9 -4
  45. mteb/languages/language_scripts.py +8 -3
  46. mteb/leaderboard/app.py +7 -3
  47. mteb/leaderboard/table.py +7 -2
  48. mteb/load_results.py +9 -3
  49. mteb/models/abs_encoder.py +22 -12
  50. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  51. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  52. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  53. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  54. mteb/models/get_model_meta.py +11 -4
  55. mteb/models/instruct_wrapper.py +13 -5
  56. mteb/models/model_implementations/align_models.py +9 -4
  57. mteb/models/model_implementations/bedrock_models.py +16 -6
  58. mteb/models/model_implementations/blip2_models.py +9 -4
  59. mteb/models/model_implementations/blip_models.py +9 -4
  60. mteb/models/model_implementations/bm25.py +15 -10
  61. mteb/models/model_implementations/bmretriever_models.py +6 -2
  62. mteb/models/model_implementations/cde_models.py +9 -5
  63. mteb/models/model_implementations/clip_models.py +9 -4
  64. mteb/models/model_implementations/cohere_models.py +10 -4
  65. mteb/models/model_implementations/cohere_v.py +9 -4
  66. mteb/models/model_implementations/colpali_models.py +4 -3
  67. mteb/models/model_implementations/colqwen_models.py +10 -31
  68. mteb/models/model_implementations/colsmol_models.py +1 -1
  69. mteb/models/model_implementations/conan_models.py +10 -4
  70. mteb/models/model_implementations/dino_models.py +9 -4
  71. mteb/models/model_implementations/e5_v.py +9 -4
  72. mteb/models/model_implementations/eagerworks_models.py +10 -4
  73. mteb/models/model_implementations/evaclip_models.py +9 -4
  74. mteb/models/model_implementations/gme_v_models.py +5 -3
  75. mteb/models/model_implementations/google_models.py +10 -4
  76. mteb/models/model_implementations/granite_vision_embedding_models.py +6 -5
  77. mteb/models/model_implementations/hinvec_models.py +5 -1
  78. mteb/models/model_implementations/jasper_models.py +12 -5
  79. mteb/models/model_implementations/jina_clip.py +9 -4
  80. mteb/models/model_implementations/jina_models.py +10 -5
  81. mteb/models/model_implementations/kalm_models.py +18 -12
  82. mteb/models/model_implementations/linq_models.py +6 -1
  83. mteb/models/model_implementations/listconranker.py +9 -4
  84. mteb/models/model_implementations/llm2clip_models.py +9 -4
  85. mteb/models/model_implementations/llm2vec_models.py +12 -6
  86. mteb/models/model_implementations/mcinext_models.py +5 -2
  87. mteb/models/model_implementations/moco_models.py +9 -4
  88. mteb/models/model_implementations/mod_models.py +1 -1
  89. mteb/models/model_implementations/model2vec_models.py +10 -4
  90. mteb/models/model_implementations/no_instruct_sentence_models.py +12 -5
  91. mteb/models/model_implementations/nomic_models.py +10 -4
  92. mteb/models/model_implementations/nomic_models_vision.py +4 -3
  93. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +7 -3
  94. mteb/models/model_implementations/nvidia_models.py +12 -4
  95. mteb/models/model_implementations/octen_models.py +1 -1
  96. mteb/models/model_implementations/openai_models.py +9 -4
  97. mteb/models/model_implementations/openclip_models.py +9 -4
  98. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -4
  99. mteb/models/model_implementations/ops_moa_models.py +7 -2
  100. mteb/models/model_implementations/promptriever_models.py +12 -6
  101. mteb/models/model_implementations/pylate_models.py +19 -13
  102. mteb/models/model_implementations/qwen3_models.py +8 -1
  103. mteb/models/model_implementations/random_baseline.py +4 -3
  104. mteb/models/model_implementations/repllama_models.py +13 -6
  105. mteb/models/model_implementations/rerankers_custom.py +10 -4
  106. mteb/models/model_implementations/rerankers_monot5_based.py +10 -4
  107. mteb/models/model_implementations/salesforce_models.py +7 -1
  108. mteb/models/model_implementations/seed_1_6_embedding_models.py +4 -2
  109. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +5 -2
  110. mteb/models/model_implementations/seed_models.py +1 -1
  111. mteb/models/model_implementations/siglip_models.py +9 -4
  112. mteb/models/model_implementations/slm_models.py +7 -4
  113. mteb/models/model_implementations/uae_models.py +9 -4
  114. mteb/models/model_implementations/vdr_models.py +7 -1
  115. mteb/models/model_implementations/vista_models.py +9 -4
  116. mteb/models/model_implementations/vlm2vec_models.py +9 -4
  117. mteb/models/model_implementations/voyage_models.py +10 -4
  118. mteb/models/model_implementations/voyage_v.py +10 -6
  119. mteb/models/model_implementations/yuan_models_en.py +1 -1
  120. mteb/models/model_meta.py +12 -7
  121. mteb/models/models_protocols.py +19 -18
  122. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  123. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  124. mteb/models/search_wrappers.py +19 -12
  125. mteb/models/sentence_transformer_wrapper.py +4 -3
  126. mteb/models/vllm_wrapper.py +8 -6
  127. mteb/results/benchmark_results.py +22 -17
  128. mteb/results/model_result.py +21 -15
  129. mteb/results/task_result.py +15 -9
  130. mteb/similarity_functions.py +8 -2
  131. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  132. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  133. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  134. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  135. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  136. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  137. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  138. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  139. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  140. mteb/tasks/clustering/nob/snl_clustering.py +7 -2
  141. mteb/tasks/clustering/nob/vg_clustering.py +7 -2
  142. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  143. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
  144. mteb/types/_encoder_io.py +1 -1
  145. mteb/types/statistics.py +9 -2
  146. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/METADATA +1 -1
  147. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/RECORD +151 -151
  148. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/WHEEL +0 -0
  149. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/entry_points.txt +0 -0
  150. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/licenses/LICENSE +0 -0
  151. {mteb-2.7.2.dist-info → mteb-2.7.3.dist-info}/top_level.txt +0 -0
@@ -1,21 +1,28 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import warnings
3
- from collections.abc import Callable
4
- from typing import Any, cast
5
+ from typing import TYPE_CHECKING, Any, cast
5
6
 
6
7
  import torch
7
8
  from datasets import Dataset, Image
8
9
  from torch.utils.data import DataLoader, default_collate
9
10
 
10
- from mteb.abstasks.task_metadata import TaskMetadata
11
11
  from mteb.types import (
12
- BatchedInput,
13
- Conversation,
14
12
  ConversationTurn,
15
13
  PromptType,
16
- QueryDatasetType,
17
14
  )
18
- from mteb.types._encoder_io import CorpusInput, ImageInput, QueryInput, TextInput
15
+
16
+ if TYPE_CHECKING:
17
+ from collections.abc import Callable
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.types import (
21
+ BatchedInput,
22
+ Conversation,
23
+ QueryDatasetType,
24
+ )
25
+ from mteb.types._encoder_io import CorpusInput, ImageInput, QueryInput, TextInput
19
26
 
20
27
  logger = logging.getLogger(__name__)
21
28
 
@@ -128,7 +135,7 @@ def _convert_conv_history_to_query(
128
135
  conversation = row["text"]
129
136
  # if it's a list of strings, just join them
130
137
  if isinstance(conversation, list) and isinstance(conversation[0], str):
131
- conversation_ = cast(list[str], conversation)
138
+ conversation_ = cast("list[str]", conversation)
132
139
  conv_str = "; ".join(conversation_)
133
140
  current_conversation = [
134
141
  ConversationTurn(role="user", content=message) for message in conversation_
@@ -173,7 +180,7 @@ def _convert_conv_history_to_query(
173
180
 
174
181
  row["text"] = conv_str
175
182
  row["conversation"] = current_conversation
176
- return cast(dict[str, str | list[ConversationTurn]], row)
183
+ return cast("dict[str, str | list[ConversationTurn]]", row)
177
184
 
178
185
 
179
186
  def _create_dataloader_for_queries_conversation(
@@ -1,7 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import TypedDict
4
+ from typing import TYPE_CHECKING, TypedDict
3
5
 
4
- from datasets import Dataset
5
6
  from sklearn.metrics.pairwise import (
6
7
  paired_cosine_distances,
7
8
  paired_euclidean_distances,
@@ -9,13 +10,17 @@ from sklearn.metrics.pairwise import (
9
10
  )
10
11
 
11
12
  from mteb._create_dataloaders import create_dataloader
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
- from mteb.models import EncoderProtocol
14
13
  from mteb.similarity_functions import compute_pairwise_similarity
15
- from mteb.types import EncodeKwargs, PromptType
16
14
 
17
15
  from .evaluator import Evaluator
18
16
 
17
+ if TYPE_CHECKING:
18
+ from datasets import Dataset
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.models import EncoderProtocol
22
+ from mteb.types import EncodeKwargs, PromptType
23
+
19
24
  logger = logging.getLogger(__name__)
20
25
 
21
26
 
@@ -1,15 +1,21 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from typing import TYPE_CHECKING
2
5
 
3
- from datasets import Dataset
4
6
  from sklearn import cluster
5
7
 
6
8
  from mteb._create_dataloaders import create_dataloader
7
- from mteb.abstasks.task_metadata import TaskMetadata
8
- from mteb.models import EncoderProtocol
9
- from mteb.types import EncodeKwargs
10
9
 
11
10
  from .evaluator import Evaluator
12
11
 
12
+ if TYPE_CHECKING:
13
+ from datasets import Dataset
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.models import EncoderProtocol
17
+ from mteb.types import EncodeKwargs
18
+
13
19
  logger = logging.getLogger(__name__)
14
20
 
15
21
 
@@ -1,10 +1,15 @@
1
+ from __future__ import annotations
2
+
1
3
  from abc import ABC, abstractmethod
2
- from collections.abc import Iterable, Mapping
3
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
4
5
 
5
6
  from mteb.abstasks.abstask import _set_seed
6
- from mteb.models import EncoderProtocol
7
- from mteb.types import EncodeKwargs
7
+
8
+ if TYPE_CHECKING:
9
+ from collections.abc import Iterable, Mapping
10
+
11
+ from mteb.models import EncoderProtocol
12
+ from mteb.types import EncodeKwargs
8
13
 
9
14
 
10
15
  class Evaluator(ABC):
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from collections.abc import Sequence
5
4
  from typing import TYPE_CHECKING, Any
6
5
 
7
6
  import torch
@@ -14,13 +13,16 @@ from mteb._create_dataloaders import (
14
13
  )
15
14
  from mteb._evaluators.evaluator import Evaluator
16
15
  from mteb._requires_package import requires_image_dependencies
17
- from mteb.abstasks.task_metadata import TaskMetadata
18
- from mteb.models.models_protocols import EncoderProtocol
19
- from mteb.types import EncodeKwargs
20
16
 
21
17
  if TYPE_CHECKING:
18
+ from collections.abc import Sequence
19
+
22
20
  from PIL.Image import Image
23
21
 
22
+ from mteb.abstasks.task_metadata import TaskMetadata
23
+ from mteb.models.models_protocols import EncoderProtocol
24
+ from mteb.types import EncodeKwargs
25
+
24
26
 
25
27
  logger = logging.getLogger(__name__)
26
28
 
@@ -1,8 +1,9 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any, TypedDict
4
+ from typing import TYPE_CHECKING, Any, TypedDict
3
5
 
4
6
  import numpy as np
5
- from datasets import Dataset
6
7
  from sklearn.metrics.pairwise import (
7
8
  paired_cosine_distances,
8
9
  paired_euclidean_distances,
@@ -11,10 +12,14 @@ from sklearn.metrics.pairwise import (
11
12
 
12
13
  from mteb._create_dataloaders import _create_dataloader_from_texts, create_dataloader
13
14
  from mteb._evaluators.evaluator import Evaluator
14
- from mteb.abstasks.task_metadata import TaskMetadata
15
- from mteb.models import EncoderProtocol
16
15
  from mteb.similarity_functions import compute_pairwise_similarity
17
- from mteb.types import EncodeKwargs, PromptType
16
+
17
+ if TYPE_CHECKING:
18
+ from datasets import Dataset
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.models import EncoderProtocol
22
+ from mteb.types import EncodeKwargs, PromptType
18
23
 
19
24
  logger = logging.getLogger(__name__)
20
25
 
@@ -1,23 +1,29 @@
1
- import logging
2
- from collections.abc import Sequence
1
+ from __future__ import annotations
3
2
 
4
- from mteb.abstasks.task_metadata import TaskMetadata
5
- from mteb.models import SearchProtocol
6
- from mteb.types import (
7
- CorpusDatasetType,
8
- EncodeKwargs,
9
- QueryDatasetType,
10
- RelevantDocumentsType,
11
- RetrievalEvaluationResult,
12
- RetrievalOutputType,
13
- TopRankedDocumentsType,
14
- )
3
+ import logging
4
+ from typing import TYPE_CHECKING
15
5
 
16
6
  from .evaluator import Evaluator
17
7
  from .retrieval_metrics import (
18
8
  calculate_retrieval_scores,
19
9
  )
20
10
 
11
+ if TYPE_CHECKING:
12
+ from collections.abc import Sequence
13
+
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.models import SearchProtocol
16
+ from mteb.types import (
17
+ CorpusDatasetType,
18
+ EncodeKwargs,
19
+ QueryDatasetType,
20
+ RelevantDocumentsType,
21
+ RetrievalEvaluationResult,
22
+ RetrievalOutputType,
23
+ TopRankedDocumentsType,
24
+ )
25
+
26
+
21
27
  logger = logging.getLogger(__name__)
22
28
 
23
29
 
@@ -1,7 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from collections import defaultdict
3
- from collections.abc import Mapping
4
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
5
6
 
6
7
  import numpy as np
7
8
  import pandas as pd
@@ -9,7 +10,12 @@ import pytrec_eval
9
10
  from packaging.version import Version
10
11
  from sklearn.metrics import auc
11
12
 
12
- from mteb.types import RelevantDocumentsType, RetrievalEvaluationResult
13
+ from mteb.types import RetrievalEvaluationResult
14
+
15
+ if TYPE_CHECKING:
16
+ from collections.abc import Mapping
17
+
18
+ from mteb.types import RelevantDocumentsType
13
19
 
14
20
  logger = logging.getLogger(__name__)
15
21
 
@@ -1,18 +1,22 @@
1
- import logging
2
- from typing import Any, Protocol, cast
1
+ from __future__ import annotations
3
2
 
4
- import numpy as np
5
- from datasets import Dataset
6
- from torch.utils.data import DataLoader
7
- from typing_extensions import Self
3
+ import logging
4
+ from typing import TYPE_CHECKING, Any, Protocol, cast
8
5
 
9
6
  from mteb._create_dataloaders import create_dataloader
10
- from mteb.abstasks.task_metadata import TaskMetadata
11
- from mteb.models import EncoderProtocol
12
- from mteb.types import Array, BatchedInput, EncodeKwargs
13
7
 
14
8
  from .evaluator import Evaluator
15
9
 
10
+ if TYPE_CHECKING:
11
+ import numpy as np
12
+ from datasets import Dataset
13
+ from torch.utils.data import DataLoader
14
+ from typing_extensions import Self
15
+
16
+ from mteb.abstasks.task_metadata import TaskMetadata
17
+ from mteb.models import EncoderProtocol
18
+ from mteb.types import Array, BatchedInput, EncodeKwargs
19
+
16
20
  logger = logging.getLogger(__name__)
17
21
 
18
22
 
@@ -104,7 +108,7 @@ class SklearnEvaluator(Evaluator):
104
108
  hf_subset=self.hf_subset,
105
109
  **encode_kwargs,
106
110
  )
107
- test_cache = cast(Array, test_cache)
111
+ test_cache = cast("Array", test_cache)
108
112
 
109
113
  logger.info("Running - Fitting classifier...")
110
114
  y_train = self.train_dataset[self.label_column_name]
@@ -1,4 +1,7 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from typing import TYPE_CHECKING
2
5
 
3
6
  import torch
4
7
  from datasets import Dataset
@@ -6,9 +9,11 @@ from tqdm.auto import tqdm
6
9
 
7
10
  from mteb._create_dataloaders import _create_dataloader_from_texts
8
11
  from mteb._evaluators.evaluator import Evaluator
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
- from mteb.models import EncoderProtocol
11
- from mteb.types import Array, EncodeKwargs
12
+
13
+ if TYPE_CHECKING:
14
+ from mteb.abstasks.task_metadata import TaskMetadata
15
+ from mteb.models import EncoderProtocol
16
+ from mteb.types import Array, EncodeKwargs
12
17
 
13
18
  logger = logging.getLogger(__name__)
14
19
 
@@ -1,6 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import sys
3
- from typing import TypedDict
5
+ from typing import TYPE_CHECKING, TypedDict
4
6
 
5
7
  import numpy as np
6
8
  import torch
@@ -9,10 +11,12 @@ from tqdm.auto import tqdm
9
11
 
10
12
  from mteb._create_dataloaders import _create_dataloader_from_texts
11
13
  from mteb._evaluators.evaluator import Evaluator
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
- from mteb.models import EncoderProtocol
14
14
  from mteb.similarity_functions import cos_sim, dot_score
15
- from mteb.types import EncodeKwargs
15
+
16
+ if TYPE_CHECKING:
17
+ from mteb.abstasks.task_metadata import TaskMetadata
18
+ from mteb.models import EncoderProtocol
19
+ from mteb.types import EncodeKwargs
16
20
 
17
21
  # if later than python 3.13 use typing module
18
22
  if sys.version_info >= (3, 13):
@@ -1,4 +1,7 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from typing import TYPE_CHECKING
2
5
 
3
6
  from datasets import Dataset
4
7
 
@@ -6,13 +9,17 @@ from mteb._create_dataloaders import (
6
9
  _create_dataloader_from_texts,
7
10
  create_dataloader,
8
11
  )
9
- from mteb.abstasks.task_metadata import TaskMetadata
10
- from mteb.models import EncoderProtocol
11
12
  from mteb.similarity_functions import similarity
12
- from mteb.types import Array, EncodeKwargs
13
13
 
14
14
  from .evaluator import Evaluator
15
15
 
16
+ if TYPE_CHECKING:
17
+ from datasets import Dataset
18
+
19
+ from mteb.abstasks.task_metadata import TaskMetadata
20
+ from mteb.models import EncoderProtocol
21
+ from mteb.types import Array, EncodeKwargs
22
+
16
23
  logger = logging.getLogger(__name__)
17
24
 
18
25
 
mteb/_helpful_enum.py CHANGED
@@ -1,6 +1,10 @@
1
+ from __future__ import annotations
2
+
1
3
  from enum import Enum
4
+ from typing import TYPE_CHECKING
2
5
 
3
- from typing_extensions import Self
6
+ if TYPE_CHECKING:
7
+ from typing_extensions import Self
4
8
 
5
9
 
6
10
  class HelpfulStrEnum(str, Enum):
@@ -1,12 +1,18 @@
1
1
  """Simplified version of https://gist.github.com/AlexeyVatolin/ea3adc21aa7a767603ff393b22085adc from https://github.com/embeddings-benchmark/mteb/pull/2900"""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import logging
6
+ from typing import TYPE_CHECKING
4
7
 
5
8
  import datasets
6
9
  import pandas as pd
7
- from datasets import Dataset, DatasetDict
10
+ from datasets import DatasetDict
11
+
12
+ if TYPE_CHECKING:
13
+ from datasets import Dataset
8
14
 
9
- from mteb import TaskMetadata
15
+ from mteb import TaskMetadata
10
16
 
11
17
  logger = logging.getLogger(__name__)
12
18
 
@@ -1,9 +1,10 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
4
+ from typing import TYPE_CHECKING
2
5
 
3
6
  from datasets import DatasetDict
4
7
 
5
- from mteb import TaskMetadata
6
- from mteb.abstasks import AbsTaskClassification
7
8
  from mteb.abstasks._data_filter.filters import (
8
9
  deduplicate,
9
10
  filter_empty,
@@ -13,6 +14,10 @@ from mteb.abstasks._data_filter.filters import (
13
14
  split_train_test,
14
15
  )
15
16
 
17
+ if TYPE_CHECKING:
18
+ from mteb import TaskMetadata
19
+ from mteb.abstasks import AbsTaskClassification
20
+
16
21
  logger = logging.getLogger(__name__)
17
22
 
18
23
 
@@ -2,10 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  import hashlib
4
4
  from collections import Counter
5
- from collections.abc import Mapping
6
5
  from typing import TYPE_CHECKING, cast
7
6
 
8
- from mteb.types import TopRankedDocumentsType
9
7
  from mteb.types.statistics import (
10
8
  ImageStatistics,
11
9
  LabelStatistics,
@@ -16,8 +14,12 @@ from mteb.types.statistics import (
16
14
  )
17
15
 
18
16
  if TYPE_CHECKING:
17
+ from collections.abc import Mapping
18
+
19
19
  from PIL import Image
20
20
 
21
+ from mteb.types import TopRankedDocumentsType
22
+
21
23
 
22
24
  def calculate_text_statistics(texts: list[str]) -> TextStatistics:
23
25
  """Calculate descriptive statistics for a list of texts.
@@ -87,13 +89,13 @@ def calculate_label_statistics(labels: list[int | list[int]]) -> LabelStatistics
87
89
 
88
90
  if not isinstance(labels[0], list):
89
91
  # single label classification
90
- single_label = cast(list[int], labels)
92
+ single_label = cast("list[int]", labels)
91
93
  label_len = [1] * len(single_label)
92
94
  total_label_len = len(single_label)
93
95
  total_labels.extend(single_label)
94
96
  elif isinstance(labels[0], list):
95
97
  # multilabel classification
96
- multilabel_labels = cast(list[list[int]], labels)
98
+ multilabel_labels = cast("list[list[int]]", labels)
97
99
  label_len = [len(l) for l in multilabel_labels]
98
100
  total_label_len = sum(label_len)
99
101
  for l in multilabel_labels:
mteb/abstasks/abstask.py CHANGED
@@ -1,30 +1,38 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import warnings
4
6
  from abc import ABC, abstractmethod
5
- from collections.abc import Mapping, Sequence
7
+ from collections.abc import Sequence
6
8
  from copy import copy
7
9
  from pathlib import Path
8
- from typing import Any, Literal, cast
10
+ from typing import TYPE_CHECKING, Any, Literal, cast
9
11
 
10
12
  import numpy as np
11
13
  from datasets import ClassLabel, Dataset, DatasetDict, load_dataset
12
14
  from sklearn.preprocessing import MultiLabelBinarizer
13
15
  from tqdm.auto import tqdm
14
- from typing_extensions import Self
15
16
 
16
17
  from mteb._set_seed import _set_seed
17
- from mteb.abstasks.task_metadata import TaskMetadata
18
18
  from mteb.languages import LanguageScripts
19
19
  from mteb.models import (
20
20
  CrossEncoderProtocol,
21
21
  EncoderProtocol,
22
- MTEBModels,
23
22
  SearchProtocol,
24
23
  )
25
- from mteb.types import HFSubset, Modalities, ScoresDict
26
- from mteb.types._encoder_io import EncodeKwargs
27
- from mteb.types.statistics import DescriptiveStatistics, SplitDescriptiveStatistics
24
+
25
+ if TYPE_CHECKING:
26
+ from collections.abc import Mapping
27
+
28
+ from typing_extensions import Self
29
+
30
+ from mteb.abstasks.task_metadata import TaskMetadata
31
+ from mteb.models import (
32
+ MTEBModels,
33
+ )
34
+ from mteb.types import EncodeKwargs, HFSubset, Modalities, ScoresDict
35
+ from mteb.types.statistics import DescriptiveStatistics, SplitDescriptiveStatistics
28
36
 
29
37
  logger = logging.getLogger(__name__)
30
38
 
@@ -163,7 +171,7 @@ class AbsTask(ABC):
163
171
  if not self.data_loaded:
164
172
  self.load_data()
165
173
 
166
- self.dataset = cast(dict[HFSubset, DatasetDict], self.dataset)
174
+ self.dataset = cast("dict[HFSubset, DatasetDict]", self.dataset)
167
175
 
168
176
  scores = {}
169
177
  if self.hf_subsets is None:
@@ -1,28 +1,39 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from datetime import datetime
5
+ from typing import TYPE_CHECKING
3
6
 
4
7
  from pydantic import ConfigDict, Field, model_validator
5
- from typing_extensions import Self
6
8
 
7
9
  from mteb.types import (
8
- ISOLanguageScript,
9
10
  Languages,
10
- Licenses,
11
- Modalities,
12
- StrDate,
13
11
  )
14
12
 
15
13
  from .abstask import AbsTask
16
14
  from .task_metadata import (
17
- AnnotatorType,
18
15
  MetadataDatasetDict,
19
- SampleCreationMethod,
20
- TaskDomain,
21
16
  TaskMetadata,
22
- TaskSubtype,
23
17
  TaskType,
24
18
  )
25
19
 
20
+ if TYPE_CHECKING:
21
+ from typing_extensions import Self
22
+
23
+ from mteb.types import (
24
+ ISOLanguageScript,
25
+ Licenses,
26
+ Modalities,
27
+ StrDate,
28
+ )
29
+
30
+ from .task_metadata import (
31
+ AnnotatorType,
32
+ SampleCreationMethod,
33
+ TaskDomain,
34
+ TaskSubtype,
35
+ )
36
+
26
37
  logger = logging.getLogger(__name__)
27
38
 
28
39
 
@@ -1,19 +1,26 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import warnings
3
- from collections.abc import Mapping
4
- from pathlib import Path
5
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
6
6
 
7
7
  import numpy as np
8
- from datasets import Dataset, DatasetDict
9
8
 
10
- from mteb.models.models_protocols import MTEBModels
11
9
  from mteb.results.task_result import TaskResult
12
- from mteb.types import EncodeKwargs, HFSubset, ScoresDict
13
- from mteb.types.statistics import DescriptiveStatistics
14
10
 
15
11
  from .abstask import AbsTask
16
- from .aggregate_task_metadata import AggregateTaskMetadata
12
+
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Mapping
15
+ from pathlib import Path
16
+
17
+ from datasets import Dataset, DatasetDict
18
+
19
+ from mteb.models.models_protocols import MTEBModels
20
+ from mteb.types import EncodeKwargs, HFSubset, ScoresDict
21
+ from mteb.types.statistics import DescriptiveStatistics
22
+
23
+ from .aggregate_task_metadata import AggregateTaskMetadata
17
24
 
18
25
  logger = logging.getLogger(__name__)
19
26
 
@@ -1,7 +1,8 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from collections import defaultdict
3
- from pathlib import Path
4
- from typing import Any, TypedDict
5
+ from typing import TYPE_CHECKING, Any, TypedDict
5
6
 
6
7
  import numpy as np
7
8
  from datasets import Dataset, DatasetDict
@@ -16,12 +17,8 @@ from sklearn.metrics import (
16
17
 
17
18
  from mteb._evaluators.sklearn_evaluator import SklearnEvaluator, SklearnModelProtocol
18
19
  from mteb.models import EncoderProtocol, MTEBModels
19
- from mteb.types import EncodeKwargs, HFSubset, ScoresDict
20
20
  from mteb.types.statistics import (
21
- ImageStatistics,
22
- LabelStatistics,
23
21
  SplitDescriptiveStatistics,
24
- TextStatistics,
25
22
  )
26
23
 
27
24
  from ._statistics_calculation import (
@@ -31,6 +28,18 @@ from ._statistics_calculation import (
31
28
  )
32
29
  from .abstask import AbsTask
33
30
 
31
+ if TYPE_CHECKING:
32
+ from pathlib import Path
33
+
34
+ from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
35
+ from mteb.models import MTEBModels
36
+ from mteb.types import EncodeKwargs, HFSubset, ScoresDict
37
+ from mteb.types.statistics import (
38
+ ImageStatistics,
39
+ LabelStatistics,
40
+ TextStatistics,
41
+ )
42
+
34
43
  logger = logging.getLogger(__name__)
35
44
 
36
45