mteb 2.7.1__py3-none-any.whl → 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +16 -9
  3. mteb/_evaluators/any_sts_evaluator.py +10 -5
  4. mteb/_evaluators/clustering_evaluator.py +10 -4
  5. mteb/_evaluators/evaluator.py +9 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
  7. mteb/_evaluators/pair_classification_evaluator.py +10 -5
  8. mteb/_evaluators/retrieval_evaluator.py +19 -13
  9. mteb/_evaluators/retrieval_metrics.py +9 -3
  10. mteb/_evaluators/sklearn_evaluator.py +14 -10
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
  12. mteb/_evaluators/text/summarization_evaluator.py +8 -4
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +8 -2
  16. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  17. mteb/abstasks/_statistics_calculation.py +6 -4
  18. mteb/abstasks/abstask.py +17 -9
  19. mteb/abstasks/aggregate_task_metadata.py +20 -9
  20. mteb/abstasks/aggregated_task.py +15 -8
  21. mteb/abstasks/classification.py +15 -6
  22. mteb/abstasks/clustering.py +17 -8
  23. mteb/abstasks/clustering_legacy.py +14 -6
  24. mteb/abstasks/image/image_text_pair_classification.py +17 -7
  25. mteb/abstasks/multilabel_classification.py +11 -5
  26. mteb/abstasks/pair_classification.py +19 -9
  27. mteb/abstasks/regression.py +14 -6
  28. mteb/abstasks/retrieval.py +27 -16
  29. mteb/abstasks/retrieval_dataset_loaders.py +11 -8
  30. mteb/abstasks/sts.py +19 -10
  31. mteb/abstasks/task_metadata.py +17 -8
  32. mteb/abstasks/text/bitext_mining.py +14 -7
  33. mteb/abstasks/text/summarization.py +17 -7
  34. mteb/abstasks/zeroshot_classification.py +15 -7
  35. mteb/benchmarks/_create_table.py +13 -3
  36. mteb/benchmarks/benchmark.py +11 -1
  37. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  38. mteb/cache.py +20 -14
  39. mteb/cli/_display_tasks.py +9 -3
  40. mteb/cli/build_cli.py +5 -2
  41. mteb/cli/generate_model_card.py +9 -2
  42. mteb/deprecated_evaluator.py +16 -12
  43. mteb/evaluate.py +20 -18
  44. mteb/filter_tasks.py +12 -7
  45. mteb/get_tasks.py +9 -4
  46. mteb/languages/language_scripts.py +8 -3
  47. mteb/leaderboard/app.py +7 -3
  48. mteb/leaderboard/table.py +7 -2
  49. mteb/load_results.py +9 -3
  50. mteb/models/abs_encoder.py +22 -12
  51. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  52. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  53. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  54. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  55. mteb/models/get_model_meta.py +11 -4
  56. mteb/models/instruct_wrapper.py +13 -5
  57. mteb/models/model_implementations/align_models.py +9 -4
  58. mteb/models/model_implementations/bedrock_models.py +16 -6
  59. mteb/models/model_implementations/blip2_models.py +9 -4
  60. mteb/models/model_implementations/blip_models.py +9 -4
  61. mteb/models/model_implementations/bm25.py +15 -10
  62. mteb/models/model_implementations/bmretriever_models.py +6 -2
  63. mteb/models/model_implementations/cde_models.py +9 -5
  64. mteb/models/model_implementations/clip_models.py +9 -4
  65. mteb/models/model_implementations/cohere_models.py +10 -4
  66. mteb/models/model_implementations/cohere_v.py +9 -4
  67. mteb/models/model_implementations/colpali_models.py +4 -3
  68. mteb/models/model_implementations/colqwen_models.py +10 -31
  69. mteb/models/model_implementations/colsmol_models.py +1 -1
  70. mteb/models/model_implementations/conan_models.py +10 -4
  71. mteb/models/model_implementations/dino_models.py +9 -4
  72. mteb/models/model_implementations/e5_v.py +9 -4
  73. mteb/models/model_implementations/eagerworks_models.py +10 -4
  74. mteb/models/model_implementations/evaclip_models.py +9 -4
  75. mteb/models/model_implementations/gme_v_models.py +5 -3
  76. mteb/models/model_implementations/google_models.py +10 -4
  77. mteb/models/model_implementations/granite_vision_embedding_models.py +6 -5
  78. mteb/models/model_implementations/hinvec_models.py +5 -1
  79. mteb/models/model_implementations/jasper_models.py +12 -5
  80. mteb/models/model_implementations/jina_clip.py +9 -4
  81. mteb/models/model_implementations/jina_models.py +10 -5
  82. mteb/models/model_implementations/kalm_models.py +18 -12
  83. mteb/models/model_implementations/linq_models.py +6 -1
  84. mteb/models/model_implementations/listconranker.py +9 -4
  85. mteb/models/model_implementations/llm2clip_models.py +9 -4
  86. mteb/models/model_implementations/llm2vec_models.py +12 -6
  87. mteb/models/model_implementations/mcinext_models.py +5 -2
  88. mteb/models/model_implementations/mdbr_models.py +3 -1
  89. mteb/models/model_implementations/{mxbai_models.py → mixedbread_ai_models.py} +91 -0
  90. mteb/models/model_implementations/moco_models.py +9 -4
  91. mteb/models/model_implementations/mod_models.py +1 -1
  92. mteb/models/model_implementations/model2vec_models.py +10 -4
  93. mteb/models/model_implementations/no_instruct_sentence_models.py +12 -5
  94. mteb/models/model_implementations/nomic_models.py +10 -4
  95. mteb/models/model_implementations/nomic_models_vision.py +4 -3
  96. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +7 -3
  97. mteb/models/model_implementations/nvidia_models.py +12 -4
  98. mteb/models/model_implementations/octen_models.py +1 -1
  99. mteb/models/model_implementations/openai_models.py +9 -4
  100. mteb/models/model_implementations/openclip_models.py +9 -4
  101. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -4
  102. mteb/models/model_implementations/ops_moa_models.py +7 -2
  103. mteb/models/model_implementations/pixie_models.py +56 -0
  104. mteb/models/model_implementations/promptriever_models.py +12 -6
  105. mteb/models/model_implementations/pylate_models.py +19 -13
  106. mteb/models/model_implementations/qwen3_models.py +8 -1
  107. mteb/models/model_implementations/random_baseline.py +4 -3
  108. mteb/models/model_implementations/repllama_models.py +13 -6
  109. mteb/models/model_implementations/rerankers_custom.py +10 -4
  110. mteb/models/model_implementations/rerankers_monot5_based.py +10 -4
  111. mteb/models/model_implementations/salesforce_models.py +7 -1
  112. mteb/models/model_implementations/seed_1_6_embedding_models.py +4 -2
  113. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +5 -2
  114. mteb/models/model_implementations/seed_models.py +1 -1
  115. mteb/models/model_implementations/siglip_models.py +9 -4
  116. mteb/models/model_implementations/slm_models.py +7 -4
  117. mteb/models/model_implementations/uae_models.py +9 -4
  118. mteb/models/model_implementations/vdr_models.py +7 -1
  119. mteb/models/model_implementations/vista_models.py +9 -4
  120. mteb/models/model_implementations/vlm2vec_models.py +9 -4
  121. mteb/models/model_implementations/voyage_models.py +10 -4
  122. mteb/models/model_implementations/voyage_v.py +10 -6
  123. mteb/models/model_implementations/yuan_models_en.py +1 -1
  124. mteb/models/model_meta.py +12 -7
  125. mteb/models/models_protocols.py +19 -18
  126. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  127. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  128. mteb/models/search_wrappers.py +19 -12
  129. mteb/models/sentence_transformer_wrapper.py +4 -3
  130. mteb/models/vllm_wrapper.py +8 -6
  131. mteb/results/benchmark_results.py +22 -17
  132. mteb/results/model_result.py +21 -15
  133. mteb/results/task_result.py +41 -10
  134. mteb/similarity_functions.py +8 -2
  135. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  136. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  137. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  138. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  139. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  140. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  141. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  142. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  143. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  144. mteb/tasks/clustering/nob/snl_clustering.py +7 -2
  145. mteb/tasks/clustering/nob/vg_clustering.py +7 -2
  146. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  147. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
  148. mteb/types/_encoder_io.py +1 -1
  149. mteb/types/statistics.py +9 -2
  150. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/METADATA +1 -1
  151. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/RECORD +155 -154
  152. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/WHEEL +0 -0
  153. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/entry_points.txt +0 -0
  154. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/licenses/LICENSE +0 -0
  155. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,19 @@
1
- from typing import Any, Literal
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any, Literal
2
4
 
3
5
  import torch
4
- from torch.utils.data import DataLoader
5
6
  from tqdm.auto import tqdm
6
7
 
7
8
  from mteb._requires_package import requires_image_dependencies
8
- from mteb.abstasks.task_metadata import TaskMetadata
9
9
  from mteb.models.abs_encoder import AbsEncoder
10
10
  from mteb.models.model_meta import ModelMeta, ScoringFunction
11
- from mteb.types import Array, BatchedInput, PromptType
11
+
12
+ if TYPE_CHECKING:
13
+ from torch.utils.data import DataLoader
14
+
15
+ from mteb.abstasks.task_metadata import TaskMetadata
16
+ from mteb.types import Array, BatchedInput, PromptType
12
17
 
13
18
  VISTA_CITATION = """@article{zhou2024vista,
14
19
  title={VISTA: Visualized Text Embedding For Universal Multi-Modal Retrieval},
@@ -1,8 +1,9 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
- from typing import Any
4
+ from typing import TYPE_CHECKING, Any
3
5
 
4
6
  import torch
5
- from torch.utils.data import DataLoader
6
7
  from tqdm.auto import tqdm
7
8
 
8
9
  from mteb._requires_package import (
@@ -10,10 +11,14 @@ from mteb._requires_package import (
10
11
  requires_package,
11
12
  suggest_package,
12
13
  )
13
- from mteb.abstasks.task_metadata import TaskMetadata
14
14
  from mteb.models.abs_encoder import AbsEncoder
15
15
  from mteb.models.model_meta import ModelMeta, ScoringFunction
16
- from mteb.types import Array, BatchedInput, PromptType
16
+
17
+ if TYPE_CHECKING:
18
+ from torch.utils.data import DataLoader
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.types import Array, BatchedInput, PromptType
17
22
 
18
23
  logger = logging.getLogger(__name__)
19
24
 
@@ -1,16 +1,22 @@
1
+ from __future__ import annotations
2
+
1
3
  import time
2
4
  from functools import wraps
3
- from typing import Any, Literal
5
+ from typing import TYPE_CHECKING, Any, Literal
4
6
 
5
7
  import numpy as np
6
- from torch.utils.data import DataLoader
7
8
  from tqdm.auto import tqdm
8
9
 
9
10
  from mteb._requires_package import requires_package
10
- from mteb.abstasks.task_metadata import TaskMetadata
11
11
  from mteb.models.abs_encoder import AbsEncoder
12
12
  from mteb.models.model_meta import ModelMeta, ScoringFunction
13
- from mteb.types import Array, BatchedInput, PromptType
13
+ from mteb.types import PromptType
14
+
15
+ if TYPE_CHECKING:
16
+ from torch.utils.data import DataLoader
17
+
18
+ from mteb.abstasks.task_metadata import TaskMetadata
19
+ from mteb.types import Array, BatchedInput
14
20
 
15
21
  VOYAGE_TRAINING_DATA = set(
16
22
  # Self-reported (message from VoyageAI member)
@@ -4,17 +4,19 @@ import logging
4
4
  from typing import TYPE_CHECKING, Any, Literal
5
5
 
6
6
  import torch
7
- from torch.utils.data import DataLoader
8
7
  from tqdm.auto import tqdm
9
8
 
10
9
  from mteb._requires_package import requires_image_dependencies, requires_package
11
- from mteb.abstasks.task_metadata import TaskMetadata
12
10
  from mteb.models.abs_encoder import AbsEncoder
13
11
  from mteb.models.model_meta import ModelMeta, ScoringFunction
14
- from mteb.types import Array, BatchedInput, PromptType
12
+ from mteb.types import PromptType
15
13
 
16
14
  if TYPE_CHECKING:
17
15
  from PIL import Image
16
+ from torch.utils.data import DataLoader
17
+
18
+ from mteb.abstasks.task_metadata import TaskMetadata
19
+ from mteb.types import Array, BatchedInput
18
20
 
19
21
  logger = logging.getLogger(__name__)
20
22
 
@@ -27,6 +29,8 @@ def _downsample_image(
27
29
  Returns:
28
30
  The downsampled image.
29
31
  """
32
+ from PIL.Image import Resampling
33
+
30
34
  width, height = image.size
31
35
  pixels = width * height
32
36
 
@@ -42,15 +46,15 @@ def _downsample_image(
42
46
  logger.info(
43
47
  f"Downsampling image from {width}x{height} to {new_width}x{new_height}"
44
48
  )
45
- return image.resize(new_size, Image.LANCZOS)
49
+ return image.resize(new_size, Resampling.LANCZOS)
46
50
  if width > height:
47
51
  if width > 10000:
48
52
  logger.error("Processing extremely wide images.")
49
- return image.resize((10000, height), Image.LANCZOS)
53
+ return image.resize((10000, height), Resampling.LANCZOS)
50
54
  else:
51
55
  if height > 10000:
52
56
  logger.error("Processing extremely high images.")
53
- return image.resize((width, 10000), Image.LANCZOS)
57
+ return image.resize((width, 10000), Resampling.LANCZOS)
54
58
  return image
55
59
 
56
60
 
@@ -1,6 +1,6 @@
1
1
  from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
2
  from mteb.models.model_meta import ModelMeta
3
- from mteb.models.models_protocols import PromptType
3
+ from mteb.types import PromptType
4
4
 
5
5
 
6
6
  def instruction_template(
mteb/models/model_meta.py CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import json
4
4
  import logging
5
5
  import warnings
6
- from collections.abc import Callable, Sequence
6
+ from collections.abc import Callable
7
7
  from dataclasses import field
8
8
  from enum import Enum
9
9
  from functools import partial
@@ -11,9 +11,7 @@ from pathlib import Path
11
11
  from typing import TYPE_CHECKING, Any, Literal, cast
12
12
 
13
13
  from huggingface_hub import (
14
- GitCommitInfo,
15
14
  ModelCard,
16
- ModelCardData,
17
15
  get_safetensors_metadata,
18
16
  hf_hub_download,
19
17
  list_repo_commits,
@@ -30,17 +28,24 @@ from huggingface_hub.errors import (
30
28
  )
31
29
  from pydantic import BaseModel, ConfigDict, field_validator, model_validator
32
30
  from transformers import AutoConfig
33
- from typing_extensions import Self
34
31
 
35
32
  from mteb._helpful_enum import HelpfulStrEnum
36
33
  from mteb.languages import check_language_code
37
- from mteb.models.models_protocols import EncoderProtocol, MTEBModels
34
+ from mteb.models.models_protocols import MTEBModels
38
35
  from mteb.types import ISOLanguageScript, Licenses, Modalities, StrDate, StrURL
39
36
 
40
37
  if TYPE_CHECKING:
38
+ from collections.abc import Sequence
39
+
40
+ from huggingface_hub import (
41
+ GitCommitInfo,
42
+ ModelCardData,
43
+ )
41
44
  from sentence_transformers import CrossEncoder, SentenceTransformer
45
+ from typing_extensions import Self
42
46
 
43
47
  from mteb.abstasks import AbsTask
48
+ from mteb.models.models_protocols import EncoderProtocol
44
49
 
45
50
 
46
51
  logger = logging.getLogger(__name__)
@@ -479,7 +484,7 @@ class ModelMeta(BaseModel):
479
484
  if isinstance(tasks[0], str):
480
485
  benchmark_datasets = set(tasks)
481
486
  else:
482
- tasks = cast(Sequence["AbsTask"], tasks)
487
+ tasks = cast("Sequence[AbsTask]", tasks)
483
488
  benchmark_datasets = set()
484
489
  for task in tasks:
485
490
  benchmark_datasets.add(task.metadata.name)
@@ -534,7 +539,7 @@ class ModelMeta(BaseModel):
534
539
  if isinstance(tasks[0], str):
535
540
  benchmark_datasets = set(tasks)
536
541
  else:
537
- tasks = cast(Sequence["AbsTask"], tasks)
542
+ tasks = cast("Sequence[AbsTask]", tasks)
538
543
  benchmark_datasets = {task.metadata.name for task in tasks}
539
544
  overlap = training_datasets & benchmark_datasets
540
545
  perc_overlap = 100 * (len(overlap) / len(benchmark_datasets))
@@ -1,22 +1,23 @@
1
- from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
1
+ from __future__ import annotations
2
2
 
3
- from torch.utils.data import DataLoader
4
- from typing_extensions import Unpack
5
-
6
- from mteb.abstasks.task_metadata import TaskMetadata
7
- from mteb.types import (
8
- Array,
9
- BatchedInput,
10
- CorpusDatasetType,
11
- EncodeKwargs,
12
- PromptType,
13
- QueryDatasetType,
14
- RetrievalOutputType,
15
- TopRankedDocumentsType,
16
- )
3
+ from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
17
4
 
18
5
  if TYPE_CHECKING:
6
+ from torch.utils.data import DataLoader
7
+ from typing_extensions import Unpack
8
+
9
+ from mteb.abstasks.task_metadata import TaskMetadata
19
10
  from mteb.models.model_meta import ModelMeta
11
+ from mteb.types import (
12
+ Array,
13
+ BatchedInput,
14
+ CorpusDatasetType,
15
+ EncodeKwargs,
16
+ PromptType,
17
+ QueryDatasetType,
18
+ RetrievalOutputType,
19
+ TopRankedDocumentsType,
20
+ )
20
21
 
21
22
 
22
23
  @runtime_checkable
@@ -72,7 +73,7 @@ class SearchProtocol(Protocol):
72
73
  ...
73
74
 
74
75
  @property
75
- def mteb_model_meta(self) -> "ModelMeta":
76
+ def mteb_model_meta(self) -> ModelMeta:
76
77
  """Metadata of the model"""
77
78
  ...
78
79
 
@@ -177,7 +178,7 @@ class EncoderProtocol(Protocol):
177
178
  ...
178
179
 
179
180
  @property
180
- def mteb_model_meta(self) -> "ModelMeta":
181
+ def mteb_model_meta(self) -> ModelMeta:
181
182
  """Metadata of the model"""
182
183
  ...
183
184
 
@@ -236,7 +237,7 @@ class CrossEncoderProtocol(Protocol):
236
237
  ...
237
238
 
238
239
  @property
239
- def mteb_model_meta(self) -> "ModelMeta":
240
+ def mteb_model_meta(self) -> ModelMeta:
240
241
  """Metadata of the model"""
241
242
  ...
242
243
 
@@ -1,7 +1,11 @@
1
- from collections.abc import Callable
2
- from typing import Protocol
1
+ from __future__ import annotations
3
2
 
4
- from mteb.types import Array, TopRankedDocumentsType
3
+ from typing import TYPE_CHECKING, Protocol
4
+
5
+ if TYPE_CHECKING:
6
+ from collections.abc import Callable
7
+
8
+ from mteb.types import Array, TopRankedDocumentsType
5
9
 
6
10
 
7
11
  class IndexEncoderSearchProtocol(Protocol):
@@ -1,14 +1,23 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  import warnings
3
- from collections.abc import Callable
5
+ from typing import TYPE_CHECKING
4
6
 
5
7
  import numpy as np
6
8
  import torch
7
9
 
8
10
  from mteb._requires_package import requires_package
9
11
  from mteb.models.model_meta import ScoringFunction
10
- from mteb.models.models_protocols import EncoderProtocol
11
- from mteb.types import Array, TopRankedDocumentsType
12
+
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Callable
15
+
16
+ import faiss
17
+
18
+ from mteb.models.models_protocols import EncoderProtocol
19
+ from mteb.types import Array, TopRankedDocumentsType
20
+
12
21
 
13
22
  logger = logging.getLogger(__name__)
14
23
 
@@ -33,7 +42,6 @@ class FaissSearchIndex:
33
42
  install_instruction="pip install mteb[faiss-cpu]",
34
43
  )
35
44
 
36
- import faiss
37
45
  from faiss import IndexFlatIP, IndexFlatL2
38
46
 
39
47
  # https://github.com/facebookresearch/faiss/wiki/Faiss-indexes
@@ -1,28 +1,35 @@
1
+ from __future__ import annotations
2
+
1
3
  import heapq
2
4
  import logging
3
- from typing import Any
5
+ from typing import TYPE_CHECKING, Any
4
6
 
5
7
  import torch
6
8
  from datasets import Dataset
7
- from torch.utils.data import DataLoader
8
9
 
9
10
  from mteb._create_dataloaders import (
10
11
  create_dataloader,
11
12
  )
12
- from mteb.abstasks.task_metadata import TaskMetadata
13
13
  from mteb.types import (
14
- Array,
15
- BatchedInput,
16
- CorpusDatasetType,
17
- EncodeKwargs,
18
14
  PromptType,
19
- QueryDatasetType,
20
- RetrievalOutputType,
21
- TopRankedDocumentsType,
22
15
  )
23
16
 
24
- from .models_protocols import CrossEncoderProtocol, EncoderProtocol
25
- from .search_encoder_index.search_backend_protocol import IndexEncoderSearchProtocol
17
+ if TYPE_CHECKING:
18
+ from torch.utils.data import DataLoader
19
+
20
+ from mteb.abstasks.task_metadata import TaskMetadata
21
+ from mteb.types import (
22
+ Array,
23
+ BatchedInput,
24
+ CorpusDatasetType,
25
+ EncodeKwargs,
26
+ QueryDatasetType,
27
+ RetrievalOutputType,
28
+ TopRankedDocumentsType,
29
+ )
30
+
31
+ from .models_protocols import CrossEncoderProtocol, EncoderProtocol
32
+ from .search_encoder_index.search_backend_protocol import IndexEncoderSearchProtocol
26
33
 
27
34
  logger = logging.getLogger(__name__)
28
35
 
@@ -7,19 +7,20 @@ from typing import TYPE_CHECKING, Any
7
7
  import numpy as np
8
8
  import torch
9
9
  from packaging.version import Version
10
- from torch.utils.data import DataLoader
11
- from typing_extensions import Unpack
12
10
 
13
11
  from mteb._log_once import LogOnce
14
12
  from mteb.models import ModelMeta
15
- from mteb.types import Array, BatchedInput, EncodeKwargs, PromptType
13
+ from mteb.types import PromptType
16
14
 
17
15
  from .abs_encoder import AbsEncoder
18
16
 
19
17
  if TYPE_CHECKING:
20
18
  from sentence_transformers import CrossEncoder, SentenceTransformer
19
+ from torch.utils.data import DataLoader
20
+ from typing_extensions import Unpack
21
21
 
22
22
  from mteb.abstasks.task_metadata import TaskMetadata
23
+ from mteb.types import Array, BatchedInput, EncodeKwargs
23
24
 
24
25
  logger = logging.getLogger(__name__)
25
26
 
@@ -4,23 +4,25 @@ import atexit
4
4
  import gc
5
5
  import logging
6
6
  import os
7
- from collections.abc import Callable
8
7
  from typing import TYPE_CHECKING, Any, Literal
9
8
 
10
9
  import numpy as np
11
10
  import torch
12
- from torch.utils.data import DataLoader
13
11
 
14
12
  from mteb._requires_package import requires_package
15
- from mteb.abstasks.task_metadata import TaskMetadata
16
13
  from mteb.models import ModelMeta
17
14
  from mteb.models.abs_encoder import AbsEncoder
18
- from mteb.types import Array, BatchedInput, PromptType
15
+ from mteb.types import PromptType
19
16
 
20
17
  if TYPE_CHECKING:
18
+ from collections.abc import Callable
19
+
20
+ from torch.utils.data import DataLoader
21
21
  from vllm.config import PoolerConfig # type: ignore[import-not-found]
22
- else:
23
- PoolerConfig = dict[str, Any]
22
+
23
+ from mteb.abstasks.task_metadata import TaskMetadata
24
+ from mteb.types import Array, BatchedInput
25
+
24
26
 
25
27
  logger = logging.getLogger(__name__)
26
28
 
@@ -4,34 +4,39 @@ import functools
4
4
  import json
5
5
  import logging
6
6
  import warnings
7
- from collections.abc import Callable, Iterable, Iterator
8
7
  from pathlib import Path
9
- from typing import Any, Literal, cast
8
+ from typing import TYPE_CHECKING, Any, Literal, cast
10
9
 
11
10
  import pandas as pd
12
11
  from packaging.version import InvalidVersion, Version
13
12
  from pydantic import BaseModel, ConfigDict
14
- from typing_extensions import Self
15
13
 
16
- from mteb.abstasks.abstask import AbsTask
17
- from mteb.abstasks.task_metadata import (
18
- TaskDomain,
19
- TaskType,
20
- )
21
14
  from mteb.benchmarks.benchmark import Benchmark
22
15
  from mteb.models import ModelMeta
23
16
  from mteb.models.get_model_meta import get_model_metas
24
- from mteb.types import (
25
- ISOLanguage,
26
- ISOLanguageScript,
27
- Modalities,
28
- Score,
29
- ScoresDict,
30
- SplitName,
31
- )
32
17
 
33
18
  from .model_result import ModelResult, _aggregate_and_pivot
34
19
 
20
+ if TYPE_CHECKING:
21
+ from collections.abc import Callable, Iterable, Iterator
22
+
23
+ from typing_extensions import Self
24
+
25
+ from mteb.abstasks.abstask import AbsTask
26
+ from mteb.abstasks.task_metadata import (
27
+ TaskDomain,
28
+ TaskType,
29
+ )
30
+ from mteb.types import (
31
+ ISOLanguage,
32
+ ISOLanguageScript,
33
+ Modalities,
34
+ Score,
35
+ ScoresDict,
36
+ SplitName,
37
+ )
38
+
39
+
35
40
  logger = logging.getLogger(__name__)
36
41
 
37
42
 
@@ -144,7 +149,7 @@ class BenchmarkResults(BaseModel):
144
149
  raise ValueError("name in ModelMeta is None. It must be a string.")
145
150
  name_rev[name.name] = name.revision
146
151
  else:
147
- name_ = cast(str, name)
152
+ name_ = cast("str", name)
148
153
  name_rev[name_] = revision
149
154
 
150
155
  for model_res in self.model_results:
@@ -2,30 +2,36 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import warnings
5
- from collections.abc import Callable, Iterable
6
- from typing import Any, Literal, cast
5
+ from typing import TYPE_CHECKING, Any, Literal, cast
7
6
 
8
7
  import numpy as np
9
8
  import pandas as pd
10
9
  from pydantic import BaseModel, ConfigDict, Field
11
10
  from typing_extensions import overload
12
11
 
13
- from mteb.abstasks.abstask import AbsTask
14
- from mteb.abstasks.task_metadata import (
15
- TaskDomain,
16
- TaskType,
17
- )
18
12
  from mteb.types import (
19
- ISOLanguage,
20
- ISOLanguageScript,
21
13
  Modalities,
22
- Score,
23
- ScoresDict,
24
- SplitName,
25
14
  )
26
15
 
27
16
  from .task_result import TaskError, TaskResult
28
17
 
18
+ if TYPE_CHECKING:
19
+ from collections.abc import Callable, Iterable
20
+
21
+ from mteb.abstasks.abstask import AbsTask
22
+ from mteb.abstasks.task_metadata import (
23
+ TaskDomain,
24
+ TaskType,
25
+ )
26
+ from mteb.types import (
27
+ ISOLanguage,
28
+ ISOLanguageScript,
29
+ Score,
30
+ ScoresDict,
31
+ SplitName,
32
+ )
33
+
34
+
29
35
  logger = logging.getLogger(__name__)
30
36
 
31
37
 
@@ -83,7 +89,7 @@ class ModelResult(BaseModel):
83
89
  model_revision: str | None
84
90
  task_results: list[TaskResult]
85
91
  default_modalities: list[Modalities] = Field(
86
- default_factory=lambda: [cast(Modalities, "text")], alias="modalities"
92
+ default_factory=lambda: [cast("Modalities", "text")], alias="modalities"
87
93
  )
88
94
  model_config = (
89
95
  ConfigDict( # to free up the name model_* which is otherwise protected
@@ -202,8 +208,8 @@ class ModelResult(BaseModel):
202
208
  aggregation = aggregation if aggregation is not None else np.mean
203
209
  else:
204
210
  use_fast = True
205
- aggregation = cast(Callable[[list[Score]], Any], aggregation)
206
- getter = cast(Callable[[ScoresDict], Score], getter)
211
+ aggregation = cast("Callable[[list[Score]], Any]", aggregation)
212
+ getter = cast("Callable[[ScoresDict], Score]", getter)
207
213
 
208
214
  if format == "wide":
209
215
  scores = {}
@@ -4,34 +4,40 @@ import json
4
4
  import logging
5
5
  import warnings
6
6
  from collections import defaultdict
7
- from collections.abc import Callable, Iterable, Mapping
8
7
  from functools import cached_property
9
8
  from importlib.metadata import version
10
- from pathlib import Path
11
- from typing import Any
9
+ from typing import TYPE_CHECKING, Any
12
10
 
13
11
  import numpy as np
14
12
  from huggingface_hub import EvalResult
15
13
  from packaging.version import Version
16
14
  from pydantic import BaseModel, field_validator
17
- from typing_extensions import Self
18
15
 
19
16
  from mteb import TaskMetadata
20
17
  from mteb._helpful_enum import HelpfulStrEnum
21
18
  from mteb.abstasks import AbsTaskClassification
22
19
  from mteb.abstasks.abstask import AbsTask
23
- from mteb.abstasks.task_metadata import TaskDomain
24
20
  from mteb.languages import LanguageScripts
25
21
  from mteb.models.model_meta import ScoringFunction
26
22
  from mteb.types import (
27
- HFSubset,
28
- ISOLanguage,
29
- ISOLanguageScript,
30
- Score,
31
23
  ScoresDict,
32
24
  SplitName,
33
25
  )
34
26
 
27
+ if TYPE_CHECKING:
28
+ from collections.abc import Callable, Iterable, Mapping
29
+ from pathlib import Path
30
+
31
+ from typing_extensions import Self
32
+
33
+ from mteb.abstasks.task_metadata import TaskDomain
34
+ from mteb.types import (
35
+ HFSubset,
36
+ ISOLanguage,
37
+ ISOLanguageScript,
38
+ Score,
39
+ )
40
+
35
41
  logger = logging.getLogger(__name__)
36
42
 
37
43
 
@@ -610,7 +616,10 @@ class TaskResult(BaseModel):
610
616
  new_res = {**self.to_dict(), "scores": new_scores}
611
617
  return TaskResult.from_validated(**new_res)
612
618
 
613
- def validate_and_filter_scores(self, task: AbsTask | None = None) -> TaskResult:
619
+ def validate_and_filter_scores(
620
+ self,
621
+ task: AbsTask | None = None,
622
+ ) -> TaskResult:
614
623
  """Validate and filter the scores against the task metadata.
615
624
 
616
625
  This ensures that the scores are correct for the given task, by removing any splits besides those specified in the task metadata.
@@ -658,11 +667,33 @@ class TaskResult(BaseModel):
658
667
  msg = f"{task.metadata.name}: Missing subsets {missing_subsets_str} for split {split}"
659
668
  logger.warning(msg)
660
669
  warnings.warn(msg)
670
+ for missing_subset in missing_subsets:
671
+ new_scores[split].append(
672
+ {
673
+ "hf_subset": missing_subset,
674
+ "main_score": np.nan,
675
+ "languages": task.metadata.hf_subsets_to_langscripts.get(
676
+ missing_subset, []
677
+ ),
678
+ }
679
+ )
661
680
  seen_splits.add(split)
662
681
  if seen_splits != set(splits):
663
682
  msg = f"{task.metadata.name}: Missing splits {set(splits) - seen_splits}"
664
683
  logger.warning(msg)
665
684
  warnings.warn(msg)
685
+ for missing_split in set(splits) - seen_splits:
686
+ new_scores[missing_split] = []
687
+ for missing_subset in hf_subsets:
688
+ new_scores[missing_split].append(
689
+ {
690
+ "hf_subset": missing_subset,
691
+ "main_score": np.nan,
692
+ "languages": task.metadata.hf_subsets_to_langscripts.get(
693
+ missing_subset, []
694
+ ),
695
+ }
696
+ )
666
697
  data = self.model_dump()
667
698
  data["scores"] = new_scores
668
699
  return type(self).model_construct(**data)
@@ -1,8 +1,14 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  import torch
2
6
 
3
- from mteb.models import EncoderProtocol
4
7
  from mteb.models.model_meta import ScoringFunction
5
- from mteb.types import Array
8
+
9
+ if TYPE_CHECKING:
10
+ from mteb.models import EncoderProtocol
11
+ from mteb.types import Array
6
12
 
7
13
 
8
14
  def _use_torch_compile():
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.retrieval import (
4
4
  CQADupstackAndroidRetrieval,
5
5
  CQADupstackEnglishRetrieval,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
15
15
  CQADupstackWordpressRetrieval,
16
16
  )
17
17
 
18
- task_list_cqa: list[AbsTask] = [
18
+ task_list_cqa = [
19
19
  CQADupstackAndroidRetrieval(),
20
20
  CQADupstackEnglishRetrieval(),
21
21
  CQADupstackGamingRetrieval(),