mteb 2.7.2__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. mteb/_create_dataloaders.py +16 -9
  2. mteb/_evaluators/any_sts_evaluator.py +10 -5
  3. mteb/_evaluators/clustering_evaluator.py +10 -4
  4. mteb/_evaluators/evaluator.py +9 -4
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
  6. mteb/_evaluators/pair_classification_evaluator.py +10 -5
  7. mteb/_evaluators/retrieval_evaluator.py +19 -13
  8. mteb/_evaluators/retrieval_metrics.py +9 -3
  9. mteb/_evaluators/sklearn_evaluator.py +14 -10
  10. mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
  11. mteb/_evaluators/text/summarization_evaluator.py +8 -4
  12. mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
  13. mteb/_helpful_enum.py +5 -1
  14. mteb/abstasks/_data_filter/filters.py +8 -2
  15. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  16. mteb/abstasks/_statistics_calculation.py +6 -4
  17. mteb/abstasks/abstask.py +17 -9
  18. mteb/abstasks/aggregate_task_metadata.py +20 -9
  19. mteb/abstasks/aggregated_task.py +15 -8
  20. mteb/abstasks/classification.py +15 -6
  21. mteb/abstasks/clustering.py +17 -8
  22. mteb/abstasks/clustering_legacy.py +14 -6
  23. mteb/abstasks/image/image_text_pair_classification.py +17 -7
  24. mteb/abstasks/multilabel_classification.py +11 -5
  25. mteb/abstasks/pair_classification.py +19 -9
  26. mteb/abstasks/regression.py +14 -6
  27. mteb/abstasks/retrieval.py +28 -17
  28. mteb/abstasks/retrieval_dataset_loaders.py +11 -8
  29. mteb/abstasks/sts.py +19 -10
  30. mteb/abstasks/task_metadata.py +17 -8
  31. mteb/abstasks/text/bitext_mining.py +14 -7
  32. mteb/abstasks/text/summarization.py +17 -7
  33. mteb/abstasks/zeroshot_classification.py +15 -7
  34. mteb/benchmarks/_create_table.py +13 -3
  35. mteb/benchmarks/benchmark.py +11 -1
  36. mteb/benchmarks/benchmarks/__init__.py +2 -0
  37. mteb/benchmarks/benchmarks/benchmarks.py +41 -2
  38. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  39. mteb/cache.py +10 -5
  40. mteb/cli/_display_tasks.py +9 -3
  41. mteb/cli/build_cli.py +5 -2
  42. mteb/cli/generate_model_card.py +9 -2
  43. mteb/deprecated_evaluator.py +16 -12
  44. mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
  45. mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
  46. mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
  47. mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
  48. mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
  49. mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
  50. mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
  51. mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
  52. mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
  53. mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
  54. mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
  55. mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
  56. mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
  57. mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
  58. mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
  59. mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
  60. mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
  61. mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
  62. mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
  63. mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
  64. mteb/evaluate.py +20 -18
  65. mteb/filter_tasks.py +12 -7
  66. mteb/get_tasks.py +9 -4
  67. mteb/languages/language_scripts.py +8 -3
  68. mteb/leaderboard/app.py +7 -3
  69. mteb/leaderboard/table.py +7 -2
  70. mteb/load_results.py +9 -3
  71. mteb/models/abs_encoder.py +22 -12
  72. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  73. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  74. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  75. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  76. mteb/models/get_model_meta.py +11 -4
  77. mteb/models/instruct_wrapper.py +13 -5
  78. mteb/models/model_implementations/align_models.py +10 -4
  79. mteb/models/model_implementations/amazon_models.py +1 -0
  80. mteb/models/model_implementations/andersborges.py +2 -0
  81. mteb/models/model_implementations/ara_models.py +1 -0
  82. mteb/models/model_implementations/arctic_models.py +8 -0
  83. mteb/models/model_implementations/b1ade_models.py +1 -0
  84. mteb/models/model_implementations/bedrock_models.py +20 -6
  85. mteb/models/model_implementations/bge_models.py +40 -1
  86. mteb/models/model_implementations/bica_model.py +1 -0
  87. mteb/models/model_implementations/blip2_models.py +11 -4
  88. mteb/models/model_implementations/blip_models.py +17 -4
  89. mteb/models/model_implementations/bm25.py +22 -14
  90. mteb/models/model_implementations/bmretriever_models.py +10 -2
  91. mteb/models/model_implementations/cadet_models.py +1 -0
  92. mteb/models/model_implementations/cde_models.py +11 -5
  93. mteb/models/model_implementations/clip_models.py +12 -4
  94. mteb/models/model_implementations/clips_models.py +3 -0
  95. mteb/models/model_implementations/codefuse_models.py +5 -0
  96. mteb/models/model_implementations/codesage_models.py +3 -0
  97. mteb/models/model_implementations/cohere_models.py +14 -4
  98. mteb/models/model_implementations/cohere_v.py +14 -4
  99. mteb/models/model_implementations/colpali_models.py +7 -3
  100. mteb/models/model_implementations/colqwen_models.py +17 -31
  101. mteb/models/model_implementations/colsmol_models.py +3 -1
  102. mteb/models/model_implementations/conan_models.py +11 -4
  103. mteb/models/model_implementations/dino_models.py +28 -4
  104. mteb/models/model_implementations/e5_instruct.py +4 -0
  105. mteb/models/model_implementations/e5_models.py +9 -0
  106. mteb/models/model_implementations/e5_v.py +10 -4
  107. mteb/models/model_implementations/eagerworks_models.py +11 -4
  108. mteb/models/model_implementations/emillykkejensen_models.py +3 -0
  109. mteb/models/model_implementations/en_code_retriever.py +1 -0
  110. mteb/models/model_implementations/euler_models.py +1 -0
  111. mteb/models/model_implementations/evaclip_models.py +13 -4
  112. mteb/models/model_implementations/fa_models.py +9 -0
  113. mteb/models/model_implementations/facebookai.py +2 -0
  114. mteb/models/model_implementations/geogpt_models.py +1 -0
  115. mteb/models/model_implementations/gme_v_models.py +7 -3
  116. mteb/models/model_implementations/google_models.py +15 -4
  117. mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
  118. mteb/models/model_implementations/gritlm_models.py +2 -0
  119. mteb/models/model_implementations/gte_models.py +9 -0
  120. mteb/models/model_implementations/hinvec_models.py +6 -1
  121. mteb/models/model_implementations/human.py +1 -0
  122. mteb/models/model_implementations/ibm_granite_models.py +6 -0
  123. mteb/models/model_implementations/inf_models.py +2 -0
  124. mteb/models/model_implementations/jasper_models.py +14 -5
  125. mteb/models/model_implementations/jina_clip.py +10 -4
  126. mteb/models/model_implementations/jina_models.py +17 -5
  127. mteb/models/model_implementations/kalm_models.py +24 -12
  128. mteb/models/model_implementations/kblab.py +1 -0
  129. mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
  130. mteb/models/model_implementations/kfst.py +1 -0
  131. mteb/models/model_implementations/kowshik24_models.py +1 -0
  132. mteb/models/model_implementations/lens_models.py +2 -0
  133. mteb/models/model_implementations/lgai_embedding_models.py +1 -0
  134. mteb/models/model_implementations/linq_models.py +7 -1
  135. mteb/models/model_implementations/listconranker.py +10 -4
  136. mteb/models/model_implementations/llm2clip_models.py +12 -4
  137. mteb/models/model_implementations/llm2vec_models.py +20 -6
  138. mteb/models/model_implementations/mcinext_models.py +8 -2
  139. mteb/models/model_implementations/mdbr_models.py +2 -0
  140. mteb/models/model_implementations/misc_models.py +63 -0
  141. mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
  142. mteb/models/model_implementations/mme5_models.py +2 -1
  143. mteb/models/model_implementations/moco_models.py +11 -4
  144. mteb/models/model_implementations/mod_models.py +2 -1
  145. mteb/models/model_implementations/model2vec_models.py +23 -4
  146. mteb/models/model_implementations/moka_models.py +3 -0
  147. mteb/models/model_implementations/nbailab.py +3 -0
  148. mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
  149. mteb/models/model_implementations/nomic_models.py +16 -4
  150. mteb/models/model_implementations/nomic_models_vision.py +5 -3
  151. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
  152. mteb/models/model_implementations/nvidia_models.py +15 -4
  153. mteb/models/model_implementations/octen_models.py +3 -1
  154. mteb/models/model_implementations/openai_models.py +14 -4
  155. mteb/models/model_implementations/openclip_models.py +17 -4
  156. mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
  157. mteb/models/model_implementations/ops_moa_models.py +9 -2
  158. mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
  159. mteb/models/model_implementations/pawan_models.py +1 -0
  160. mteb/models/model_implementations/piccolo_models.py +2 -0
  161. mteb/models/model_implementations/promptriever_models.py +16 -6
  162. mteb/models/model_implementations/pylate_models.py +22 -13
  163. mteb/models/model_implementations/qodo_models.py +2 -0
  164. mteb/models/model_implementations/qtack_models.py +1 -0
  165. mteb/models/model_implementations/qwen3_models.py +11 -1
  166. mteb/models/model_implementations/qzhou_models.py +2 -0
  167. mteb/models/model_implementations/random_baseline.py +4 -3
  168. mteb/models/model_implementations/rasgaard_models.py +1 -0
  169. mteb/models/model_implementations/reasonir_model.py +65 -0
  170. mteb/models/model_implementations/repllama_models.py +15 -6
  171. mteb/models/model_implementations/rerankers_custom.py +13 -4
  172. mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
  173. mteb/models/model_implementations/richinfoai_models.py +1 -0
  174. mteb/models/model_implementations/ru_sentence_models.py +20 -0
  175. mteb/models/model_implementations/ruri_models.py +10 -0
  176. mteb/models/model_implementations/salesforce_models.py +10 -1
  177. mteb/models/model_implementations/samilpwc_models.py +1 -0
  178. mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
  179. mteb/models/model_implementations/searchmap_models.py +1 -0
  180. mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
  181. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
  182. mteb/models/model_implementations/seed_models.py +2 -1
  183. mteb/models/model_implementations/sentence_transformers_models.py +18 -0
  184. mteb/models/model_implementations/shuu_model.py +1 -0
  185. mteb/models/model_implementations/siglip_models.py +19 -4
  186. mteb/models/model_implementations/slm_models.py +7 -4
  187. mteb/models/model_implementations/sonar_models.py +2 -1
  188. mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
  189. mteb/models/model_implementations/stella_models.py +6 -0
  190. mteb/models/model_implementations/tarka_models.py +2 -0
  191. mteb/models/model_implementations/text2vec_models.py +3 -0
  192. mteb/models/model_implementations/ua_sentence_models.py +1 -0
  193. mteb/models/model_implementations/uae_models.py +10 -4
  194. mteb/models/model_implementations/vdr_models.py +8 -1
  195. mteb/models/model_implementations/vi_vn_models.py +6 -0
  196. mteb/models/model_implementations/vista_models.py +11 -4
  197. mteb/models/model_implementations/vlm2vec_models.py +11 -4
  198. mteb/models/model_implementations/voyage_models.py +25 -4
  199. mteb/models/model_implementations/voyage_v.py +11 -6
  200. mteb/models/model_implementations/xyz_models.py +1 -0
  201. mteb/models/model_implementations/youtu_models.py +1 -0
  202. mteb/models/model_implementations/yuan_models.py +1 -0
  203. mteb/models/model_implementations/yuan_models_en.py +2 -1
  204. mteb/models/model_meta.py +47 -9
  205. mteb/models/models_protocols.py +19 -18
  206. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  207. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  208. mteb/models/search_wrappers.py +19 -12
  209. mteb/models/sentence_transformer_wrapper.py +4 -3
  210. mteb/models/vllm_wrapper.py +8 -6
  211. mteb/results/benchmark_results.py +22 -17
  212. mteb/results/model_result.py +21 -15
  213. mteb/results/task_result.py +15 -9
  214. mteb/similarity_functions.py +8 -2
  215. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  216. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  217. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  218. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  219. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  220. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  221. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  222. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  223. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  224. mteb/tasks/clustering/nob/snl_clustering.py +7 -2
  225. mteb/tasks/clustering/nob/vg_clustering.py +7 -2
  226. mteb/tasks/retrieval/eng/__init__.py +42 -0
  227. mteb/tasks/retrieval/eng/bright_retrieval.py +9 -1
  228. mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
  229. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  230. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
  231. mteb/types/_encoder_io.py +1 -1
  232. mteb/types/statistics.py +9 -2
  233. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/METADATA +1 -1
  234. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/RECORD +238 -217
  235. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/WHEEL +0 -0
  236. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/entry_points.txt +0 -0
  237. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/licenses/LICENSE +0 -0
  238. {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/top_level.txt +0 -0
@@ -7,19 +7,20 @@ from typing import TYPE_CHECKING, Any
7
7
  import numpy as np
8
8
  import torch
9
9
  from packaging.version import Version
10
- from torch.utils.data import DataLoader
11
- from typing_extensions import Unpack
12
10
 
13
11
  from mteb._log_once import LogOnce
14
12
  from mteb.models import ModelMeta
15
- from mteb.types import Array, BatchedInput, EncodeKwargs, PromptType
13
+ from mteb.types import PromptType
16
14
 
17
15
  from .abs_encoder import AbsEncoder
18
16
 
19
17
  if TYPE_CHECKING:
20
18
  from sentence_transformers import CrossEncoder, SentenceTransformer
19
+ from torch.utils.data import DataLoader
20
+ from typing_extensions import Unpack
21
21
 
22
22
  from mteb.abstasks.task_metadata import TaskMetadata
23
+ from mteb.types import Array, BatchedInput, EncodeKwargs
23
24
 
24
25
  logger = logging.getLogger(__name__)
25
26
 
@@ -4,23 +4,25 @@ import atexit
4
4
  import gc
5
5
  import logging
6
6
  import os
7
- from collections.abc import Callable
8
7
  from typing import TYPE_CHECKING, Any, Literal
9
8
 
10
9
  import numpy as np
11
10
  import torch
12
- from torch.utils.data import DataLoader
13
11
 
14
12
  from mteb._requires_package import requires_package
15
- from mteb.abstasks.task_metadata import TaskMetadata
16
13
  from mteb.models import ModelMeta
17
14
  from mteb.models.abs_encoder import AbsEncoder
18
- from mteb.types import Array, BatchedInput, PromptType
15
+ from mteb.types import PromptType
19
16
 
20
17
  if TYPE_CHECKING:
18
+ from collections.abc import Callable
19
+
20
+ from torch.utils.data import DataLoader
21
21
  from vllm.config import PoolerConfig # type: ignore[import-not-found]
22
- else:
23
- PoolerConfig = dict[str, Any]
22
+
23
+ from mteb.abstasks.task_metadata import TaskMetadata
24
+ from mteb.types import Array, BatchedInput
25
+
24
26
 
25
27
  logger = logging.getLogger(__name__)
26
28
 
@@ -4,34 +4,39 @@ import functools
4
4
  import json
5
5
  import logging
6
6
  import warnings
7
- from collections.abc import Callable, Iterable, Iterator
8
7
  from pathlib import Path
9
- from typing import Any, Literal, cast
8
+ from typing import TYPE_CHECKING, Any, Literal, cast
10
9
 
11
10
  import pandas as pd
12
11
  from packaging.version import InvalidVersion, Version
13
12
  from pydantic import BaseModel, ConfigDict
14
- from typing_extensions import Self
15
13
 
16
- from mteb.abstasks.abstask import AbsTask
17
- from mteb.abstasks.task_metadata import (
18
- TaskDomain,
19
- TaskType,
20
- )
21
14
  from mteb.benchmarks.benchmark import Benchmark
22
15
  from mteb.models import ModelMeta
23
16
  from mteb.models.get_model_meta import get_model_metas
24
- from mteb.types import (
25
- ISOLanguage,
26
- ISOLanguageScript,
27
- Modalities,
28
- Score,
29
- ScoresDict,
30
- SplitName,
31
- )
32
17
 
33
18
  from .model_result import ModelResult, _aggregate_and_pivot
34
19
 
20
+ if TYPE_CHECKING:
21
+ from collections.abc import Callable, Iterable, Iterator
22
+
23
+ from typing_extensions import Self
24
+
25
+ from mteb.abstasks.abstask import AbsTask
26
+ from mteb.abstasks.task_metadata import (
27
+ TaskDomain,
28
+ TaskType,
29
+ )
30
+ from mteb.types import (
31
+ ISOLanguage,
32
+ ISOLanguageScript,
33
+ Modalities,
34
+ Score,
35
+ ScoresDict,
36
+ SplitName,
37
+ )
38
+
39
+
35
40
  logger = logging.getLogger(__name__)
36
41
 
37
42
 
@@ -144,7 +149,7 @@ class BenchmarkResults(BaseModel):
144
149
  raise ValueError("name in ModelMeta is None. It must be a string.")
145
150
  name_rev[name.name] = name.revision
146
151
  else:
147
- name_ = cast(str, name)
152
+ name_ = cast("str", name)
148
153
  name_rev[name_] = revision
149
154
 
150
155
  for model_res in self.model_results:
@@ -2,30 +2,36 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  import warnings
5
- from collections.abc import Callable, Iterable
6
- from typing import Any, Literal, cast
5
+ from typing import TYPE_CHECKING, Any, Literal, cast
7
6
 
8
7
  import numpy as np
9
8
  import pandas as pd
10
9
  from pydantic import BaseModel, ConfigDict, Field
11
10
  from typing_extensions import overload
12
11
 
13
- from mteb.abstasks.abstask import AbsTask
14
- from mteb.abstasks.task_metadata import (
15
- TaskDomain,
16
- TaskType,
17
- )
18
12
  from mteb.types import (
19
- ISOLanguage,
20
- ISOLanguageScript,
21
13
  Modalities,
22
- Score,
23
- ScoresDict,
24
- SplitName,
25
14
  )
26
15
 
27
16
  from .task_result import TaskError, TaskResult
28
17
 
18
+ if TYPE_CHECKING:
19
+ from collections.abc import Callable, Iterable
20
+
21
+ from mteb.abstasks.abstask import AbsTask
22
+ from mteb.abstasks.task_metadata import (
23
+ TaskDomain,
24
+ TaskType,
25
+ )
26
+ from mteb.types import (
27
+ ISOLanguage,
28
+ ISOLanguageScript,
29
+ Score,
30
+ ScoresDict,
31
+ SplitName,
32
+ )
33
+
34
+
29
35
  logger = logging.getLogger(__name__)
30
36
 
31
37
 
@@ -83,7 +89,7 @@ class ModelResult(BaseModel):
83
89
  model_revision: str | None
84
90
  task_results: list[TaskResult]
85
91
  default_modalities: list[Modalities] = Field(
86
- default_factory=lambda: [cast(Modalities, "text")], alias="modalities"
92
+ default_factory=lambda: [cast("Modalities", "text")], alias="modalities"
87
93
  )
88
94
  model_config = (
89
95
  ConfigDict( # to free up the name model_* which is otherwise protected
@@ -202,8 +208,8 @@ class ModelResult(BaseModel):
202
208
  aggregation = aggregation if aggregation is not None else np.mean
203
209
  else:
204
210
  use_fast = True
205
- aggregation = cast(Callable[[list[Score]], Any], aggregation)
206
- getter = cast(Callable[[ScoresDict], Score], getter)
211
+ aggregation = cast("Callable[[list[Score]], Any]", aggregation)
212
+ getter = cast("Callable[[ScoresDict], Score]", getter)
207
213
 
208
214
  if format == "wide":
209
215
  scores = {}
@@ -4,34 +4,40 @@ import json
4
4
  import logging
5
5
  import warnings
6
6
  from collections import defaultdict
7
- from collections.abc import Callable, Iterable, Mapping
8
7
  from functools import cached_property
9
8
  from importlib.metadata import version
10
- from pathlib import Path
11
- from typing import Any
9
+ from typing import TYPE_CHECKING, Any
12
10
 
13
11
  import numpy as np
14
12
  from huggingface_hub import EvalResult
15
13
  from packaging.version import Version
16
14
  from pydantic import BaseModel, field_validator
17
- from typing_extensions import Self
18
15
 
19
16
  from mteb import TaskMetadata
20
17
  from mteb._helpful_enum import HelpfulStrEnum
21
18
  from mteb.abstasks import AbsTaskClassification
22
19
  from mteb.abstasks.abstask import AbsTask
23
- from mteb.abstasks.task_metadata import TaskDomain
24
20
  from mteb.languages import LanguageScripts
25
21
  from mteb.models.model_meta import ScoringFunction
26
22
  from mteb.types import (
27
- HFSubset,
28
- ISOLanguage,
29
- ISOLanguageScript,
30
- Score,
31
23
  ScoresDict,
32
24
  SplitName,
33
25
  )
34
26
 
27
+ if TYPE_CHECKING:
28
+ from collections.abc import Callable, Iterable, Mapping
29
+ from pathlib import Path
30
+
31
+ from typing_extensions import Self
32
+
33
+ from mteb.abstasks.task_metadata import TaskDomain
34
+ from mteb.types import (
35
+ HFSubset,
36
+ ISOLanguage,
37
+ ISOLanguageScript,
38
+ Score,
39
+ )
40
+
35
41
  logger = logging.getLogger(__name__)
36
42
 
37
43
 
@@ -1,8 +1,14 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
1
5
  import torch
2
6
 
3
- from mteb.models import EncoderProtocol
4
7
  from mteb.models.model_meta import ScoringFunction
5
- from mteb.types import Array
8
+
9
+ if TYPE_CHECKING:
10
+ from mteb.models import EncoderProtocol
11
+ from mteb.types import Array
6
12
 
7
13
 
8
14
  def _use_torch_compile():
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.retrieval import (
4
4
  CQADupstackAndroidRetrieval,
5
5
  CQADupstackEnglishRetrieval,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
15
15
  CQADupstackWordpressRetrieval,
16
16
  )
17
17
 
18
- task_list_cqa: list[AbsTask] = [
18
+ task_list_cqa = [
19
19
  CQADupstackAndroidRetrieval(),
20
20
  CQADupstackEnglishRetrieval(),
21
21
  CQADupstackGamingRetrieval(),
@@ -1,10 +1,10 @@
1
- from mteb.abstasks.abstask import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.sts.multilingual.sts17_multilingual_visual_sts import (
4
4
  STS17MultilingualVisualSTS,
5
5
  )
6
6
 
7
- task_list_sts17: list[AbsTask] = [
7
+ task_list_sts17 = [
8
8
  STS17MultilingualVisualSTS().filter_languages(
9
9
  languages=["eng"], hf_subsets=["en-en"]
10
10
  )
@@ -1,10 +1,10 @@
1
- from mteb.abstasks.abstask import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.sts.multilingual.sts_benchmark_multilingual_visual_sts import (
4
4
  STSBenchmarkMultilingualVisualSTS,
5
5
  )
6
6
 
7
- task_list_stsb: list[AbsTask] = [
7
+ task_list_stsb = [
8
8
  STSBenchmarkMultilingualVisualSTS().filter_languages(
9
9
  languages=["eng"], hf_subsets=["en"]
10
10
  )
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.retrieval import (
4
4
  CQADupstackAndroidRetrievalFa,
5
5
  CQADupstackEnglishRetrievalFa,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
15
15
  CQADupstackWordpressRetrievalFa,
16
16
  )
17
17
 
18
- task_list_cqa: list[AbsTask] = [
18
+ task_list_cqa = [
19
19
  CQADupstackAndroidRetrievalFa(),
20
20
  CQADupstackEnglishRetrievalFa(),
21
21
  CQADupstackGamingRetrievalFa(),
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.classification import (
4
4
  SynPerChatbotConvSAAnger,
5
5
  SynPerChatbotConvSAFear,
@@ -12,7 +12,7 @@ from mteb.tasks.classification import (
12
12
  SynPerChatbotConvSASurprise,
13
13
  )
14
14
 
15
- task_list_cqa: list[AbsTask] = [
15
+ task_list_cqa = [
16
16
  SynPerChatbotConvSAAnger(),
17
17
  SynPerChatbotConvSASatisfaction(),
18
18
  SynPerChatbotConvSAFriendship(),
@@ -1,10 +1,10 @@
1
- from mteb.abstasks.abstask import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.sts.multilingual.sts17_multilingual_visual_sts import (
4
4
  STS17MultilingualVisualSTS,
5
5
  )
6
6
 
7
- task_list_sts17_multi: list[AbsTask] = [
7
+ task_list_sts17_multi = [
8
8
  STS17MultilingualVisualSTS().filter_languages(
9
9
  languages=["ara", "eng", "spa", "kor"],
10
10
  hf_subsets=[
@@ -1,10 +1,10 @@
1
- from mteb.abstasks.abstask import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.sts.multilingual.sts_benchmark_multilingual_visual_sts import (
4
4
  STSBenchmarkMultilingualVisualSTS,
5
5
  )
6
6
 
7
- task_list_multi: list[AbsTask] = [
7
+ task_list_multi = [
8
8
  STSBenchmarkMultilingualVisualSTS().filter_languages(
9
9
  languages=[
10
10
  "deu",
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.retrieval import (
4
4
  CQADupstackAndroidNLRetrieval,
5
5
  CQADupstackEnglishNLRetrieval,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
15
15
  CQADupstackWordpressNLRetrieval,
16
16
  )
17
17
 
18
- task_list_cqa: list[AbsTask] = [
18
+ task_list_cqa = [
19
19
  CQADupstackAndroidNLRetrieval(),
20
20
  CQADupstackEnglishNLRetrieval(),
21
21
  CQADupstackGamingNLRetrieval(),
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.retrieval.pol.cqadupstack_pl_retrieval import (
4
4
  CQADupstackAndroidRetrievalPL,
5
5
  CQADupstackEnglishRetrievalPL,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval.pol.cqadupstack_pl_retrieval import (
15
15
  CQADupstackWordpressRetrievalPL,
16
16
  )
17
17
 
18
- task_list_cqa: list[AbsTask] = [
18
+ task_list_cqa = [
19
19
  CQADupstackAndroidRetrievalPL(),
20
20
  CQADupstackEnglishRetrievalPL(),
21
21
  CQADupstackGamingRetrievalPL(),
@@ -1,13 +1,18 @@
1
+ from __future__ import annotations
2
+
1
3
  import random
2
- from collections.abc import Iterable
3
4
  from itertools import islice
4
- from typing import TypeVar
5
+ from typing import TYPE_CHECKING, TypeVar
5
6
 
6
7
  import datasets
7
8
 
8
9
  from mteb.abstasks.clustering_legacy import AbsTaskClusteringLegacy
9
10
  from mteb.abstasks.task_metadata import TaskMetadata
10
11
 
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Iterable
14
+
15
+
11
16
  T = TypeVar("T")
12
17
 
13
18
 
@@ -1,13 +1,18 @@
1
+ from __future__ import annotations
2
+
1
3
  import random
2
- from collections.abc import Iterable
3
4
  from itertools import islice
4
- from typing import TypeVar
5
+ from typing import TYPE_CHECKING, TypeVar
5
6
 
6
7
  import datasets
7
8
 
8
9
  from mteb.abstasks.clustering_legacy import AbsTaskClusteringLegacy
9
10
  from mteb.abstasks.task_metadata import TaskMetadata
10
11
 
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Iterable
14
+
15
+
11
16
  T = TypeVar("T")
12
17
 
13
18
 
@@ -14,6 +14,28 @@ from .birco_whats_that_book_reranking import BIRCOWhatsThatBookReranking
14
14
  from .blink_it2i_retrieval import BLINKIT2IRetrieval
15
15
  from .blink_it2t_retrieval import BLINKIT2TRetrieval
16
16
  from .bright_retrieval import BrightLongRetrieval, BrightRetrieval
17
+ from .bright_v1_1_retrieval import (
18
+ BrightAopsRetrieval,
19
+ BrightBiologyLongRetrieval,
20
+ BrightBiologyRetrieval,
21
+ BrightEarthScienceLongRetrieval,
22
+ BrightEarthScienceRetrieval,
23
+ BrightEconomicsLongRetrieval,
24
+ BrightEconomicsRetrieval,
25
+ BrightLeetcodeRetrieval,
26
+ BrightPonyLongRetrieval,
27
+ BrightPonyRetrieval,
28
+ BrightPsychologyLongRetrieval,
29
+ BrightPsychologyRetrieval,
30
+ BrightRoboticsLongRetrieval,
31
+ BrightRoboticsRetrieval,
32
+ BrightStackoverflowLongRetrieval,
33
+ BrightStackoverflowRetrieval,
34
+ BrightSustainableLivingLongRetrieval,
35
+ BrightSustainableLivingRetrieval,
36
+ BrightTheoremQAQuestionsRetrieval,
37
+ BrightTheoremQATheoremsRetrieval,
38
+ )
17
39
  from .built_bench_retrieval import BuiltBenchRetrieval
18
40
  from .chat_doctor_retrieval import ChatDoctorRetrieval
19
41
  from .chem_hotpot_qa_retrieval import ChemHotpotQARetrieval
@@ -236,8 +258,28 @@ __all__ = [
236
258
  "BarExamQARetrieval",
237
259
  "BillSumCARetrieval",
238
260
  "BillSumUSRetrieval",
261
+ "BrightAopsRetrieval",
262
+ "BrightBiologyLongRetrieval",
263
+ "BrightBiologyRetrieval",
264
+ "BrightEarthScienceLongRetrieval",
265
+ "BrightEarthScienceRetrieval",
266
+ "BrightEconomicsLongRetrieval",
267
+ "BrightEconomicsRetrieval",
268
+ "BrightLeetcodeRetrieval",
239
269
  "BrightLongRetrieval",
270
+ "BrightPonyLongRetrieval",
271
+ "BrightPonyRetrieval",
272
+ "BrightPsychologyLongRetrieval",
273
+ "BrightPsychologyRetrieval",
240
274
  "BrightRetrieval",
275
+ "BrightRoboticsLongRetrieval",
276
+ "BrightRoboticsRetrieval",
277
+ "BrightStackoverflowLongRetrieval",
278
+ "BrightStackoverflowRetrieval",
279
+ "BrightSustainableLivingLongRetrieval",
280
+ "BrightSustainableLivingRetrieval",
281
+ "BrightTheoremQAQuestionsRetrieval",
282
+ "BrightTheoremQATheoremsRetrieval",
241
283
  "BuiltBenchRetrieval",
242
284
  "CIRRIT2IRetrieval",
243
285
  "CQADupstackAndroidRetrieval",
@@ -1,3 +1,4 @@
1
+ import warnings
1
2
  from collections import defaultdict
2
3
 
3
4
  import datasets
@@ -86,6 +87,12 @@ def load_data(self) -> None:
86
87
  if self.data_loaded:
87
88
  return
88
89
 
90
+ warnings.warn(
91
+ "This task contains wrong prompts in the metadata. "
92
+ "Please use BRIGHT(v1.1) benchmark instead.",
93
+ category=DeprecationWarning,
94
+ )
95
+
89
96
  self.corpus, self.queries, self.relevant_docs = self.load_bright_data(
90
97
  path=self.metadata.dataset["path"],
91
98
  domains=list(self.metadata.eval_langs.keys()),
@@ -104,7 +111,7 @@ class BrightRetrieval(AbsTaskRetrieval):
104
111
  "revision": "a75a0eb483f6a5233a6efc2d63d71540a4443dfb",
105
112
  },
106
113
  reference="https://huggingface.co/datasets/xlangai/BRIGHT",
107
- description="Bright retrieval dataset.",
114
+ description="BRIGHT: A Realistic and Challenging Benchmark for Reasoning-Intensive Retrieval",
108
115
  type="Retrieval",
109
116
  category="t2t",
110
117
  eval_splits=["standard"],
@@ -129,6 +136,7 @@ class BrightRetrieval(AbsTaskRetrieval):
129
136
  year = {2024},
130
137
  }
131
138
  """,
139
+ superseded_by="BrightBiologyRetrieval",
132
140
  )
133
141
  load_bright_data = load_bright_data
134
142
  load_data = load_data