mteb 2.7.1__py3-none-any.whl → 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. mteb/__init__.py +2 -0
  2. mteb/_create_dataloaders.py +16 -9
  3. mteb/_evaluators/any_sts_evaluator.py +10 -5
  4. mteb/_evaluators/clustering_evaluator.py +10 -4
  5. mteb/_evaluators/evaluator.py +9 -4
  6. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
  7. mteb/_evaluators/pair_classification_evaluator.py +10 -5
  8. mteb/_evaluators/retrieval_evaluator.py +19 -13
  9. mteb/_evaluators/retrieval_metrics.py +9 -3
  10. mteb/_evaluators/sklearn_evaluator.py +14 -10
  11. mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
  12. mteb/_evaluators/text/summarization_evaluator.py +8 -4
  13. mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
  14. mteb/_helpful_enum.py +5 -1
  15. mteb/abstasks/_data_filter/filters.py +8 -2
  16. mteb/abstasks/_data_filter/task_pipelines.py +7 -2
  17. mteb/abstasks/_statistics_calculation.py +6 -4
  18. mteb/abstasks/abstask.py +17 -9
  19. mteb/abstasks/aggregate_task_metadata.py +20 -9
  20. mteb/abstasks/aggregated_task.py +15 -8
  21. mteb/abstasks/classification.py +15 -6
  22. mteb/abstasks/clustering.py +17 -8
  23. mteb/abstasks/clustering_legacy.py +14 -6
  24. mteb/abstasks/image/image_text_pair_classification.py +17 -7
  25. mteb/abstasks/multilabel_classification.py +11 -5
  26. mteb/abstasks/pair_classification.py +19 -9
  27. mteb/abstasks/regression.py +14 -6
  28. mteb/abstasks/retrieval.py +27 -16
  29. mteb/abstasks/retrieval_dataset_loaders.py +11 -8
  30. mteb/abstasks/sts.py +19 -10
  31. mteb/abstasks/task_metadata.py +17 -8
  32. mteb/abstasks/text/bitext_mining.py +14 -7
  33. mteb/abstasks/text/summarization.py +17 -7
  34. mteb/abstasks/zeroshot_classification.py +15 -7
  35. mteb/benchmarks/_create_table.py +13 -3
  36. mteb/benchmarks/benchmark.py +11 -1
  37. mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
  38. mteb/cache.py +20 -14
  39. mteb/cli/_display_tasks.py +9 -3
  40. mteb/cli/build_cli.py +5 -2
  41. mteb/cli/generate_model_card.py +9 -2
  42. mteb/deprecated_evaluator.py +16 -12
  43. mteb/evaluate.py +20 -18
  44. mteb/filter_tasks.py +12 -7
  45. mteb/get_tasks.py +9 -4
  46. mteb/languages/language_scripts.py +8 -3
  47. mteb/leaderboard/app.py +7 -3
  48. mteb/leaderboard/table.py +7 -2
  49. mteb/load_results.py +9 -3
  50. mteb/models/abs_encoder.py +22 -12
  51. mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
  52. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
  53. mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
  54. mteb/models/cache_wrappers/cache_wrapper.py +14 -9
  55. mteb/models/get_model_meta.py +11 -4
  56. mteb/models/instruct_wrapper.py +13 -5
  57. mteb/models/model_implementations/align_models.py +9 -4
  58. mteb/models/model_implementations/bedrock_models.py +16 -6
  59. mteb/models/model_implementations/blip2_models.py +9 -4
  60. mteb/models/model_implementations/blip_models.py +9 -4
  61. mteb/models/model_implementations/bm25.py +15 -10
  62. mteb/models/model_implementations/bmretriever_models.py +6 -2
  63. mteb/models/model_implementations/cde_models.py +9 -5
  64. mteb/models/model_implementations/clip_models.py +9 -4
  65. mteb/models/model_implementations/cohere_models.py +10 -4
  66. mteb/models/model_implementations/cohere_v.py +9 -4
  67. mteb/models/model_implementations/colpali_models.py +4 -3
  68. mteb/models/model_implementations/colqwen_models.py +10 -31
  69. mteb/models/model_implementations/colsmol_models.py +1 -1
  70. mteb/models/model_implementations/conan_models.py +10 -4
  71. mteb/models/model_implementations/dino_models.py +9 -4
  72. mteb/models/model_implementations/e5_v.py +9 -4
  73. mteb/models/model_implementations/eagerworks_models.py +10 -4
  74. mteb/models/model_implementations/evaclip_models.py +9 -4
  75. mteb/models/model_implementations/gme_v_models.py +5 -3
  76. mteb/models/model_implementations/google_models.py +10 -4
  77. mteb/models/model_implementations/granite_vision_embedding_models.py +6 -5
  78. mteb/models/model_implementations/hinvec_models.py +5 -1
  79. mteb/models/model_implementations/jasper_models.py +12 -5
  80. mteb/models/model_implementations/jina_clip.py +9 -4
  81. mteb/models/model_implementations/jina_models.py +10 -5
  82. mteb/models/model_implementations/kalm_models.py +18 -12
  83. mteb/models/model_implementations/linq_models.py +6 -1
  84. mteb/models/model_implementations/listconranker.py +9 -4
  85. mteb/models/model_implementations/llm2clip_models.py +9 -4
  86. mteb/models/model_implementations/llm2vec_models.py +12 -6
  87. mteb/models/model_implementations/mcinext_models.py +5 -2
  88. mteb/models/model_implementations/mdbr_models.py +3 -1
  89. mteb/models/model_implementations/{mxbai_models.py → mixedbread_ai_models.py} +91 -0
  90. mteb/models/model_implementations/moco_models.py +9 -4
  91. mteb/models/model_implementations/mod_models.py +1 -1
  92. mteb/models/model_implementations/model2vec_models.py +10 -4
  93. mteb/models/model_implementations/no_instruct_sentence_models.py +12 -5
  94. mteb/models/model_implementations/nomic_models.py +10 -4
  95. mteb/models/model_implementations/nomic_models_vision.py +4 -3
  96. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +7 -3
  97. mteb/models/model_implementations/nvidia_models.py +12 -4
  98. mteb/models/model_implementations/octen_models.py +1 -1
  99. mteb/models/model_implementations/openai_models.py +9 -4
  100. mteb/models/model_implementations/openclip_models.py +9 -4
  101. mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -4
  102. mteb/models/model_implementations/ops_moa_models.py +7 -2
  103. mteb/models/model_implementations/pixie_models.py +56 -0
  104. mteb/models/model_implementations/promptriever_models.py +12 -6
  105. mteb/models/model_implementations/pylate_models.py +19 -13
  106. mteb/models/model_implementations/qwen3_models.py +8 -1
  107. mteb/models/model_implementations/random_baseline.py +4 -3
  108. mteb/models/model_implementations/repllama_models.py +13 -6
  109. mteb/models/model_implementations/rerankers_custom.py +10 -4
  110. mteb/models/model_implementations/rerankers_monot5_based.py +10 -4
  111. mteb/models/model_implementations/salesforce_models.py +7 -1
  112. mteb/models/model_implementations/seed_1_6_embedding_models.py +4 -2
  113. mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +5 -2
  114. mteb/models/model_implementations/seed_models.py +1 -1
  115. mteb/models/model_implementations/siglip_models.py +9 -4
  116. mteb/models/model_implementations/slm_models.py +7 -4
  117. mteb/models/model_implementations/uae_models.py +9 -4
  118. mteb/models/model_implementations/vdr_models.py +7 -1
  119. mteb/models/model_implementations/vista_models.py +9 -4
  120. mteb/models/model_implementations/vlm2vec_models.py +9 -4
  121. mteb/models/model_implementations/voyage_models.py +10 -4
  122. mteb/models/model_implementations/voyage_v.py +10 -6
  123. mteb/models/model_implementations/yuan_models_en.py +1 -1
  124. mteb/models/model_meta.py +12 -7
  125. mteb/models/models_protocols.py +19 -18
  126. mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
  127. mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
  128. mteb/models/search_wrappers.py +19 -12
  129. mteb/models/sentence_transformer_wrapper.py +4 -3
  130. mteb/models/vllm_wrapper.py +8 -6
  131. mteb/results/benchmark_results.py +22 -17
  132. mteb/results/model_result.py +21 -15
  133. mteb/results/task_result.py +41 -10
  134. mteb/similarity_functions.py +8 -2
  135. mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
  136. mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
  137. mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
  138. mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
  139. mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
  140. mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
  141. mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
  142. mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
  143. mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
  144. mteb/tasks/clustering/nob/snl_clustering.py +7 -2
  145. mteb/tasks/clustering/nob/vg_clustering.py +7 -2
  146. mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
  147. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
  148. mteb/types/_encoder_io.py +1 -1
  149. mteb/types/statistics.py +9 -2
  150. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/METADATA +1 -1
  151. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/RECORD +155 -154
  152. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/WHEEL +0 -0
  153. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/entry_points.txt +0 -0
  154. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/licenses/LICENSE +0 -0
  155. {mteb-2.7.1.dist-info → mteb-2.7.3.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,10 @@
1
- from mteb.abstasks.abstask import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.sts.multilingual.sts17_multilingual_visual_sts import (
4
4
  STS17MultilingualVisualSTS,
5
5
  )
6
6
 
7
- task_list_sts17: list[AbsTask] = [
7
+ task_list_sts17 = [
8
8
  STS17MultilingualVisualSTS().filter_languages(
9
9
  languages=["eng"], hf_subsets=["en-en"]
10
10
  )
@@ -1,10 +1,10 @@
1
- from mteb.abstasks.abstask import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.sts.multilingual.sts_benchmark_multilingual_visual_sts import (
4
4
  STSBenchmarkMultilingualVisualSTS,
5
5
  )
6
6
 
7
- task_list_stsb: list[AbsTask] = [
7
+ task_list_stsb = [
8
8
  STSBenchmarkMultilingualVisualSTS().filter_languages(
9
9
  languages=["eng"], hf_subsets=["en"]
10
10
  )
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.retrieval import (
4
4
  CQADupstackAndroidRetrievalFa,
5
5
  CQADupstackEnglishRetrievalFa,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
15
15
  CQADupstackWordpressRetrievalFa,
16
16
  )
17
17
 
18
- task_list_cqa: list[AbsTask] = [
18
+ task_list_cqa = [
19
19
  CQADupstackAndroidRetrievalFa(),
20
20
  CQADupstackEnglishRetrievalFa(),
21
21
  CQADupstackGamingRetrievalFa(),
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.classification import (
4
4
  SynPerChatbotConvSAAnger,
5
5
  SynPerChatbotConvSAFear,
@@ -12,7 +12,7 @@ from mteb.tasks.classification import (
12
12
  SynPerChatbotConvSASurprise,
13
13
  )
14
14
 
15
- task_list_cqa: list[AbsTask] = [
15
+ task_list_cqa = [
16
16
  SynPerChatbotConvSAAnger(),
17
17
  SynPerChatbotConvSASatisfaction(),
18
18
  SynPerChatbotConvSAFriendship(),
@@ -1,10 +1,10 @@
1
- from mteb.abstasks.abstask import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.sts.multilingual.sts17_multilingual_visual_sts import (
4
4
  STS17MultilingualVisualSTS,
5
5
  )
6
6
 
7
- task_list_sts17_multi: list[AbsTask] = [
7
+ task_list_sts17_multi = [
8
8
  STS17MultilingualVisualSTS().filter_languages(
9
9
  languages=["ara", "eng", "spa", "kor"],
10
10
  hf_subsets=[
@@ -1,10 +1,10 @@
1
- from mteb.abstasks.abstask import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.sts.multilingual.sts_benchmark_multilingual_visual_sts import (
4
4
  STSBenchmarkMultilingualVisualSTS,
5
5
  )
6
6
 
7
- task_list_multi: list[AbsTask] = [
7
+ task_list_multi = [
8
8
  STSBenchmarkMultilingualVisualSTS().filter_languages(
9
9
  languages=[
10
10
  "deu",
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.retrieval import (
4
4
  CQADupstackAndroidNLRetrieval,
5
5
  CQADupstackEnglishNLRetrieval,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
15
15
  CQADupstackWordpressNLRetrieval,
16
16
  )
17
17
 
18
- task_list_cqa: list[AbsTask] = [
18
+ task_list_cqa = [
19
19
  CQADupstackAndroidNLRetrieval(),
20
20
  CQADupstackEnglishNLRetrieval(),
21
21
  CQADupstackGamingNLRetrieval(),
@@ -1,5 +1,5 @@
1
- from mteb.abstasks import AbsTask
2
- from mteb.abstasks.aggregated_task import AbsTaskAggregate, AggregateTaskMetadata
1
+ from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
2
+ from mteb.abstasks.aggregated_task import AbsTaskAggregate
3
3
  from mteb.tasks.retrieval.pol.cqadupstack_pl_retrieval import (
4
4
  CQADupstackAndroidRetrievalPL,
5
5
  CQADupstackEnglishRetrievalPL,
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval.pol.cqadupstack_pl_retrieval import (
15
15
  CQADupstackWordpressRetrievalPL,
16
16
  )
17
17
 
18
- task_list_cqa: list[AbsTask] = [
18
+ task_list_cqa = [
19
19
  CQADupstackAndroidRetrievalPL(),
20
20
  CQADupstackEnglishRetrievalPL(),
21
21
  CQADupstackGamingRetrievalPL(),
@@ -1,13 +1,18 @@
1
+ from __future__ import annotations
2
+
1
3
  import random
2
- from collections.abc import Iterable
3
4
  from itertools import islice
4
- from typing import TypeVar
5
+ from typing import TYPE_CHECKING, TypeVar
5
6
 
6
7
  import datasets
7
8
 
8
9
  from mteb.abstasks.clustering_legacy import AbsTaskClusteringLegacy
9
10
  from mteb.abstasks.task_metadata import TaskMetadata
10
11
 
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Iterable
14
+
15
+
11
16
  T = TypeVar("T")
12
17
 
13
18
 
@@ -1,13 +1,18 @@
1
+ from __future__ import annotations
2
+
1
3
  import random
2
- from collections.abc import Iterable
3
4
  from itertools import islice
4
- from typing import TypeVar
5
+ from typing import TYPE_CHECKING, TypeVar
5
6
 
6
7
  import datasets
7
8
 
8
9
  from mteb.abstasks.clustering_legacy import AbsTaskClusteringLegacy
9
10
  from mteb.abstasks.task_metadata import TaskMetadata
10
11
 
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Iterable
14
+
15
+
11
16
  T = TypeVar("T")
12
17
 
13
18
 
@@ -1,8 +1,13 @@
1
- from collections.abc import Sequence
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
2
4
 
3
5
  from mteb.abstasks.retrieval import AbsTaskRetrieval
4
6
  from mteb.abstasks.task_metadata import TaskMetadata
5
7
 
8
+ if TYPE_CHECKING:
9
+ from collections.abc import Sequence
10
+
6
11
  _CITATION = """
7
12
  @misc{weller2025theoreticallimit,
8
13
  archiveprefix = {arXiv},
@@ -30,15 +30,15 @@ def load_ruscibench_data(
30
30
 
31
31
  for lang in langs:
32
32
  lang_corpus = cast(
33
- datasets.Dataset,
33
+ "datasets.Dataset",
34
34
  datasets.load_dataset(path, f"corpus-{lang}", revision=revision),
35
35
  )["corpus"]
36
36
  lang_queries = cast(
37
- datasets.Dataset,
37
+ "datasets.Dataset",
38
38
  datasets.load_dataset(path, f"queries-{lang}", revision=revision),
39
39
  )["queries"]
40
40
  lang_qrels = cast(
41
- datasets.Dataset,
41
+ "datasets.Dataset",
42
42
  datasets.load_dataset(path, f"{lang}", revision=revision),
43
43
  )["test"]
44
44
  corpus[lang] = {
mteb/types/_encoder_io.py CHANGED
@@ -7,10 +7,10 @@ from typing import TYPE_CHECKING, TypedDict
7
7
  import numpy as np
8
8
  import torch
9
9
  from datasets import Dataset
10
- from typing_extensions import NotRequired
11
10
 
12
11
  if TYPE_CHECKING:
13
12
  from PIL import Image
13
+ from typing_extensions import NotRequired
14
14
 
15
15
 
16
16
  class EncodeKwargs(TypedDict):
mteb/types/statistics.py CHANGED
@@ -1,6 +1,13 @@
1
- from typing_extensions import NotRequired, TypedDict
1
+ from __future__ import annotations
2
2
 
3
- from mteb.types import HFSubset
3
+ from typing import TYPE_CHECKING
4
+
5
+ from typing_extensions import TypedDict
6
+
7
+ if TYPE_CHECKING:
8
+ from typing_extensions import NotRequired
9
+
10
+ from mteb.types import HFSubset
4
11
 
5
12
 
6
13
  class SplitDescriptiveStatistics(TypedDict):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.7.1
3
+ Version: 2.7.3
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>