mteb 2.5.2__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +17 -18
- mteb/_evaluators/any_sts_evaluator.py +3 -3
- mteb/_evaluators/clustering_evaluator.py +2 -2
- mteb/_evaluators/evaluator.py +4 -2
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +10 -8
- mteb/_evaluators/pair_classification_evaluator.py +5 -3
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/retrieval_metrics.py +18 -17
- mteb/_evaluators/sklearn_evaluator.py +11 -10
- mteb/_evaluators/text/bitext_mining_evaluator.py +27 -18
- mteb/_evaluators/text/summarization_evaluator.py +23 -18
- mteb/_evaluators/zeroshot_classification_evaluator.py +5 -3
- mteb/abstasks/_data_filter/filters.py +1 -1
- mteb/abstasks/_data_filter/task_pipelines.py +3 -0
- mteb/abstasks/_statistics_calculation.py +18 -10
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +35 -28
- mteb/abstasks/aggregate_task_metadata.py +1 -9
- mteb/abstasks/aggregated_task.py +10 -29
- mteb/abstasks/classification.py +15 -10
- mteb/abstasks/clustering.py +19 -15
- mteb/abstasks/clustering_legacy.py +10 -10
- mteb/abstasks/image/image_text_pair_classification.py +7 -4
- mteb/abstasks/multilabel_classification.py +23 -19
- mteb/abstasks/pair_classification.py +20 -11
- mteb/abstasks/regression.py +4 -4
- mteb/abstasks/retrieval.py +28 -24
- mteb/abstasks/retrieval_dataset_loaders.py +2 -2
- mteb/abstasks/sts.py +8 -5
- mteb/abstasks/task_metadata.py +31 -33
- mteb/abstasks/text/bitext_mining.py +39 -28
- mteb/abstasks/text/reranking.py +8 -6
- mteb/abstasks/text/summarization.py +10 -5
- mteb/abstasks/zeroshot_classification.py +8 -4
- mteb/benchmarks/benchmark.py +4 -2
- mteb/benchmarks/benchmarks/__init__.py +4 -0
- mteb/benchmarks/benchmarks/benchmarks.py +112 -11
- mteb/benchmarks/get_benchmark.py +14 -55
- mteb/cache.py +182 -29
- mteb/cli/_display_tasks.py +2 -2
- mteb/cli/build_cli.py +110 -14
- mteb/cli/generate_model_card.py +43 -23
- mteb/deprecated_evaluator.py +63 -49
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +44 -33
- mteb/filter_tasks.py +25 -26
- mteb/get_tasks.py +29 -30
- mteb/languages/language_scripts.py +5 -3
- mteb/leaderboard/app.py +162 -34
- mteb/load_results.py +12 -12
- mteb/models/abs_encoder.py +10 -6
- mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +5 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +2 -2
- mteb/models/get_model_meta.py +21 -3
- mteb/models/instruct_wrapper.py +28 -8
- mteb/models/model_implementations/align_models.py +1 -1
- mteb/models/model_implementations/andersborges.py +4 -4
- mteb/models/model_implementations/ara_models.py +1 -1
- mteb/models/model_implementations/arctic_models.py +8 -8
- mteb/models/model_implementations/b1ade_models.py +1 -1
- mteb/models/model_implementations/bge_models.py +45 -21
- mteb/models/model_implementations/bica_model.py +3 -3
- mteb/models/model_implementations/blip2_models.py +2 -2
- mteb/models/model_implementations/blip_models.py +16 -16
- mteb/models/model_implementations/bm25.py +4 -4
- mteb/models/model_implementations/bmretriever_models.py +6 -4
- mteb/models/model_implementations/cadet_models.py +1 -1
- mteb/models/model_implementations/cde_models.py +11 -4
- mteb/models/model_implementations/clip_models.py +6 -6
- mteb/models/model_implementations/clips_models.py +3 -3
- mteb/models/model_implementations/codefuse_models.py +5 -5
- mteb/models/model_implementations/codesage_models.py +3 -3
- mteb/models/model_implementations/cohere_models.py +5 -5
- mteb/models/model_implementations/cohere_v.py +2 -2
- mteb/models/model_implementations/colpali_models.py +3 -3
- mteb/models/model_implementations/colqwen_models.py +8 -8
- mteb/models/model_implementations/colsmol_models.py +2 -2
- mteb/models/model_implementations/conan_models.py +1 -1
- mteb/models/model_implementations/dino_models.py +42 -42
- mteb/models/model_implementations/e5_instruct.py +23 -4
- mteb/models/model_implementations/e5_models.py +9 -9
- mteb/models/model_implementations/e5_v.py +6 -6
- mteb/models/model_implementations/eagerworks_models.py +1 -1
- mteb/models/model_implementations/emillykkejensen_models.py +6 -6
- mteb/models/model_implementations/en_code_retriever.py +1 -1
- mteb/models/model_implementations/euler_models.py +2 -2
- mteb/models/model_implementations/fa_models.py +9 -9
- mteb/models/model_implementations/facebookai.py +14 -2
- mteb/models/model_implementations/geogpt_models.py +1 -1
- mteb/models/model_implementations/gme_v_models.py +6 -5
- mteb/models/model_implementations/google_models.py +1 -1
- mteb/models/model_implementations/granite_vision_embedding_models.py +1 -1
- mteb/models/model_implementations/gritlm_models.py +2 -2
- mteb/models/model_implementations/gte_models.py +25 -13
- mteb/models/model_implementations/hinvec_models.py +1 -1
- mteb/models/model_implementations/ibm_granite_models.py +30 -6
- mteb/models/model_implementations/inf_models.py +2 -2
- mteb/models/model_implementations/jasper_models.py +2 -2
- mteb/models/model_implementations/jina_clip.py +48 -10
- mteb/models/model_implementations/jina_models.py +18 -11
- mteb/models/model_implementations/kblab.py +12 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +4 -4
- mteb/models/model_implementations/kfst.py +1 -1
- mteb/models/model_implementations/kowshik24_models.py +1 -1
- mteb/models/model_implementations/lgai_embedding_models.py +1 -1
- mteb/models/model_implementations/linq_models.py +1 -1
- mteb/models/model_implementations/listconranker.py +1 -1
- mteb/models/model_implementations/llm2clip_models.py +6 -6
- mteb/models/model_implementations/llm2vec_models.py +8 -8
- mteb/models/model_implementations/mcinext_models.py +4 -1
- mteb/models/model_implementations/mdbr_models.py +17 -3
- mteb/models/model_implementations/misc_models.py +68 -68
- mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
- mteb/models/model_implementations/mme5_models.py +1 -1
- mteb/models/model_implementations/moco_models.py +4 -4
- mteb/models/model_implementations/mod_models.py +1 -1
- mteb/models/model_implementations/model2vec_models.py +14 -14
- mteb/models/model_implementations/moka_models.py +1 -1
- mteb/models/model_implementations/nbailab.py +3 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +2 -2
- mteb/models/model_implementations/nomic_models.py +30 -15
- mteb/models/model_implementations/nomic_models_vision.py +1 -1
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +15 -9
- mteb/models/model_implementations/nvidia_models.py +151 -19
- mteb/models/model_implementations/octen_models.py +61 -2
- mteb/models/model_implementations/openclip_models.py +13 -13
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -5
- mteb/models/model_implementations/ops_moa_models.py +1 -1
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +1 -1
- mteb/models/model_implementations/piccolo_models.py +1 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +4 -4
- mteb/models/model_implementations/pylate_models.py +10 -9
- mteb/models/model_implementations/qodo_models.py +2 -2
- mteb/models/model_implementations/qtack_models.py +1 -1
- mteb/models/model_implementations/qwen3_models.py +3 -3
- mteb/models/model_implementations/qzhou_models.py +2 -2
- mteb/models/model_implementations/random_baseline.py +3 -3
- mteb/models/model_implementations/rasgaard_models.py +2 -2
- mteb/models/model_implementations/reasonir_model.py +1 -1
- mteb/models/model_implementations/repllama_models.py +3 -3
- mteb/models/model_implementations/rerankers_custom.py +12 -6
- mteb/models/model_implementations/rerankers_monot5_based.py +17 -17
- mteb/models/model_implementations/richinfoai_models.py +1 -1
- mteb/models/model_implementations/ru_sentence_models.py +20 -20
- mteb/models/model_implementations/ruri_models.py +10 -10
- mteb/models/model_implementations/salesforce_models.py +3 -3
- mteb/models/model_implementations/samilpwc_models.py +1 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -2
- mteb/models/model_implementations/searchmap_models.py +1 -1
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +113 -146
- mteb/models/model_implementations/sentence_transformers_models.py +124 -22
- mteb/models/model_implementations/shuu_model.py +1 -1
- mteb/models/model_implementations/siglip_models.py +20 -20
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -1
- mteb/models/model_implementations/stella_models.py +17 -4
- mteb/models/model_implementations/tarka_models.py +2 -2
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +1 -1
- mteb/models/model_implementations/uae_models.py +7 -1
- mteb/models/model_implementations/vdr_models.py +1 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -6
- mteb/models/model_implementations/vlm2vec_models.py +3 -3
- mteb/models/model_implementations/voyage_models.py +84 -0
- mteb/models/model_implementations/voyage_v.py +9 -7
- mteb/models/model_implementations/youtu_models.py +1 -1
- mteb/models/model_implementations/yuan_models.py +1 -1
- mteb/models/model_implementations/yuan_models_en.py +1 -1
- mteb/models/model_meta.py +80 -31
- mteb/models/models_protocols.py +22 -6
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +9 -6
- mteb/models/search_wrappers.py +33 -18
- mteb/models/sentence_transformer_wrapper.py +50 -25
- mteb/models/vllm_wrapper.py +327 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +29 -21
- mteb/results/model_result.py +52 -22
- mteb/results/task_result.py +80 -58
- mteb/similarity_functions.py +11 -7
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
- mteb/tasks/classification/est/estonian_valence.py +1 -1
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/classification/multilingual/scala_classification.py +1 -1
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +12 -12
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +2 -0
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/multilingual/__init__.py +2 -0
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +12 -0
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +9 -3
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/METADATA +15 -4
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/RECORD +240 -219
- mteb/models/model_implementations/mxbai_models.py +0 -111
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
|
@@ -136,7 +136,7 @@ class RetrievalDatasetLoader:
|
|
|
136
136
|
"_id", "id"
|
|
137
137
|
)
|
|
138
138
|
logger.info("Loaded %d %s Documents.", len(corpus_ds), self.split.upper())
|
|
139
|
-
logger.
|
|
139
|
+
logger.debug("Doc Example: %s", corpus_ds[0])
|
|
140
140
|
return corpus_ds
|
|
141
141
|
|
|
142
142
|
def _load_queries(self) -> QueryDatasetType:
|
|
@@ -152,7 +152,7 @@ class RetrievalDatasetLoader:
|
|
|
152
152
|
)
|
|
153
153
|
|
|
154
154
|
logger.info("Loaded %d %s queries.", len(queries_ds), self.split.upper())
|
|
155
|
-
logger.
|
|
155
|
+
logger.debug("Query Example: %s", queries_ds[0])
|
|
156
156
|
|
|
157
157
|
return queries_ds
|
|
158
158
|
|
mteb/abstasks/sts.py
CHANGED
|
@@ -7,8 +7,8 @@ from scipy.stats import pearsonr, spearmanr
|
|
|
7
7
|
|
|
8
8
|
from mteb._evaluators import AnySTSEvaluator
|
|
9
9
|
from mteb._evaluators.any_sts_evaluator import STSEvaluatorScores
|
|
10
|
-
from mteb.models import EncoderProtocol
|
|
11
|
-
from mteb.types import PromptType
|
|
10
|
+
from mteb.models import EncoderProtocol, MTEBModels
|
|
11
|
+
from mteb.types import EncodeKwargs, PromptType
|
|
12
12
|
from mteb.types.statistics import (
|
|
13
13
|
ImageStatistics,
|
|
14
14
|
ScoreStatistics,
|
|
@@ -103,14 +103,17 @@ class AbsTaskSTS(AbsTask):
|
|
|
103
103
|
|
|
104
104
|
def _evaluate_subset(
|
|
105
105
|
self,
|
|
106
|
-
model:
|
|
106
|
+
model: MTEBModels,
|
|
107
107
|
data_split: Dataset,
|
|
108
|
-
encode_kwargs:
|
|
108
|
+
encode_kwargs: EncodeKwargs,
|
|
109
109
|
hf_split: str,
|
|
110
110
|
hf_subset: str,
|
|
111
111
|
prediction_folder: Path | None = None,
|
|
112
112
|
**kwargs: Any,
|
|
113
113
|
) -> STSMetrics:
|
|
114
|
+
if not isinstance(model, EncoderProtocol):
|
|
115
|
+
raise TypeError("Expected model to be an instance of EncoderProtocol")
|
|
116
|
+
|
|
114
117
|
normalized_scores = list(map(self._normalize, data_split["score"]))
|
|
115
118
|
data_split = data_split.select_columns(list(self.column_names))
|
|
116
119
|
|
|
@@ -142,7 +145,7 @@ class AbsTaskSTS(AbsTask):
|
|
|
142
145
|
) -> STSMetrics:
|
|
143
146
|
def compute_corr(x: list[float], y: list[float]) -> tuple[float, float]:
|
|
144
147
|
"""Return (pearson, spearman) correlations between x and y."""
|
|
145
|
-
return pearsonr(x, y)[0], spearmanr(x, y)[0]
|
|
148
|
+
return float(pearsonr(x, y)[0]), float(spearmanr(x, y)[0])
|
|
146
149
|
|
|
147
150
|
cosine_pearson, cosine_spearman = compute_corr(
|
|
148
151
|
normalized_scores, scores["cosine_scores"]
|
mteb/abstasks/task_metadata.py
CHANGED
|
@@ -2,9 +2,10 @@ import json
|
|
|
2
2
|
import logging
|
|
3
3
|
from collections.abc import Sequence
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Any, Literal
|
|
5
|
+
from typing import Any, Literal, cast
|
|
6
6
|
|
|
7
7
|
from huggingface_hub import (
|
|
8
|
+
CardData,
|
|
8
9
|
DatasetCard,
|
|
9
10
|
DatasetCardData,
|
|
10
11
|
constants,
|
|
@@ -150,7 +151,7 @@ _TASK_TYPE = (
|
|
|
150
151
|
"InstructionReranking",
|
|
151
152
|
) + MIEB_TASK_TYPE
|
|
152
153
|
|
|
153
|
-
TaskType = Literal[_TASK_TYPE]
|
|
154
|
+
TaskType = Literal[_TASK_TYPE] # type: ignore[valid-type]
|
|
154
155
|
"""The type of the task. E.g. includes "Classification", "Retrieval" and "Clustering"."""
|
|
155
156
|
|
|
156
157
|
|
|
@@ -192,8 +193,10 @@ AnnotatorType = Literal[
|
|
|
192
193
|
"""The type of the annotators. Is often important for understanding the quality of a dataset."""
|
|
193
194
|
|
|
194
195
|
|
|
195
|
-
PromptDict = TypedDict(
|
|
196
|
-
"PromptDict",
|
|
196
|
+
PromptDict = TypedDict( # type: ignore[misc]
|
|
197
|
+
"PromptDict",
|
|
198
|
+
{prompt_type.value: str for prompt_type in PromptType},
|
|
199
|
+
total=False,
|
|
197
200
|
)
|
|
198
201
|
"""A dictionary containing the prompt used for the task.
|
|
199
202
|
|
|
@@ -365,7 +368,7 @@ class TaskMetadata(BaseModel):
|
|
|
365
368
|
"""Return a dictionary mapping huggingface subsets to languages."""
|
|
366
369
|
if isinstance(self.eval_langs, dict):
|
|
367
370
|
return self.eval_langs
|
|
368
|
-
return {"default": self.eval_langs}
|
|
371
|
+
return {"default": cast(list[str], self.eval_langs)}
|
|
369
372
|
|
|
370
373
|
@property
|
|
371
374
|
def intext_citation(self, include_cite: bool = True) -> str:
|
|
@@ -376,9 +379,8 @@ class TaskMetadata(BaseModel):
|
|
|
376
379
|
if include_cite and cite:
|
|
377
380
|
# check for whitespace in the citation
|
|
378
381
|
if " " in cite:
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
)
|
|
382
|
+
msg = "Citation contains whitespace. Please ensure that the citation is correctly formatted."
|
|
383
|
+
logger.warning(msg)
|
|
382
384
|
return f"\\cite{{{cite}}}"
|
|
383
385
|
return cite
|
|
384
386
|
|
|
@@ -414,7 +416,7 @@ class TaskMetadata(BaseModel):
|
|
|
414
416
|
for subset, subset_value in stats.items():
|
|
415
417
|
if subset == "hf_subset_descriptive_stats":
|
|
416
418
|
continue
|
|
417
|
-
n_samples[subset] = subset_value["num_samples"]
|
|
419
|
+
n_samples[subset] = subset_value["num_samples"]
|
|
418
420
|
return n_samples
|
|
419
421
|
|
|
420
422
|
@property
|
|
@@ -447,7 +449,7 @@ class TaskMetadata(BaseModel):
|
|
|
447
449
|
Raises:
|
|
448
450
|
ValueError: If the prompt type is not recognized.
|
|
449
451
|
"""
|
|
450
|
-
if prompt_type is None:
|
|
452
|
+
if prompt_type is None or self.category is None:
|
|
451
453
|
return self.modalities
|
|
452
454
|
query_modalities, doc_modalities = self.category.split("2")
|
|
453
455
|
category_to_modality: dict[str, Modalities] = {
|
|
@@ -467,7 +469,7 @@ class TaskMetadata(BaseModel):
|
|
|
467
469
|
|
|
468
470
|
def _create_dataset_card_data(
|
|
469
471
|
self,
|
|
470
|
-
existing_dataset_card_data:
|
|
472
|
+
existing_dataset_card_data: CardData | None = None,
|
|
471
473
|
) -> tuple[DatasetCardData, dict[str, Any]]:
|
|
472
474
|
"""Create a DatasetCardData object from the task metadata.
|
|
473
475
|
|
|
@@ -483,7 +485,6 @@ class TaskMetadata(BaseModel):
|
|
|
483
485
|
dataset_type = [
|
|
484
486
|
*self._hf_task_type(),
|
|
485
487
|
*self._hf_task_category(),
|
|
486
|
-
*self._hf_subtypes(),
|
|
487
488
|
]
|
|
488
489
|
languages = self._hf_languages()
|
|
489
490
|
|
|
@@ -502,12 +503,13 @@ class TaskMetadata(BaseModel):
|
|
|
502
503
|
|
|
503
504
|
tags = ["mteb"] + self.modalities
|
|
504
505
|
|
|
505
|
-
descriptive_stats =
|
|
506
|
-
if descriptive_stats is not None:
|
|
507
|
-
|
|
506
|
+
descriptive_stats = ""
|
|
507
|
+
if self.descriptive_stats is not None:
|
|
508
|
+
descriptive_stats_ = self.descriptive_stats
|
|
509
|
+
for split, split_stat in descriptive_stats_.items():
|
|
508
510
|
if len(split_stat.get("hf_subset_descriptive_stats", {})) > 10:
|
|
509
511
|
split_stat.pop("hf_subset_descriptive_stats", {})
|
|
510
|
-
descriptive_stats = json.dumps(
|
|
512
|
+
descriptive_stats = json.dumps(descriptive_stats_, indent=4)
|
|
511
513
|
|
|
512
514
|
dataset_card_data_params = existing_dataset_card_data.to_dict()
|
|
513
515
|
# override the existing values
|
|
@@ -584,10 +586,8 @@ class TaskMetadata(BaseModel):
|
|
|
584
586
|
|
|
585
587
|
def _hf_subtypes(self) -> list[str]:
|
|
586
588
|
# to get full list of available task_ids execute
|
|
587
|
-
#
|
|
588
|
-
#
|
|
589
|
-
# "repoType": "dataset"
|
|
590
|
-
# })
|
|
589
|
+
# https://huggingface.co/api/datasets-tags-by-type?type=task_ids
|
|
590
|
+
# ref https://huggingface-openapi.hf.space/#tag/datasets/GET/api/datasets-tags-by-type
|
|
591
591
|
mteb_to_hf_subtype = {
|
|
592
592
|
"Article retrieval": ["document-retrieval"],
|
|
593
593
|
"Conversational retrieval": ["conversational", "utterance-retrieval"],
|
|
@@ -609,7 +609,7 @@ class TaskMetadata(BaseModel):
|
|
|
609
609
|
"hate-speech-detection",
|
|
610
610
|
],
|
|
611
611
|
"Thematic clustering": [],
|
|
612
|
-
"Scientific Reranking": [],
|
|
612
|
+
"Scientific Reranking": ["text-scoring"],
|
|
613
613
|
"Claim verification": ["fact-checking", "fact-checking-retrieval"],
|
|
614
614
|
"Topic classification": ["topic-classification"],
|
|
615
615
|
"Code retrieval": [],
|
|
@@ -617,21 +617,21 @@ class TaskMetadata(BaseModel):
|
|
|
617
617
|
"Cross-Lingual Semantic Discrimination": [],
|
|
618
618
|
"Textual Entailment": ["natural-language-inference"],
|
|
619
619
|
"Counterfactual Detection": [],
|
|
620
|
-
"Emotion classification": [],
|
|
620
|
+
"Emotion classification": ["sentiment-classification"],
|
|
621
621
|
"Reasoning as Retrieval": [],
|
|
622
622
|
"Rendered Texts Understanding": [],
|
|
623
623
|
"Image Text Retrieval": [],
|
|
624
624
|
"Object recognition": [],
|
|
625
625
|
"Scene recognition": [],
|
|
626
626
|
"Caption Pairing": ["image-captioning"],
|
|
627
|
-
"Emotion recognition": [],
|
|
627
|
+
"Emotion recognition": ["sentiment-scoring"],
|
|
628
628
|
"Textures recognition": [],
|
|
629
629
|
"Activity recognition": [],
|
|
630
630
|
"Tumor detection": [],
|
|
631
631
|
"Duplicate Detection": [],
|
|
632
632
|
"Rendered semantic textual similarity": [
|
|
633
633
|
"semantic-similarity-scoring",
|
|
634
|
-
"
|
|
634
|
+
"semantic-similarity-classification",
|
|
635
635
|
],
|
|
636
636
|
"Intent classification": [
|
|
637
637
|
"intent-classification",
|
|
@@ -645,10 +645,8 @@ class TaskMetadata(BaseModel):
|
|
|
645
645
|
|
|
646
646
|
def _hf_task_type(self) -> list[str]:
|
|
647
647
|
# to get full list of task_types execute:
|
|
648
|
-
#
|
|
649
|
-
#
|
|
650
|
-
# }).json()
|
|
651
|
-
# or look at https://huggingface.co/tasks
|
|
648
|
+
# https://huggingface.co/api/datasets-tags-by-type?type=task_categories
|
|
649
|
+
# ref https://huggingface-openapi.hf.space/#tag/datasets/GET/api/datasets-tags-by-type
|
|
652
650
|
mteb_task_type_to_datasets = {
|
|
653
651
|
# Text
|
|
654
652
|
"BitextMining": ["translation"],
|
|
@@ -667,7 +665,7 @@ class TaskMetadata(BaseModel):
|
|
|
667
665
|
"Any2AnyRetrieval": ["visual-document-retrieval"],
|
|
668
666
|
"Any2AnyMultilingualRetrieval": ["visual-document-retrieval"],
|
|
669
667
|
"VisionCentricQA": ["visual-question-answering"],
|
|
670
|
-
"ImageClustering": ["image-
|
|
668
|
+
"ImageClustering": ["image-feature-extraction"],
|
|
671
669
|
"ImageClassification": ["image-classification"],
|
|
672
670
|
"ImageMultilabelClassification": ["image-classification"],
|
|
673
671
|
"DocumentUnderstanding": ["visual-document-retrieval"],
|
|
@@ -695,11 +693,11 @@ class TaskMetadata(BaseModel):
|
|
|
695
693
|
|
|
696
694
|
def _hf_languages(self) -> list[str]:
|
|
697
695
|
languages: list[str] = []
|
|
698
|
-
if self.is_multilingual:
|
|
699
|
-
for val in
|
|
696
|
+
if self.is_multilingual and isinstance(self.eval_langs, dict):
|
|
697
|
+
for val in self.eval_langs.values():
|
|
700
698
|
languages.extend(val)
|
|
701
699
|
else:
|
|
702
|
-
languages = self.eval_langs
|
|
700
|
+
languages = cast(list[str], self.eval_langs)
|
|
703
701
|
# value "python" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters),
|
|
704
702
|
# or a special value like "code", "multilingual".
|
|
705
703
|
readme_langs = []
|
|
@@ -711,7 +709,7 @@ class TaskMetadata(BaseModel):
|
|
|
711
709
|
readme_langs.append(lang_name)
|
|
712
710
|
return sorted(set(readme_langs))
|
|
713
711
|
|
|
714
|
-
def _hf_license(self) -> str:
|
|
712
|
+
def _hf_license(self) -> str | None:
|
|
715
713
|
dataset_license = self.license
|
|
716
714
|
if dataset_license:
|
|
717
715
|
license_mapping = {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any, ClassVar, TypedDict
|
|
4
|
+
from typing import Any, ClassVar, TypedDict, cast
|
|
5
5
|
|
|
6
6
|
from datasets import Dataset, DatasetDict
|
|
7
7
|
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
|
|
@@ -10,7 +10,7 @@ from mteb._evaluators import BitextMiningEvaluator
|
|
|
10
10
|
from mteb.abstasks._statistics_calculation import calculate_text_statistics
|
|
11
11
|
from mteb.abstasks.abstask import AbsTask
|
|
12
12
|
from mteb.models import EncoderProtocol, MTEBModels
|
|
13
|
-
from mteb.types import HFSubset, ScoresDict
|
|
13
|
+
from mteb.types import EncodeKwargs, HFSubset, ScoresDict
|
|
14
14
|
from mteb.types.statistics import SplitDescriptiveStatistics, TextStatistics
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
@@ -73,11 +73,14 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
73
73
|
split: str = "test",
|
|
74
74
|
subsets_to_run: list[HFSubset] | None = None,
|
|
75
75
|
*,
|
|
76
|
-
encode_kwargs:
|
|
76
|
+
encode_kwargs: EncodeKwargs,
|
|
77
77
|
prediction_folder: Path | None = None,
|
|
78
78
|
**kwargs: Any,
|
|
79
79
|
) -> dict[HFSubset, ScoresDict]:
|
|
80
80
|
"""Added load for "parallel" datasets"""
|
|
81
|
+
if not isinstance(model, EncoderProtocol):
|
|
82
|
+
raise TypeError("Expected model to be an instance of EncoderProtocol")
|
|
83
|
+
|
|
81
84
|
if not self.data_loaded:
|
|
82
85
|
self.load_data()
|
|
83
86
|
|
|
@@ -87,11 +90,16 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
87
90
|
if subsets_to_run is not None:
|
|
88
91
|
hf_subsets = [s for s in hf_subsets if s in subsets_to_run]
|
|
89
92
|
|
|
90
|
-
|
|
93
|
+
encoder_model = cast(EncoderProtocol, model)
|
|
94
|
+
|
|
95
|
+
if self.dataset is None:
|
|
96
|
+
raise ValueError("Dataset is not loaded.")
|
|
97
|
+
|
|
98
|
+
scores: dict[str, BitextMiningMetrics] = {}
|
|
91
99
|
if self.parallel_subsets:
|
|
92
|
-
scores = self._evaluate_subset(
|
|
93
|
-
|
|
94
|
-
self.dataset[split],
|
|
100
|
+
scores = self._evaluate_subset( # type: ignore[assignment]
|
|
101
|
+
encoder_model,
|
|
102
|
+
self.dataset[split],
|
|
95
103
|
parallel=True,
|
|
96
104
|
hf_split=split,
|
|
97
105
|
hf_subset="parallel",
|
|
@@ -109,8 +117,8 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
109
117
|
data_split = self.dataset[split]
|
|
110
118
|
else:
|
|
111
119
|
data_split = self.dataset[hf_subset][split]
|
|
112
|
-
scores[hf_subset] = self._evaluate_subset(
|
|
113
|
-
|
|
120
|
+
scores[hf_subset] = self._evaluate_subset( # type: ignore[assignment]
|
|
121
|
+
encoder_model,
|
|
114
122
|
data_split,
|
|
115
123
|
hf_split=split,
|
|
116
124
|
hf_subset=hf_subset,
|
|
@@ -119,32 +127,32 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
119
127
|
**kwargs,
|
|
120
128
|
)
|
|
121
129
|
|
|
122
|
-
return scores
|
|
130
|
+
return cast(dict[HFSubset, ScoresDict], scores)
|
|
123
131
|
|
|
124
132
|
def _get_pairs(self, parallel: bool) -> list[tuple[str, str]]:
|
|
125
133
|
pairs = self._DEFAULT_PAIR
|
|
126
134
|
if parallel:
|
|
127
|
-
pairs = [langpair.split("-") for langpair in self.hf_subsets]
|
|
135
|
+
pairs = [langpair.split("-") for langpair in self.hf_subsets] # type: ignore[misc]
|
|
128
136
|
return pairs
|
|
129
137
|
|
|
130
|
-
def _evaluate_subset(
|
|
138
|
+
def _evaluate_subset( # type: ignore[override]
|
|
131
139
|
self,
|
|
132
140
|
model: EncoderProtocol,
|
|
133
141
|
data_split: Dataset,
|
|
134
142
|
*,
|
|
135
143
|
hf_split: str,
|
|
136
144
|
hf_subset: str,
|
|
137
|
-
|
|
138
|
-
encode_kwargs: dict[str, Any],
|
|
145
|
+
encode_kwargs: EncodeKwargs,
|
|
139
146
|
prediction_folder: Path | None = None,
|
|
147
|
+
parallel: bool = False,
|
|
140
148
|
**kwargs,
|
|
141
|
-
) ->
|
|
149
|
+
) -> BitextMiningMetrics | dict[str, BitextMiningMetrics]:
|
|
142
150
|
pairs = self._get_pairs(parallel)
|
|
143
151
|
|
|
144
152
|
evaluator = BitextMiningEvaluator(
|
|
145
153
|
data_split,
|
|
146
154
|
task_metadata=self.metadata,
|
|
147
|
-
pair_columns=pairs,
|
|
155
|
+
pair_columns=pairs,
|
|
148
156
|
hf_split=hf_split,
|
|
149
157
|
hf_subset=hf_subset,
|
|
150
158
|
**kwargs,
|
|
@@ -168,16 +176,16 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
168
176
|
)
|
|
169
177
|
|
|
170
178
|
if parallel:
|
|
171
|
-
|
|
179
|
+
parallel_metrics = {}
|
|
172
180
|
for keys, nearest_neighbors in neighbours.items():
|
|
173
|
-
|
|
181
|
+
parallel_metrics[keys] = self._compute_metrics(nearest_neighbors, gold)
|
|
174
182
|
|
|
175
|
-
for v in
|
|
183
|
+
for v in parallel_metrics.values():
|
|
176
184
|
self._add_main_score(v)
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
185
|
+
return parallel_metrics
|
|
186
|
+
def_pair_str = "-".join(self._DEFAULT_PAIR[0])
|
|
187
|
+
metrics = self._compute_metrics(neighbours[def_pair_str], gold)
|
|
188
|
+
self._add_main_score(metrics)
|
|
181
189
|
return metrics
|
|
182
190
|
|
|
183
191
|
def _compute_metrics(
|
|
@@ -250,8 +258,11 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
250
258
|
)
|
|
251
259
|
|
|
252
260
|
def _push_dataset_to_hub(self, repo_name: str) -> None:
|
|
261
|
+
if self.dataset is None:
|
|
262
|
+
raise ValueError("Dataset is not loaded.")
|
|
263
|
+
|
|
253
264
|
if self.metadata.is_multilingual:
|
|
254
|
-
dataset = defaultdict(dict)
|
|
265
|
+
dataset: dict[str, dict[str, list[str]]] = defaultdict(dict)
|
|
255
266
|
for config in self.metadata.eval_langs:
|
|
256
267
|
logger.info(f"Converting {config} of {self.metadata.name}")
|
|
257
268
|
|
|
@@ -266,10 +277,10 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
266
277
|
for split in self.dataset[config]:
|
|
267
278
|
dataset[split][lang_1] = self.dataset[config][split][sent_1]
|
|
268
279
|
dataset[split][lang_2] = self.dataset[config][split][sent_2]
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
280
|
+
dataset_dict = DatasetDict(
|
|
281
|
+
{split: Dataset.from_dict(dataset[split]) for split in dataset}
|
|
282
|
+
)
|
|
283
|
+
dataset_dict.push_to_hub(repo_name)
|
|
273
284
|
else:
|
|
274
285
|
sentences = {}
|
|
275
286
|
for split in self.dataset:
|
mteb/abstasks/text/reranking.py
CHANGED
|
@@ -16,7 +16,7 @@ else:
|
|
|
16
16
|
|
|
17
17
|
logger = logging.getLogger(__name__)
|
|
18
18
|
|
|
19
|
-
OLD_FORMAT_RERANKING_TASKS = []
|
|
19
|
+
OLD_FORMAT_RERANKING_TASKS: list[str] = []
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
@deprecated(
|
|
@@ -100,12 +100,14 @@ class AbsTaskReranking(AbsTaskRetrieval):
|
|
|
100
100
|
if self.metadata.name not in OLD_FORMAT_RERANKING_TASKS:
|
|
101
101
|
return
|
|
102
102
|
|
|
103
|
-
|
|
103
|
+
logger.info(
|
|
104
104
|
f"Transforming old format to standard format for {self.metadata.name}"
|
|
105
105
|
)
|
|
106
106
|
|
|
107
107
|
given_dataset = copy(given_dataset)
|
|
108
|
-
self.dataset = defaultdict(
|
|
108
|
+
self.dataset: dict[str, dict[str, RetrievalSplitData]] = defaultdict(
|
|
109
|
+
lambda: defaultdict(dict) # type: ignore[arg-type]
|
|
110
|
+
)
|
|
109
111
|
|
|
110
112
|
hf_subsets = self.hf_subsets
|
|
111
113
|
|
|
@@ -115,19 +117,19 @@ class AbsTaskReranking(AbsTaskRetrieval):
|
|
|
115
117
|
if hf_subset in cur_dataset:
|
|
116
118
|
cur_dataset = cur_dataset[hf_subset]
|
|
117
119
|
elif "name" in self.metadata.dataset:
|
|
118
|
-
cur_dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
120
|
+
cur_dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
119
121
|
assert hf_subset == "default", (
|
|
120
122
|
f"Only default subset is supported for {self.metadata.name} since `name` is given in the metadata."
|
|
121
123
|
)
|
|
122
124
|
else:
|
|
123
125
|
cur_dataset = datasets.load_dataset(
|
|
124
126
|
**self.metadata.dataset, name=hf_subset
|
|
125
|
-
)
|
|
127
|
+
)
|
|
126
128
|
|
|
127
129
|
for split in cur_dataset:
|
|
128
130
|
corpus = []
|
|
129
131
|
queries = []
|
|
130
|
-
relevant_docs = defaultdict(dict)
|
|
132
|
+
relevant_docs: dict[str, dict[str, int]] = defaultdict(dict)
|
|
131
133
|
top_ranked = defaultdict(list)
|
|
132
134
|
|
|
133
135
|
# Create an enumerated dataset to pass indices
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import Any
|
|
4
3
|
|
|
5
4
|
import numpy as np
|
|
6
5
|
from datasets import Dataset
|
|
@@ -12,7 +11,8 @@ from mteb.abstasks._statistics_calculation import (
|
|
|
12
11
|
calculate_text_statistics,
|
|
13
12
|
)
|
|
14
13
|
from mteb.abstasks.abstask import AbsTask
|
|
15
|
-
from mteb.models import EncoderProtocol
|
|
14
|
+
from mteb.models import EncoderProtocol, MTEBModels
|
|
15
|
+
from mteb.types import EncodeKwargs
|
|
16
16
|
from mteb.types.statistics import (
|
|
17
17
|
ScoreStatistics,
|
|
18
18
|
SplitDescriptiveStatistics,
|
|
@@ -77,17 +77,22 @@ class AbsTaskSummarization(AbsTask):
|
|
|
77
77
|
|
|
78
78
|
def _evaluate_subset(
|
|
79
79
|
self,
|
|
80
|
-
model:
|
|
80
|
+
model: MTEBModels,
|
|
81
81
|
data_split: Dataset,
|
|
82
82
|
*,
|
|
83
83
|
hf_split: str,
|
|
84
84
|
hf_subset: str,
|
|
85
|
-
encode_kwargs:
|
|
85
|
+
encode_kwargs: EncodeKwargs,
|
|
86
86
|
prediction_folder: Path | None = None,
|
|
87
87
|
**kwargs,
|
|
88
88
|
) -> SummarizationMetrics:
|
|
89
|
+
if not isinstance(model, EncoderProtocol):
|
|
90
|
+
raise TypeError("Expected model to be an instance of EncoderProtocol")
|
|
91
|
+
|
|
89
92
|
normalized_scores = [
|
|
90
|
-
(
|
|
93
|
+
(
|
|
94
|
+
(np.array(x) - self.min_score) / (self.max_score - self.min_score)
|
|
95
|
+
).tolist()
|
|
91
96
|
for x in data_split[self.relevancy_column_name]
|
|
92
97
|
]
|
|
93
98
|
evaluator = self.evaluator(
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import TypedDict
|
|
4
4
|
|
|
5
5
|
import torch
|
|
6
6
|
from datasets import Dataset
|
|
7
7
|
from sklearn import metrics
|
|
8
8
|
|
|
9
9
|
from mteb._evaluators import ZeroShotClassificationEvaluator
|
|
10
|
-
from mteb.models import EncoderProtocol
|
|
10
|
+
from mteb.models import EncoderProtocol, MTEBModels
|
|
11
|
+
from mteb.types import EncodeKwargs
|
|
11
12
|
from mteb.types.statistics import (
|
|
12
13
|
ImageStatistics,
|
|
13
14
|
LabelStatistics,
|
|
@@ -111,15 +112,18 @@ class AbsTaskZeroShotClassification(AbsTask):
|
|
|
111
112
|
|
|
112
113
|
def _evaluate_subset(
|
|
113
114
|
self,
|
|
114
|
-
model:
|
|
115
|
+
model: MTEBModels,
|
|
115
116
|
data_split: Dataset,
|
|
116
117
|
*,
|
|
117
118
|
hf_split: str,
|
|
118
119
|
hf_subset: str,
|
|
119
|
-
encode_kwargs:
|
|
120
|
+
encode_kwargs: EncodeKwargs,
|
|
120
121
|
prediction_folder: Path | None = None,
|
|
121
122
|
**kwargs,
|
|
122
123
|
) -> ZeroShotClassificationMetrics:
|
|
124
|
+
if not isinstance(model, EncoderProtocol):
|
|
125
|
+
raise TypeError("Expected model to be an instance of EncoderProtocol")
|
|
126
|
+
|
|
123
127
|
candidate_labels = self.get_candidate_labels()
|
|
124
128
|
data_split = data_split.select_columns(
|
|
125
129
|
[self.input_column_name, self.label_column_name]
|
mteb/benchmarks/benchmark.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from collections.abc import
|
|
3
|
+
from collections.abc import Iterator, Sequence
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from typing import TYPE_CHECKING, Literal
|
|
6
6
|
|
|
@@ -19,6 +19,7 @@ class Benchmark:
|
|
|
19
19
|
|
|
20
20
|
Args:
|
|
21
21
|
name: The name of the benchmark
|
|
22
|
+
aliases: Alternative names for the benchmark
|
|
22
23
|
tasks: The tasks within the benchmark.
|
|
23
24
|
description: A description of the benchmark, should include its intended goal and potentially a description of its construction
|
|
24
25
|
reference: A link reference, to a source containing additional information typically to a paper, leaderboard or github.
|
|
@@ -38,6 +39,7 @@ class Benchmark:
|
|
|
38
39
|
|
|
39
40
|
name: str
|
|
40
41
|
tasks: Sequence[AbsTask]
|
|
42
|
+
aliases: Sequence[str] = field(default_factory=tuple)
|
|
41
43
|
description: str | None = None
|
|
42
44
|
reference: StrURL | None = None
|
|
43
45
|
citation: str | None = None
|
|
@@ -47,7 +49,7 @@ class Benchmark:
|
|
|
47
49
|
display_name: str | None = None
|
|
48
50
|
language_view: list[str] | Literal["all"] = field(default_factory=list)
|
|
49
51
|
|
|
50
|
-
def __iter__(self) ->
|
|
52
|
+
def __iter__(self) -> Iterator[AbsTask]:
|
|
51
53
|
return iter(self.tasks)
|
|
52
54
|
|
|
53
55
|
def __len__(self) -> int:
|
|
@@ -6,6 +6,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
|
|
|
6
6
|
BUILT_MTEB,
|
|
7
7
|
C_MTEB,
|
|
8
8
|
CHEMTEB,
|
|
9
|
+
CHEMTEB_V1_1,
|
|
9
10
|
CODE_RAG,
|
|
10
11
|
ENCODECHKA,
|
|
11
12
|
FA_MTEB,
|
|
@@ -14,6 +15,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
|
|
|
14
15
|
JINA_VDR,
|
|
15
16
|
JMTEB_LITE_V1,
|
|
16
17
|
JMTEB_V2,
|
|
18
|
+
KOVIDORE_V2,
|
|
17
19
|
LONG_EMBED,
|
|
18
20
|
MIEB_ENG,
|
|
19
21
|
MIEB_IMG,
|
|
@@ -69,6 +71,7 @@ __all__ = [
|
|
|
69
71
|
"BRIGHT_LONG",
|
|
70
72
|
"BUILT_MTEB",
|
|
71
73
|
"CHEMTEB",
|
|
74
|
+
"CHEMTEB_V1_1",
|
|
72
75
|
"CODE_RAG",
|
|
73
76
|
"C_MTEB",
|
|
74
77
|
"ENCODECHKA",
|
|
@@ -79,6 +82,7 @@ __all__ = [
|
|
|
79
82
|
"JINA_VDR",
|
|
80
83
|
"JMTEB_LITE_V1",
|
|
81
84
|
"JMTEB_V2",
|
|
85
|
+
"KOVIDORE_V2",
|
|
82
86
|
"LONG_EMBED",
|
|
83
87
|
"MIEB_ENG",
|
|
84
88
|
"MIEB_IMG",
|