mteb 2.5.2__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +17 -18
- mteb/_evaluators/any_sts_evaluator.py +3 -3
- mteb/_evaluators/clustering_evaluator.py +2 -2
- mteb/_evaluators/evaluator.py +4 -2
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +10 -8
- mteb/_evaluators/pair_classification_evaluator.py +5 -3
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/retrieval_metrics.py +18 -17
- mteb/_evaluators/sklearn_evaluator.py +11 -10
- mteb/_evaluators/text/bitext_mining_evaluator.py +27 -18
- mteb/_evaluators/text/summarization_evaluator.py +23 -18
- mteb/_evaluators/zeroshot_classification_evaluator.py +5 -3
- mteb/abstasks/_data_filter/filters.py +1 -1
- mteb/abstasks/_data_filter/task_pipelines.py +3 -0
- mteb/abstasks/_statistics_calculation.py +18 -10
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +35 -28
- mteb/abstasks/aggregate_task_metadata.py +1 -9
- mteb/abstasks/aggregated_task.py +10 -29
- mteb/abstasks/classification.py +15 -10
- mteb/abstasks/clustering.py +19 -15
- mteb/abstasks/clustering_legacy.py +10 -10
- mteb/abstasks/image/image_text_pair_classification.py +7 -4
- mteb/abstasks/multilabel_classification.py +23 -19
- mteb/abstasks/pair_classification.py +20 -11
- mteb/abstasks/regression.py +4 -4
- mteb/abstasks/retrieval.py +28 -24
- mteb/abstasks/retrieval_dataset_loaders.py +2 -2
- mteb/abstasks/sts.py +8 -5
- mteb/abstasks/task_metadata.py +31 -33
- mteb/abstasks/text/bitext_mining.py +39 -28
- mteb/abstasks/text/reranking.py +8 -6
- mteb/abstasks/text/summarization.py +10 -5
- mteb/abstasks/zeroshot_classification.py +8 -4
- mteb/benchmarks/benchmark.py +4 -2
- mteb/benchmarks/benchmarks/__init__.py +4 -0
- mteb/benchmarks/benchmarks/benchmarks.py +112 -11
- mteb/benchmarks/get_benchmark.py +14 -55
- mteb/cache.py +182 -29
- mteb/cli/_display_tasks.py +2 -2
- mteb/cli/build_cli.py +110 -14
- mteb/cli/generate_model_card.py +43 -23
- mteb/deprecated_evaluator.py +63 -49
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +44 -33
- mteb/filter_tasks.py +25 -26
- mteb/get_tasks.py +29 -30
- mteb/languages/language_scripts.py +5 -3
- mteb/leaderboard/app.py +162 -34
- mteb/load_results.py +12 -12
- mteb/models/abs_encoder.py +10 -6
- mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +5 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +2 -2
- mteb/models/get_model_meta.py +21 -3
- mteb/models/instruct_wrapper.py +28 -8
- mteb/models/model_implementations/align_models.py +1 -1
- mteb/models/model_implementations/andersborges.py +4 -4
- mteb/models/model_implementations/ara_models.py +1 -1
- mteb/models/model_implementations/arctic_models.py +8 -8
- mteb/models/model_implementations/b1ade_models.py +1 -1
- mteb/models/model_implementations/bge_models.py +45 -21
- mteb/models/model_implementations/bica_model.py +3 -3
- mteb/models/model_implementations/blip2_models.py +2 -2
- mteb/models/model_implementations/blip_models.py +16 -16
- mteb/models/model_implementations/bm25.py +4 -4
- mteb/models/model_implementations/bmretriever_models.py +6 -4
- mteb/models/model_implementations/cadet_models.py +1 -1
- mteb/models/model_implementations/cde_models.py +11 -4
- mteb/models/model_implementations/clip_models.py +6 -6
- mteb/models/model_implementations/clips_models.py +3 -3
- mteb/models/model_implementations/codefuse_models.py +5 -5
- mteb/models/model_implementations/codesage_models.py +3 -3
- mteb/models/model_implementations/cohere_models.py +5 -5
- mteb/models/model_implementations/cohere_v.py +2 -2
- mteb/models/model_implementations/colpali_models.py +3 -3
- mteb/models/model_implementations/colqwen_models.py +8 -8
- mteb/models/model_implementations/colsmol_models.py +2 -2
- mteb/models/model_implementations/conan_models.py +1 -1
- mteb/models/model_implementations/dino_models.py +42 -42
- mteb/models/model_implementations/e5_instruct.py +23 -4
- mteb/models/model_implementations/e5_models.py +9 -9
- mteb/models/model_implementations/e5_v.py +6 -6
- mteb/models/model_implementations/eagerworks_models.py +1 -1
- mteb/models/model_implementations/emillykkejensen_models.py +6 -6
- mteb/models/model_implementations/en_code_retriever.py +1 -1
- mteb/models/model_implementations/euler_models.py +2 -2
- mteb/models/model_implementations/fa_models.py +9 -9
- mteb/models/model_implementations/facebookai.py +14 -2
- mteb/models/model_implementations/geogpt_models.py +1 -1
- mteb/models/model_implementations/gme_v_models.py +6 -5
- mteb/models/model_implementations/google_models.py +1 -1
- mteb/models/model_implementations/granite_vision_embedding_models.py +1 -1
- mteb/models/model_implementations/gritlm_models.py +2 -2
- mteb/models/model_implementations/gte_models.py +25 -13
- mteb/models/model_implementations/hinvec_models.py +1 -1
- mteb/models/model_implementations/ibm_granite_models.py +30 -6
- mteb/models/model_implementations/inf_models.py +2 -2
- mteb/models/model_implementations/jasper_models.py +2 -2
- mteb/models/model_implementations/jina_clip.py +48 -10
- mteb/models/model_implementations/jina_models.py +18 -11
- mteb/models/model_implementations/kblab.py +12 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +4 -4
- mteb/models/model_implementations/kfst.py +1 -1
- mteb/models/model_implementations/kowshik24_models.py +1 -1
- mteb/models/model_implementations/lgai_embedding_models.py +1 -1
- mteb/models/model_implementations/linq_models.py +1 -1
- mteb/models/model_implementations/listconranker.py +1 -1
- mteb/models/model_implementations/llm2clip_models.py +6 -6
- mteb/models/model_implementations/llm2vec_models.py +8 -8
- mteb/models/model_implementations/mcinext_models.py +4 -1
- mteb/models/model_implementations/mdbr_models.py +17 -3
- mteb/models/model_implementations/misc_models.py +68 -68
- mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
- mteb/models/model_implementations/mme5_models.py +1 -1
- mteb/models/model_implementations/moco_models.py +4 -4
- mteb/models/model_implementations/mod_models.py +1 -1
- mteb/models/model_implementations/model2vec_models.py +14 -14
- mteb/models/model_implementations/moka_models.py +1 -1
- mteb/models/model_implementations/nbailab.py +3 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +2 -2
- mteb/models/model_implementations/nomic_models.py +30 -15
- mteb/models/model_implementations/nomic_models_vision.py +1 -1
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +15 -9
- mteb/models/model_implementations/nvidia_models.py +151 -19
- mteb/models/model_implementations/octen_models.py +61 -2
- mteb/models/model_implementations/openclip_models.py +13 -13
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -5
- mteb/models/model_implementations/ops_moa_models.py +1 -1
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +1 -1
- mteb/models/model_implementations/piccolo_models.py +1 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +4 -4
- mteb/models/model_implementations/pylate_models.py +10 -9
- mteb/models/model_implementations/qodo_models.py +2 -2
- mteb/models/model_implementations/qtack_models.py +1 -1
- mteb/models/model_implementations/qwen3_models.py +3 -3
- mteb/models/model_implementations/qzhou_models.py +2 -2
- mteb/models/model_implementations/random_baseline.py +3 -3
- mteb/models/model_implementations/rasgaard_models.py +2 -2
- mteb/models/model_implementations/reasonir_model.py +1 -1
- mteb/models/model_implementations/repllama_models.py +3 -3
- mteb/models/model_implementations/rerankers_custom.py +12 -6
- mteb/models/model_implementations/rerankers_monot5_based.py +17 -17
- mteb/models/model_implementations/richinfoai_models.py +1 -1
- mteb/models/model_implementations/ru_sentence_models.py +20 -20
- mteb/models/model_implementations/ruri_models.py +10 -10
- mteb/models/model_implementations/salesforce_models.py +3 -3
- mteb/models/model_implementations/samilpwc_models.py +1 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -2
- mteb/models/model_implementations/searchmap_models.py +1 -1
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +113 -146
- mteb/models/model_implementations/sentence_transformers_models.py +124 -22
- mteb/models/model_implementations/shuu_model.py +1 -1
- mteb/models/model_implementations/siglip_models.py +20 -20
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -1
- mteb/models/model_implementations/stella_models.py +17 -4
- mteb/models/model_implementations/tarka_models.py +2 -2
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +1 -1
- mteb/models/model_implementations/uae_models.py +7 -1
- mteb/models/model_implementations/vdr_models.py +1 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -6
- mteb/models/model_implementations/vlm2vec_models.py +3 -3
- mteb/models/model_implementations/voyage_models.py +84 -0
- mteb/models/model_implementations/voyage_v.py +9 -7
- mteb/models/model_implementations/youtu_models.py +1 -1
- mteb/models/model_implementations/yuan_models.py +1 -1
- mteb/models/model_implementations/yuan_models_en.py +1 -1
- mteb/models/model_meta.py +80 -31
- mteb/models/models_protocols.py +22 -6
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +9 -6
- mteb/models/search_wrappers.py +33 -18
- mteb/models/sentence_transformer_wrapper.py +50 -25
- mteb/models/vllm_wrapper.py +327 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +29 -21
- mteb/results/model_result.py +52 -22
- mteb/results/task_result.py +80 -58
- mteb/similarity_functions.py +11 -7
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
- mteb/tasks/classification/est/estonian_valence.py +1 -1
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/classification/multilingual/scala_classification.py +1 -1
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +12 -12
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +2 -0
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/multilingual/__init__.py +2 -0
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +12 -0
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +9 -3
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/METADATA +15 -4
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/RECORD +240 -219
- mteb/models/model_implementations/mxbai_models.py +0 -111
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
mteb/deprecated_evaluator.py
CHANGED
|
@@ -5,29 +5,30 @@ import logging
|
|
|
5
5
|
import os
|
|
6
6
|
import sys
|
|
7
7
|
import traceback
|
|
8
|
-
|
|
8
|
+
import warnings
|
|
9
|
+
from collections.abc import Iterable, Sequence
|
|
9
10
|
from copy import deepcopy
|
|
10
11
|
from datetime import datetime
|
|
11
12
|
from itertools import chain
|
|
12
13
|
from pathlib import Path
|
|
13
14
|
from time import time
|
|
14
|
-
from typing import TYPE_CHECKING, Any
|
|
15
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
15
16
|
|
|
16
17
|
import datasets
|
|
17
18
|
|
|
18
19
|
import mteb
|
|
19
20
|
from mteb.abstasks import AbsTask
|
|
21
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
20
22
|
from mteb.abstasks.task_metadata import TaskCategory, TaskType
|
|
21
23
|
from mteb.benchmarks import Benchmark
|
|
22
24
|
from mteb.models import (
|
|
23
25
|
CrossEncoderWrapper,
|
|
24
|
-
EncoderProtocol,
|
|
25
26
|
ModelMeta,
|
|
26
27
|
MTEBModels,
|
|
27
28
|
SentenceTransformerEncoderWrapper,
|
|
28
29
|
)
|
|
29
30
|
from mteb.results import TaskResult
|
|
30
|
-
from mteb.types import ScoresDict
|
|
31
|
+
from mteb.types import EncodeKwargs, ScoresDict
|
|
31
32
|
|
|
32
33
|
if sys.version_info >= (3, 13):
|
|
33
34
|
from warnings import deprecated
|
|
@@ -52,7 +53,7 @@ class MTEB:
|
|
|
52
53
|
)
|
|
53
54
|
def __init__(
|
|
54
55
|
self,
|
|
55
|
-
tasks: Iterable[AbsTask | Benchmark],
|
|
56
|
+
tasks: Iterable[AbsTask] | Iterable[Benchmark],
|
|
56
57
|
*,
|
|
57
58
|
err_logs_path: str = "error_logs.txt",
|
|
58
59
|
) -> None:
|
|
@@ -63,15 +64,14 @@ class MTEB:
|
|
|
63
64
|
`mteb.get_tasks(["task1","task2"]) or `mteb.get_benchmark("MTEB(eng, classic)").
|
|
64
65
|
err_logs_path: Path to save error logs.
|
|
65
66
|
"""
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
self.tasks = list(tasks)
|
|
69
|
-
if len(self.tasks) > 0 and isinstance(self.tasks[0], Benchmark):
|
|
67
|
+
if isinstance(next(iter(tasks)), Benchmark):
|
|
70
68
|
self.benchmarks = tasks
|
|
71
|
-
self.tasks = list(chain.from_iterable(
|
|
69
|
+
self.tasks = list(chain.from_iterable(cast(Iterable[Benchmark], tasks)))
|
|
70
|
+
elif isinstance(next(iter(tasks)), AbsTask):
|
|
71
|
+
self.tasks = list(cast(Iterable[AbsTask], tasks))
|
|
72
72
|
|
|
73
73
|
self.err_logs_path = Path(err_logs_path)
|
|
74
|
-
self.
|
|
74
|
+
self._last_evaluated_splits: dict[str, list[str]] = {}
|
|
75
75
|
|
|
76
76
|
@property
|
|
77
77
|
def available_tasks(self) -> list[str]:
|
|
@@ -84,7 +84,7 @@ class MTEB:
|
|
|
84
84
|
return sorted({x.metadata.type for x in self.tasks})
|
|
85
85
|
|
|
86
86
|
@property
|
|
87
|
-
def available_task_categories(self) -> set[TaskCategory]:
|
|
87
|
+
def available_task_categories(self) -> set[TaskCategory | None]:
|
|
88
88
|
"""Set of available task categories."""
|
|
89
89
|
return {x.metadata.category for x in self.tasks}
|
|
90
90
|
|
|
@@ -174,7 +174,7 @@ class MTEB:
|
|
|
174
174
|
split: str,
|
|
175
175
|
subsets_to_run: list[str] | None = None,
|
|
176
176
|
*,
|
|
177
|
-
encode_kwargs:
|
|
177
|
+
encode_kwargs: EncodeKwargs,
|
|
178
178
|
**kwargs: Any,
|
|
179
179
|
):
|
|
180
180
|
tick = time()
|
|
@@ -231,13 +231,14 @@ class MTEB:
|
|
|
231
231
|
merged_kg_co2_emissions = None
|
|
232
232
|
if existing_kg_co2_emissions and new_kg_co2_emissions:
|
|
233
233
|
merged_kg_co2_emissions = existing_kg_co2_emissions + new_kg_co2_emissions
|
|
234
|
+
existing_evaluation_time = existing_results.evaluation_time or 0
|
|
235
|
+
new_evaluation_time = new_results.evaluation_time or 0
|
|
234
236
|
merged_results = TaskResult(
|
|
235
237
|
dataset_revision=new_results.dataset_revision,
|
|
236
238
|
task_name=new_results.task_name,
|
|
237
239
|
mteb_version=new_results.mteb_version,
|
|
238
240
|
scores=merged_scores,
|
|
239
|
-
evaluation_time=
|
|
240
|
-
+ new_results.evaluation_time,
|
|
241
|
+
evaluation_time=existing_evaluation_time + new_evaluation_time,
|
|
241
242
|
kg_co2_emissions=merged_kg_co2_emissions,
|
|
242
243
|
)
|
|
243
244
|
|
|
@@ -262,7 +263,7 @@ class MTEB:
|
|
|
262
263
|
overwrite_results: bool = False,
|
|
263
264
|
raise_error: bool = True,
|
|
264
265
|
co2_tracker: bool = False,
|
|
265
|
-
encode_kwargs:
|
|
266
|
+
encode_kwargs: EncodeKwargs | None = None,
|
|
266
267
|
**kwargs,
|
|
267
268
|
) -> list[TaskResult]:
|
|
268
269
|
"""Run the evaluation pipeline on the selected tasks.
|
|
@@ -306,13 +307,16 @@ class MTEB:
|
|
|
306
307
|
elif verbosity == 3:
|
|
307
308
|
datasets.logging.set_verbosity(logging.DEBUG)
|
|
308
309
|
|
|
309
|
-
|
|
310
|
-
output_path = self._create_output_folder(meta, output_folder)
|
|
311
|
-
|
|
310
|
+
mteb_model: MTEBModels
|
|
312
311
|
if isinstance(model, SentenceTransformer):
|
|
313
|
-
|
|
312
|
+
mteb_model = SentenceTransformerEncoderWrapper(model)
|
|
314
313
|
elif isinstance(model, CrossEncoder):
|
|
315
|
-
|
|
314
|
+
mteb_model = CrossEncoderWrapper(model)
|
|
315
|
+
else:
|
|
316
|
+
mteb_model = cast(MTEBModels, model)
|
|
317
|
+
|
|
318
|
+
meta = self.create_model_meta(mteb_model)
|
|
319
|
+
output_path = self._create_output_folder(meta, output_folder)
|
|
316
320
|
|
|
317
321
|
# Disable co2_tracker for API models
|
|
318
322
|
if "API" in meta.framework:
|
|
@@ -333,7 +337,7 @@ class MTEB:
|
|
|
333
337
|
) # save them in case we re-use the object (e.g. for reranking)
|
|
334
338
|
|
|
335
339
|
# To evaluate missing splits, we keep track of the task name and the corresponding splits.
|
|
336
|
-
self.
|
|
340
|
+
self._last_evaluated_splits = {}
|
|
337
341
|
|
|
338
342
|
while len(self.tasks) > 0:
|
|
339
343
|
task = self.tasks[0]
|
|
@@ -342,9 +346,10 @@ class MTEB:
|
|
|
342
346
|
)
|
|
343
347
|
|
|
344
348
|
if task.is_aggregate:
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
349
|
+
aggregated_task = cast(AbsTaskAggregate, task)
|
|
350
|
+
self_ = MTEB(tasks=aggregated_task.metadata.tasks)
|
|
351
|
+
aggregated_task_results = self_.run(
|
|
352
|
+
mteb_model,
|
|
348
353
|
verbosity=verbosity - 1,
|
|
349
354
|
output_folder=output_folder,
|
|
350
355
|
eval_splits=eval_splits,
|
|
@@ -355,12 +360,15 @@ class MTEB:
|
|
|
355
360
|
encode_kwargs=encode_kwargs,
|
|
356
361
|
**kwargs,
|
|
357
362
|
)
|
|
358
|
-
new_results =
|
|
363
|
+
new_results = aggregated_task.combine_task_results(
|
|
364
|
+
aggregated_task_results
|
|
365
|
+
)
|
|
359
366
|
evaluation_results.append(new_results)
|
|
360
367
|
|
|
361
368
|
if output_path:
|
|
362
|
-
|
|
363
|
-
|
|
369
|
+
new_results.to_disk(
|
|
370
|
+
output_path / f"{aggregated_task.metadata.name}.json"
|
|
371
|
+
)
|
|
364
372
|
del self.tasks[0]
|
|
365
373
|
continue
|
|
366
374
|
|
|
@@ -382,7 +390,7 @@ class MTEB:
|
|
|
382
390
|
task_subsets = task.hf_subsets
|
|
383
391
|
|
|
384
392
|
existing_results = None
|
|
385
|
-
save_path = None
|
|
393
|
+
save_path: Path | None = None
|
|
386
394
|
final_splits_to_run = task_eval_splits
|
|
387
395
|
missing_evaluations = self._get_missing_evaluations(
|
|
388
396
|
existing_results,
|
|
@@ -432,7 +440,7 @@ class MTEB:
|
|
|
432
440
|
logger.info(
|
|
433
441
|
f"No splits to evaluate for {task.metadata.name}. Skipping evaluation."
|
|
434
442
|
)
|
|
435
|
-
self.
|
|
443
|
+
self._last_evaluated_splits[task.metadata.name] = []
|
|
436
444
|
del self.tasks[0]
|
|
437
445
|
continue
|
|
438
446
|
|
|
@@ -440,11 +448,11 @@ class MTEB:
|
|
|
440
448
|
task.check_if_dataset_is_superseded()
|
|
441
449
|
task.load_data()
|
|
442
450
|
|
|
443
|
-
task_results = {}
|
|
451
|
+
task_results: dict[str, dict[str, dict[str, Any]]] = {}
|
|
444
452
|
evaluation_time = 0
|
|
445
453
|
kg_co2_emissions: int | None = 0 if co2_tracker else None
|
|
446
454
|
|
|
447
|
-
self.
|
|
455
|
+
self._last_evaluated_splits[task.metadata.name] = []
|
|
448
456
|
|
|
449
457
|
for split in final_splits_to_run:
|
|
450
458
|
info = missing_evaluations[split]
|
|
@@ -465,14 +473,16 @@ class MTEB:
|
|
|
465
473
|
|
|
466
474
|
if co2_tracker:
|
|
467
475
|
try:
|
|
468
|
-
from codecarbon import
|
|
476
|
+
from codecarbon import ( # type: ignore[import-not-found,import-untyped]
|
|
477
|
+
EmissionsTracker,
|
|
478
|
+
)
|
|
469
479
|
except ImportError:
|
|
470
480
|
raise ImportError(
|
|
471
481
|
"codecarbon is not installed. Please install it using `pip install 'mteb[codecarbon]'` to track CO₂ emissions."
|
|
472
482
|
)
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
)
|
|
483
|
+
msg = "Evaluating multiple MTEB runs simultaneously will produce incorrect CO₂ results"
|
|
484
|
+
logger.warning(msg)
|
|
485
|
+
warnings.warn(msg)
|
|
476
486
|
with EmissionsTracker(
|
|
477
487
|
save_to_file=False,
|
|
478
488
|
save_to_api=False,
|
|
@@ -481,7 +491,7 @@ class MTEB:
|
|
|
481
491
|
) as tracker:
|
|
482
492
|
results, tick, tock = self._run_eval(
|
|
483
493
|
task,
|
|
484
|
-
|
|
494
|
+
mteb_model,
|
|
485
495
|
split,
|
|
486
496
|
encode_kwargs=encode_kwargs,
|
|
487
497
|
subsets_to_run=subsets_to_run,
|
|
@@ -494,7 +504,7 @@ class MTEB:
|
|
|
494
504
|
else:
|
|
495
505
|
results, tick, tock = self._run_eval(
|
|
496
506
|
task,
|
|
497
|
-
|
|
507
|
+
mteb_model,
|
|
498
508
|
split,
|
|
499
509
|
subsets_to_run=subsets_to_run,
|
|
500
510
|
encode_kwargs=encode_kwargs,
|
|
@@ -510,25 +520,25 @@ class MTEB:
|
|
|
510
520
|
if verbosity >= 1:
|
|
511
521
|
logger.info(f"Scores: {task_results[split]}")
|
|
512
522
|
|
|
513
|
-
self.
|
|
523
|
+
self._last_evaluated_splits[task.metadata.name].append(split)
|
|
514
524
|
|
|
515
525
|
# Create new TaskResult
|
|
516
526
|
new_results = TaskResult.from_task_results(
|
|
517
527
|
task,
|
|
518
|
-
task_results,
|
|
528
|
+
task_results, # type: ignore[arg-type]
|
|
519
529
|
evaluation_time=evaluation_time,
|
|
520
530
|
kg_co2_emissions=kg_co2_emissions,
|
|
521
531
|
)
|
|
522
532
|
|
|
523
533
|
# Merge with existing if needed
|
|
524
|
-
if output_path and save_path.exists():
|
|
534
|
+
if output_path and save_path and save_path.exists():
|
|
525
535
|
existing_results = TaskResult.from_disk(save_path)
|
|
526
536
|
if existing_results:
|
|
527
537
|
merged_results = self._merge_results(existing_results, new_results)
|
|
528
538
|
else:
|
|
529
539
|
merged_results = new_results
|
|
530
540
|
|
|
531
|
-
if output_path:
|
|
541
|
+
if output_path and save_path:
|
|
532
542
|
merged_results.to_disk(save_path)
|
|
533
543
|
|
|
534
544
|
evaluation_results.append(merged_results)
|
|
@@ -555,7 +565,7 @@ class MTEB:
|
|
|
555
565
|
def create_model_meta(model: MTEBModels) -> ModelMeta:
|
|
556
566
|
"""Create a ModelMeta object for the given model."""
|
|
557
567
|
if hasattr(model, "mteb_model_meta") and model.mteb_model_meta is not None:
|
|
558
|
-
meta = model.mteb_model_meta
|
|
568
|
+
meta = model.mteb_model_meta
|
|
559
569
|
else:
|
|
560
570
|
meta = MTEB._get_model_meta(model)
|
|
561
571
|
|
|
@@ -581,7 +591,11 @@ class MTEB:
|
|
|
581
591
|
if output_folder is None:
|
|
582
592
|
return None
|
|
583
593
|
|
|
584
|
-
model_revision: str =
|
|
594
|
+
model_revision: str = (
|
|
595
|
+
model_meta.revision
|
|
596
|
+
if model_meta.revision is not None
|
|
597
|
+
else "no_revision_available"
|
|
598
|
+
)
|
|
585
599
|
model_path_name = model_meta.model_name_as_path()
|
|
586
600
|
|
|
587
601
|
output_path = Path(output_folder) / model_path_name / model_revision
|
|
@@ -603,15 +617,15 @@ class MTEB:
|
|
|
603
617
|
Tasks with empty lists indicate that results already existed and no splits were evaluated.
|
|
604
618
|
"""
|
|
605
619
|
return deepcopy(
|
|
606
|
-
{task: list(splits) for task, splits in self.
|
|
620
|
+
{task: list(splits) for task, splits in self._last_evaluated_splits.items()}
|
|
607
621
|
)
|
|
608
622
|
|
|
609
623
|
@staticmethod
|
|
610
624
|
def _get_missing_evaluations(
|
|
611
625
|
existing_results: TaskResult | None,
|
|
612
|
-
task_eval_splits:
|
|
613
|
-
task_eval_langs:
|
|
614
|
-
eval_subsets:
|
|
626
|
+
task_eval_splits: Sequence[str],
|
|
627
|
+
task_eval_langs: Sequence[str],
|
|
628
|
+
eval_subsets: Sequence[str] | None,
|
|
615
629
|
) -> dict[str, dict[str, Any]]:
|
|
616
630
|
"""Return a dictionary for each split, indicating if the whole split is missing and which subsets are missing."""
|
|
617
631
|
missing_evaluations = {
|
|
@@ -660,7 +674,7 @@ class MTEB:
|
|
|
660
674
|
return missing_evaluations
|
|
661
675
|
|
|
662
676
|
@staticmethod
|
|
663
|
-
def _get_model_meta(model:
|
|
677
|
+
def _get_model_meta(model: MTEBModels) -> ModelMeta:
|
|
664
678
|
from sentence_transformers import CrossEncoder, SentenceTransformer
|
|
665
679
|
|
|
666
680
|
if isinstance(model, CrossEncoder):
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 1299,
|
|
4
|
+
"number_of_characters": 9254,
|
|
5
|
+
"documents_text_statistics": null,
|
|
6
|
+
"documents_image_statistics": {
|
|
7
|
+
"min_image_width": 2245,
|
|
8
|
+
"average_image_width": 2370.324347826087,
|
|
9
|
+
"max_image_width": 3508,
|
|
10
|
+
"min_image_height": 2481,
|
|
11
|
+
"average_image_height": 3289.8060869565215,
|
|
12
|
+
"max_image_height": 3580,
|
|
13
|
+
"unique_images": 1132
|
|
14
|
+
},
|
|
15
|
+
"queries_text_statistics": {
|
|
16
|
+
"total_text_length": 9254,
|
|
17
|
+
"min_text_length": 15,
|
|
18
|
+
"average_text_length": 62.10738255033557,
|
|
19
|
+
"max_text_length": 108,
|
|
20
|
+
"unique_texts": 149
|
|
21
|
+
},
|
|
22
|
+
"queries_image_statistics": null,
|
|
23
|
+
"relevant_docs_statistics": {
|
|
24
|
+
"num_relevant_docs": 409,
|
|
25
|
+
"min_relevant_docs_per_query": 1,
|
|
26
|
+
"average_relevant_docs_per_query": 2.7449664429530203,
|
|
27
|
+
"max_relevant_docs_per_query": 7,
|
|
28
|
+
"unique_relevant_docs": 316
|
|
29
|
+
},
|
|
30
|
+
"top_ranked_statistics": null
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 1640,
|
|
4
|
+
"number_of_characters": 8331,
|
|
5
|
+
"documents_text_statistics": null,
|
|
6
|
+
"documents_image_statistics": {
|
|
7
|
+
"min_image_width": 2313,
|
|
8
|
+
"average_image_width": 2347.5321597833445,
|
|
9
|
+
"max_image_width": 2481,
|
|
10
|
+
"min_image_height": 3138,
|
|
11
|
+
"average_image_height": 3214.301963439404,
|
|
12
|
+
"max_image_height": 3508,
|
|
13
|
+
"unique_images": 1442
|
|
14
|
+
},
|
|
15
|
+
"queries_text_statistics": {
|
|
16
|
+
"total_text_length": 8331,
|
|
17
|
+
"min_text_length": 23,
|
|
18
|
+
"average_text_length": 51.11042944785276,
|
|
19
|
+
"max_text_length": 110,
|
|
20
|
+
"unique_texts": 163
|
|
21
|
+
},
|
|
22
|
+
"queries_image_statistics": null,
|
|
23
|
+
"relevant_docs_statistics": {
|
|
24
|
+
"num_relevant_docs": 413,
|
|
25
|
+
"min_relevant_docs_per_query": 1,
|
|
26
|
+
"average_relevant_docs_per_query": 2.5337423312883436,
|
|
27
|
+
"max_relevant_docs_per_query": 6,
|
|
28
|
+
"unique_relevant_docs": 349
|
|
29
|
+
},
|
|
30
|
+
"top_ranked_statistics": null
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 2166,
|
|
4
|
+
"number_of_characters": 9764,
|
|
5
|
+
"documents_text_statistics": null,
|
|
6
|
+
"documents_image_statistics": {
|
|
7
|
+
"min_image_width": 2221,
|
|
8
|
+
"average_image_width": 2339.4957350727545,
|
|
9
|
+
"max_image_width": 2480,
|
|
10
|
+
"min_image_height": 3036,
|
|
11
|
+
"average_image_height": 3242.8138484696437,
|
|
12
|
+
"max_image_height": 3508,
|
|
13
|
+
"unique_images": 1974
|
|
14
|
+
},
|
|
15
|
+
"queries_text_statistics": {
|
|
16
|
+
"total_text_length": 9764,
|
|
17
|
+
"min_text_length": 22,
|
|
18
|
+
"average_text_length": 56.4393063583815,
|
|
19
|
+
"max_text_length": 103,
|
|
20
|
+
"unique_texts": 173
|
|
21
|
+
},
|
|
22
|
+
"queries_image_statistics": null,
|
|
23
|
+
"relevant_docs_statistics": {
|
|
24
|
+
"num_relevant_docs": 525,
|
|
25
|
+
"min_relevant_docs_per_query": 1,
|
|
26
|
+
"average_relevant_docs_per_query": 3.0346820809248554,
|
|
27
|
+
"max_relevant_docs_per_query": 7,
|
|
28
|
+
"unique_relevant_docs": 442
|
|
29
|
+
},
|
|
30
|
+
"top_ranked_statistics": null
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 2330,
|
|
4
|
+
"number_of_characters": 13131,
|
|
5
|
+
"documents_text_statistics": null,
|
|
6
|
+
"documents_image_statistics": {
|
|
7
|
+
"min_image_width": 1949,
|
|
8
|
+
"average_image_width": 2430.1152204836417,
|
|
9
|
+
"max_image_width": 3505,
|
|
10
|
+
"min_image_height": 2480,
|
|
11
|
+
"average_image_height": 3350.3921289710765,
|
|
12
|
+
"max_image_height": 3626,
|
|
13
|
+
"unique_images": 2096
|
|
14
|
+
},
|
|
15
|
+
"queries_text_statistics": {
|
|
16
|
+
"total_text_length": 13131,
|
|
17
|
+
"min_text_length": 21,
|
|
18
|
+
"average_text_length": 59.41628959276018,
|
|
19
|
+
"max_text_length": 112,
|
|
20
|
+
"unique_texts": 221
|
|
21
|
+
},
|
|
22
|
+
"queries_image_statistics": null,
|
|
23
|
+
"relevant_docs_statistics": {
|
|
24
|
+
"num_relevant_docs": 726,
|
|
25
|
+
"min_relevant_docs_per_query": 1,
|
|
26
|
+
"average_relevant_docs_per_query": 3.2850678733031673,
|
|
27
|
+
"max_relevant_docs_per_query": 7,
|
|
28
|
+
"unique_relevant_docs": 575
|
|
29
|
+
},
|
|
30
|
+
"top_ranked_statistics": null
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 74457,
|
|
4
|
+
"number_of_characters": 76109543,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 75549698,
|
|
7
|
+
"min_text_length": 121,
|
|
8
|
+
"average_text_length": 1087.7189916063176,
|
|
9
|
+
"max_text_length": 25438,
|
|
10
|
+
"unique_texts": 69150
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 559845,
|
|
15
|
+
"min_text_length": 57,
|
|
16
|
+
"average_text_length": 111.969,
|
|
17
|
+
"max_text_length": 224,
|
|
18
|
+
"unique_texts": 5000
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 5000,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0,
|
|
25
|
+
"max_relevant_docs_per_query": 1,
|
|
26
|
+
"unique_relevant_docs": 5000
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 30300,
|
|
4
|
+
"number_of_characters": 17320243,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 17276572,
|
|
7
|
+
"min_text_length": 316,
|
|
8
|
+
"average_text_length": 575.8857333333333,
|
|
9
|
+
"max_text_length": 1008,
|
|
10
|
+
"unique_texts": 28361
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 43671,
|
|
15
|
+
"min_text_length": 67,
|
|
16
|
+
"average_text_length": 145.57,
|
|
17
|
+
"max_text_length": 345,
|
|
18
|
+
"unique_texts": 300
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 300,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0,
|
|
25
|
+
"max_relevant_docs_per_query": 1,
|
|
26
|
+
"unique_relevant_docs": 300
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null,
|
|
29
|
+
"hf_subset_descriptive_stats": {
|
|
30
|
+
"en": {
|
|
31
|
+
"num_samples": 10100,
|
|
32
|
+
"number_of_characters": 5517678,
|
|
33
|
+
"documents_text_statistics": {
|
|
34
|
+
"total_text_length": 5503635,
|
|
35
|
+
"min_text_length": 316,
|
|
36
|
+
"average_text_length": 550.3635,
|
|
37
|
+
"max_text_length": 726,
|
|
38
|
+
"unique_texts": 9422
|
|
39
|
+
},
|
|
40
|
+
"documents_image_statistics": null,
|
|
41
|
+
"queries_text_statistics": {
|
|
42
|
+
"total_text_length": 14043,
|
|
43
|
+
"min_text_length": 68,
|
|
44
|
+
"average_text_length": 140.43,
|
|
45
|
+
"max_text_length": 305,
|
|
46
|
+
"unique_texts": 100
|
|
47
|
+
},
|
|
48
|
+
"queries_image_statistics": null,
|
|
49
|
+
"relevant_docs_statistics": {
|
|
50
|
+
"num_relevant_docs": 100,
|
|
51
|
+
"min_relevant_docs_per_query": 1,
|
|
52
|
+
"average_relevant_docs_per_query": 1.0,
|
|
53
|
+
"max_relevant_docs_per_query": 1,
|
|
54
|
+
"unique_relevant_docs": 100
|
|
55
|
+
},
|
|
56
|
+
"top_ranked_statistics": null
|
|
57
|
+
},
|
|
58
|
+
"fi": {
|
|
59
|
+
"num_samples": 10100,
|
|
60
|
+
"number_of_characters": 5953462,
|
|
61
|
+
"documents_text_statistics": {
|
|
62
|
+
"total_text_length": 5938809,
|
|
63
|
+
"min_text_length": 326,
|
|
64
|
+
"average_text_length": 593.8809,
|
|
65
|
+
"max_text_length": 1008,
|
|
66
|
+
"unique_texts": 9422
|
|
67
|
+
},
|
|
68
|
+
"documents_image_statistics": null,
|
|
69
|
+
"queries_text_statistics": {
|
|
70
|
+
"total_text_length": 14653,
|
|
71
|
+
"min_text_length": 67,
|
|
72
|
+
"average_text_length": 146.53,
|
|
73
|
+
"max_text_length": 345,
|
|
74
|
+
"unique_texts": 100
|
|
75
|
+
},
|
|
76
|
+
"queries_image_statistics": null,
|
|
77
|
+
"relevant_docs_statistics": {
|
|
78
|
+
"num_relevant_docs": 100,
|
|
79
|
+
"min_relevant_docs_per_query": 1,
|
|
80
|
+
"average_relevant_docs_per_query": 1.0,
|
|
81
|
+
"max_relevant_docs_per_query": 1,
|
|
82
|
+
"unique_relevant_docs": 100
|
|
83
|
+
},
|
|
84
|
+
"top_ranked_statistics": null
|
|
85
|
+
},
|
|
86
|
+
"pt": {
|
|
87
|
+
"num_samples": 10100,
|
|
88
|
+
"number_of_characters": 5849103,
|
|
89
|
+
"documents_text_statistics": {
|
|
90
|
+
"total_text_length": 5834128,
|
|
91
|
+
"min_text_length": 325,
|
|
92
|
+
"average_text_length": 583.4128,
|
|
93
|
+
"max_text_length": 774,
|
|
94
|
+
"unique_texts": 9517
|
|
95
|
+
},
|
|
96
|
+
"documents_image_statistics": null,
|
|
97
|
+
"queries_text_statistics": {
|
|
98
|
+
"total_text_length": 14975,
|
|
99
|
+
"min_text_length": 69,
|
|
100
|
+
"average_text_length": 149.75,
|
|
101
|
+
"max_text_length": 320,
|
|
102
|
+
"unique_texts": 100
|
|
103
|
+
},
|
|
104
|
+
"queries_image_statistics": null,
|
|
105
|
+
"relevant_docs_statistics": {
|
|
106
|
+
"num_relevant_docs": 100,
|
|
107
|
+
"min_relevant_docs_per_query": 1,
|
|
108
|
+
"average_relevant_docs_per_query": 1.0,
|
|
109
|
+
"max_relevant_docs_per_query": 1,
|
|
110
|
+
"unique_relevant_docs": 100
|
|
111
|
+
},
|
|
112
|
+
"top_ranked_statistics": null
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 102198,
|
|
4
|
+
"number_of_characters": 47870352,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 47719757,
|
|
7
|
+
"min_text_length": 9,
|
|
8
|
+
"average_text_length": 472.01951591046225,
|
|
9
|
+
"max_text_length": 8686,
|
|
10
|
+
"unique_texts": 101097
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 150595,
|
|
15
|
+
"min_text_length": 30,
|
|
16
|
+
"average_text_length": 136.78019981834694,
|
|
17
|
+
"max_text_length": 404,
|
|
18
|
+
"unique_texts": 1099
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 3401,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 3.089009990917348,
|
|
25
|
+
"max_relevant_docs_per_query": 5,
|
|
26
|
+
"unique_relevant_docs": 1123
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 132137,
|
|
4
|
+
"number_of_characters": 43323279,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 43311486,
|
|
7
|
+
"min_text_length": 11,
|
|
8
|
+
"average_text_length": 328.5778249819823,
|
|
9
|
+
"max_text_length": 8576,
|
|
10
|
+
"unique_texts": 131814
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 11793,
|
|
15
|
+
"min_text_length": 6,
|
|
16
|
+
"average_text_length": 36.62422360248447,
|
|
17
|
+
"max_text_length": 100,
|
|
18
|
+
"unique_texts": 321
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 11620,
|
|
23
|
+
"min_relevant_docs_per_query": 31,
|
|
24
|
+
"average_relevant_docs_per_query": 36.08695652173913,
|
|
25
|
+
"max_relevant_docs_per_query": 1288,
|
|
26
|
+
"unique_relevant_docs": 32537
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|