mteb 2.7.2__py3-none-any.whl → 2.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +16 -9
- mteb/_evaluators/any_sts_evaluator.py +10 -5
- mteb/_evaluators/clustering_evaluator.py +10 -4
- mteb/_evaluators/evaluator.py +9 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +6 -4
- mteb/_evaluators/pair_classification_evaluator.py +10 -5
- mteb/_evaluators/retrieval_evaluator.py +19 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +14 -10
- mteb/_evaluators/text/bitext_mining_evaluator.py +8 -3
- mteb/_evaluators/text/summarization_evaluator.py +8 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +10 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +17 -9
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +15 -6
- mteb/abstasks/clustering.py +17 -8
- mteb/abstasks/clustering_legacy.py +14 -6
- mteb/abstasks/image/image_text_pair_classification.py +17 -7
- mteb/abstasks/multilabel_classification.py +11 -5
- mteb/abstasks/pair_classification.py +19 -9
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +28 -17
- mteb/abstasks/retrieval_dataset_loaders.py +11 -8
- mteb/abstasks/sts.py +19 -10
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +14 -7
- mteb/abstasks/text/summarization.py +17 -7
- mteb/abstasks/zeroshot_classification.py +15 -7
- mteb/benchmarks/_create_table.py +13 -3
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +41 -2
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +10 -5
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/evaluate.py +20 -18
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +7 -3
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +11 -4
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +10 -4
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +2 -0
- mteb/models/model_implementations/ara_models.py +1 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +40 -1
- mteb/models/model_implementations/bica_model.py +1 -0
- mteb/models/model_implementations/blip2_models.py +11 -4
- mteb/models/model_implementations/blip_models.py +17 -4
- mteb/models/model_implementations/bm25.py +22 -14
- mteb/models/model_implementations/bmretriever_models.py +10 -2
- mteb/models/model_implementations/cadet_models.py +1 -0
- mteb/models/model_implementations/cde_models.py +11 -5
- mteb/models/model_implementations/clip_models.py +12 -4
- mteb/models/model_implementations/clips_models.py +3 -0
- mteb/models/model_implementations/codefuse_models.py +5 -0
- mteb/models/model_implementations/codesage_models.py +3 -0
- mteb/models/model_implementations/cohere_models.py +14 -4
- mteb/models/model_implementations/cohere_v.py +14 -4
- mteb/models/model_implementations/colpali_models.py +7 -3
- mteb/models/model_implementations/colqwen_models.py +17 -31
- mteb/models/model_implementations/colsmol_models.py +3 -1
- mteb/models/model_implementations/conan_models.py +11 -4
- mteb/models/model_implementations/dino_models.py +28 -4
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +9 -0
- mteb/models/model_implementations/e5_v.py +10 -4
- mteb/models/model_implementations/eagerworks_models.py +11 -4
- mteb/models/model_implementations/emillykkejensen_models.py +3 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +1 -0
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +9 -0
- mteb/models/model_implementations/facebookai.py +2 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +7 -3
- mteb/models/model_implementations/google_models.py +15 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
- mteb/models/model_implementations/gritlm_models.py +2 -0
- mteb/models/model_implementations/gte_models.py +9 -0
- mteb/models/model_implementations/hinvec_models.py +6 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +14 -5
- mteb/models/model_implementations/jina_clip.py +10 -4
- mteb/models/model_implementations/jina_models.py +17 -5
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +1 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
- mteb/models/model_implementations/kfst.py +1 -0
- mteb/models/model_implementations/kowshik24_models.py +1 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +7 -1
- mteb/models/model_implementations/listconranker.py +10 -4
- mteb/models/model_implementations/llm2clip_models.py +12 -4
- mteb/models/model_implementations/llm2vec_models.py +20 -6
- mteb/models/model_implementations/mcinext_models.py +8 -2
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +63 -0
- mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +11 -4
- mteb/models/model_implementations/mod_models.py +2 -1
- mteb/models/model_implementations/model2vec_models.py +23 -4
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/nbailab.py +3 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
- mteb/models/model_implementations/nomic_models.py +16 -4
- mteb/models/model_implementations/nomic_models_vision.py +5 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
- mteb/models/model_implementations/nvidia_models.py +15 -4
- mteb/models/model_implementations/octen_models.py +3 -1
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +17 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
- mteb/models/model_implementations/ops_moa_models.py +9 -2
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
- mteb/models/model_implementations/pawan_models.py +1 -0
- mteb/models/model_implementations/piccolo_models.py +2 -0
- mteb/models/model_implementations/promptriever_models.py +16 -6
- mteb/models/model_implementations/pylate_models.py +22 -13
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +11 -1
- mteb/models/model_implementations/qzhou_models.py +2 -0
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/rasgaard_models.py +1 -0
- mteb/models/model_implementations/reasonir_model.py +65 -0
- mteb/models/model_implementations/repllama_models.py +15 -6
- mteb/models/model_implementations/rerankers_custom.py +13 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +20 -0
- mteb/models/model_implementations/ruri_models.py +10 -0
- mteb/models/model_implementations/salesforce_models.py +10 -1
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +18 -0
- mteb/models/model_implementations/shuu_model.py +1 -0
- mteb/models/model_implementations/siglip_models.py +19 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +2 -0
- mteb/models/model_implementations/text2vec_models.py +3 -0
- mteb/models/model_implementations/ua_sentence_models.py +1 -0
- mteb/models/model_implementations/uae_models.py +10 -4
- mteb/models/model_implementations/vdr_models.py +8 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -0
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +11 -4
- mteb/models/model_implementations/voyage_models.py +25 -4
- mteb/models/model_implementations/voyage_v.py +11 -6
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +2 -1
- mteb/models/model_meta.py +47 -9
- mteb/models/models_protocols.py +19 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +19 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +15 -9
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/clustering/nob/snl_clustering.py +7 -2
- mteb/tasks/clustering/nob/vg_clustering.py +7 -2
- mteb/tasks/retrieval/eng/__init__.py +42 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +9 -1
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +3 -3
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/METADATA +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/RECORD +238 -217
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/WHEEL +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.4.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +1,19 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
|
-
from torch.utils.data import DataLoader
|
|
5
6
|
from tqdm.auto import tqdm
|
|
6
7
|
|
|
7
8
|
from mteb._requires_package import requires_package
|
|
8
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
9
9
|
from mteb.models.abs_encoder import AbsEncoder
|
|
10
10
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
11
|
-
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from torch.utils.data import DataLoader
|
|
14
|
+
|
|
15
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
16
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
12
17
|
|
|
13
18
|
BLIP2_CITATION = """@inproceedings{li2023blip2,
|
|
14
19
|
title={{BLIP-2:} Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models},
|
|
@@ -172,6 +177,7 @@ blip2_opt_2_7b = ModelMeta(
|
|
|
172
177
|
release_date="2024-03-22",
|
|
173
178
|
modalities=["image", "text"],
|
|
174
179
|
n_parameters=3_740_000_000,
|
|
180
|
+
n_embedding_parameters=None,
|
|
175
181
|
memory_usage_mb=14285,
|
|
176
182
|
max_tokens=None,
|
|
177
183
|
embed_dim=768,
|
|
@@ -196,6 +202,7 @@ blip2_opt_6_7b_coco = ModelMeta(
|
|
|
196
202
|
release_date="2024-03-31",
|
|
197
203
|
modalities=["image", "text"],
|
|
198
204
|
n_parameters=7_750_000_000,
|
|
205
|
+
n_embedding_parameters=None,
|
|
199
206
|
memory_usage_mb=29577,
|
|
200
207
|
max_tokens=None,
|
|
201
208
|
embed_dim=768,
|
|
@@ -1,14 +1,19 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
6
|
from torch.nn.functional import normalize
|
|
5
|
-
from torch.utils.data import DataLoader
|
|
6
7
|
from tqdm.auto import tqdm
|
|
7
8
|
|
|
8
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
9
9
|
from mteb.models.abs_encoder import AbsEncoder
|
|
10
10
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
11
|
-
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from torch.utils.data import DataLoader
|
|
14
|
+
|
|
15
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
16
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
12
17
|
|
|
13
18
|
BLIP_CITATION = """@misc{https://doi.org/10.48550/arxiv.2201.12086,
|
|
14
19
|
doi = {10.48550/ARXIV.2201.12086},
|
|
@@ -136,6 +141,7 @@ blip_image_captioning_large = ModelMeta(
|
|
|
136
141
|
release_date="2023-12-07",
|
|
137
142
|
modalities=["image", "text"],
|
|
138
143
|
n_parameters=470_000_000,
|
|
144
|
+
n_embedding_parameters=23_442_432,
|
|
139
145
|
memory_usage_mb=1792,
|
|
140
146
|
max_tokens=512,
|
|
141
147
|
embed_dim=768,
|
|
@@ -164,6 +170,7 @@ blip_image_captioning_base = ModelMeta(
|
|
|
164
170
|
release_date="2023-08-01",
|
|
165
171
|
modalities=["image", "text"],
|
|
166
172
|
n_parameters=247_000_000,
|
|
173
|
+
n_embedding_parameters=23_442_432,
|
|
167
174
|
memory_usage_mb=942,
|
|
168
175
|
max_tokens=512,
|
|
169
176
|
embed_dim=768,
|
|
@@ -193,6 +200,7 @@ blip_vqa_base = ModelMeta(
|
|
|
193
200
|
release_date="2023-12-07",
|
|
194
201
|
modalities=["image", "text"],
|
|
195
202
|
n_parameters=247_000_000,
|
|
203
|
+
n_embedding_parameters=23_442_432,
|
|
196
204
|
memory_usage_mb=1467,
|
|
197
205
|
max_tokens=512,
|
|
198
206
|
embed_dim=768,
|
|
@@ -220,6 +228,7 @@ blip_vqa_capfilt_large = ModelMeta(
|
|
|
220
228
|
release_date="2023-01-22",
|
|
221
229
|
modalities=["image", "text"],
|
|
222
230
|
n_parameters=247_000_000,
|
|
231
|
+
n_embedding_parameters=23_442_432,
|
|
223
232
|
memory_usage_mb=942,
|
|
224
233
|
max_tokens=512,
|
|
225
234
|
embed_dim=768,
|
|
@@ -247,6 +256,7 @@ blip_itm_base_coco = ModelMeta(
|
|
|
247
256
|
release_date="2023-08-01",
|
|
248
257
|
modalities=["image", "text"],
|
|
249
258
|
n_parameters=247_000_000,
|
|
259
|
+
n_embedding_parameters=23_442_432,
|
|
250
260
|
memory_usage_mb=942,
|
|
251
261
|
max_tokens=512,
|
|
252
262
|
embed_dim=768,
|
|
@@ -274,6 +284,7 @@ blip_itm_large_coco = ModelMeta(
|
|
|
274
284
|
release_date="2023-08-01",
|
|
275
285
|
modalities=["image", "text"],
|
|
276
286
|
n_parameters=470_000_000,
|
|
287
|
+
n_embedding_parameters=23_442_432,
|
|
277
288
|
memory_usage_mb=1793,
|
|
278
289
|
max_tokens=512,
|
|
279
290
|
embed_dim=768,
|
|
@@ -302,6 +313,7 @@ blip_itm_base_flickr = ModelMeta(
|
|
|
302
313
|
release_date="2023-08-01",
|
|
303
314
|
modalities=["image", "text"],
|
|
304
315
|
n_parameters=247_000_000,
|
|
316
|
+
n_embedding_parameters=23_442_432,
|
|
305
317
|
memory_usage_mb=942,
|
|
306
318
|
max_tokens=512,
|
|
307
319
|
embed_dim=768,
|
|
@@ -330,6 +342,7 @@ blip_itm_large_flickr = ModelMeta(
|
|
|
330
342
|
release_date="2023-08-01",
|
|
331
343
|
modalities=["image", "text"],
|
|
332
344
|
n_parameters=470_000_000,
|
|
345
|
+
n_embedding_parameters=23_442_432,
|
|
333
346
|
memory_usage_mb=1793,
|
|
334
347
|
max_tokens=512,
|
|
335
348
|
embed_dim=768,
|
|
@@ -1,18 +1,22 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
2
5
|
|
|
3
6
|
from mteb._create_dataloaders import _create_text_queries_dataloader
|
|
4
7
|
from mteb._requires_package import requires_package
|
|
5
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
6
8
|
from mteb.models.model_meta import ModelMeta
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
12
|
+
from mteb.models.models_protocols import SearchProtocol
|
|
13
|
+
from mteb.types import (
|
|
14
|
+
CorpusDatasetType,
|
|
15
|
+
EncodeKwargs,
|
|
16
|
+
QueryDatasetType,
|
|
17
|
+
RetrievalOutputType,
|
|
18
|
+
TopRankedDocumentsType,
|
|
19
|
+
)
|
|
16
20
|
|
|
17
21
|
logger = logging.getLogger(__name__)
|
|
18
22
|
|
|
@@ -75,7 +79,6 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
|
|
|
75
79
|
hf_subset: str,
|
|
76
80
|
top_k: int,
|
|
77
81
|
encode_kwargs: EncodeKwargs,
|
|
78
|
-
instructions: InstructionDatasetType | None = None,
|
|
79
82
|
top_ranked: TopRankedDocumentsType | None = None,
|
|
80
83
|
) -> RetrievalOutputType:
|
|
81
84
|
logger.info("Encoding Queries...")
|
|
@@ -98,13 +101,17 @@ def bm25_loader(model_name, **kwargs) -> SearchProtocol:
|
|
|
98
101
|
query_results = queries_results[qi]
|
|
99
102
|
scores = queries_scores[qi]
|
|
100
103
|
doc_id_to_score = {}
|
|
104
|
+
query_documents = (
|
|
105
|
+
top_ranked[qid] if top_ranked and qid in top_ranked else None
|
|
106
|
+
)
|
|
101
107
|
|
|
102
108
|
# Iterate over results
|
|
103
|
-
for
|
|
104
|
-
doc_idx = query_results[ri]
|
|
105
|
-
score = scores[ri]
|
|
109
|
+
for doc_idx, score in zip(query_results, scores):
|
|
106
110
|
doc_id = self.corpus_idx_to_id[doc_idx]
|
|
107
111
|
|
|
112
|
+
# handle reranking with a filtered set of documents
|
|
113
|
+
if query_documents is not None and doc_id not in query_documents:
|
|
114
|
+
continue
|
|
108
115
|
doc_id_to_score[doc_id] = float(score)
|
|
109
116
|
|
|
110
117
|
results[qid] = doc_id_to_score
|
|
@@ -127,6 +134,7 @@ bm25_s = ModelMeta(
|
|
|
127
134
|
revision="0_1_10",
|
|
128
135
|
release_date="2024-07-10", # release of version 0.1.10
|
|
129
136
|
n_parameters=None,
|
|
137
|
+
n_embedding_parameters=None,
|
|
130
138
|
memory_usage_mb=None,
|
|
131
139
|
embed_dim=None,
|
|
132
140
|
license=None,
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
3
4
|
|
|
4
5
|
import torch
|
|
5
6
|
from sentence_transformers import SentenceTransformer
|
|
@@ -9,6 +10,9 @@ from mteb.models import ModelMeta
|
|
|
9
10
|
from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
|
|
10
11
|
from mteb.types import PromptType
|
|
11
12
|
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Callable
|
|
15
|
+
|
|
12
16
|
|
|
13
17
|
def instruction_template(
|
|
14
18
|
instruction: str, prompt_type: PromptType | None = None
|
|
@@ -99,6 +103,7 @@ BMRetriever_410M = ModelMeta(
|
|
|
99
103
|
release_date="2024-04-29",
|
|
100
104
|
embed_dim=1024,
|
|
101
105
|
n_parameters=353_822_720,
|
|
106
|
+
n_embedding_parameters=51_511_296,
|
|
102
107
|
memory_usage_mb=1349,
|
|
103
108
|
max_tokens=2048,
|
|
104
109
|
license="mit",
|
|
@@ -129,6 +134,7 @@ BMRetriever_1B = ModelMeta(
|
|
|
129
134
|
release_date="2024-04-29",
|
|
130
135
|
embed_dim=2048,
|
|
131
136
|
n_parameters=908_759_040,
|
|
137
|
+
n_embedding_parameters=103_022_592,
|
|
132
138
|
memory_usage_mb=3466,
|
|
133
139
|
max_tokens=2048,
|
|
134
140
|
license="mit",
|
|
@@ -159,6 +165,7 @@ BMRetriever_2B = ModelMeta(
|
|
|
159
165
|
release_date="2024-04-29",
|
|
160
166
|
embed_dim=2048,
|
|
161
167
|
n_parameters=2_506_172_416,
|
|
168
|
+
n_embedding_parameters=524_288_000,
|
|
162
169
|
memory_usage_mb=9560,
|
|
163
170
|
max_tokens=8192,
|
|
164
171
|
license="mit",
|
|
@@ -189,6 +196,7 @@ BMRetriever_7B = ModelMeta(
|
|
|
189
196
|
release_date="2024-04-29",
|
|
190
197
|
embed_dim=4096,
|
|
191
198
|
n_parameters=7_110_660_096,
|
|
199
|
+
n_embedding_parameters=131_072_000,
|
|
192
200
|
memory_usage_mb=27124,
|
|
193
201
|
max_tokens=32768,
|
|
194
202
|
license="mit",
|
|
@@ -1,27 +1,31 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from collections.abc import Sequence
|
|
3
4
|
from typing import TYPE_CHECKING, Any
|
|
4
5
|
|
|
5
6
|
import numpy as np
|
|
6
7
|
import torch
|
|
7
|
-
from torch.utils.data import DataLoader
|
|
8
8
|
|
|
9
9
|
import mteb
|
|
10
10
|
from mteb._create_dataloaders import _corpus_to_dict
|
|
11
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
12
11
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
13
|
-
from mteb.models.models_protocols import PromptType
|
|
14
12
|
from mteb.models.sentence_transformer_wrapper import SentenceTransformerEncoderWrapper
|
|
15
|
-
from mteb.types import
|
|
13
|
+
from mteb.types import PromptType
|
|
16
14
|
|
|
17
15
|
from .bge_models import bge_full_data
|
|
18
16
|
|
|
19
17
|
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Sequence
|
|
19
|
+
|
|
20
|
+
from torch.utils.data import DataLoader
|
|
21
|
+
|
|
20
22
|
from mteb.abstasks import (
|
|
21
23
|
AbsTaskClassification,
|
|
22
24
|
AbsTaskRetrieval,
|
|
23
25
|
AbsTaskSummarization,
|
|
24
26
|
)
|
|
27
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
28
|
+
from mteb.types import Array, BatchedInput
|
|
25
29
|
logger = logging.getLogger(__name__)
|
|
26
30
|
|
|
27
31
|
CDE_CITATION = """@misc{morris2024contextualdocumentembeddings,
|
|
@@ -222,6 +226,7 @@ cde_small_v1 = ModelMeta(
|
|
|
222
226
|
revision="e151df18af0d7f1d1c37b074fee58406ececf19f",
|
|
223
227
|
release_date="2024-09-24",
|
|
224
228
|
n_parameters=int(281 * 1e6),
|
|
229
|
+
n_embedding_parameters=None,
|
|
225
230
|
memory_usage_mb=1072, # Though the second-stage model is only 140M
|
|
226
231
|
max_tokens=512,
|
|
227
232
|
embed_dim=768,
|
|
@@ -251,6 +256,7 @@ cde_small_v2 = ModelMeta(
|
|
|
251
256
|
revision="4e1d021a6c3fd7ce8aa0a7204057eee5ae61d390",
|
|
252
257
|
release_date="2025-01-13",
|
|
253
258
|
n_parameters=int(306 * 1e6),
|
|
259
|
+
n_embedding_parameters=None,
|
|
254
260
|
memory_usage_mb=1166, # Though the second-stage model is only 140M
|
|
255
261
|
max_tokens=512,
|
|
256
262
|
embed_dim=768,
|
|
@@ -1,13 +1,18 @@
|
|
|
1
|
-
from
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
2
4
|
|
|
3
5
|
import torch
|
|
4
|
-
from torch.utils.data import DataLoader
|
|
5
6
|
from tqdm.auto import tqdm
|
|
6
7
|
|
|
7
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
8
8
|
from mteb.models.abs_encoder import AbsEncoder
|
|
9
9
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from torch.utils.data import DataLoader
|
|
13
|
+
|
|
14
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
11
16
|
|
|
12
17
|
|
|
13
18
|
class CLIPModel(AbsEncoder):
|
|
@@ -123,6 +128,7 @@ clip_vit_large_patch14 = ModelMeta(
|
|
|
123
128
|
release_date="2021-02-26",
|
|
124
129
|
modalities=["image", "text"],
|
|
125
130
|
n_parameters=428_000_000,
|
|
131
|
+
n_embedding_parameters=None,
|
|
126
132
|
memory_usage_mb=1631,
|
|
127
133
|
max_tokens=77,
|
|
128
134
|
embed_dim=768,
|
|
@@ -147,6 +153,7 @@ clip_vit_base_patch32 = ModelMeta(
|
|
|
147
153
|
release_date="2021-02-26",
|
|
148
154
|
modalities=["image", "text"],
|
|
149
155
|
n_parameters=151_000_000,
|
|
156
|
+
n_embedding_parameters=None,
|
|
150
157
|
memory_usage_mb=576,
|
|
151
158
|
max_tokens=77,
|
|
152
159
|
embed_dim=512,
|
|
@@ -171,6 +178,7 @@ clip_vit_base_patch16 = ModelMeta(
|
|
|
171
178
|
release_date="2021-02-26",
|
|
172
179
|
modalities=["image", "text"],
|
|
173
180
|
n_parameters=151_000_000,
|
|
181
|
+
n_embedding_parameters=None,
|
|
174
182
|
memory_usage_mb=576,
|
|
175
183
|
max_tokens=77,
|
|
176
184
|
embed_dim=512,
|
|
@@ -30,6 +30,7 @@ e5_nl_small = ModelMeta(
|
|
|
30
30
|
revision="0243664a6c5e12eef854b091eb283e51833c3e9f",
|
|
31
31
|
release_date="2025-09-23",
|
|
32
32
|
n_parameters=40_800_000,
|
|
33
|
+
n_embedding_parameters=19_200_768,
|
|
33
34
|
memory_usage_mb=78,
|
|
34
35
|
embed_dim=384,
|
|
35
36
|
license="mit",
|
|
@@ -57,6 +58,7 @@ e5_nl_base = ModelMeta(
|
|
|
57
58
|
revision="6bd5722f236da48b4b8bcb28cc1fc478f7089956",
|
|
58
59
|
release_date="2025-09-23",
|
|
59
60
|
n_parameters=124_400_000,
|
|
61
|
+
n_embedding_parameters=38_401_536,
|
|
60
62
|
memory_usage_mb=237,
|
|
61
63
|
embed_dim=768,
|
|
62
64
|
license="mit",
|
|
@@ -84,6 +86,7 @@ e5_nl_large = ModelMeta(
|
|
|
84
86
|
revision="683333f86ed9eb3699b5567f0fdabeb958d412b0",
|
|
85
87
|
release_date="2025-09-23",
|
|
86
88
|
n_parameters=355_000_000,
|
|
89
|
+
n_embedding_parameters=51_202_048,
|
|
87
90
|
memory_usage_mb=1355,
|
|
88
91
|
embed_dim=1024,
|
|
89
92
|
license="mit",
|
|
@@ -236,6 +236,7 @@ F2LLM_0B6 = ModelMeta(
|
|
|
236
236
|
revision="36416618b83d4bd84a8ca30c2ee01ed518f9f2e7",
|
|
237
237
|
release_date="2025-09-18",
|
|
238
238
|
n_parameters=595_776_512,
|
|
239
|
+
n_embedding_parameters=None,
|
|
239
240
|
memory_usage_mb=1137,
|
|
240
241
|
embed_dim=1024,
|
|
241
242
|
license="apache-2.0",
|
|
@@ -266,6 +267,7 @@ F2LLM_1B7 = ModelMeta(
|
|
|
266
267
|
revision="fdce0e09655f42cea26f7f66f5a70cd4507ea45c",
|
|
267
268
|
release_date="2025-09-18",
|
|
268
269
|
n_parameters=1_720_574_976,
|
|
270
|
+
n_embedding_parameters=None,
|
|
269
271
|
memory_usage_mb=3282,
|
|
270
272
|
embed_dim=2560,
|
|
271
273
|
license="apache-2.0",
|
|
@@ -296,6 +298,7 @@ F2LLM_4B = ModelMeta(
|
|
|
296
298
|
revision="9fe95901ed2b6b59dd7673d6e93c9d76766a1e25",
|
|
297
299
|
release_date="2025-09-18",
|
|
298
300
|
n_parameters=4_021_774_336,
|
|
301
|
+
n_embedding_parameters=None,
|
|
299
302
|
memory_usage_mb=7672,
|
|
300
303
|
embed_dim=2560,
|
|
301
304
|
license="apache-2.0",
|
|
@@ -318,6 +321,7 @@ C2LLM_0B5 = ModelMeta(
|
|
|
318
321
|
release_date="2025-12-22",
|
|
319
322
|
languages=c2llm_languages,
|
|
320
323
|
n_parameters=497252096,
|
|
324
|
+
n_embedding_parameters=None,
|
|
321
325
|
memory_usage_mb=948.0,
|
|
322
326
|
max_tokens=32768,
|
|
323
327
|
embed_dim=896,
|
|
@@ -346,6 +350,7 @@ C2LLM_7B = ModelMeta(
|
|
|
346
350
|
release_date="2025-12-22",
|
|
347
351
|
languages=c2llm_languages,
|
|
348
352
|
n_parameters=7667028992,
|
|
353
|
+
n_embedding_parameters=None,
|
|
349
354
|
memory_usage_mb=14624.0,
|
|
350
355
|
max_tokens=32768,
|
|
351
356
|
embed_dim=3584,
|
|
@@ -28,6 +28,7 @@ codesage_large = ModelMeta(
|
|
|
28
28
|
release_date="2024-02-03",
|
|
29
29
|
modalities=["text"],
|
|
30
30
|
n_parameters=1_300_000_000,
|
|
31
|
+
n_embedding_parameters=100_667_392,
|
|
31
32
|
memory_usage_mb=4959,
|
|
32
33
|
max_tokens=2048,
|
|
33
34
|
embed_dim=2048,
|
|
@@ -55,6 +56,7 @@ codesage_base = ModelMeta(
|
|
|
55
56
|
release_date="2024-02-03",
|
|
56
57
|
modalities=["text"],
|
|
57
58
|
n_parameters=356_000_000,
|
|
59
|
+
n_embedding_parameters=50_333_696,
|
|
58
60
|
memory_usage_mb=1358,
|
|
59
61
|
max_tokens=2048,
|
|
60
62
|
embed_dim=1024,
|
|
@@ -82,6 +84,7 @@ codesage_small = ModelMeta(
|
|
|
82
84
|
release_date="2024-02-03",
|
|
83
85
|
modalities=["text"],
|
|
84
86
|
n_parameters=130_000_000,
|
|
87
|
+
n_embedding_parameters=50_333_696,
|
|
85
88
|
memory_usage_mb=496,
|
|
86
89
|
max_tokens=2048,
|
|
87
90
|
embed_dim=1024,
|
|
@@ -1,18 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import time
|
|
3
5
|
from functools import wraps
|
|
4
|
-
from typing import Any, Literal, get_args
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Literal, get_args
|
|
5
7
|
|
|
6
8
|
import numpy as np
|
|
7
9
|
import torch
|
|
8
|
-
from torch.utils.data import DataLoader
|
|
9
10
|
from tqdm.auto import tqdm
|
|
10
11
|
|
|
11
12
|
from mteb._requires_package import requires_package
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
13
|
from mteb.models.abs_encoder import AbsEncoder
|
|
14
14
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
15
|
-
from mteb.types import
|
|
15
|
+
from mteb.types import PromptType
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from torch.utils.data import DataLoader
|
|
19
|
+
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
21
|
+
from mteb.types import Array, BatchedInput
|
|
16
22
|
|
|
17
23
|
logger = logging.getLogger(__name__)
|
|
18
24
|
|
|
@@ -386,6 +392,7 @@ cohere_mult_3 = ModelMeta(
|
|
|
386
392
|
revision="1",
|
|
387
393
|
release_date="2023-11-02",
|
|
388
394
|
n_parameters=None,
|
|
395
|
+
n_embedding_parameters=None,
|
|
389
396
|
memory_usage_mb=None,
|
|
390
397
|
max_tokens=None,
|
|
391
398
|
embed_dim=512,
|
|
@@ -412,6 +419,7 @@ cohere_eng_3 = ModelMeta(
|
|
|
412
419
|
revision="1",
|
|
413
420
|
release_date="2023-11-02",
|
|
414
421
|
n_parameters=None,
|
|
422
|
+
n_embedding_parameters=None,
|
|
415
423
|
memory_usage_mb=None,
|
|
416
424
|
max_tokens=512,
|
|
417
425
|
embed_dim=1024,
|
|
@@ -437,6 +445,7 @@ cohere_mult_light_3 = ModelMeta(
|
|
|
437
445
|
reference="https://cohere.com/blog/introducing-embed-v3",
|
|
438
446
|
release_date="2023-11-02",
|
|
439
447
|
n_parameters=None,
|
|
448
|
+
n_embedding_parameters=None,
|
|
440
449
|
memory_usage_mb=None,
|
|
441
450
|
max_tokens=512,
|
|
442
451
|
embed_dim=384,
|
|
@@ -462,6 +471,7 @@ cohere_eng_light_3 = ModelMeta(
|
|
|
462
471
|
revision="1",
|
|
463
472
|
release_date="2023-11-02",
|
|
464
473
|
n_parameters=None,
|
|
474
|
+
n_embedding_parameters=None,
|
|
465
475
|
memory_usage_mb=None,
|
|
466
476
|
max_tokens=512,
|
|
467
477
|
embed_dim=384,
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import base64
|
|
2
4
|
import io
|
|
3
5
|
import os
|
|
4
6
|
import time
|
|
5
|
-
from typing import Any, Literal, get_args
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Literal, get_args
|
|
6
8
|
|
|
7
9
|
import torch
|
|
8
|
-
from torch.utils.data import DataLoader
|
|
9
10
|
from tqdm.auto import tqdm
|
|
10
11
|
|
|
11
12
|
from mteb._requires_package import requires_image_dependencies, requires_package
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
13
|
from mteb.models import ModelMeta
|
|
14
14
|
from mteb.models.abs_encoder import AbsEncoder
|
|
15
15
|
from mteb.models.model_implementations.cohere_models import (
|
|
@@ -18,7 +18,12 @@ from mteb.models.model_implementations.cohere_models import (
|
|
|
18
18
|
retry_with_rate_limit,
|
|
19
19
|
)
|
|
20
20
|
from mteb.models.model_meta import ScoringFunction
|
|
21
|
-
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from torch.utils.data import DataLoader
|
|
24
|
+
|
|
25
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
26
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
22
27
|
|
|
23
28
|
|
|
24
29
|
def _post_process_embeddings(
|
|
@@ -386,6 +391,7 @@ cohere_mult_3 = ModelMeta(
|
|
|
386
391
|
revision="1",
|
|
387
392
|
release_date="2024-10-24",
|
|
388
393
|
n_parameters=None,
|
|
394
|
+
n_embedding_parameters=None,
|
|
389
395
|
memory_usage_mb=None,
|
|
390
396
|
max_tokens=None,
|
|
391
397
|
embed_dim=1024,
|
|
@@ -410,6 +416,7 @@ cohere_eng_3 = ModelMeta(
|
|
|
410
416
|
revision="1",
|
|
411
417
|
release_date="2024-10-24",
|
|
412
418
|
n_parameters=None,
|
|
419
|
+
n_embedding_parameters=None,
|
|
413
420
|
memory_usage_mb=None,
|
|
414
421
|
max_tokens=None,
|
|
415
422
|
embed_dim=1024,
|
|
@@ -434,6 +441,7 @@ cohere_embed_v4_multimodal = ModelMeta(
|
|
|
434
441
|
revision="1",
|
|
435
442
|
release_date="2024-12-01",
|
|
436
443
|
n_parameters=None,
|
|
444
|
+
n_embedding_parameters=None,
|
|
437
445
|
memory_usage_mb=None,
|
|
438
446
|
max_tokens=128000,
|
|
439
447
|
embed_dim=1536,
|
|
@@ -458,6 +466,7 @@ cohere_embed_v4_multimodal_binary = ModelMeta(
|
|
|
458
466
|
revision="1",
|
|
459
467
|
release_date="2024-12-01",
|
|
460
468
|
n_parameters=None,
|
|
469
|
+
n_embedding_parameters=None,
|
|
461
470
|
memory_usage_mb=None,
|
|
462
471
|
max_tokens=128000,
|
|
463
472
|
embed_dim=1536,
|
|
@@ -483,6 +492,7 @@ cohere_embed_v4_multimodal_int8 = ModelMeta(
|
|
|
483
492
|
revision="1",
|
|
484
493
|
release_date="2024-12-01",
|
|
485
494
|
n_parameters=None,
|
|
495
|
+
n_embedding_parameters=None,
|
|
486
496
|
memory_usage_mb=None,
|
|
487
497
|
max_tokens=128000,
|
|
488
498
|
embed_dim=1536,
|
|
@@ -4,20 +4,21 @@ import logging
|
|
|
4
4
|
from typing import TYPE_CHECKING, Any
|
|
5
5
|
|
|
6
6
|
import torch
|
|
7
|
-
from torch.utils.data import DataLoader
|
|
8
7
|
from tqdm.auto import tqdm
|
|
9
8
|
|
|
10
9
|
from mteb._requires_package import (
|
|
11
10
|
requires_image_dependencies,
|
|
12
11
|
requires_package,
|
|
13
12
|
)
|
|
14
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
13
|
from mteb.models.abs_encoder import AbsEncoder
|
|
16
14
|
from mteb.models.model_meta import ModelMeta, ScoringFunction
|
|
17
|
-
from mteb.types import Array, BatchedInput, PromptType
|
|
18
15
|
|
|
19
16
|
if TYPE_CHECKING:
|
|
20
17
|
from PIL import Image
|
|
18
|
+
from torch.utils.data import DataLoader
|
|
19
|
+
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
21
|
+
from mteb.types import Array, BatchedInput, PromptType
|
|
21
22
|
|
|
22
23
|
logger = logging.getLogger(__name__)
|
|
23
24
|
|
|
@@ -219,6 +220,7 @@ colpali_v1_1 = ModelMeta(
|
|
|
219
220
|
release_date="2024-08-21",
|
|
220
221
|
modalities=["image", "text"],
|
|
221
222
|
n_parameters=2_920_000_000,
|
|
223
|
+
n_embedding_parameters=None,
|
|
222
224
|
memory_usage_mb=4700,
|
|
223
225
|
max_tokens=16384,
|
|
224
226
|
embed_dim=128,
|
|
@@ -246,6 +248,7 @@ colpali_v1_2 = ModelMeta(
|
|
|
246
248
|
release_date="2024-08-26",
|
|
247
249
|
modalities=["image", "text"],
|
|
248
250
|
n_parameters=2_920_000_000,
|
|
251
|
+
n_embedding_parameters=None,
|
|
249
252
|
memory_usage_mb=4700,
|
|
250
253
|
max_tokens=16384,
|
|
251
254
|
embed_dim=128,
|
|
@@ -273,6 +276,7 @@ colpali_v1_3 = ModelMeta(
|
|
|
273
276
|
release_date="2024-11-01",
|
|
274
277
|
modalities=["image", "text"],
|
|
275
278
|
n_parameters=2_920_000_000,
|
|
279
|
+
n_embedding_parameters=None,
|
|
276
280
|
memory_usage_mb=4700,
|
|
277
281
|
max_tokens=16384,
|
|
278
282
|
embed_dim=128,
|