PyPI - mteb - Versions diffs - 2.3.10__py3-none-any.whl → 2.4.1__py3-none-any.whl - Mend

mteb 2.3.10py3-none-any.whl → 2.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +7 -2
mteb/abstasks/_statistics_calculation.py +6 -2
mteb/abstasks/classification.py +0 -2
mteb/benchmarks/benchmarks/__init__.py +2 -0
mteb/benchmarks/benchmarks/benchmarks.py +57 -0
mteb/deprecated_evaluator.py +8 -13
mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
mteb/evaluate.py +2 -33
mteb/leaderboard/figures.py +1 -1
mteb/leaderboard/table.py +1 -11
mteb/models/abs_encoder.py +21 -17
mteb/models/cache_wrappers/cache_backends/_hash_utils.py +2 -2
mteb/models/get_model_meta.py +3 -123
mteb/models/instruct_wrapper.py +2 -1
mteb/models/model_implementations/bica_model.py +34 -0
mteb/models/model_implementations/colpali_models.py +7 -2
mteb/models/model_implementations/colqwen_models.py +1 -1
mteb/models/model_implementations/gme_v_models.py +9 -5
mteb/models/model_implementations/google_models.py +10 -0
mteb/models/model_implementations/granite_vision_embedding_models.py +6 -2
mteb/models/model_implementations/jasper_models.py +2 -2
mteb/models/model_implementations/jina_models.py +1 -1
mteb/models/model_implementations/mod_models.py +204 -0
mteb/models/model_implementations/nomic_models.py +142 -4
mteb/models/model_implementations/nomic_models_vision.py +6 -2
mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +6 -2
mteb/models/model_implementations/pylate_models.py +1 -4
mteb/models/model_implementations/random_baseline.py +6 -2
mteb/models/model_implementations/seed_1_6_embedding_models.py +7 -2
mteb/models/model_implementations/voyage_v.py +6 -2
mteb/models/model_meta.py +396 -19
mteb/models/sentence_transformer_wrapper.py +2 -7
mteb/tasks/reranking/jpn/__init__.py +9 -1
mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
mteb/tasks/retrieval/jpn/__init__.py +8 -0
mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
mteb/types/_encoder_io.py +7 -2
{mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/METADATA +2 -1
{mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/RECORD +53 -39
{mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/WHEEL +0 -0
{mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/entry_points.txt +0 -0
{mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/licenses/LICENSE +0 -0
{mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/top_level.txt +0 -0

mteb/models/get_model_meta.py CHANGED Viewed

@@ -1,26 +1,15 @@
-from __future__ import annotations
 import difflib
 import logging
-import warnings
 from collections.abc import Iterable
-from typing import TYPE_CHECKING, Any
-from huggingface_hub import ModelCard
-from huggingface_hub.errors import RepositoryNotFoundError
+from typing import Any
 from mteb.abstasks import AbsTask
 from mteb.models import (
-    CrossEncoderWrapper,
     ModelMeta,
     MTEBModels,
-    sentence_transformers_loader,
 )
 from mteb.models.model_implementations import MODEL_REGISTRY
-if TYPE_CHECKING:
-    from sentence_transformers import CrossEncoder, SentenceTransformer
 logger = logging.getLogger(__name__)
@@ -101,24 +90,9 @@ def get_model(
     Returns:
         A model object
     """
-    from sentence_transformers import CrossEncoder, SentenceTransformer
     meta = get_model_meta(model_name, revision)
     model = meta.load_model(**kwargs)
-    # If revision not available in the modelmeta, try to extract it from sentence-transformers
-    if hasattr(model, "model") and isinstance(model.model, SentenceTransformer):  # type: ignore
-        _meta = _model_meta_from_sentence_transformers(model.model)  # type: ignore
-        if meta.revision is None:
-            meta.revision = _meta.revision if _meta.revision else meta.revision
-        if not meta.similarity_fn_name:
-            meta.similarity_fn_name = _meta.similarity_fn_name
-    elif isinstance(model, CrossEncoder):
-        _meta = _model_meta_from_cross_encoder(model.model)
-        if meta.revision is None:
-            meta.revision = _meta.revision if _meta.revision else meta.revision
     model.mteb_model_meta = meta  # type: ignore
     return model
@@ -148,12 +122,8 @@ def get_model_meta(
         logger.info(
             "Model not found in model registry. Attempting to extract metadata by loading the model ({model_name}) using HuggingFace."
         )
-        try:
-            meta = _model_meta_from_hf_hub(model_name)
-            meta.revision = revision
-            return meta
-        except RepositoryNotFoundError:
-            pass
+        meta = ModelMeta.from_hub(model_name, revision)
+        return meta
     not_found_msg = f"Model '{model_name}' not found in MTEB registry"
     not_found_msg += " nor on the Huggingface Hub." if fetch_from_hf else "."
@@ -171,93 +141,3 @@ def get_model_meta(
             suggestion = f" Did you mean: '{close_matches[0]}'?"
     raise KeyError(not_found_msg + suggestion)
-def _model_meta_from_hf_hub(model_name: str) -> ModelMeta:
-    card = ModelCard.load(model_name)
-    card_data = card.data.to_dict()
-    frameworks = ["PyTorch"]
-    loader = None
-    if card_data.get("library_name", None) == "sentence-transformers":
-        frameworks.append("Sentence Transformers")
-        loader = sentence_transformers_loader
-    else:
-        msg = (
-            "Model library not recognized, defaulting to Sentence Transformers loader."
-        )
-        logger.warning(msg)
-        warnings.warn(msg)
-        loader = sentence_transformers_loader
-    revision = card_data.get("base_model_revision", None)
-    license = card_data.get("license", None)
-    return ModelMeta(
-        loader=loader,
-        name=model_name,
-        revision=revision,
-        release_date=None,
-        languages=None,
-        license=license,
-        framework=frameworks,  # type: ignore
-        training_datasets=None,
-        similarity_fn_name=None,
-        n_parameters=None,
-        memory_usage_mb=None,
-        max_tokens=None,
-        embed_dim=None,
-        open_weights=True,
-        public_training_code=None,
-        public_training_data=None,
-        use_instructions=None,
-    )
-def _model_meta_from_cross_encoder(model: CrossEncoder) -> ModelMeta:
-    return ModelMeta(
-        loader=CrossEncoderWrapper,
-        name=model.model.name_or_path,
-        revision=model.config._commit_hash,
-        release_date=None,
-        languages=None,
-        framework=["Sentence Transformers"],
-        similarity_fn_name=None,
-        n_parameters=None,
-        memory_usage_mb=None,
-        max_tokens=None,
-        embed_dim=None,
-        license=None,
-        open_weights=True,
-        public_training_code=None,
-        public_training_data=None,
-        use_instructions=None,
-        training_datasets=None,
-    )
-def _model_meta_from_sentence_transformers(model: SentenceTransformer) -> ModelMeta:
-    name: str | None = (
-        model.model_card_data.model_name
-        if model.model_card_data.model_name
-        else model.model_card_data.base_model
-    )
-    embeddings_dim = model.get_sentence_embedding_dimension()
-    meta = ModelMeta(
-        loader=sentence_transformers_loader,
-        name=name,
-        revision=model.model_card_data.base_model_revision,
-        release_date=None,
-        languages=None,
-        framework=["Sentence Transformers"],
-        similarity_fn_name=None,
-        n_parameters=None,
-        memory_usage_mb=None,
-        max_tokens=None,
-        embed_dim=embeddings_dim,
-        license=None,
-        open_weights=True,
-        public_training_code=None,
-        public_training_data=None,
-        use_instructions=None,
-        training_datasets=None,
-    )
-    return meta

mteb/models/instruct_wrapper.py CHANGED Viewed

@@ -122,7 +122,8 @@ class InstructSentenceTransformerModel(AbsEncoder):
             apply_instruction_to_passages: Whether to apply the instruction template to the passages.
             padding_side: Padding side. If None, the padding side will be read from the model config.
             add_eos_token: Whether to add the eos token to each input example.
-            prompts_dict: Dictionary of task names to prompt names. If None, the prompts will be read from the model config.
+            prompts_dict: Dictionary of task names to prompt names. If task name is missing in the dict or prompts dict is None, prompt from task metadata or
+                AbsTask.abstask_prompt will be used.
             **kwargs: Kwargs for Sentence Transformer model.
         """
         from sentence_transformers import SentenceTransformer

mteb/models/model_implementations/bica_model.py ADDED Viewed

@@ -0,0 +1,34 @@
+from mteb.models import ModelMeta, sentence_transformers_loader
+bica_base = ModelMeta(
+    name="bisectgroup/BiCA-base",
+    loader=sentence_transformers_loader,
+    languages=["eng-Latn"],
+    open_weights=True,
+    revision="31237a836e5ae908c308a256573e5f0986498574",
+    release_date="2025-11-14",
+    n_parameters=110_000_000,
+    memory_usage_mb=418,
+    embed_dim=768,
+    license="mit",
+    max_tokens=512,
+    reference="https://huggingface.co/bisectgroup/BiCA-base",
+    similarity_fn_name="cosine",
+    framework=["Sentence Transformers", "PyTorch"],
+    use_instructions=False,
+    public_training_code="https://github.com/NiravBhattLab/BiCA",
+    public_training_data="https://huggingface.co/datasets/bisectgroup/hard-negatives-traversal",
+    adapted_from="thenlper/gte-base",
+    citation="""
+@misc{sinha2025bicaeffectivebiomedicaldense,
+      title={BiCA: Effective Biomedical Dense Retrieval with Citation-Aware Hard Negatives},
+      author={Aarush Sinha and Pavan Kumar S and Roshan Balaji and Nirav Pravinbhai Bhatt},
+      year={2025},
+      eprint={2511.08029},
+      archivePrefix={arXiv},
+      primaryClass={cs.IR},
+      url={https://arxiv.org/abs/2511.08029},
+}
+""",
+    training_datasets=set(),
+)

mteb/models/model_implementations/colpali_models.py CHANGED Viewed

@@ -1,8 +1,9 @@
+from __future__ import annotations
 import logging
-from typing import Any
+from typing import TYPE_CHECKING, Any
 import torch
-from PIL import Image
 from torch.utils.data import DataLoader
 from tqdm.auto import tqdm
@@ -15,6 +16,9 @@ from mteb.models.abs_encoder import AbsEncoder
 from mteb.models.model_meta import ModelMeta, ScoringFunction
 from mteb.types import Array, BatchedInput, PromptType
+if TYPE_CHECKING:
+    from PIL import Image
 logger = logging.getLogger(__name__)
@@ -89,6 +93,7 @@ class ColPaliEngineWrapper(AbsEncoder):
         **kwargs,
     ):
         import torchvision.transforms.functional as F
+        from PIL import Image
         all_embeds = []

mteb/models/model_implementations/colqwen_models.py CHANGED Viewed

@@ -2,7 +2,6 @@ import logging
 from typing import Any
 import torch
-from PIL import Image
 from torch.utils.data import DataLoader
 from tqdm.auto import tqdm
@@ -154,6 +153,7 @@ class ColQwen3Wrapper(AbsEncoder):
         **kwargs: Any,
     ):
         import torchvision.transforms.functional as F
+        from PIL import Image
         contains_image = "image" in image_texts_pairs.dataset.features
         contains_text = "text" in image_texts_pairs.dataset.features

mteb/models/model_implementations/gme_v_models.py CHANGED Viewed

@@ -1,9 +1,10 @@
+from __future__ import annotations
 import logging
 import math
-from typing import Any
+from typing import TYPE_CHECKING, Any
 import torch
-from PIL import Image
 from torch.utils.data import DataLoader
 from tqdm.autonotebook import tqdm
@@ -12,6 +13,9 @@ from mteb.models.abs_encoder import AbsEncoder
 from mteb.models.model_meta import ModelMeta, ScoringFunction
 from mteb.types import Array, BatchedInput, PromptType
+if TYPE_CHECKING:
+    from PIL import Image
 logger = logging.getLogger(__name__)
 GME_CITATION = """@misc{zhang2024gme,
@@ -267,9 +271,9 @@ def smart_resize(
     return h_bar, w_bar
-def fetch_image(
-    image: str | Image.Image, size_factor: int = IMAGE_FACTOR
-) -> Image.Image:
+def fetch_image(image: Image.Image, size_factor: int = IMAGE_FACTOR) -> Image.Image:
+    from PIL import Image
     image_obj = None
     if isinstance(image, Image.Image):
         image_obj = image

mteb/models/model_implementations/google_models.py CHANGED Viewed

@@ -272,4 +272,14 @@ embedding_gemma_300m = ModelMeta(
     training_datasets=GECKO_TRAINING_DATA,
     similarity_fn_name="cosine",
     memory_usage_mb=1155,
+    citation="""
+@misc{vera2025embeddinggemmapowerfullightweighttext,
+      title={EmbeddingGemma: Powerful and Lightweight Text Representations},
+      author={Henrique Schechter Vera and Sahil Dua and Biao Zhang and Daniel Salz and Ryan Mullins and Sindhu Raghuram Panyam and Sara Smoot and Iftekhar Naim and Joe Zou and Feiyang Chen and Daniel Cer and Alice Lisak and Min Choi and Lucas Gonzalez and Omar Sanseviero and Glenn Cameron and Ian Ballantyne and Kat Black and Kaifeng Chen and Weiyi Wang and Zhe Li and Gus Martins and Jinhyuk Lee and Mark Sherwood and Juyeong Ji and Renjie Wu and Jingxiao Zheng and Jyotinder Singh and Abheesht Sharma and Divyashree Sreepathihalli and Aashi Jain and Adham Elarabawy and AJ Co and Andreas Doumanoglou and Babak Samari and Ben Hora and Brian Potetz and Dahun Kim and Enrique Alfonseca and Fedor Moiseev and Feng Han and Frank Palma Gomez and Gustavo Hernández Ábrego and Hesen Zhang and Hui Hui and Jay Han and Karan Gill and Ke Chen and Koert Chen and Madhuri Shanbhogue and Michael Boratko and Paul Suganthan and Sai Meher Karthik Duddu and Sandeep Mariserla and Setareh Ariafar and Shanfeng Zhang and Shijie Zhang and Simon Baumgartner and Sonam Goenka and Steve Qiu and Tanmaya Dabral and Trevor Walker and Vikram Rao and Waleed Khawaja and Wenlei Zhou and Xiaoqi Ren and Ye Xia and Yichang Chen and Yi-Ting Chen and Zhe Dong and Zhongli Ding and Francesco Visin and Gaël Liu and Jiageng Zhang and Kathleen Kenealy and Michelle Casbon and Ravin Kumar and Thomas Mesnard and Zach Gleicher and Cormac Brick and Olivier Lacombe and Adam Roberts and Qin Yin and Yunhsuan Sung and Raphael Hoffmann and Tris Warkentin and Armand Joulin and Tom Duerig and Mojtaba Seyedhosseini},
+      year={2025},
+      eprint={2509.20354},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2509.20354},
+}""",
 )

mteb/models/model_implementations/granite_vision_embedding_models.py CHANGED Viewed

@@ -1,8 +1,9 @@
+from __future__ import annotations
 import logging
-from typing import Any
+from typing import TYPE_CHECKING, Any
 import torch
-from PIL import Image
 from torch.utils.data import DataLoader
 from tqdm.auto import tqdm
@@ -15,6 +16,9 @@ from mteb.types import Array, BatchedInput, PromptType
 logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    from PIL import Image
 class GraniteVisionEmbeddingWrapper:
     def __init__(

mteb/models/model_implementations/jasper_models.py CHANGED Viewed

@@ -355,13 +355,13 @@ Jasper_Token_Compression_600M = ModelMeta(
     | qzhou_training_data,
     citation="""
 @misc{zhang2025jaspertokencompression600mtechnicalreport,
-      title={Jasper-Token-Compression-600M Technical Report},
+      title={Jasper-Token-Compression-600M Technical Report},
       author={Dun Zhang and Ziyang Zeng and Yudong Zhou and Shuyang Lu},
       year={2025},
       eprint={2511.14405},
       archivePrefix={arXiv},
       primaryClass={cs.IR},
-      url={https://arxiv.org/abs/2511.14405},
+      url={https://arxiv.org/abs/2511.14405},
 }
 """,
 )

mteb/models/model_implementations/jina_models.py CHANGED Viewed

@@ -740,7 +740,7 @@ jina_reranker_v3 = ModelMeta(
     training_datasets=JINARerankerV3_TRAINING_DATA,
     adapted_from="Qwen/Qwen3-0.6B",
     citation="""@misc{wang2025jinarerankerv3lateinteractionlistwise,
-      title={jina-reranker-v3: Last but Not Late Interaction for Listwise Document Reranking},
+      title={jina-reranker-v3: Last but Not Late Interaction for Listwise Document Reranking},
       author={Feng Wang and Yuqing Li and Han Xiao},
       year={2025},
       eprint={2509.25085},

mteb/models/model_implementations/mod_models.py ADDED Viewed

@@ -0,0 +1,204 @@
+from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
+from mteb.models.model_meta import ModelMeta
+from mteb.models.models_protocols import EncoderProtocol, PromptType
+def instruction_template(
+    instruction: str, prompt_type: PromptType | None = None
+) -> str:
+    if not instruction or prompt_type == PromptType.document:
+        return ""
+    if isinstance(instruction, dict):
+        if prompt_type is None:
+            instruction = next(iter(instruction.values()))  # TODO
+        else:
+            instruction = instruction[prompt_type]
+    return f"Instruct: {instruction}\nQuery:"
+multilingual_langs = [
+    "afr-Latn",
+    "ara-Arab",
+    "aze-Latn",
+    "bel-Cyrl",
+    "bul-Cyrl",
+    "ben-Beng",
+    "cat-Latn",
+    "ceb-Latn",
+    "ces-Latn",
+    "cym-Latn",
+    "dan-Latn",
+    "deu-Latn",
+    "ell-Grek",
+    "eng-Latn",
+    "spa-Latn",
+    "est-Latn",
+    "eus-Latn",
+    "fas-Arab",
+    "fin-Latn",
+    "fra-Latn",
+    "glg-Latn",
+    "guj-Gujr",
+    "heb-Hebr",
+    "hin-Deva",
+    "hrv-Latn",
+    "hat-Latn",
+    "hun-Latn",
+    "hye-Armn",
+    "ind-Latn",
+    "isl-Latn",
+    "ita-Latn",
+    "jpn-Jpan",
+    "jav-Latn",
+    "kat-Geor",
+    "kaz-Cyrl",
+    "khm-Khmr",
+    "kan-Knda",
+    "kor-Hang",
+    "kir-Cyrl",
+    "lao-Laoo",
+    "lit-Latn",
+    "lav-Latn",
+    "mkd-Cyrl",
+    "mal-Mlym",
+    "mon-Cyrl",
+    "mar-Deva",
+    "msa-Latn",
+    "mya-Mymr",
+    "nep-Deva",
+    "nld-Latn",
+    "nor-Latn",
+    "nob-Latn",
+    "nno-Latn",
+    "pan-Guru",
+    "pol-Latn",
+    "por-Latn",
+    "que-Latn",
+    "ron-Latn",
+    "rus-Cyrl",
+    "sin-Sinh",
+    "slk-Latn",
+    "slv-Latn",
+    "swa-Latn",
+    "tam-Taml",
+    "tel-Telu",
+    "tha-Thai",
+    "tgl-Latn",
+    "tur-Latn",
+    "ukr-Cyrl",
+    "urd-Arab",
+    "vie-Latn",
+    "yor-Latn",
+    "zho-Hans",
+]
+MOD_CITATION = """@misc{mod-embedding-2025,
+  title={MoD-Embedding: A Fine-tuned Multilingual Text Embedding Model},
+  author={MoD Team},
+  year={2025},
+  url={https://huggingface.co/bflhc/MoD-Embedding}
+}"""
+training_data = {
+    "T2Retrieval",
+    "DuRetrieval",
+    "MMarcoReranking",
+    "CMedQAv2-reranking",
+    "NQ",
+    "MSMARCO",
+    "HotpotQA",
+    "FEVER",
+    "MrTidyRetrieval",
+    "MIRACLRetrieval",
+    "CodeSearchNet",
+}
+# Predefined prompts for various RTEB tasks
+PREDEFINED_PROMPTS = {
+    # ========== Open Datasets ==========
+    # Legal domain
+    "AILACasedocs": "Given a legal case scenario, retrieve the most relevant case documents",
+    "AILAStatutes": "Given a legal scenario, retrieve the most relevant statute documents",
+    "LegalQuAD": "Given a legal question, retrieve relevant legal documents that answer the question",
+    "LegalSummarization": "Given a query, retrieve relevant legal documents for summarization",
+    # Code domain
+    "AppsRetrieval": "Given a query about mobile applications, retrieve relevant app information",
+    "HumanEvalRetrieval": "Given a code problem description, retrieve relevant code examples",
+    "MBPPRetrieval": "Given a programming problem description, retrieve relevant code solutions",
+    "DS1000Retrieval": "Given a data science problem, retrieve relevant code snippets",
+    "FreshStackRetrieval": "Given a programming question, retrieve relevant Stack Overflow posts",
+    # Finance domain
+    "FinQARetrieval": "Given a financial question, retrieve relevant financial documents",
+    "FinanceBenchRetrieval": "Given a financial query, retrieve relevant financial information",
+    "HC3FinanceRetrieval": "Given a finance-related query, retrieve relevant documents",
+    # Medical domain
+    "CUREv1": "Given a medical query, retrieve relevant clinical documents",
+    "ChatDoctorRetrieval": "Given a medical question, retrieve relevant medical information",
+    # SQL domain
+    "WikiSQLRetrieval": "Given a natural language query, retrieve relevant SQL examples",
+    # Multilingual
+    "MIRACLRetrievalHardNegatives": "Given a query, retrieve relevant passages",
+    # ========== Private/Closed Datasets ==========
+    # Code domain (Private)
+    "Code1Retrieval": "Given a code problem description, retrieve relevant code examples",
+    "JapaneseCode1Retrieval": "Given a code problem description, retrieve relevant code examples",
+    # Finance domain (Private)
+    "EnglishFinance1Retrieval": "Given a financial query, retrieve relevant financial documents",
+    "EnglishFinance2Retrieval": "Given a financial query, retrieve relevant financial documents",
+    "EnglishFinance3Retrieval": "Given a financial query, retrieve relevant financial documents",
+    "EnglishFinance4Retrieval": "Given a financial query, retrieve relevant financial documents",
+    # Healthcare domain (Private)
+    "EnglishHealthcare1Retrieval": "Given a medical question, retrieve relevant medical information",
+    "GermanHealthcare1Retrieval": "Given a medical question, retrieve relevant medical information",
+    # Legal domain (Private)
+    "FrenchLegal1Retrieval": "Given a legal query, retrieve relevant legal documents",
+    "GermanLegal1Retrieval": "Given a legal query, retrieve relevant legal documents",
+    "JapaneseLegal1Retrieval": "Given a legal query, retrieve relevant legal documents",
+    # General/Multilingual (Private)
+    "French1Retrieval": "Given a query, retrieve relevant passages",
+    "German1Retrieval": "Given a query, retrieve relevant passages",
+}
+def mod_instruct_loader(
+    model_name_or_path: str, revision: str, **kwargs
+) -> EncoderProtocol:
+    # Set default prompts_dict if not provided
+    model = InstructSentenceTransformerModel(
+        model_name_or_path,
+        revision=revision,
+        instruction_template=instruction_template,
+        apply_instruction_to_passages=False,
+        prompt_dicts=PREDEFINED_PROMPTS,
+        **kwargs,
+    )
+    encoder = model.model._first_module()
+    if encoder.auto_model.config._attn_implementation == "flash_attention_2":
+        # The Qwen3 code only use left padding in flash_attention_2 mode.
+        encoder.tokenizer.padding_side = "left"
+    return model
+MoD_Embedding = ModelMeta(
+    loader=mod_instruct_loader,
+    name="bflhc/MoD-Embedding",
+    languages=multilingual_langs,
+    open_weights=True,
+    revision="acbb5b70fdab262226a6af2bc62001de8021b05c",
+    release_date="2025-12-14",
+    n_parameters=4021774336,
+    memory_usage_mb=7671,
+    embed_dim=2560,
+    max_tokens=32768,
+    license="apache-2.0",
+    reference="https://huggingface.co/bflhc/MoD-Embedding",
+    similarity_fn_name="cosine",
+    framework=["Sentence Transformers", "PyTorch"],
+    use_instructions=True,
+    public_training_code=None,
+    public_training_data=None,
+    training_datasets=training_data,
+    citation=MOD_CITATION,
+    adapted_from="Qwen/Qwen3-Embedding-4B",
+)

mteb 2.3.10__py3-none-any.whl → 2.4.1__py3-none-any.whl

mteb 2.3.10py3-none-any.whl → 2.4.1py3-none-any.whl