PyPI - mteb - Versions diffs - 2.7.18__py3-none-any.whl → 2.7.20__py3-none-any.whl - Mend

mteb 2.7.18py3-none-any.whl → 2.7.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

mteb/models/model_implementations/ict_time_and_querit_models.py ADDED Viewed

@@ -0,0 +1,115 @@
+from __future__ import annotations
+from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
+from mteb.models.model_meta import ModelMeta
+from mteb.types import PromptType
+def instruction_template(
+    instruction: str | dict, prompt_type: PromptType | None = None
+) -> str:
+    """Format instruction for the model."""
+    if isinstance(instruction, dict):
+        instruction = instruction.get(prompt_type.value if prompt_type else "", "")
+    elif prompt_type == PromptType.document:
+        return ""
+    if not instruction:
+        return ""
+    return f"Instruct: {instruction}\nQuery:"
+multilingual_langs = [
+    "deu-Latn",
+    "ita-Latn",
+    "ara-Arab",
+    "fas-Arab",
+    "fra-Latn",
+    "hin-Deva",
+    "spa-Latn",
+    "zho-Hans",
+    "ben-Beng",
+    "eng-Latn",
+    "fin-Latn",
+    "ind-Latn",
+    "jpn-Jpan",
+    "kor-Hang",
+    "rus-Cyrl",
+    "swh-Latn",
+    "tel-Telu",
+    "tha-Thai",
+]
+training_data = [
+    "FEVER",
+    "DuRetrieval",
+    "HotpotQA",
+    "MSMARCO",
+    "T2Retrieval",
+    "NQ",
+    "MIRACLRetrieval",
+    "MrTidyRetrieval",
+    "AmazonCounterfactualClassification",
+    "Banking77Classification",
+    "ImdbClassification",
+    "MTOPDomainClassification",
+    "ToxicConversationsClassification",
+    "TweetSentimentExtractionClassification",
+]
+boom_4b_instructions = {
+    "AmazonCounterfactualClassification": "Classify a given Amazon customer review text as either counterfactual or not-counterfactual.",
+    "AmazonPolarityClassification": "Classify Amazon reviews into positive or negative sentiment.",
+    "AmazonReviewsClassification": "Classify the given Amazon review into its appropriate rating category.",
+    "Banking77Classification": "Given a online banking query, find the corresponding intents.",
+    "EmotionClassification": "Classify the emotion expressed in the given Twitter message into one of the six emotions: anger, fear, joy, love, sadness, and surprise.",
+    "ImdbClassification": "Classify the sentiment expressed in the given movie review text from the IMDB dataset.",
+    "MassiveIntentClassification": "Given a user utterance as query, find the user intents.",
+    "MassiveScenarioClassification": "Given a user utterance as query, find the user scenarios.",
+    "MTOPDomainClassification": "Classify the intent domain of the given utterance in task-oriented conversation.",
+    "MTOPIntentClassification": "Classify the intent of the given utterance in task-oriented conversation.",
+    "ToxicConversationsClassification": "Classify the given comments as either toxic or not toxic.",
+    "TweetSentimentExtractionClassification": "Classify the sentiment of a given tweet as either positive, negative, or neutral.",
+    "TNews": "Classify the fine-grained category of the given news title.",
+    "ClimateFEVER": "Given a claim about climate change, retrieve documents that support or refute the claim.",
+    "ClimateFEVERHardNegatives": "Given a claim about climate change, retrieve documents that support or refute the claim.",
+    "DBPedia": "Given a query, retrieve relevant entity descriptions from DBPedia.",
+    "FEVER": "Given a claim, retrieve documents that support or refute the claim.",
+    "FEVERHardNegatives": "Given a claim, retrieve documents that support or refute the claim.",
+    "FiQA2018": "Given a financial question, retrieve user replies that best answer the question.",
+    "HotpotQA": "Given a multi-hop question, retrieve documents that can help answer the question.",
+    "HotpotQAHardNegatives": "Given a multi-hop question, retrieve documents that can help answer the question.",
+    "MSMARCO": "Given a web search query, retrieve relevant passages that answer the query.",
+    "NFCorpus": "Given a question, retrieve relevant documents that best answer the question.",
+    "NQ": "Given a question, retrieve Wikipedia passages that answer the question.",
+}
+# How the template actually renders each one at inference time:
+#   instruction_template(boom_4b_instructions["Banking77Classification"], PromptType.query)
+#   -> "Instruct: Given a online banking query, find the corresponding intents.\nQuery:"
+boom_4b_v1 = ModelMeta(
+    loader=InstructSentenceTransformerModel,
+    loader_kwargs=dict(
+        instruction_template=instruction_template,
+    ),
+    name="ICT-TIME-and-Querit/BOOM_4B_v1",
+    model_type=["dense"],
+    languages=multilingual_langs,
+    open_weights=True,
+    adapted_from="Qwen/Qwen3-4B",
+    revision="447ab88574d27e67c428acc2b429d7d4580a4ea7",
+    release_date="2026-01-31",
+    n_parameters=4021774336,
+    n_embedding_parameters=None,
+    memory_usage_mb=7671,
+    embed_dim=2560,
+    max_tokens=32768,
+    license="apache-2.0",
+    reference="https://huggingface.co/ICT-TIME-and-Querit/BOOM_4B_v1",
+    similarity_fn_name="cosine",
+    framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
+    use_instructions=True,
+    public_training_code=None,
+    public_training_data=None,
+    training_datasets=training_data,
+)

mteb/models/model_implementations/ops_colqwen3_models.py CHANGED Viewed

@@ -26,6 +26,7 @@ class OpsColQwen3Wrapper(AbsEncoder):
         revision: str | None = None,
         device: str | None = None,
         attn_implementation: str | None = None,
+        trust_remote_code: bool = True,
         **kwargs,
     ):
         requires_image_dependencies()
@@ -42,15 +43,15 @@ class OpsColQwen3Wrapper(AbsEncoder):
             model_name,
             device_map=self.device,
             attn_implementation=attn_implementation,
-            trust_remote_code=True,
             revision=revision,
+            trust_remote_code=trust_remote_code,
             **kwargs,
         )
         self.mdl.eval()
         self.processor = AutoProcessor.from_pretrained(
             model_name,
-            trust_remote_code=True,
+            trust_remote_code=trust_remote_code,
         )
     def encode(

mteb/types/_encoder_io.py CHANGED Viewed

@@ -27,7 +27,7 @@ class EncodeKwargs(TypedDict):
 # --- Output types ---
-Array = NDArray[np.floating | np.integer | np.bool] | torch.Tensor
+Array = NDArray[np.floating | np.integer | np.bool_] | torch.Tensor
 """General array type, can be a numpy array (float, int, or bool) or a torch tensor."""

{mteb-2.7.18.dist-info → mteb-2.7.20.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mteb
-Version: 2.7.18
+Version: 2.7.20
 Summary: Massive Text Embedding Benchmark
 Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
 Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>

{mteb-2.7.18.dist-info → mteb-2.7.20.dist-info}/RECORD RENAMED Viewed

@@ -1539,6 +1539,7 @@ mteb/models/model_implementations/gte_models.py,sha256=-ASkoAuAiVytVtsYMtuKonUf3
 mteb/models/model_implementations/hinvec_models.py,sha256=SYWGFr8XALmM7B9tIHEQnrqq9kZOZIBkW7m7QpzerHI,1756
 mteb/models/model_implementations/human.py,sha256=k7vN6WTcSWyWS9wnluzr6yCOjuMi5LupQnT-4cfzNOk,600
 mteb/models/model_implementations/ibm_granite_models.py,sha256=ipLRDBerTQiL5NaoaDho410Fzy7eNFlF3jB54hGZrwI,8687
+mteb/models/model_implementations/ict_time_and_querit_models.py,sha256=2tR3tLruumZwr5gpyFqott4nznftdOBFOV47-iEl3oI,4993
 mteb/models/model_implementations/inf_models.py,sha256=q_hNNhzMjAxbnJnAT0N6KaNegX_3XZlmz-LXY5C891I,3093
 mteb/models/model_implementations/jasper_models.py,sha256=ourAMx1_L6b2AxX046wQcxDqvYzY1Mx3gaHww0WaMA8,16476
 mteb/models/model_implementations/jina_clip.py,sha256=OF-aC5L8V57-kLdgqyo74S80_q0pxEvc5cyH26Mtwbk,6711
@@ -1573,7 +1574,7 @@ mteb/models/model_implementations/octen_models.py,sha256=5z-t2O-iIFiOOLdZ_AK9f7G
 mteb/models/model_implementations/openai_models.py,sha256=fE8SfSAcl20GccR8D8s-7MR9w_kO6LlN5Pm80Iwx82c,9777
 mteb/models/model_implementations/openclip_models.py,sha256=z2gQum16O0QhJPyxqKor3oO-_uWfnep6wSXqOFQQ2Q8,11969
 mteb/models/model_implementations/opensearch_neural_sparse_models.py,sha256=J5FEvKWQUiBusL6PHcrRuRRJOQ-iMwOSu1fX0pblXhk,8941
-mteb/models/model_implementations/ops_colqwen3_models.py,sha256=5vg5d1_WfVGMgtIwkh6zf2-Paum6V35XcKEvLfRyRzs,7437
+mteb/models/model_implementations/ops_colqwen3_models.py,sha256=tqQ9MZbUAygeeclliYFwxvclAt2OwATYRqs3taSkK2U,7503
 mteb/models/model_implementations/ops_moa_models.py,sha256=Ah7L78mqC9pH8t6sf1OWXOLjouVUpAutt6lZ0np7eMM,2655
 mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py,sha256=xv1ftJeMND4lpeKYC3RLQB4nhdiYy0wCxrzEjUj4gSg,1114
 mteb/models/model_implementations/pawan_models.py,sha256=iyzh6NSPZKU9znJYEDPjJNIqvkyuKPAol5TcILuq1Is,1225
@@ -2641,14 +2642,14 @@ mteb/tasks/zeroshot_classification/eng/sun397.py,sha256=Nls7tXM2Svu008MmAUjt-o_N
 mteb/tasks/zeroshot_classification/eng/ucf101.py,sha256=kwNRYks-_Oe4VE3GyoHIvN-2OJ6zhkwFr76WDNL9ymU,1884
 mteb/tasks/zeroshot_classification/eng/templates/__init__.py,sha256=da1PTClDMl-IBkrSvq6JC1lnS-K_BASzCvxVhNxN5Ls,13
 mteb/types/__init__.py,sha256=O26vXPolPReX7iVUBgUsyCkCo4w8KeLs7uueQDWp3fc,1142
-mteb/types/_encoder_io.py,sha256=V7m_t7ZXm3COJ4SoHP8bcr23WgjFBRCGa9AIaqAX8v4,5939
+mteb/types/_encoder_io.py,sha256=vdIv0_HR1PnJXLPyM4kHmpocT3DvHIbcZP1ue7aU10c,5940
 mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
 mteb/types/_result.py,sha256=UKNokV9pu3G74MGebocU512aU_fFU9I9nPKnrG9Q0iE,1035
 mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
 mteb/types/statistics.py,sha256=gElgSShKBXpfcqaZHhU_d2UHln1CyzUj8FN8KFun_UA,4087
-mteb-2.7.18.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-mteb-2.7.18.dist-info/METADATA,sha256=wIhLzuH5ewCPsURRgO5-DyaJUE6UwGLoZuUfRPBQQzg,14348
-mteb-2.7.18.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-mteb-2.7.18.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
-mteb-2.7.18.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
-mteb-2.7.18.dist-info/RECORD,,
+mteb-2.7.20.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+mteb-2.7.20.dist-info/METADATA,sha256=TlZtKy_JecJva-vrjFYjqLen3vuDp3zWw-RfSvuwAFI,14348
+mteb-2.7.20.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+mteb-2.7.20.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
+mteb-2.7.20.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
+mteb-2.7.20.dist-info/RECORD,,

{mteb-2.7.18.dist-info → mteb-2.7.20.dist-info}/WHEEL RENAMED Viewed

File without changes

{mteb-2.7.18.dist-info → mteb-2.7.20.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mteb-2.7.18.dist-info → mteb-2.7.20.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{mteb-2.7.18.dist-info → mteb-2.7.20.dist-info}/top_level.txt RENAMED Viewed

File without changes

mteb 2.7.18__py3-none-any.whl → 2.7.20__py3-none-any.whl

mteb 2.7.18py3-none-any.whl → 2.7.20py3-none-any.whl