mteb 2.7.18__py3-none-any.whl → 2.7.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,115 @@
1
+ from __future__ import annotations
2
+
3
+ from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
4
+ from mteb.models.model_meta import ModelMeta
5
+ from mteb.types import PromptType
6
+
7
+
8
+ def instruction_template(
9
+ instruction: str | dict, prompt_type: PromptType | None = None
10
+ ) -> str:
11
+ """Format instruction for the model."""
12
+ if isinstance(instruction, dict):
13
+ instruction = instruction.get(prompt_type.value if prompt_type else "", "")
14
+ elif prompt_type == PromptType.document:
15
+ return ""
16
+
17
+ if not instruction:
18
+ return ""
19
+ return f"Instruct: {instruction}\nQuery:"
20
+
21
+
22
+ multilingual_langs = [
23
+ "deu-Latn",
24
+ "ita-Latn",
25
+ "ara-Arab",
26
+ "fas-Arab",
27
+ "fra-Latn",
28
+ "hin-Deva",
29
+ "spa-Latn",
30
+ "zho-Hans",
31
+ "ben-Beng",
32
+ "eng-Latn",
33
+ "fin-Latn",
34
+ "ind-Latn",
35
+ "jpn-Jpan",
36
+ "kor-Hang",
37
+ "rus-Cyrl",
38
+ "swh-Latn",
39
+ "tel-Telu",
40
+ "tha-Thai",
41
+ ]
42
+
43
+ training_data = [
44
+ "FEVER",
45
+ "DuRetrieval",
46
+ "HotpotQA",
47
+ "MSMARCO",
48
+ "T2Retrieval",
49
+ "NQ",
50
+ "MIRACLRetrieval",
51
+ "MrTidyRetrieval",
52
+ "AmazonCounterfactualClassification",
53
+ "Banking77Classification",
54
+ "ImdbClassification",
55
+ "MTOPDomainClassification",
56
+ "ToxicConversationsClassification",
57
+ "TweetSentimentExtractionClassification",
58
+ ]
59
+
60
+ boom_4b_instructions = {
61
+ "AmazonCounterfactualClassification": "Classify a given Amazon customer review text as either counterfactual or not-counterfactual.",
62
+ "AmazonPolarityClassification": "Classify Amazon reviews into positive or negative sentiment.",
63
+ "AmazonReviewsClassification": "Classify the given Amazon review into its appropriate rating category.",
64
+ "Banking77Classification": "Given a online banking query, find the corresponding intents.",
65
+ "EmotionClassification": "Classify the emotion expressed in the given Twitter message into one of the six emotions: anger, fear, joy, love, sadness, and surprise.",
66
+ "ImdbClassification": "Classify the sentiment expressed in the given movie review text from the IMDB dataset.",
67
+ "MassiveIntentClassification": "Given a user utterance as query, find the user intents.",
68
+ "MassiveScenarioClassification": "Given a user utterance as query, find the user scenarios.",
69
+ "MTOPDomainClassification": "Classify the intent domain of the given utterance in task-oriented conversation.",
70
+ "MTOPIntentClassification": "Classify the intent of the given utterance in task-oriented conversation.",
71
+ "ToxicConversationsClassification": "Classify the given comments as either toxic or not toxic.",
72
+ "TweetSentimentExtractionClassification": "Classify the sentiment of a given tweet as either positive, negative, or neutral.",
73
+ "TNews": "Classify the fine-grained category of the given news title.",
74
+ "ClimateFEVER": "Given a claim about climate change, retrieve documents that support or refute the claim.",
75
+ "ClimateFEVERHardNegatives": "Given a claim about climate change, retrieve documents that support or refute the claim.",
76
+ "DBPedia": "Given a query, retrieve relevant entity descriptions from DBPedia.",
77
+ "FEVER": "Given a claim, retrieve documents that support or refute the claim.",
78
+ "FEVERHardNegatives": "Given a claim, retrieve documents that support or refute the claim.",
79
+ "FiQA2018": "Given a financial question, retrieve user replies that best answer the question.",
80
+ "HotpotQA": "Given a multi-hop question, retrieve documents that can help answer the question.",
81
+ "HotpotQAHardNegatives": "Given a multi-hop question, retrieve documents that can help answer the question.",
82
+ "MSMARCO": "Given a web search query, retrieve relevant passages that answer the query.",
83
+ "NFCorpus": "Given a question, retrieve relevant documents that best answer the question.",
84
+ "NQ": "Given a question, retrieve Wikipedia passages that answer the question.",
85
+ }
86
+ # How the template actually renders each one at inference time:
87
+ # instruction_template(boom_4b_instructions["Banking77Classification"], PromptType.query)
88
+ # -> "Instruct: Given a online banking query, find the corresponding intents.\nQuery:"
89
+
90
+ boom_4b_v1 = ModelMeta(
91
+ loader=InstructSentenceTransformerModel,
92
+ loader_kwargs=dict(
93
+ instruction_template=instruction_template,
94
+ ),
95
+ name="ICT-TIME-and-Querit/BOOM_4B_v1",
96
+ model_type=["dense"],
97
+ languages=multilingual_langs,
98
+ open_weights=True,
99
+ adapted_from="Qwen/Qwen3-4B",
100
+ revision="447ab88574d27e67c428acc2b429d7d4580a4ea7",
101
+ release_date="2026-01-31",
102
+ n_parameters=4021774336,
103
+ n_embedding_parameters=None,
104
+ memory_usage_mb=7671,
105
+ embed_dim=2560,
106
+ max_tokens=32768,
107
+ license="apache-2.0",
108
+ reference="https://huggingface.co/ICT-TIME-and-Querit/BOOM_4B_v1",
109
+ similarity_fn_name="cosine",
110
+ framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
111
+ use_instructions=True,
112
+ public_training_code=None,
113
+ public_training_data=None,
114
+ training_datasets=training_data,
115
+ )
@@ -26,6 +26,7 @@ class OpsColQwen3Wrapper(AbsEncoder):
26
26
  revision: str | None = None,
27
27
  device: str | None = None,
28
28
  attn_implementation: str | None = None,
29
+ trust_remote_code: bool = True,
29
30
  **kwargs,
30
31
  ):
31
32
  requires_image_dependencies()
@@ -42,15 +43,15 @@ class OpsColQwen3Wrapper(AbsEncoder):
42
43
  model_name,
43
44
  device_map=self.device,
44
45
  attn_implementation=attn_implementation,
45
- trust_remote_code=True,
46
46
  revision=revision,
47
+ trust_remote_code=trust_remote_code,
47
48
  **kwargs,
48
49
  )
49
50
  self.mdl.eval()
50
51
 
51
52
  self.processor = AutoProcessor.from_pretrained(
52
53
  model_name,
53
- trust_remote_code=True,
54
+ trust_remote_code=trust_remote_code,
54
55
  )
55
56
 
56
57
  def encode(
mteb/types/_encoder_io.py CHANGED
@@ -27,7 +27,7 @@ class EncodeKwargs(TypedDict):
27
27
 
28
28
 
29
29
  # --- Output types ---
30
- Array = NDArray[np.floating | np.integer | np.bool] | torch.Tensor
30
+ Array = NDArray[np.floating | np.integer | np.bool_] | torch.Tensor
31
31
  """General array type, can be a numpy array (float, int, or bool) or a torch tensor."""
32
32
 
33
33
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.7.18
3
+ Version: 2.7.20
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -1539,6 +1539,7 @@ mteb/models/model_implementations/gte_models.py,sha256=-ASkoAuAiVytVtsYMtuKonUf3
1539
1539
  mteb/models/model_implementations/hinvec_models.py,sha256=SYWGFr8XALmM7B9tIHEQnrqq9kZOZIBkW7m7QpzerHI,1756
1540
1540
  mteb/models/model_implementations/human.py,sha256=k7vN6WTcSWyWS9wnluzr6yCOjuMi5LupQnT-4cfzNOk,600
1541
1541
  mteb/models/model_implementations/ibm_granite_models.py,sha256=ipLRDBerTQiL5NaoaDho410Fzy7eNFlF3jB54hGZrwI,8687
1542
+ mteb/models/model_implementations/ict_time_and_querit_models.py,sha256=2tR3tLruumZwr5gpyFqott4nznftdOBFOV47-iEl3oI,4993
1542
1543
  mteb/models/model_implementations/inf_models.py,sha256=q_hNNhzMjAxbnJnAT0N6KaNegX_3XZlmz-LXY5C891I,3093
1543
1544
  mteb/models/model_implementations/jasper_models.py,sha256=ourAMx1_L6b2AxX046wQcxDqvYzY1Mx3gaHww0WaMA8,16476
1544
1545
  mteb/models/model_implementations/jina_clip.py,sha256=OF-aC5L8V57-kLdgqyo74S80_q0pxEvc5cyH26Mtwbk,6711
@@ -1573,7 +1574,7 @@ mteb/models/model_implementations/octen_models.py,sha256=5z-t2O-iIFiOOLdZ_AK9f7G
1573
1574
  mteb/models/model_implementations/openai_models.py,sha256=fE8SfSAcl20GccR8D8s-7MR9w_kO6LlN5Pm80Iwx82c,9777
1574
1575
  mteb/models/model_implementations/openclip_models.py,sha256=z2gQum16O0QhJPyxqKor3oO-_uWfnep6wSXqOFQQ2Q8,11969
1575
1576
  mteb/models/model_implementations/opensearch_neural_sparse_models.py,sha256=J5FEvKWQUiBusL6PHcrRuRRJOQ-iMwOSu1fX0pblXhk,8941
1576
- mteb/models/model_implementations/ops_colqwen3_models.py,sha256=5vg5d1_WfVGMgtIwkh6zf2-Paum6V35XcKEvLfRyRzs,7437
1577
+ mteb/models/model_implementations/ops_colqwen3_models.py,sha256=tqQ9MZbUAygeeclliYFwxvclAt2OwATYRqs3taSkK2U,7503
1577
1578
  mteb/models/model_implementations/ops_moa_models.py,sha256=Ah7L78mqC9pH8t6sf1OWXOLjouVUpAutt6lZ0np7eMM,2655
1578
1579
  mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py,sha256=xv1ftJeMND4lpeKYC3RLQB4nhdiYy0wCxrzEjUj4gSg,1114
1579
1580
  mteb/models/model_implementations/pawan_models.py,sha256=iyzh6NSPZKU9znJYEDPjJNIqvkyuKPAol5TcILuq1Is,1225
@@ -2641,14 +2642,14 @@ mteb/tasks/zeroshot_classification/eng/sun397.py,sha256=Nls7tXM2Svu008MmAUjt-o_N
2641
2642
  mteb/tasks/zeroshot_classification/eng/ucf101.py,sha256=kwNRYks-_Oe4VE3GyoHIvN-2OJ6zhkwFr76WDNL9ymU,1884
2642
2643
  mteb/tasks/zeroshot_classification/eng/templates/__init__.py,sha256=da1PTClDMl-IBkrSvq6JC1lnS-K_BASzCvxVhNxN5Ls,13
2643
2644
  mteb/types/__init__.py,sha256=O26vXPolPReX7iVUBgUsyCkCo4w8KeLs7uueQDWp3fc,1142
2644
- mteb/types/_encoder_io.py,sha256=V7m_t7ZXm3COJ4SoHP8bcr23WgjFBRCGa9AIaqAX8v4,5939
2645
+ mteb/types/_encoder_io.py,sha256=vdIv0_HR1PnJXLPyM4kHmpocT3DvHIbcZP1ue7aU10c,5940
2645
2646
  mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2646
2647
  mteb/types/_result.py,sha256=UKNokV9pu3G74MGebocU512aU_fFU9I9nPKnrG9Q0iE,1035
2647
2648
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2648
2649
  mteb/types/statistics.py,sha256=gElgSShKBXpfcqaZHhU_d2UHln1CyzUj8FN8KFun_UA,4087
2649
- mteb-2.7.18.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2650
- mteb-2.7.18.dist-info/METADATA,sha256=wIhLzuH5ewCPsURRgO5-DyaJUE6UwGLoZuUfRPBQQzg,14348
2651
- mteb-2.7.18.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
2652
- mteb-2.7.18.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2653
- mteb-2.7.18.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2654
- mteb-2.7.18.dist-info/RECORD,,
2650
+ mteb-2.7.20.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2651
+ mteb-2.7.20.dist-info/METADATA,sha256=TlZtKy_JecJva-vrjFYjqLen3vuDp3zWw-RfSvuwAFI,14348
2652
+ mteb-2.7.20.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
2653
+ mteb-2.7.20.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2654
+ mteb-2.7.20.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2655
+ mteb-2.7.20.dist-info/RECORD,,
File without changes