mteb 2.7.18__py3-none-any.whl → 2.7.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/models/model_implementations/ict_time_and_querit_models.py +115 -0
- mteb/types/_encoder_io.py +1 -1
- {mteb-2.7.18.dist-info → mteb-2.7.19.dist-info}/METADATA +1 -1
- {mteb-2.7.18.dist-info → mteb-2.7.19.dist-info}/RECORD +8 -7
- {mteb-2.7.18.dist-info → mteb-2.7.19.dist-info}/WHEEL +0 -0
- {mteb-2.7.18.dist-info → mteb-2.7.19.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.18.dist-info → mteb-2.7.19.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.18.dist-info → mteb-2.7.19.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
|
|
4
|
+
from mteb.models.model_meta import ModelMeta
|
|
5
|
+
from mteb.types import PromptType
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def instruction_template(
|
|
9
|
+
instruction: str | dict, prompt_type: PromptType | None = None
|
|
10
|
+
) -> str:
|
|
11
|
+
"""Format instruction for the model."""
|
|
12
|
+
if isinstance(instruction, dict):
|
|
13
|
+
instruction = instruction.get(prompt_type.value if prompt_type else "", "")
|
|
14
|
+
elif prompt_type == PromptType.document:
|
|
15
|
+
return ""
|
|
16
|
+
|
|
17
|
+
if not instruction:
|
|
18
|
+
return ""
|
|
19
|
+
return f"Instruct: {instruction}\nQuery:"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
multilingual_langs = [
|
|
23
|
+
"deu-Latn",
|
|
24
|
+
"ita-Latn",
|
|
25
|
+
"ara-Arab",
|
|
26
|
+
"fas-Arab",
|
|
27
|
+
"fra-Latn",
|
|
28
|
+
"hin-Deva",
|
|
29
|
+
"spa-Latn",
|
|
30
|
+
"zho-Hans",
|
|
31
|
+
"ben-Beng",
|
|
32
|
+
"eng-Latn",
|
|
33
|
+
"fin-Latn",
|
|
34
|
+
"ind-Latn",
|
|
35
|
+
"jpn-Jpan",
|
|
36
|
+
"kor-Hang",
|
|
37
|
+
"rus-Cyrl",
|
|
38
|
+
"swh-Latn",
|
|
39
|
+
"tel-Telu",
|
|
40
|
+
"tha-Thai",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
training_data = [
|
|
44
|
+
"FEVER",
|
|
45
|
+
"DuRetrieval",
|
|
46
|
+
"HotpotQA",
|
|
47
|
+
"MSMARCO",
|
|
48
|
+
"T2Retrieval",
|
|
49
|
+
"NQ",
|
|
50
|
+
"MIRACLRetrieval",
|
|
51
|
+
"MrTidyRetrieval",
|
|
52
|
+
"AmazonCounterfactualClassification",
|
|
53
|
+
"Banking77Classification",
|
|
54
|
+
"ImdbClassification",
|
|
55
|
+
"MTOPDomainClassification",
|
|
56
|
+
"ToxicConversationsClassification",
|
|
57
|
+
"TweetSentimentExtractionClassification",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
boom_4b_instructions = {
|
|
61
|
+
"AmazonCounterfactualClassification": "Classify a given Amazon customer review text as either counterfactual or not-counterfactual.",
|
|
62
|
+
"AmazonPolarityClassification": "Classify Amazon reviews into positive or negative sentiment.",
|
|
63
|
+
"AmazonReviewsClassification": "Classify the given Amazon review into its appropriate rating category.",
|
|
64
|
+
"Banking77Classification": "Given a online banking query, find the corresponding intents.",
|
|
65
|
+
"EmotionClassification": "Classify the emotion expressed in the given Twitter message into one of the six emotions: anger, fear, joy, love, sadness, and surprise.",
|
|
66
|
+
"ImdbClassification": "Classify the sentiment expressed in the given movie review text from the IMDB dataset.",
|
|
67
|
+
"MassiveIntentClassification": "Given a user utterance as query, find the user intents.",
|
|
68
|
+
"MassiveScenarioClassification": "Given a user utterance as query, find the user scenarios.",
|
|
69
|
+
"MTOPDomainClassification": "Classify the intent domain of the given utterance in task-oriented conversation.",
|
|
70
|
+
"MTOPIntentClassification": "Classify the intent of the given utterance in task-oriented conversation.",
|
|
71
|
+
"ToxicConversationsClassification": "Classify the given comments as either toxic or not toxic.",
|
|
72
|
+
"TweetSentimentExtractionClassification": "Classify the sentiment of a given tweet as either positive, negative, or neutral.",
|
|
73
|
+
"TNews": "Classify the fine-grained category of the given news title.",
|
|
74
|
+
"ClimateFEVER": "Given a claim about climate change, retrieve documents that support or refute the claim.",
|
|
75
|
+
"ClimateFEVERHardNegatives": "Given a claim about climate change, retrieve documents that support or refute the claim.",
|
|
76
|
+
"DBPedia": "Given a query, retrieve relevant entity descriptions from DBPedia.",
|
|
77
|
+
"FEVER": "Given a claim, retrieve documents that support or refute the claim.",
|
|
78
|
+
"FEVERHardNegatives": "Given a claim, retrieve documents that support or refute the claim.",
|
|
79
|
+
"FiQA2018": "Given a financial question, retrieve user replies that best answer the question.",
|
|
80
|
+
"HotpotQA": "Given a multi-hop question, retrieve documents that can help answer the question.",
|
|
81
|
+
"HotpotQAHardNegatives": "Given a multi-hop question, retrieve documents that can help answer the question.",
|
|
82
|
+
"MSMARCO": "Given a web search query, retrieve relevant passages that answer the query.",
|
|
83
|
+
"NFCorpus": "Given a question, retrieve relevant documents that best answer the question.",
|
|
84
|
+
"NQ": "Given a question, retrieve Wikipedia passages that answer the question.",
|
|
85
|
+
}
|
|
86
|
+
# How the template actually renders each one at inference time:
|
|
87
|
+
# instruction_template(boom_4b_instructions["Banking77Classification"], PromptType.query)
|
|
88
|
+
# -> "Instruct: Given a online banking query, find the corresponding intents.\nQuery:"
|
|
89
|
+
|
|
90
|
+
boom_4b_v1 = ModelMeta(
|
|
91
|
+
loader=InstructSentenceTransformerModel,
|
|
92
|
+
loader_kwargs=dict(
|
|
93
|
+
instruction_template=instruction_template,
|
|
94
|
+
),
|
|
95
|
+
name="ICT-TIME-and-Querit/BOOM_4B_v1",
|
|
96
|
+
model_type=["dense"],
|
|
97
|
+
languages=multilingual_langs,
|
|
98
|
+
open_weights=True,
|
|
99
|
+
adapted_from="Qwen/Qwen3-4B",
|
|
100
|
+
revision="447ab88574d27e67c428acc2b429d7d4580a4ea7",
|
|
101
|
+
release_date="2026-01-31",
|
|
102
|
+
n_parameters=4021774336,
|
|
103
|
+
n_embedding_parameters=None,
|
|
104
|
+
memory_usage_mb=7671,
|
|
105
|
+
embed_dim=2560,
|
|
106
|
+
max_tokens=32768,
|
|
107
|
+
license="apache-2.0",
|
|
108
|
+
reference="https://huggingface.co/ICT-TIME-and-Querit/BOOM_4B_v1",
|
|
109
|
+
similarity_fn_name="cosine",
|
|
110
|
+
framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
|
|
111
|
+
use_instructions=True,
|
|
112
|
+
public_training_code=None,
|
|
113
|
+
public_training_data=None,
|
|
114
|
+
training_datasets=training_data,
|
|
115
|
+
)
|
mteb/types/_encoder_io.py
CHANGED
|
@@ -27,7 +27,7 @@ class EncodeKwargs(TypedDict):
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
# --- Output types ---
|
|
30
|
-
Array = NDArray[np.floating | np.integer | np.
|
|
30
|
+
Array = NDArray[np.floating | np.integer | np.bool_] | torch.Tensor
|
|
31
31
|
"""General array type, can be a numpy array (float, int, or bool) or a torch tensor."""
|
|
32
32
|
|
|
33
33
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.7.
|
|
3
|
+
Version: 2.7.19
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -1539,6 +1539,7 @@ mteb/models/model_implementations/gte_models.py,sha256=-ASkoAuAiVytVtsYMtuKonUf3
|
|
|
1539
1539
|
mteb/models/model_implementations/hinvec_models.py,sha256=SYWGFr8XALmM7B9tIHEQnrqq9kZOZIBkW7m7QpzerHI,1756
|
|
1540
1540
|
mteb/models/model_implementations/human.py,sha256=k7vN6WTcSWyWS9wnluzr6yCOjuMi5LupQnT-4cfzNOk,600
|
|
1541
1541
|
mteb/models/model_implementations/ibm_granite_models.py,sha256=ipLRDBerTQiL5NaoaDho410Fzy7eNFlF3jB54hGZrwI,8687
|
|
1542
|
+
mteb/models/model_implementations/ict_time_and_querit_models.py,sha256=2tR3tLruumZwr5gpyFqott4nznftdOBFOV47-iEl3oI,4993
|
|
1542
1543
|
mteb/models/model_implementations/inf_models.py,sha256=q_hNNhzMjAxbnJnAT0N6KaNegX_3XZlmz-LXY5C891I,3093
|
|
1543
1544
|
mteb/models/model_implementations/jasper_models.py,sha256=ourAMx1_L6b2AxX046wQcxDqvYzY1Mx3gaHww0WaMA8,16476
|
|
1544
1545
|
mteb/models/model_implementations/jina_clip.py,sha256=OF-aC5L8V57-kLdgqyo74S80_q0pxEvc5cyH26Mtwbk,6711
|
|
@@ -2641,14 +2642,14 @@ mteb/tasks/zeroshot_classification/eng/sun397.py,sha256=Nls7tXM2Svu008MmAUjt-o_N
|
|
|
2641
2642
|
mteb/tasks/zeroshot_classification/eng/ucf101.py,sha256=kwNRYks-_Oe4VE3GyoHIvN-2OJ6zhkwFr76WDNL9ymU,1884
|
|
2642
2643
|
mteb/tasks/zeroshot_classification/eng/templates/__init__.py,sha256=da1PTClDMl-IBkrSvq6JC1lnS-K_BASzCvxVhNxN5Ls,13
|
|
2643
2644
|
mteb/types/__init__.py,sha256=O26vXPolPReX7iVUBgUsyCkCo4w8KeLs7uueQDWp3fc,1142
|
|
2644
|
-
mteb/types/_encoder_io.py,sha256=
|
|
2645
|
+
mteb/types/_encoder_io.py,sha256=vdIv0_HR1PnJXLPyM4kHmpocT3DvHIbcZP1ue7aU10c,5940
|
|
2645
2646
|
mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
|
|
2646
2647
|
mteb/types/_result.py,sha256=UKNokV9pu3G74MGebocU512aU_fFU9I9nPKnrG9Q0iE,1035
|
|
2647
2648
|
mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
|
|
2648
2649
|
mteb/types/statistics.py,sha256=gElgSShKBXpfcqaZHhU_d2UHln1CyzUj8FN8KFun_UA,4087
|
|
2649
|
-
mteb-2.7.
|
|
2650
|
-
mteb-2.7.
|
|
2651
|
-
mteb-2.7.
|
|
2652
|
-
mteb-2.7.
|
|
2653
|
-
mteb-2.7.
|
|
2654
|
-
mteb-2.7.
|
|
2650
|
+
mteb-2.7.19.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
2651
|
+
mteb-2.7.19.dist-info/METADATA,sha256=y3sFllzuYQsMdkp6mwS6f6bCkQH4hibXb44oEMMCQsY,14348
|
|
2652
|
+
mteb-2.7.19.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
2653
|
+
mteb-2.7.19.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
|
|
2654
|
+
mteb-2.7.19.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
|
|
2655
|
+
mteb-2.7.19.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|