mteb 2.3.4__py3-none-any.whl → 2.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
12
12
  FA_MTEB_2,
13
13
  HUME,
14
14
  JINA_VDR,
15
+ JMTEB_V2,
15
16
  LONG_EMBED,
16
17
  MIEB_ENG,
17
18
  MIEB_IMG,
@@ -75,6 +76,7 @@ __all__ = [
75
76
  "HUME",
76
77
  "HUME",
77
78
  "JINA_VDR",
79
+ "JMTEB_V2",
78
80
  "LONG_EMBED",
79
81
  "MIEB_ENG",
80
82
  "MIEB_IMG",
@@ -2562,3 +2562,60 @@ HUME = HUMEBenchmark(
2562
2562
  citation=None,
2563
2563
  contacts=["AdnanElAssadi56", "KennethEnevoldsen", "isaac-chung", "Samoed"],
2564
2564
  )
2565
+
2566
+ JMTEB_V2 = Benchmark(
2567
+ name="JMTEB(v2)",
2568
+ display_name="Japanese",
2569
+ icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
2570
+ tasks=get_tasks(
2571
+ languages=["jpn"],
2572
+ tasks=[
2573
+ # Clustering (3)
2574
+ "LivedoorNewsClustering.v2",
2575
+ "MewsC16JaClustering",
2576
+ "SIB200ClusteringS2S",
2577
+ # Classification (7)
2578
+ "AmazonReviewsClassification",
2579
+ "AmazonCounterfactualClassification",
2580
+ "MassiveIntentClassification",
2581
+ "MassiveScenarioClassification",
2582
+ "JapaneseSentimentClassification",
2583
+ "SIB200Classification",
2584
+ "WRIMEClassification",
2585
+ # STS (2)
2586
+ "JSTS",
2587
+ "JSICK",
2588
+ # Retrieval (11)
2589
+ "JaqketRetrieval",
2590
+ "MrTidyRetrieval",
2591
+ "JaGovFaqsRetrieval",
2592
+ "NLPJournalTitleAbsRetrieval.V2",
2593
+ "NLPJournalTitleIntroRetrieval.V2",
2594
+ "NLPJournalAbsIntroRetrieval.V2",
2595
+ "NLPJournalAbsArticleRetrieval.V2",
2596
+ "JaCWIRRetrieval",
2597
+ "MIRACLRetrieval",
2598
+ "MintakaRetrieval",
2599
+ "MultiLongDocRetrieval",
2600
+ # Reranking (5)
2601
+ "ESCIReranking",
2602
+ "JQaRAReranking",
2603
+ "JaCWIRReranking",
2604
+ "MIRACLReranking",
2605
+ "MultiLongDocReranking",
2606
+ ],
2607
+ ),
2608
+ description="JMTEB is a benchmark for evaluating Japanese text embedding models. In v2, we have extended the benchmark to 28 datasets, enabling more comprehensive evaluation compared with v1 (MTEB(jpn, v1)).",
2609
+ reference="https://github.com/sbintuitions/JMTEB",
2610
+ citation=r"""
2611
+ @article{li2025jmteb,
2612
+ author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide and Kawahara, Daisuke},
2613
+ issue = {3},
2614
+ journal = {Vol.2025-NL-265,No.3,1-15},
2615
+ month = {sep},
2616
+ title = {{JMTEB and JMTEB-lite: Japanese Massive Text Embedding Benchmark and Its Lightweight Version}},
2617
+ year = {2025},
2618
+ }
2619
+ """,
2620
+ contacts=["lsz05"],
2621
+ )
mteb/cache.py CHANGED
@@ -243,7 +243,11 @@ class ResultCache:
243
243
  f"No results repository found in {results_directory}, cloning it from {remote}"
244
244
  )
245
245
 
246
- subprocess.run(["git", "clone", remote, "remote"], cwd=self.cache_path)
246
+ subprocess.run(
247
+ ["git", "clone", "--depth", "1", remote, "remote"],
248
+ cwd=self.cache_path,
249
+ check=True,
250
+ )
247
251
 
248
252
  return results_directory
249
253
 
@@ -0,0 +1,60 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 1085,
4
+ "number_texts_intersect_with_train": 0,
5
+ "text_statistics": {
6
+ "total_text_length": 115359,
7
+ "min_text_length": 8,
8
+ "average_text_length": 106.32165898617511,
9
+ "max_text_length": 2722,
10
+ "unique_texts": 1085
11
+ },
12
+ "image_statistics": null,
13
+ "label_statistics": {
14
+ "min_labels_per_text": 1,
15
+ "average_label_per_text": 1.0,
16
+ "max_labels_per_text": 1,
17
+ "unique_labels": 3,
18
+ "labels": {
19
+ "0": {
20
+ "count": 868
21
+ },
22
+ "1": {
23
+ "count": 190
24
+ },
25
+ "2": {
26
+ "count": 27
27
+ }
28
+ }
29
+ }
30
+ },
31
+ "train": {
32
+ "num_samples": 7176,
33
+ "number_texts_intersect_with_train": null,
34
+ "text_statistics": {
35
+ "total_text_length": 830248,
36
+ "min_text_length": 5,
37
+ "average_text_length": 115.69788182831661,
38
+ "max_text_length": 4759,
39
+ "unique_texts": 7176
40
+ },
41
+ "image_statistics": null,
42
+ "label_statistics": {
43
+ "min_labels_per_text": 1,
44
+ "average_label_per_text": 1.0,
45
+ "max_labels_per_text": 1,
46
+ "unique_labels": 3,
47
+ "labels": {
48
+ "0": {
49
+ "count": 4933
50
+ },
51
+ "1": {
52
+ "count": 2047
53
+ },
54
+ "2": {
55
+ "count": 196
56
+ }
57
+ }
58
+ }
59
+ }
60
+ }
@@ -117,7 +117,7 @@ def _performance_size_plot(df: pd.DataFrame) -> go.Figure:
117
117
  df["Max Tokens"] = df["Max Tokens"].map(_parse_float)
118
118
  df["Log(Tokens)"] = np.log10(df["Max Tokens"])
119
119
  df["Mean (Task)"] = df["Mean (Task)"].map(_parse_float)
120
- df = df.dropna(
120
+ df = df[df["Mean (Task)"] > 0].dropna(
121
121
  subset=["Mean (Task)", "Number of Parameters", "Embedding Dimensions"]
122
122
  )
123
123
  if not len(df.index):
@@ -0,0 +1,22 @@
1
+ from mteb.models import ModelMeta, sentence_transformers_loader
2
+
3
+ kowshik24_bangla_embedding_model = ModelMeta(
4
+ loader=sentence_transformers_loader,
5
+ name="Kowshik24/bangla-sentence-transformer-ft-matryoshka-paraphrase-multilingual-mpnet-base-v2",
6
+ languages=["ben-Beng"], # Bengali using Bengali script
7
+ open_weights=True,
8
+ revision="6689c21e69be5950596bad084457cbaa138728d8",
9
+ release_date="2025-11-10",
10
+ n_parameters=278_000_000,
11
+ memory_usage_mb=1061,
12
+ embed_dim=768,
13
+ license="apache-2.0",
14
+ max_tokens=128,
15
+ reference="https://huggingface.co/Kowshik24/bangla-sentence-transformer-ft-matryoshka-paraphrase-multilingual-mpnet-base-v2",
16
+ similarity_fn_name="cosine",
17
+ framework=["Sentence Transformers", "PyTorch"],
18
+ use_instructions=False,
19
+ public_training_code="https://github.com/kowshik24/Bangla-Embedding",
20
+ public_training_data="https://huggingface.co/datasets/sartajekram/BanglaRQA",
21
+ training_datasets=set(),
22
+ )
@@ -0,0 +1,312 @@
1
+ from mteb.models.model_meta import ModelMeta
2
+ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
3
+
4
+ RURI_V3_PROMPTS = {
5
+ "Retrieval-query": "検索クエリ: ",
6
+ "Retrieval-document": "検索文書: ",
7
+ "Reranking-query": "検索クエリ: ",
8
+ "Reranking-document": "検索文書: ",
9
+ "Classification": "トピック: ",
10
+ "Clustering": "トピック: ",
11
+ }
12
+
13
+ RURI_V1_V2_PROMPTS = {
14
+ "query": "クエリ: ",
15
+ "document": "文章: ",
16
+ }
17
+
18
+
19
+ RURI_CITATION = r"""@misc{Ruri,
20
+ title={{Ruri: Japanese General Text Embeddings}},
21
+ author={Hayato Tsukagoshi and Ryohei Sasano},
22
+ year={2024},
23
+ eprint={2409.07737},
24
+ archivePrefix={arXiv},
25
+ primaryClass={cs.CL},
26
+ url={https://arxiv.org/abs/2409.07737},
27
+ }"""
28
+
29
+ cl_nagoya_ruri_v3_30m = ModelMeta(
30
+ loader=sentence_transformers_loader,
31
+ loader_kwargs=dict(
32
+ model_prompts=RURI_V3_PROMPTS,
33
+ ),
34
+ name="cl-nagoya/ruri-v3-30m",
35
+ languages=["jpn-Jpan"],
36
+ open_weights=True,
37
+ revision="24899e5de370b56d179604a007c0d727bf144504",
38
+ release_date="2025-04-07",
39
+ n_parameters=36_705_536,
40
+ memory_usage_mb=140,
41
+ embed_dim=256,
42
+ license="apache-2.0",
43
+ max_tokens=8192,
44
+ reference="https://huggingface.co/cl-nagoya/ruri-v3-30m",
45
+ similarity_fn_name="cosine",
46
+ framework=["PyTorch", "Sentence Transformers"],
47
+ use_instructions=True,
48
+ superseded_by=None,
49
+ training_datasets={
50
+ "cl-nagoya/ruri-v3-dataset-ft",
51
+ },
52
+ adapted_from="sbintuitions/modernbert-ja-30m",
53
+ public_training_code=None,
54
+ public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-v3-dataset-ft",
55
+ citation=RURI_CITATION,
56
+ contacts=["hpprc"],
57
+ )
58
+
59
+ cl_nagoya_ruri_v3_70m = ModelMeta(
60
+ loader=sentence_transformers_loader,
61
+ loader_kwargs=dict(
62
+ model_prompts=RURI_V3_PROMPTS,
63
+ ),
64
+ name="cl-nagoya/ruri-v3-70m",
65
+ languages=["jpn-Jpan"],
66
+ open_weights=True,
67
+ revision="07a8b0aba47d29d2ca21f89b915c1efe2c23d1cc",
68
+ release_date="2025-04-09",
69
+ n_parameters=36_705_536,
70
+ memory_usage_mb=140,
71
+ embed_dim=256,
72
+ license="apache-2.0",
73
+ max_tokens=8192,
74
+ reference="https://huggingface.co/cl-nagoya/ruri-v3-70m",
75
+ similarity_fn_name="cosine",
76
+ framework=["PyTorch", "Sentence Transformers"],
77
+ use_instructions=True,
78
+ superseded_by=None,
79
+ training_datasets={"MrTidyRetrieval", "MIRACLRetrieval"},
80
+ adapted_from="sbintuitions/modernbert-ja-70m",
81
+ public_training_code=None,
82
+ public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-v3-dataset-ft",
83
+ citation=RURI_CITATION,
84
+ contacts=["hpprc"],
85
+ )
86
+
87
+ cl_nagoya_ruri_v3_130m = ModelMeta(
88
+ loader=sentence_transformers_loader,
89
+ loader_kwargs=dict(
90
+ model_prompts=RURI_V3_PROMPTS,
91
+ ),
92
+ name="cl-nagoya/ruri-v3-130m",
93
+ languages=["jpn-Jpan"],
94
+ open_weights=True,
95
+ revision="e3114c6ee10dbab8b4b235fbc6dcf9dd4d5ac1a6",
96
+ release_date="2025-04-09",
97
+ n_parameters=132_140_544,
98
+ memory_usage_mb=504,
99
+ embed_dim=512,
100
+ license="apache-2.0",
101
+ max_tokens=8192,
102
+ reference="https://huggingface.co/cl-nagoya/ruri-v3-130m",
103
+ similarity_fn_name="cosine",
104
+ framework=["PyTorch", "Sentence Transformers"],
105
+ use_instructions=True,
106
+ superseded_by=None,
107
+ training_datasets={"MrTidyRetrieval", "MIRACLRetrieval"},
108
+ adapted_from="sbintuitions/modernbert-ja-130m",
109
+ public_training_code=None,
110
+ public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-v3-dataset-ft",
111
+ citation=RURI_CITATION,
112
+ contacts=["hpprc"],
113
+ )
114
+
115
+ cl_nagoya_ruri_v3_310m = ModelMeta(
116
+ loader=sentence_transformers_loader,
117
+ loader_kwargs=dict(
118
+ model_prompts=RURI_V3_PROMPTS,
119
+ ),
120
+ name="cl-nagoya/ruri-v3-310m",
121
+ languages=["jpn-Jpan"],
122
+ open_weights=True,
123
+ revision="18b60fb8c2b9df296fb4212bb7d23ef94e579cd3",
124
+ release_date="2025-04-09",
125
+ n_parameters=314_611_968,
126
+ memory_usage_mb=1200,
127
+ embed_dim=768,
128
+ license="apache-2.0",
129
+ max_tokens=8192,
130
+ reference="https://huggingface.co/cl-nagoya/ruri-v3-310m",
131
+ similarity_fn_name="cosine",
132
+ framework=["PyTorch", "Sentence Transformers"],
133
+ use_instructions=True,
134
+ superseded_by=None,
135
+ training_datasets={"MrTidyRetrieval", "MIRACLRetrieval"},
136
+ adapted_from="sbintuitions/modernbert-ja-310m",
137
+ public_training_code=None,
138
+ public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-v3-dataset-ft",
139
+ citation=RURI_CITATION,
140
+ contacts=["hpprc"],
141
+ )
142
+
143
+ cl_nagoya_ruri_small_v2 = ModelMeta(
144
+ loader=sentence_transformers_loader,
145
+ loader_kwargs=dict(
146
+ model_prompts=RURI_V1_V2_PROMPTS,
147
+ trust_remote_code=True,
148
+ ),
149
+ name="cl-nagoya/ruri-small-v2",
150
+ languages=["jpn-Jpan"],
151
+ open_weights=True,
152
+ revision="db18646e673b713cd0518a5bb0fefdce21e77cd9",
153
+ release_date="2024-12-05",
154
+ n_parameters=68_087_808,
155
+ memory_usage_mb=260,
156
+ embed_dim=768,
157
+ license="apache-2.0",
158
+ max_tokens=512,
159
+ reference="https://huggingface.co/cl-nagoya/ruri-small-v2",
160
+ similarity_fn_name="cosine",
161
+ framework=["PyTorch", "Sentence Transformers"],
162
+ use_instructions=True,
163
+ adapted_from="line-corporation/line-distilbert-base-japanese",
164
+ superseded_by=None,
165
+ training_datasets={"MrTidyRetrieval", "MIRACLRetrieval"},
166
+ public_training_code=None,
167
+ public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-dataset-v2-ft",
168
+ citation=RURI_CITATION,
169
+ contacts=["hpprc"],
170
+ )
171
+
172
+ cl_nagoya_ruri_base_v2 = ModelMeta(
173
+ loader=sentence_transformers_loader,
174
+ loader_kwargs=dict(
175
+ model_prompts=RURI_V1_V2_PROMPTS,
176
+ ),
177
+ name="cl-nagoya/ruri-base-v2",
178
+ languages=["jpn-Jpan"],
179
+ open_weights=True,
180
+ revision="8ce03882903668a01c83ca3b8111ac025a3bc734",
181
+ release_date="2024-12-05",
182
+ n_parameters=111_207_168,
183
+ memory_usage_mb=424,
184
+ embed_dim=768,
185
+ license="apache-2.0",
186
+ max_tokens=512,
187
+ reference="https://huggingface.co/cl-nagoya/ruri-base-v2",
188
+ similarity_fn_name="cosine",
189
+ framework=["PyTorch", "Sentence Transformers"],
190
+ use_instructions=True,
191
+ adapted_from="tohoku-nlp/bert-base-japanese-v3",
192
+ superseded_by=None,
193
+ training_datasets=None,
194
+ public_training_code=None,
195
+ public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-dataset-v2-ft",
196
+ citation=RURI_CITATION,
197
+ contacts=["hpprc"],
198
+ )
199
+
200
+ cl_nagoya_ruri_large_v2 = ModelMeta(
201
+ loader=sentence_transformers_loader,
202
+ loader_kwargs=dict(
203
+ model_prompts=RURI_V1_V2_PROMPTS,
204
+ ),
205
+ name="cl-nagoya/ruri-large-v2",
206
+ languages=["jpn-Jpan"],
207
+ open_weights=True,
208
+ revision="42898ef34a5574977380ebf0dfd28cbfbd36438b",
209
+ release_date="2024-12-06",
210
+ n_parameters=337_441_792,
211
+ memory_usage_mb=1287,
212
+ embed_dim=1024,
213
+ license="apache-2.0",
214
+ max_tokens=512,
215
+ reference="https://huggingface.co/cl-nagoya/ruri-large-v2",
216
+ similarity_fn_name="cosine",
217
+ framework=["PyTorch", "Sentence Transformers"],
218
+ use_instructions=True,
219
+ adapted_from="tohoku-nlp/bert-large-japanese-v2",
220
+ superseded_by=None,
221
+ training_datasets=None,
222
+ public_training_code=None,
223
+ public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-dataset-v2-ft",
224
+ citation=RURI_CITATION,
225
+ contacts=["hpprc"],
226
+ )
227
+
228
+ cl_nagoya_ruri_small_v1 = ModelMeta(
229
+ loader=sentence_transformers_loader,
230
+ loader_kwargs=dict(
231
+ model_prompts=RURI_V1_V2_PROMPTS,
232
+ trust_remote_code=True,
233
+ ),
234
+ name="cl-nagoya/ruri-small",
235
+ languages=["jpn-Jpan"],
236
+ open_weights=True,
237
+ revision="bc56ce90cd7a979f6eb199fc52dfe700bfd94bc3",
238
+ release_date="2024-08-28",
239
+ n_parameters=68_087_808,
240
+ memory_usage_mb=130,
241
+ embed_dim=768,
242
+ license="apache-2.0",
243
+ max_tokens=512,
244
+ reference="https://huggingface.co/cl-nagoya/ruri-small",
245
+ similarity_fn_name="cosine",
246
+ framework=["PyTorch", "Sentence Transformers"],
247
+ use_instructions=True,
248
+ adapted_from="line-corporation/line-distilbert-base-japanese",
249
+ superseded_by="cl-nagoya/ruri-small-v2",
250
+ training_datasets=None,
251
+ public_training_code=None,
252
+ public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-dataset-ft",
253
+ citation=RURI_CITATION,
254
+ contacts=["hpprc"],
255
+ )
256
+
257
+ cl_nagoya_ruri_base_v1 = ModelMeta(
258
+ loader=sentence_transformers_loader,
259
+ loader_kwargs=dict(
260
+ model_prompts=RURI_V1_V2_PROMPTS,
261
+ ),
262
+ name="cl-nagoya/ruri-base",
263
+ languages=["jpn-Jpan"],
264
+ open_weights=True,
265
+ revision="1ae40b8b6c78518a499425086bab8fc16c2e4b0e",
266
+ release_date="2024-08-28",
267
+ n_parameters=111_207_168,
268
+ memory_usage_mb=212,
269
+ embed_dim=768,
270
+ license="apache-2.0",
271
+ max_tokens=512,
272
+ reference="https://huggingface.co/cl-nagoya/ruri-base",
273
+ similarity_fn_name="cosine",
274
+ framework=["PyTorch", "Sentence Transformers"],
275
+ use_instructions=True,
276
+ adapted_from="tohoku-nlp/bert-base-japanese-v3",
277
+ superseded_by="cl-nagoya/ruri-base-v2",
278
+ training_datasets=None,
279
+ public_training_code=None,
280
+ public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-dataset-ft",
281
+ citation=RURI_CITATION,
282
+ contacts=["hpprc"],
283
+ )
284
+
285
+
286
+ cl_nagoya_ruri_large_v1 = ModelMeta(
287
+ loader=sentence_transformers_loader,
288
+ loader_kwargs=dict(
289
+ model_prompts=RURI_V1_V2_PROMPTS,
290
+ ),
291
+ name="cl-nagoya/ruri-large",
292
+ languages=["jpn-Jpan"],
293
+ open_weights=True,
294
+ revision="a011c39b13e8bc137ee13c6bc82191ece46c414c",
295
+ release_date="2024-08-28",
296
+ n_parameters=337_441_792,
297
+ memory_usage_mb=644,
298
+ embed_dim=1024,
299
+ license="apache-2.0",
300
+ max_tokens=512,
301
+ reference="https://huggingface.co/cl-nagoya/ruri-large",
302
+ similarity_fn_name="cosine",
303
+ framework=["PyTorch", "Sentence Transformers"],
304
+ use_instructions=True,
305
+ adapted_from="tohoku-nlp/bert-large-japanese-v2",
306
+ superseded_by="cl-nagoya/ruri-large-v2",
307
+ training_datasets=None,
308
+ public_training_code=None,
309
+ public_training_data="https://huggingface.co/datasets/cl-nagoya/ruri-dataset-ft",
310
+ citation=RURI_CITATION,
311
+ contacts=["hpprc"],
312
+ )
@@ -0,0 +1,166 @@
1
+ from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
2
+ from mteb.models.model_meta import ModelMeta
3
+ from mteb.models.sentence_transformer_wrapper import sentence_transformers_loader
4
+ from mteb.types import PromptType
5
+
6
+ SARASHINA_V2_INSTRUCTIONS = {
7
+ "Retrieval": {
8
+ "query": "クエリを与えるので、もっともクエリに意味が似ている一節を探してください。",
9
+ "document": "text: ",
10
+ },
11
+ "Reranking": {
12
+ "query": "クエリを与えるので、もっともクエリに意味が似ている一節を探してください。",
13
+ "document": "text: ",
14
+ },
15
+ "Classification": "与えられたドキュメントを適切なカテゴリに分類してください。",
16
+ "Clustering": "与えられたドキュメントのトピックまたはテーマを特定してください。",
17
+ # optimization regarding JMTEB
18
+ "LivedoorNewsClustering.v2": "与えられたニュース記事のトピックを特定してください。",
19
+ "MewsC16JaClustering": "与えられたニュース記事のトピックを特定してください。",
20
+ "SIB200ClusteringS2S": "与えられたテキストのトピックを特定してください。",
21
+ "AmazonReviewsClassification": "与えられたAmazonレビューを適切な評価カテゴリに分類してください。",
22
+ "AmazonCounterfactualClassification": "与えられたAmazonのカスタマーレビューのテキストを反事実か反事実でないかに分類してください。",
23
+ "MassiveIntentClassification": "ユーザーの発話をクエリとして与えるので、ユーザーの意図を見つけてください。",
24
+ "MassiveScenarioClassification": "ユーザーの発話をクエリとして与えるので、ユーザーシナリオを見つけてください。",
25
+ "JapaneseSentimentClassification": "与えられたテキストの感情極性をポジティブ(1)かネガティブか(0)に分類してください。",
26
+ "SIB200Classification": "与えられたテキストのトピックを特定してください。",
27
+ "WRIMEClassification": "与えられたテキストの感情極性(-2:強いネガティブ、-1:ネガティブ、0:ニュートラル、1:ポジティブ、2:強いポジティブ)を分類してください。",
28
+ "JSTS": "クエリを与えるので,もっともクエリに意味が似ている一節を探してください。",
29
+ "JSICK": "クエリを与えるので,もっともクエリに意味が似ている一節を探してください。",
30
+ "JaqketRetrieval": {
31
+ "query": "質問を与えるので、その質問に答えるのに役立つWikipediaの文章を検索してください。",
32
+ "document": "text: ",
33
+ },
34
+ "MrTidyRetrieval": {
35
+ "query": "質問を与えるので、その質問に答えるWikipediaの文章を検索するしてください。",
36
+ "document": "text: ",
37
+ },
38
+ "JaGovFaqsRetrieval": {
39
+ "query": "質問を与えるので、その質問に答えるのに役立つ関連文書を検索してください。",
40
+ "document": "text: ",
41
+ },
42
+ "NLPJournalTitleAbsRetrieval.V2": {
43
+ "query": "論文のタイトルを与えるので、タイトルに対応する要約を検索してください。",
44
+ "document": "text: ",
45
+ },
46
+ "NLPJournalTitleIntroRetrieval.V2": {
47
+ "query": "論文のタイトルを与えるので、タイトルに対応する要約を検索してください。",
48
+ "document": "text: ",
49
+ },
50
+ "NLPJournalAbsIntroRetrieval.V2": {
51
+ "query": "論文の序論を与えるので、序論に対応する全文を検索してください。",
52
+ "document": "text: ",
53
+ },
54
+ "NLPJournalAbsArticleRetrieval.V2": {
55
+ "query": "論文の序論を与えるので、序論に対応する全文を検索してください。",
56
+ "document": "text: ",
57
+ },
58
+ "JaCWIRRetrieval": {
59
+ "query": "記事のタイトルを与えるので、そのタイトルと合っている記事の中身を検索してください。",
60
+ "document": "text: ",
61
+ },
62
+ "MIRACLRetrieval": {
63
+ "query": "質問を与えるので、その質問に答えるのに役立つ関連文書を検索してください。",
64
+ "document": "text: ",
65
+ },
66
+ "MintakaRetrieval": {
67
+ "query": "質問を与えるので、その質問に答えられるテキストを検索してください。",
68
+ "document": "text: ",
69
+ },
70
+ "MultiLongDocRetrieval": {
71
+ "query": "質問を与えるので、その質問に答えるのに役立つWikipediaの文章を検索してください。",
72
+ "document": "text: ",
73
+ },
74
+ "ESCIReranking": {
75
+ "query": "クエリを与えるので、与えられたWeb検索クエリに答える関連文章を検索してください。",
76
+ "document": "text: ",
77
+ },
78
+ "JQaRAReranking": {
79
+ "query": "質問を与えるので、その質問に答えるのに役立つWikipediaの文章を検索してください。",
80
+ "document": "text: ",
81
+ },
82
+ "JaCWIRReranking": {
83
+ "query": "記事のタイトルを与えるので、そのタイトルと合っている記事の中身を検索してください。",
84
+ "document": "text: ",
85
+ },
86
+ "MIRACLReranking": {
87
+ "query": "質問を与えるので、その質問に答えるのに役立つ関連文書を検索してください。",
88
+ "document": "text: ",
89
+ },
90
+ "MultiLongDocReranking": {
91
+ "query": "質問を与えるので、その質問に答えるのに役立つWikipediaの文章を検索してください。",
92
+ "document": "text: ",
93
+ },
94
+ }
95
+
96
+
97
+ def sarashina_instruction_template(
98
+ instruction: str, prompt_type: PromptType | None = None
99
+ ) -> str:
100
+ """Instruction template for Sarashina v2 model.
101
+
102
+ Returns the instruction as-is since the prompts already contain the full format.
103
+ For document prompts, returns the instruction directly (e.g., "text: ").
104
+ """
105
+ if not instruction:
106
+ return ""
107
+ if prompt_type == PromptType.document:
108
+ return "text: "
109
+ return f"task: {instruction}\nquery: "
110
+
111
+
112
+ sbintuitions_sarashina_embedding_v2_1b = ModelMeta(
113
+ loader=InstructSentenceTransformerModel,
114
+ loader_kwargs=dict(
115
+ instruction_template=sarashina_instruction_template,
116
+ apply_instruction_to_passages=True,
117
+ prompts_dict=SARASHINA_V2_INSTRUCTIONS,
118
+ max_seq_length=8192,
119
+ ),
120
+ name="sbintuitions/sarashina-embedding-v2-1b",
121
+ languages=["jpn-Jpan"],
122
+ open_weights=True,
123
+ revision="1f3408afaa7b617e3445d891310a9c26dd0c68a5",
124
+ release_date="2025-07-30",
125
+ n_parameters=1_224_038_144,
126
+ memory_usage_mb=4669,
127
+ embed_dim=1792,
128
+ license="https://huggingface.co/sbintuitions/sarashina-embedding-v2-1b/blob/main/LICENSE",
129
+ max_tokens=8192,
130
+ reference="https://huggingface.co/sbintuitions/sarashina-embedding-v2-1b",
131
+ similarity_fn_name="cosine",
132
+ framework=["Sentence Transformers", "PyTorch"],
133
+ use_instructions=True,
134
+ adapted_from="sbintuitions/sarashina2.2-1b",
135
+ superseded_by=None,
136
+ training_datasets={"NQ", "MrTidyRetrieval"},
137
+ public_training_code=None,
138
+ public_training_data="https://huggingface.co/sbintuitions/sarashina-embedding-v1-1b",
139
+ citation=None,
140
+ contacts=["Sraym1217", "akiFQC", "lsz05"],
141
+ )
142
+
143
+ sbintuitions_sarashina_embedding_v1_1b = ModelMeta(
144
+ loader=sentence_transformers_loader,
145
+ name="sbintuitions/sarashina-embedding-v1-1b",
146
+ languages=["jpn-Jpan"],
147
+ open_weights=True,
148
+ revision="d060fcd8984075071e7fad81baff035cbb3b6c7e",
149
+ release_date="2024-11-22",
150
+ n_parameters=1_224_038_144,
151
+ memory_usage_mb=4669,
152
+ embed_dim=1792,
153
+ license="https://huggingface.co/sbintuitions/sarashina-embedding-v1-1b/blob/main/LICENSE",
154
+ max_tokens=8192,
155
+ reference="https://huggingface.co/sbintuitions/sarashina-embedding-v1-1b",
156
+ similarity_fn_name="cosine",
157
+ framework=["Sentence Transformers", "PyTorch"],
158
+ use_instructions=False,
159
+ adapted_from="sbintuitions/sarashina2.1-1b",
160
+ superseded_by="sbintuitions/sarashina-embedding-v2-1b",
161
+ training_datasets={"NQ", "MrTidyRetrieval"},
162
+ public_training_code=None,
163
+ public_training_data="https://huggingface.co/sbintuitions/sarashina-embedding-v1-1b",
164
+ citation=None,
165
+ contacts=["akiFQC", "lsz05"],
166
+ )
@@ -250,7 +250,7 @@ class SearchEncoderWrapper:
250
250
 
251
251
  # get top-k values
252
252
  cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk(
253
- torch.tensor(scores),
253
+ torch.as_tensor(scores),
254
254
  min(
255
255
  top_k + 1,
256
256
  len(scores[1]) if len(scores) > 1 else len(scores[-1]),
@@ -1,6 +1,11 @@
1
1
  from .hebrew_sentiment_analysis import (
2
2
  HebrewSentimentAnalysis,
3
3
  HebrewSentimentAnalysisV2,
4
+ HebrewSentimentAnalysisV3,
4
5
  )
5
6
 
6
- __all__ = ["HebrewSentimentAnalysis", "HebrewSentimentAnalysisV2"]
7
+ __all__ = [
8
+ "HebrewSentimentAnalysis",
9
+ "HebrewSentimentAnalysisV2",
10
+ "HebrewSentimentAnalysisV3",
11
+ ]
@@ -9,7 +9,12 @@ class HebrewSentimentAnalysis(AbsTaskClassification):
9
9
  "path": "mteb/HebrewSentimentAnalysis",
10
10
  "revision": "03eb0996c8234e0d8cd7206bf4763815deda12ed",
11
11
  },
12
- description="HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy.",
12
+ description=(
13
+ "HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. "
14
+ "In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, "
15
+ "the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, "
16
+ "the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy. "
17
+ ),
13
18
  reference="https://huggingface.co/datasets/hebrew_sentiment",
14
19
  type="Classification",
15
20
  category="t2c",
@@ -37,7 +42,7 @@ class HebrewSentimentAnalysis(AbsTaskClassification):
37
42
  year = {2018},
38
43
  }
39
44
  """,
40
- superseded_by="HebrewSentimentAnalysis.v2",
45
+ superseded_by="HebrewSentimentAnalysis.v3",
41
46
  )
42
47
 
43
48
 
@@ -49,7 +54,61 @@ class HebrewSentimentAnalysisV2(AbsTaskClassification):
49
54
  "revision": "7ecd049fc8ac0d6f0a0121c8ff9fe44ea5bd935b",
50
55
  "name": "morph",
51
56
  },
52
- description="HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
57
+ description=(
58
+ "HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. "
59
+ "In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, "
60
+ "the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, "
61
+ "the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy. "
62
+ "This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)"
63
+ ),
64
+ reference="https://huggingface.co/datasets/hebrew_sentiment",
65
+ type="Classification",
66
+ category="t2c",
67
+ modalities=["text"],
68
+ eval_splits=["test"],
69
+ eval_langs=["heb-Hebr"],
70
+ main_score="accuracy",
71
+ date=("2015-10-01", "2015-10-31"),
72
+ domains=["Reviews", "Written"],
73
+ task_subtypes=["Sentiment/Hate speech"],
74
+ license="mit",
75
+ annotations_creators="expert-annotated",
76
+ dialect=[],
77
+ sample_creation="found",
78
+ bibtex_citation=r"""
79
+ @inproceedings{amram-etal-2018-representations,
80
+ address = {Santa Fe, New Mexico, USA},
81
+ author = {Amram, Adam and Ben David, Anat and Tsarfaty, Reut},
82
+ booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
83
+ month = aug,
84
+ pages = {2242--2252},
85
+ publisher = {Association for Computational Linguistics},
86
+ title = {Representations and Architectures in Neural Sentiment Analysis for Morphologically Rich Languages: A Case Study from {M}odern {H}ebrew},
87
+ url = {https://www.aclweb.org/anthology/C18-1190},
88
+ year = {2018},
89
+ }
90
+ """,
91
+ adapted_from=["HebrewSentimentAnalysis"],
92
+ superseded_by="HebrewSentimentAnalysis.v3",
93
+ )
94
+
95
+
96
+ class HebrewSentimentAnalysisV3(AbsTaskClassification):
97
+ label_column_name = "labels"
98
+ metadata = TaskMetadata(
99
+ name="HebrewSentimentAnalysis.v3",
100
+ dataset={
101
+ "path": "mteb/HebrewSentimentAnalysisV4",
102
+ "revision": "aa0b83c4b16cd28daf7c41ef3402e3ffe9c70c59",
103
+ },
104
+ description=(
105
+ "HebrewSentiment is a data set consists of 12,804 user comments to posts on the official Facebook page of Israel’s president, Mr. Reuven Rivlin. "
106
+ "In October 2015, we used the open software application Netvizz (Rieder, 2013) to scrape all the comments to all of the president’s posts in the period of June – August 2014, "
107
+ "the first three months of Rivlin’s presidency.2 While the president’s posts aimed at reconciling tensions and called for tolerance and empathy, "
108
+ "the sentiment expressed in the comments to the president’s posts was polarized between citizens who warmly thanked the president, and citizens that fiercely critiqued his policy. "
109
+ "This version corrects texts (took pre-tokenized) [more details in this thread](https://huggingface.co/datasets/mteb/HebrewSentimentAnalysis/discussions/2). "
110
+ "This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)"
111
+ ),
53
112
  reference="https://huggingface.co/datasets/hebrew_sentiment",
54
113
  type="Classification",
55
114
  category="t2c",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.3.4
3
+ Version: 2.3.6
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -5,7 +5,7 @@ mteb/_helpful_enum.py,sha256=jh73N1jlcpg7RGz4bj8UpctiMNvqvHpp9wrB7SYEzIU,510
5
5
  mteb/_log_once.py,sha256=-tUKzxGQzf2LZSuQXi97oYFXMta1B6GEYXd7BPqssvY,1095
6
6
  mteb/_requires_package.py,sha256=eHg_TD9BVZRzNCcQQrUP17d8M1DF_vOd_tVx54AmAnM,3017
7
7
  mteb/_set_seed.py,sha256=HPlPRl__Pe6IG-4UgJqTfplcivJ_wA2kaClbXoHQedM,1178
8
- mteb/cache.py,sha256=77GtlwqHZxqkoCYcu76KCFL4AnHHkH8w-JY3oglMrbc,20102
8
+ mteb/cache.py,sha256=XiFuhjZ2C-o0LgP1YM8g9As_vigJCUNfTrOb9-EiFlM,20177
9
9
  mteb/deprecated_evaluator.py,sha256=t13Eluvm5ByVIOqgT7fqiVfLb8Ud3A4bbF2djRfs8iA,26901
10
10
  mteb/evaluate.py,sha256=B60CkqRHzkI-3zIfHyocp-YUeWrzeoOvX_RN5vSlGqE,19363
11
11
  mteb/filter_tasks.py,sha256=5XE1OYmgDDoJYnXwFf4ma_PIT_Lekzs420sQF_kpCiY,7240
@@ -55,8 +55,8 @@ mteb/benchmarks/__init__.py,sha256=MQEVeli-zLaJ7Xg0z7RhXQwsdmm7Ht_W2Ln0rZo1Szc,2
55
55
  mteb/benchmarks/_create_table.py,sha256=OAiR44ynJ2fMzoBmVITQtOTYQzxIu9KUdS_HzlBlAck,20195
56
56
  mteb/benchmarks/benchmark.py,sha256=70RlMyyg_wkWTlU_IbfLl-KaqRWXGCKTd8fWe9X-AQE,4173
57
57
  mteb/benchmarks/get_benchmark.py,sha256=-n_O-gitRKZi48gJKNgGuI36hsP7yLVSiwulnMHN7Gw,3935
58
- mteb/benchmarks/benchmarks/__init__.py,sha256=0ySgD14Mu3Y1nJzazR_eUir81ia3x6E23N57SzQNkF0,2150
59
- mteb/benchmarks/benchmarks/benchmarks.py,sha256=Ob2cHVXwFk328xbV-2ZmUibiVAMtT2RN1ygGgiP6UNQ,92662
58
+ mteb/benchmarks/benchmarks/__init__.py,sha256=Ig5dSFunzI-F-OamruuKJVSstbG3xQNkXCxRY3Bj_Ck,2180
59
+ mteb/benchmarks/benchmarks/benchmarks.py,sha256=qHHmJfisT75VRVoZfPcHhShCG0jY6vSWZEx-D01XxKU,94757
60
60
  mteb/benchmarks/benchmarks/rteb_benchmarks.py,sha256=QnCSrTTaBfcRlAQp2Nu81tgv1idMXqiM16Fp2zKJ5Ys,10607
61
61
  mteb/cli/__init__.py,sha256=v-csUr3eUZElIvrGB6QGtaIdndDfNWEe9oZchsGsJpg,64
62
62
  mteb/cli/_display_tasks.py,sha256=7A06dT9sSoTz6shyMvskPxuc5eHY_H7PGPlROzMP0yw,2196
@@ -252,6 +252,7 @@ mteb/descriptive_stats/Classification/HeadlineClassification.json,sha256=VfTqah7
252
252
  mteb/descriptive_stats/Classification/HeadlineClassification.v2.json,sha256=n-KiCmlKXb5QOzNG9QTdjwYR-cRV3Qvn96KOF2so7Cs,2110
253
253
  mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.json,sha256=j517xsonPntbr4k5Pa9ftGGIZqzaOLDvPqeGjWngdyI,1659
254
254
  mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v2.json,sha256=FaxgCr_lbS-ppsd_cEp_87kbCRCGqFUDMZv375XGvdk,1658
255
+ mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json,sha256=75u2ZWek2BT8JQElPqBHvCqTEXbeTwlrXcyQC5NK-wU,1658
255
256
  mteb/descriptive_stats/Classification/HinDialectClassification.json,sha256=HK13-QJaWc0uC4wOqhZHKzT6i05_wJhURdUJ6Upq8OQ,4745
256
257
  mteb/descriptive_stats/Classification/HindiDiscourseClassification.json,sha256=4XMoJL46RHYKYKdndxD5lZ2b3hGjL2CArx3eRalO8eg,1049
257
258
  mteb/descriptive_stats/Classification/HindiDiscourseClassification.v2.json,sha256=MGz2Ntd2JxyHtlP3GhkhbMQ_yH2Eladd4vZSFOme2K8,2088
@@ -1426,7 +1427,7 @@ mteb/languages/programming_languages.py,sha256=zxAakT3OSUnAuTnQ34VyeFIECnNXMlleZ
1426
1427
  mteb/leaderboard/__init__.py,sha256=991roXmtRwEQysV-37hWEzWpkvPgMCGRqZTHR-hm2io,88
1427
1428
  mteb/leaderboard/app.py,sha256=29MxFLKEVT-roULHG5boHmsQVhld1rDGNS94r7MWlz8,33118
1428
1429
  mteb/leaderboard/benchmark_selector.py,sha256=uH66SI0iT1J4_fnebViWa83dQwhPi7toBv7PRL_epDw,7784
1429
- mteb/leaderboard/figures.py,sha256=cfOK82rRf-7sCjyP7GBxh4ezhOIt0OhD0_86mKtzLrg,7530
1430
+ mteb/leaderboard/figures.py,sha256=mPO0go_23QEhAm1RJdLiBxPFCoUiA74_ztyl6yimc7k,7553
1430
1431
  mteb/leaderboard/table.py,sha256=6SnrYC5GcBlvVSO6vOk6ObuqtoveBLv3JUuXqdKueG8,8333
1431
1432
  mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9HydipKM,6570
1432
1433
  mteb/models/__init__.py,sha256=ABTuoqiBjBtBWW3LYY7ItBHdylR6jWoy06HH0g6j6fU,910
@@ -1435,7 +1436,7 @@ mteb/models/get_model_meta.py,sha256=GeofphZ8wFtwAHYQipgQlZzxNIFAVFGzo_E2sMzjZTc
1435
1436
  mteb/models/instruct_wrapper.py,sha256=Ty4nfEvioycL_uATkhd0PGuyeB5Xc9xrRd6HOGgb-tc,9005
1436
1437
  mteb/models/model_meta.py,sha256=b-Nel9nX5bJk4cgJnqkBzEKyMY7uXvxlCBSxmmH1Ios,14769
1437
1438
  mteb/models/models_protocols.py,sha256=D2hYWn_UBGMaKtRwBx3u0B0ni6lHJjSzTxX21XFNwIc,8917
1438
- mteb/models/search_wrappers.py,sha256=AcMhjQyKdeitUjnaqgnP3_zTeVSum8rz1sjBRddHUVQ,20328
1439
+ mteb/models/search_wrappers.py,sha256=zpCvxUVNQWekyC4Fiz7mvlI0VPdSrFq41A0GrCDvBK4,20331
1439
1440
  mteb/models/sentence_transformer_wrapper.py,sha256=n5CMsM6Lpg_CFHH0NkpJusMsaLUTt-L9vRmFINQ961k,12338
1440
1441
  mteb/models/cache_wrappers/__init__.py,sha256=1w1TnMwulWJSzNkLXjbh5MY3sqgHWc6vUntYn49i9X8,169
1441
1442
  mteb/models/cache_wrappers/cache_backend_protocol.py,sha256=TR7kD7KbN1J4piszIecpegtLZYGy7sRHZt3SDWlImKk,1665
@@ -1494,6 +1495,7 @@ mteb/models/model_implementations/jina_clip.py,sha256=CfiIxbhKspjQajNtObCfGPHOWP
1494
1495
  mteb/models/model_implementations/jina_models.py,sha256=HrHm2Io3g9gHwxU5icAaudy_E8rAVkAAIFSzVYWF-dM,34859
1495
1496
  mteb/models/model_implementations/kalm_models.py,sha256=FmW7Z5Qs6WYBLuKvql3u4IJW36kj4k-Ypah8qTBEBkg,59837
1496
1497
  mteb/models/model_implementations/kennethenevoldsen_models.py,sha256=DF-9nmsewYO9ikZ0kV81ujKGr7Ot36-9iPoxN7KX2mY,2993
1498
+ mteb/models/model_implementations/kowshik24_models.py,sha256=HoQpybjhquK2XSnawlq0aiSWFI5M7l6N4DNY4MQ-P10,976
1497
1499
  mteb/models/model_implementations/lens_models.py,sha256=fC7_NB1F8vBAlXD0p0-hALf6eZTPFJwpz57dy71OlwI,1696
1498
1500
  mteb/models/model_implementations/lgai_embedding_models.py,sha256=S83pbfkMH3YUNl4skusgbK-Rn-uLuScQVxgXwegR_N4,2333
1499
1501
  mteb/models/model_implementations/linq_models.py,sha256=EtvUyiNbjU-GJd1kS0Z0gBACkP2pFOjk0KfGMZz4K9Y,1872
@@ -1534,8 +1536,10 @@ mteb/models/model_implementations/rerankers_custom.py,sha256=ro73A9-hHudy3_qIMrh
1534
1536
  mteb/models/model_implementations/rerankers_monot5_based.py,sha256=rxVwzapNnHl4gCw79XVCaTXj3-wbToyj7XVL97tpAF4,34302
1535
1537
  mteb/models/model_implementations/richinfoai_models.py,sha256=llvYa0JUjyOOMbuTgOYoJ2qeqZ5rLHX1ZjZIYlYbdvA,989
1536
1538
  mteb/models/model_implementations/ru_sentence_models.py,sha256=GuZFwbzaooufvSMGNjIsL0DDLrqHjhdSsAQHHZo5H08,40480
1539
+ mteb/models/model_implementations/ruri_models.py,sha256=-BTYkZ8dEWZUbGqx3YB5yFSrzMwZtXX7sMUHzrlB8ws,10043
1537
1540
  mteb/models/model_implementations/salesforce_models.py,sha256=KslTK-IKeLvNG-vQir9k6swkaOgjk6eyozm_BOVgTpY,5160
1538
1541
  mteb/models/model_implementations/samilpwc_models.py,sha256=oMwKNwCxoH1jZgCy04oo2oVlBZWu253QMpnEEC6emz8,2021
1542
+ mteb/models/model_implementations/sarashina_embedding_models.py,sha256=TSmr2FEX79mJTA9mbEV3meEZYSelGv58Veiw__TTGFM,8415
1539
1543
  mteb/models/model_implementations/searchmap_models.py,sha256=XvVl99emIgnNUCxkTuFQXW6py2R8vgsArfpyHveCugw,1904
1540
1544
  mteb/models/model_implementations/seed_1_6_embedding_models.py,sha256=8J3htEddltyGTydIbnMUudgAV97FdD43-SQKaSA_Iuc,18534
1541
1545
  mteb/models/model_implementations/seed_models.py,sha256=SgK4kPVO6V33G3F1zSq06zSkWarPLEwBt1SWp4TUoVw,14142
@@ -1722,8 +1726,8 @@ mteb/tasks/classification/fra/french_book_reviews.py,sha256=Fsx8UznQVNDNUhcdsTeN
1722
1726
  mteb/tasks/classification/fra/movie_review_sentiment_classification.py,sha256=ov-fbReWP9T_RqhxFtS-gjNaZmbM9J8gQdBQyci5yqU,3290
1723
1727
  mteb/tasks/classification/guj/__init__.py,sha256=HZfimpBCywBLi5VGof_A9Ua6bqtMUoWGRhM1eqAEWKE,186
1724
1728
  mteb/tasks/classification/guj/gujarati_news_classification.py,sha256=VEdbzqlw8b8N8R3TQc275iiCxqGLAMAM8Nf_N7FnUGA,2303
1725
- mteb/tasks/classification/heb/__init__.py,sha256=Wa9-nATstuSXN2OFsO3tF0BZdlk3AzM0g9R1VzZUTc0,171
1726
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py,sha256=1DPoxWdGAZJB7LwEQWJzpR8KEg2fpbgGXe_cGv1bo5M,4523
1729
+ mteb/tasks/classification/heb/__init__.py,sha256=xQNtDxjUsCXQhA_ZYO_4kpBtHWQSBhWhQwAuWcTo6GE,246
1730
+ mteb/tasks/classification/heb/hebrew_sentiment_analysis.py,sha256=2wmKwq4Z4YKTG3QieuxmUE56ofiSOV63kV-jh8Zomh4,7281
1727
1731
  mteb/tasks/classification/hin/__init__.py,sha256=KdScMtYYmjsali0InoHP0PKQ8yCbaD-j2tLqc7_lhlo,356
1728
1732
  mteb/tasks/classification/hin/hindi_discourse_classification.py,sha256=HXLJZFEpnI7UjwA-NuadhrCplqaLiSb8Npla_6oXC48,3717
1729
1733
  mteb/tasks/classification/hin/sentiment_analysis_hindi.py,sha256=YIeEuNa2UTr8Jwh_wx15broRuD3NHNansGq7Bl9Vjl0,3101
@@ -2569,9 +2573,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2569
2573
  mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
2570
2574
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2571
2575
  mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
2572
- mteb-2.3.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2573
- mteb-2.3.4.dist-info/METADATA,sha256=IbQhbIhtR1KD2NPrrGl9vm0Ua3QQoeGVViqj4sUNRR8,13923
2574
- mteb-2.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2575
- mteb-2.3.4.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2576
- mteb-2.3.4.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2577
- mteb-2.3.4.dist-info/RECORD,,
2576
+ mteb-2.3.6.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2577
+ mteb-2.3.6.dist-info/METADATA,sha256=urz0_67bNhVt17rvN3pZdvMFt_mvxI7MFvamWkNoNjM,13923
2578
+ mteb-2.3.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2579
+ mteb-2.3.6.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2580
+ mteb-2.3.6.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2581
+ mteb-2.3.6.dist-info/RECORD,,
File without changes