mteb 2.3.7__py3-none-any.whl → 2.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ import numpy as np
2
+
3
+
4
+ def hamming_score(y_true: np.ndarray, y_pred: np.ndarray) -> float:
5
+ """Compute the Hamming score (a.k.a. label-based accuracy) for multilabel classification.
6
+
7
+ The Hamming score is the fraction of labels that are correctly predicted for each sample,
8
+ averaged over all samples. For samples where both y_true and y_pred have no labels,
9
+ the score is 1.0 (perfect agreement).
10
+
11
+ Args:
12
+ y_true: Binary matrix of true labels with shape (n_samples, n_labels)
13
+ y_pred: Binary matrix of predicted labels with shape (n_samples, n_labels)
14
+
15
+ Returns:
16
+ float: Hamming score between 0.0 and 1.0
17
+
18
+ Raises:
19
+ ValueError: If inputs are invalid or have incompatible shapes
20
+ TypeError: If inputs cannot be converted to numpy arrays
21
+ """
22
+ y_true = np.asarray(y_true)
23
+ y_pred = np.asarray(y_pred)
24
+
25
+ # Check shapes
26
+ if y_true.shape != y_pred.shape:
27
+ raise ValueError(
28
+ f"Shape mismatch: y_true {y_true.shape} != y_pred {y_pred.shape}"
29
+ )
30
+
31
+ # Check if arrays are empty
32
+ if y_true.size == 0:
33
+ raise ValueError("Input arrays cannot be empty")
34
+
35
+ # Ensure 2D arrays
36
+ if y_true.ndim != 2:
37
+ raise ValueError(f"Arrays must be 2D, got {y_true.ndim}D")
38
+
39
+ # Check for binary values
40
+ if not (np.all(np.isin(y_true, [0, 1])) and np.all(np.isin(y_pred, [0, 1]))):
41
+ raise ValueError("Arrays must contain only binary values (0 and 1)")
42
+
43
+ # Convert to boolean for bitwise operations
44
+ y_true_bool = y_true.astype(bool)
45
+ y_pred_bool = y_pred.astype(bool)
46
+
47
+ # Calculate intersection and union for each sample
48
+ intersection = (y_true_bool & y_pred_bool).sum(axis=1)
49
+ union = (y_true_bool | y_pred_bool).sum(axis=1)
50
+
51
+ # Handle division by zero: when union is 0, both are all zeros, so score is 1.0
52
+ scores = np.where(union == 0, 1.0, intersection / union)
53
+
54
+ return float(scores.mean())
File without changes
@@ -0,0 +1,125 @@
1
+ """Simplified version of https://gist.github.com/AlexeyVatolin/ea3adc21aa7a767603ff393b22085adc from https://github.com/embeddings-benchmark/mteb/pull/2900"""
2
+
3
+ import logging
4
+
5
+ import datasets
6
+ import pandas as pd
7
+ from datasets import Dataset, DatasetDict
8
+
9
+ from mteb import TaskMetadata
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def deduplicate(dataset: Dataset, input_column: str) -> Dataset:
15
+ """Remove duplicate texts, keeping the first occurrence."""
16
+ unique_texts = set()
17
+ indices_to_keep = []
18
+ for i, text in enumerate(dataset[input_column]):
19
+ text = text.strip()
20
+ if text not in unique_texts:
21
+ unique_texts.add(text)
22
+ indices_to_keep.append(i)
23
+
24
+ logger.info(
25
+ f"[deduplicate] removed={len(dataset) - len(indices_to_keep)}/{len(dataset)}"
26
+ )
27
+ return dataset.select(indices_to_keep)
28
+
29
+
30
+ def filter_empty(dataset: Dataset, input_column: str) -> Dataset:
31
+ """Filter out empty or whitespace-only examples."""
32
+ before = len(dataset)
33
+ ds = dataset.filter(lambda x: len(x[input_column].strip()) > 0)
34
+ logger.info(f"[filter_empty] removed={before - len(ds)}/{before}")
35
+ return ds
36
+
37
+
38
+ def filter_train_leakage(
39
+ train_dataset: Dataset, test_dataset: Dataset, input_column: str
40
+ ) -> Dataset:
41
+ """Remove test examples that appear in training."""
42
+ train_texts = set(train_dataset[input_column])
43
+ before = len(test_dataset)
44
+ indices = [
45
+ i
46
+ for i, text in enumerate(test_dataset[input_column])
47
+ if text not in train_texts
48
+ ]
49
+ logger.info(f"[filter_train_leakage] removed={before - len(indices)}/{before}")
50
+ return test_dataset.select(indices)
51
+
52
+
53
+ def filter_unclear_label(
54
+ dataset_dict: DatasetDict, input_column: str, label_column: str
55
+ ) -> DatasetDict:
56
+ """Remove examples where the same text appears with multiple different labels."""
57
+ normalized: dict[str, set[str | tuple[str, ...]]] = {}
58
+ logger.debug("[filter_controversial] scanning dataset for label conflicts...")
59
+
60
+ for split, ds in dataset_dict.items():
61
+ for text, label in zip(ds[input_column], ds[label_column]):
62
+ key = text.strip().lower()
63
+ normalized.setdefault(key, set()).add(
64
+ label if isinstance(label, (str, int, float)) else tuple(label)
65
+ )
66
+
67
+ bad_texts = {t for t, labels in normalized.items() if len(labels) > 1}
68
+ logger.info(f"[filter_controversial] Removing {len(bad_texts)} conflicting texts")
69
+
70
+ new_dict = {}
71
+ for split, ds in dataset_dict.items():
72
+ before = len(ds)
73
+ filtered = ds.filter(lambda x: x[input_column].strip().lower() not in bad_texts)
74
+ logger.debug(
75
+ f"[filter_controversial:{split}] removed={before - len(filtered)}/{before}"
76
+ )
77
+ new_dict[split] = filtered
78
+
79
+ return DatasetDict(new_dict)
80
+
81
+
82
+ def filter_short(dataset: Dataset, input_column: str, min_words: int = 3) -> Dataset:
83
+ """Filter out texts with fewer than `min_words`."""
84
+ before = len(dataset)
85
+ ds = dataset.filter(lambda x: len(x[input_column].strip().split()) >= min_words)
86
+ logger.debug(f"[filter_short] removed={before - len(ds)}/{before}")
87
+ return ds
88
+
89
+
90
+ def split_train_test(
91
+ ds: DatasetDict,
92
+ metadata: TaskMetadata,
93
+ train_split: str,
94
+ label_column: str,
95
+ ) -> DatasetDict:
96
+ if train_split in ds and metadata.eval_splits == train_split:
97
+ before = len(ds[train_split])
98
+ logger.info(
99
+ f"[split_train_test] eval_splits == train_split; performing split on {before} examples"
100
+ )
101
+ ds[train_split] = ds[train_split].cast_column(
102
+ label_column,
103
+ datasets.ClassLabel(names=list(set(ds[train_split][label_column]))),
104
+ )
105
+ label_counts = pd.Series(ds[train_split][label_column]).value_counts()
106
+ one_sample_labels = set(label_counts[label_counts == 1].index.tolist())
107
+
108
+ if one_sample_labels:
109
+ logger.info(
110
+ f"[split_train_test] Removing {len(one_sample_labels)} labels with only one instance"
111
+ )
112
+ ds[train_split] = ds[train_split].filter(
113
+ lambda x: x[label_column] not in one_sample_labels
114
+ )
115
+
116
+ splits = ds[train_split].train_test_split(
117
+ test_size=min(2048, before // 2), seed=42, stratify_by_column=label_column
118
+ )
119
+ ds = DatasetDict({train_split: splits[train_split], "test": splits["test"]})
120
+ metadata.eval_splits = ["test"]
121
+ logger.info(
122
+ f"[split_train_test] Train size={len(ds[train_split])}, Test size={len(ds['test'])}"
123
+ )
124
+
125
+ return ds
@@ -0,0 +1,102 @@
1
+ import logging
2
+
3
+ from datasets import DatasetDict
4
+
5
+ from mteb import TaskMetadata
6
+ from mteb.abstasks import AbsTaskClassification
7
+ from mteb.abstasks._data_filter.filters import (
8
+ deduplicate,
9
+ filter_empty,
10
+ filter_short,
11
+ filter_train_leakage,
12
+ filter_unclear_label,
13
+ split_train_test,
14
+ )
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def clean_dataset(
20
+ ds: DatasetDict,
21
+ metadata: TaskMetadata,
22
+ train_split: str,
23
+ input_column: str,
24
+ label_column: str,
25
+ subset: str | None = None,
26
+ ) -> DatasetDict:
27
+ """Apply the full cleaning pipeline with logging."""
28
+ logger.info("[clean_dataset] Starting dataset cleaning pipeline...")
29
+
30
+ transforms = [
31
+ ("filter_empty", filter_empty),
32
+ ("deduplicate", deduplicate),
33
+ ]
34
+
35
+ skip_cjk_codes = {"zho", "jpn", "tha", "mya", "cmn"}
36
+ logger.info("[clean_dataset] Applying short-text filter")
37
+ cur_langs = (
38
+ metadata.eval_langs[subset]
39
+ if isinstance(metadata.eval_langs, dict) and subset
40
+ else metadata.eval_langs
41
+ )
42
+ apply_short = not any(lang.split("-")[0] in skip_cjk_codes for lang in cur_langs)
43
+ if apply_short:
44
+ logger.info("[clean_dataset] Applying short-text filter")
45
+ transforms.append(("filter_short", filter_short))
46
+
47
+ for split in [train_split, *metadata.eval_splits]:
48
+ if split not in ds:
49
+ logger.warning(f"[clean_dataset] Split '{split}' missing; skipping.")
50
+ continue
51
+
52
+ for name, fn in transforms:
53
+ before = len(ds[split])
54
+ ds[split] = fn(ds[split], input_column=input_column)
55
+ logger.info(
56
+ f"[clean_dataset:{split}] {name} removed={before - len(ds[split])}"
57
+ )
58
+
59
+ ds = split_train_test(ds, metadata, train_split, label_column)
60
+
61
+ for split in metadata.eval_splits:
62
+ if split == train_split:
63
+ continue
64
+ before = len(ds[split])
65
+ ds[split] = filter_train_leakage(ds[train_split], ds[split], input_column)
66
+ logger.info(
67
+ f"[clean_dataset:{split}] leakage_removed={before - len(ds[split])}"
68
+ )
69
+
70
+ ds = filter_unclear_label(ds, input_column=input_column, label_column=label_column)
71
+
72
+ logger.info("[clean_dataset] Cleaning pipeline complete.")
73
+ return ds
74
+
75
+
76
+ def process_classification(
77
+ task: AbsTaskClassification,
78
+ ) -> DatasetDict | dict[str, DatasetDict]:
79
+ """Process classification task dataset(s) with cleaning pipeline."""
80
+ if not task.data_loaded:
81
+ task.load_data()
82
+ if isinstance(task.dataset, DatasetDict):
83
+ return clean_dataset(
84
+ task.dataset,
85
+ task.metadata,
86
+ task.train_split,
87
+ task.input_column_name,
88
+ task.label_column_name,
89
+ subset=None,
90
+ )
91
+
92
+ new_ds = {}
93
+ for subset in task.dataset:
94
+ new_ds[subset] = clean_dataset(
95
+ task.dataset[subset],
96
+ task.metadata,
97
+ task.train_split,
98
+ task.input_column_name,
99
+ task.label_column_name,
100
+ subset=subset,
101
+ )
102
+ return new_ds
@@ -14,6 +14,7 @@ from sklearn.preprocessing import MultiLabelBinarizer
14
14
  from typing_extensions import override
15
15
 
16
16
  from mteb._create_dataloaders import create_dataloader
17
+ from mteb._evaluators.classification_metrics import hamming_score
17
18
  from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
18
19
  from mteb.models import EncoderProtocol
19
20
 
@@ -40,11 +41,13 @@ class MultilabelClassificationMetrics(TypedDict):
40
41
  accuracy: Accuracy of the classifier.
41
42
  lrap: Label Ranking Average Precision (LRAP) score.
42
43
  f1: Macro F1 score.
44
+ hamming: Hamming score (label-based accuracy).
43
45
  """
44
46
 
45
47
  accuracy: float
46
48
  lrap: float
47
49
  f1: float
50
+ hamming: float
48
51
 
49
52
 
50
53
  class FullMultilabelClassificationMetrics(MultilabelClassificationMetrics):
@@ -157,7 +160,7 @@ class AbsTaskMultilabelClassification(AbsTaskClassification):
157
160
 
158
161
  logger.info("Running multilabel classification - Evaluating classifiers...")
159
162
  all_predictions = []
160
- for i_experiment, sample_indices in enumerate(train_samples):
163
+ for _, sample_indices in enumerate(train_samples):
161
164
  X_train = np.stack([unique_train_embeddings[idx] for idx in sample_indices])
162
165
  y_train = train_split.select(sample_indices)[self.label_column_name]
163
166
  y_train = binarizer.transform(y_train)
@@ -207,10 +210,12 @@ class AbsTaskMultilabelClassification(AbsTaskClassification):
207
210
  else:
208
211
  lrap = label_ranking_average_precision_score(y_test, y_pred)
209
212
  f1 = f1_score(y_test, y_pred, average="macro")
213
+ hamming = hamming_score(y_test, y_pred)
210
214
  return MultilabelClassificationMetrics(
211
215
  accuracy=accuracy,
212
216
  lrap=lrap,
213
217
  f1=f1,
218
+ hamming=hamming,
214
219
  )
215
220
 
216
221
  def _undersample_data_indices(
@@ -218,6 +223,8 @@ class AbsTaskMultilabelClassification(AbsTaskClassification):
218
223
  ) -> tuple[list[int], list[int]]:
219
224
  """Undersample data to have samples_per_label samples of each label.
220
225
 
226
+ Currently ensures that each label has at least samples_per_label samples.
227
+
221
228
  Returns:
222
229
  A tuple containing:
223
230
  - List of sampled indices.
@@ -309,7 +309,7 @@ RU_SCI_BENCH = Benchmark(
309
309
  tasks=get_tasks(
310
310
  tasks=[
311
311
  # BitextMining
312
- "RuSciBenchBitextMining",
312
+ "RuSciBenchBitextMining.v2",
313
313
  # Classification
314
314
  "RuSciBenchCoreRiscClassification",
315
315
  "RuSciBenchGRNTIClassification.v2",
@@ -963,7 +963,7 @@ MTEB_multilingual_v2 = Benchmark(
963
963
 
964
964
  MTEB_JPN = Benchmark(
965
965
  name="MTEB(jpn, v1)",
966
- display_name="Japanese",
966
+ display_name="Japanese Legacy",
967
967
  icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/jp.svg",
968
968
  tasks=get_tasks(
969
969
  languages=["jpn"],
@@ -0,0 +1,61 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 19928,
4
+ "number_of_characters": 35466331,
5
+ "unique_pairs": 19928,
6
+ "sentence1_statistics": {
7
+ "total_text_length": 17733346,
8
+ "min_text_length": 103,
9
+ "average_text_length": 889.8708350060217,
10
+ "max_text_length": 11576,
11
+ "unique_texts": 19928
12
+ },
13
+ "sentence2_statistics": {
14
+ "total_text_length": 17732985,
15
+ "min_text_length": 103,
16
+ "average_text_length": 889.8527197912485,
17
+ "max_text_length": 11576,
18
+ "unique_texts": 19928
19
+ },
20
+ "hf_subset_descriptive_stats": {
21
+ "ru-en": {
22
+ "num_samples": 9965,
23
+ "number_of_characters": 17734926,
24
+ "unique_pairs": 9965,
25
+ "sentence1_statistics": {
26
+ "total_text_length": 8685585,
27
+ "min_text_length": 103,
28
+ "average_text_length": 871.6091319618665,
29
+ "max_text_length": 5675,
30
+ "unique_texts": 9965
31
+ },
32
+ "sentence2_statistics": {
33
+ "total_text_length": 9049341,
34
+ "min_text_length": 106,
35
+ "average_text_length": 908.1124937280482,
36
+ "max_text_length": 11576,
37
+ "unique_texts": 9965
38
+ }
39
+ },
40
+ "en-ru": {
41
+ "num_samples": 9963,
42
+ "number_of_characters": 17731405,
43
+ "unique_pairs": 9963,
44
+ "sentence1_statistics": {
45
+ "total_text_length": 9047761,
46
+ "min_text_length": 106,
47
+ "average_text_length": 908.1362039546322,
48
+ "max_text_length": 11576,
49
+ "unique_texts": 9963
50
+ },
51
+ "sentence2_statistics": {
52
+ "total_text_length": 8683644,
53
+ "min_text_length": 103,
54
+ "average_text_length": 871.5892803372478,
55
+ "max_text_length": 5675,
56
+ "unique_texts": 9963
57
+ }
58
+ }
59
+ }
60
+ }
61
+ }
@@ -71,7 +71,7 @@ GP_BENCHMARK_ENTRIES = [
71
71
  "MTEB(cmn, v1)",
72
72
  "MTEB(deu, v1)",
73
73
  "MTEB(fra, v1)",
74
- "MTEB(jpn, v1)",
74
+ "JMTEB(v2)",
75
75
  "MTEB(kor, v1)",
76
76
  "MTEB(nld, v1)",
77
77
  "MTEB(pol, v1)",
@@ -84,7 +84,12 @@ GP_BENCHMARK_ENTRIES = [
84
84
  MenuEntry(
85
85
  "Other",
86
86
  mteb.get_benchmarks(
87
- ["MTEB(eng, v1)", "MTEB(fas, v1)", "MTEB(rus, v1)"]
87
+ [
88
+ "MTEB(eng, v1)",
89
+ "MTEB(fas, v1)",
90
+ "MTEB(rus, v1)",
91
+ "MTEB(jpn, v1)",
92
+ ]
88
93
  ),
89
94
  )
90
95
  ],
@@ -0,0 +1,67 @@
1
+ from mteb.models.model_meta import ModelMeta, ScoringFunction
2
+ from mteb.models.sentence_transformer_wrapper import (
3
+ SentenceTransformerEncoderWrapper,
4
+ )
5
+
6
+ nb_sbert = ModelMeta(
7
+ loader=SentenceTransformerEncoderWrapper, # type: ignore[arg-type]
8
+ name="NbAiLab/nb-sbert-base",
9
+ languages=["nno-Latn", "nob-Latn", "swe-Latn", "dan-Latn"],
10
+ open_weights=True,
11
+ revision="b95656350a076aeafd2d23763660f80655408cc6",
12
+ release_date="2022-11-23",
13
+ n_parameters=1_780_000_000,
14
+ memory_usage_mb=678,
15
+ embed_dim=4096,
16
+ license="apache-2.0",
17
+ max_tokens=75,
18
+ reference="https://huggingface.co/NbAiLab/nb-sbert-base",
19
+ similarity_fn_name=ScoringFunction.COSINE,
20
+ framework=["Sentence Transformers", "PyTorch"],
21
+ use_instructions=False,
22
+ public_training_code=None,
23
+ public_training_data="https://huggingface.co/datasets/NbAiLab/mnli-norwegian",
24
+ training_datasets=set(),
25
+ )
26
+
27
+ nb_bert_large = ModelMeta(
28
+ loader=SentenceTransformerEncoderWrapper, # type: ignore[arg-type]
29
+ name="NbAiLab/nb-bert-large",
30
+ languages=["nno-Latn", "nob-Latn"],
31
+ open_weights=True,
32
+ revision="f9d0fc184adab4dc354d85e1854b7634540d7550",
33
+ release_date="2021-04-29",
34
+ n_parameters=355087360,
35
+ memory_usage_mb=1359,
36
+ embed_dim=1024,
37
+ license="cc-by-4.0",
38
+ max_tokens=512,
39
+ reference="https://huggingface.co/NbAiLab/nb-bert-large",
40
+ similarity_fn_name=ScoringFunction.COSINE,
41
+ framework=["Sentence Transformers", "PyTorch"],
42
+ use_instructions=False,
43
+ public_training_code=None,
44
+ public_training_data="https://huggingface.co/NbAiLab/nb-bert-large#training-data",
45
+ training_datasets=set(),
46
+ )
47
+
48
+ nb_bert_base = ModelMeta(
49
+ loader=SentenceTransformerEncoderWrapper, # type: ignore[arg-type]
50
+ name="NbAiLab/nb-bert-base",
51
+ languages=["nno-Latn", "nob-Latn"],
52
+ open_weights=True,
53
+ revision="9417c3f62a3adc99f17ff92bff446f35d011f994",
54
+ release_date="2021-01-13",
55
+ n_parameters=177853440,
56
+ memory_usage_mb=681,
57
+ embed_dim=768,
58
+ license="cc-by-4.0",
59
+ max_tokens=512,
60
+ reference="https://huggingface.co/NbAiLab/nb-bert-base",
61
+ similarity_fn_name=ScoringFunction.COSINE,
62
+ framework=["Sentence Transformers", "PyTorch"],
63
+ use_instructions=False,
64
+ public_training_code=None,
65
+ public_training_data="https://huggingface.co/NbAiLab/nb-bert-base#training-data",
66
+ training_datasets=set(),
67
+ )
@@ -16,7 +16,7 @@ from .nusa_translation_bitext_mining import NusaTranslationBitextMining
16
16
  from .nusa_x_bitext_mining import NusaXBitextMining
17
17
  from .phinc_bitext_mining import PhincBitextMining
18
18
  from .roma_tales_bitext_mining import RomaTalesBitextMining
19
- from .ru_sci_bench_bitext_mining import RuSciBenchBitextMining
19
+ from .ru_sci_bench_bitext_mining import RuSciBenchBitextMining, RuSciBenchBitextMiningV2
20
20
  from .tatoeba_bitext_mining import TatoebaBitextMining
21
21
  from .web_faq_bitext_mining import WebFAQBitextMiningQAs, WebFAQBitextMiningQuestions
22
22
 
@@ -40,6 +40,7 @@ __all__ = [
40
40
  "PhincBitextMining",
41
41
  "RomaTalesBitextMining",
42
42
  "RuSciBenchBitextMining",
43
+ "RuSciBenchBitextMiningV2",
43
44
  "TatoebaBitextMining",
44
45
  "WebFAQBitextMiningQAs",
45
46
  "WebFAQBitextMiningQuestions",
@@ -42,6 +42,52 @@ class RuSciBenchBitextMining(AbsTaskBitextMining):
42
42
  volume = {110},
43
43
  year = {2024},
44
44
  }
45
+ """,
46
+ prompt="Given the following title and abstract of the scientific article, find its translation",
47
+ superseded_by="RuSciBenchBitextMining.v2",
48
+ )
49
+
50
+
51
+ class RuSciBenchBitextMiningV2(AbsTaskBitextMining):
52
+ fast_loading = True
53
+ metadata = TaskMetadata(
54
+ name="RuSciBenchBitextMining.v2",
55
+ dataset={
56
+ "path": "mlsa-iai-msu-lab/ru_sci_bench_bitext_mining",
57
+ "revision": "20e815e8ac8787331546386dfd177821510f79a3",
58
+ },
59
+ description="This task focuses on finding translations of scientific articles. The dataset is sourced from eLibrary, Russia's largest electronic library of scientific publications. Russian authors often provide English translations for their abstracts and titles, and the data consists of these paired titles and abstracts. The task evaluates a model's ability to match an article's Russian title and abstract to its English counterpart, or vice versa. Compared to the previous version, 6 erroneous examples have been removed.",
60
+ reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
61
+ type="BitextMining",
62
+ category="t2c",
63
+ modalities=["text"],
64
+ eval_splits=["test"],
65
+ eval_langs={
66
+ "ru-en": ["rus-Cyrl", "eng-Latn"],
67
+ "en-ru": ["eng-Latn", "rus-Cyrl"],
68
+ },
69
+ main_score="f1",
70
+ date=("2007-01-01", "2023-01-01"),
71
+ domains=["Academic", "Non-fiction", "Written"],
72
+ task_subtypes=[],
73
+ license="not specified",
74
+ dialect=[],
75
+ sample_creation="found",
76
+ annotations_creators="derived",
77
+ bibtex_citation=r"""
78
+ @article{vatolin2024ruscibench,
79
+ author = {Vatolin, A. and Gerasimenko, N. and Ianina, A. and Vorontsov, K.},
80
+ doi = {10.1134/S1064562424602191},
81
+ issn = {1531-8362},
82
+ journal = {Doklady Mathematics},
83
+ month = {12},
84
+ number = {1},
85
+ pages = {S251--S260},
86
+ title = {RuSciBench: Open Benchmark for Russian and English Scientific Document Representations},
87
+ url = {https://doi.org/10.1134/S1064562424602191},
88
+ volume = {110},
89
+ year = {2024},
90
+ }
45
91
  """,
46
92
  prompt="Given the following title and abstract of the scientific article, find its translation",
47
93
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.3.7
3
+ Version: 2.3.8
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -15,6 +15,7 @@ mteb/similarity_functions.py,sha256=ySSnrKl4cSKOWfyIKQPVTJtxuy2ZNfcv0COXDp22QlQ,
15
15
  mteb/_evaluators/__init__.py,sha256=Ag1_RWpxBGMpujzd3FZjI40gY_KQKIpY31tJPuk-hFg,1013
16
16
  mteb/_evaluators/_download.py,sha256=jntlcURbJxcxUjTmn2D9Tu6ZnWgDc9t5bY8p9CZCqv4,586
17
17
  mteb/_evaluators/any_sts_evaluator.py,sha256=f0V3NDP5Bfp8qEeBwP8E-Enj5F5NbFze-kGmzlkObQA,3762
18
+ mteb/_evaluators/classification_metrics.py,sha256=TI-cMPWrIpMqpsNhhwSBY4bZUu2yM469fbcu44zolW0,1926
18
19
  mteb/_evaluators/clustering_evaluator.py,sha256=5XoKHl5LcG9jQ9oBzNAWYVpZWWUxrars3t7TdIV7xS0,2052
19
20
  mteb/_evaluators/evaluator.py,sha256=gwaeftcAKoGcIQs8jIaafynbcYrYErj6AitHBxgjn2w,807
20
21
  mteb/_evaluators/pair_classification_evaluator.py,sha256=6lgDI9wRfEK937YTS4l0W1OL1IQpHYZ4l34-Lxi9KdA,6401
@@ -37,7 +38,7 @@ mteb/abstasks/classification.py,sha256=Es9pmRdjv6xbc-KnGqVdO6dR1cc7yAHhekCZES7n5
37
38
  mteb/abstasks/clustering.py,sha256=4KcaU8_sNLmLvMhwDpNmcY2nD3BNyx_LcM-ddSv-wtY,14410
38
39
  mteb/abstasks/clustering_legacy.py,sha256=HZY8zgBgqqs5urF_to9wzqm3MnjFivs59hU6P3NrzcI,8684
39
40
  mteb/abstasks/dataset_card_template.md,sha256=aD6l8qc3_jxwoIGJNYLzse-jpRa8hu92AxpnUtNgges,5122
40
- mteb/abstasks/multilabel_classification.py,sha256=feLlpSKoe3b_Sb58N-9cx_5hzti1a2iA8QxcSBWSfjE,8922
41
+ mteb/abstasks/multilabel_classification.py,sha256=rpIwI3jV2YKtmXlFS2_Ytg4yYjdjPy0q5OU4MsRJFqo,9211
41
42
  mteb/abstasks/pair_classification.py,sha256=ToOBFDiokZOz9ea-klMLj_37slbVFR3lSuihP81x9Lc,13263
42
43
  mteb/abstasks/regression.py,sha256=SeacOErZUXGLGOcwqAvht6BlbD8fcsn9QhNiFIuJGyc,8832
43
44
  mteb/abstasks/retrieval.py,sha256=7QTKYlGaGvF1lOQkB_B4qj8Vm2FxxFXNVTHhfwZO8Bw,26439
@@ -45,6 +46,9 @@ mteb/abstasks/retrieval_dataset_loaders.py,sha256=WukcFAn54rUpXULCG43eysHozXHAxo
45
46
  mteb/abstasks/sts.py,sha256=aKTivjvDtAaoYb1hz1NBv2o3UpDR-3AaeHgkDFHMBGI,9077
46
47
  mteb/abstasks/task_metadata.py,sha256=7CzYK1y-vwLUiWaEGPgU3HiolpW3UCul8Y2KJ-WSpeE,26892
47
48
  mteb/abstasks/zeroshot_classification.py,sha256=4UxBIZ1e1iRK8PRAhCWnnSDirK2vi5-w2N5ZomCnaIM,5882
49
+ mteb/abstasks/_data_filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
+ mteb/abstasks/_data_filter/filters.py,sha256=p1QLy7V9jYVFicef61fwzCpbSpTA6rOv8CxkwEUTMvc,4585
51
+ mteb/abstasks/_data_filter/task_pipelines.py,sha256=L56nKTGwLH3QqmzkO-Wx4Vi5vfovnnKIDN_f3M8FSiA,3078
48
52
  mteb/abstasks/image/__init__.py,sha256=NgvMJnp1g2mUv27RL-TvzA7s1BOdMG-EB1CrZfdbWdg,136
49
53
  mteb/abstasks/image/image_text_pair_classification.py,sha256=SejETTXc3g2VSWYafTe-VAHZcNpX98bgzsWsqQisIzI,7712
50
54
  mteb/abstasks/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -56,7 +60,7 @@ mteb/benchmarks/_create_table.py,sha256=OAiR44ynJ2fMzoBmVITQtOTYQzxIu9KUdS_HzlBl
56
60
  mteb/benchmarks/benchmark.py,sha256=70RlMyyg_wkWTlU_IbfLl-KaqRWXGCKTd8fWe9X-AQE,4173
57
61
  mteb/benchmarks/get_benchmark.py,sha256=-n_O-gitRKZi48gJKNgGuI36hsP7yLVSiwulnMHN7Gw,3935
58
62
  mteb/benchmarks/benchmarks/__init__.py,sha256=Ig5dSFunzI-F-OamruuKJVSstbG3xQNkXCxRY3Bj_Ck,2180
59
- mteb/benchmarks/benchmarks/benchmarks.py,sha256=qHHmJfisT75VRVoZfPcHhShCG0jY6vSWZEx-D01XxKU,94757
63
+ mteb/benchmarks/benchmarks/benchmarks.py,sha256=vWX6QZgqF9iKAE1tIQwaXw9f8q_WiBtdgo8yj4_CHFI,94767
60
64
  mteb/benchmarks/benchmarks/rteb_benchmarks.py,sha256=QnCSrTTaBfcRlAQp2Nu81tgv1idMXqiM16Fp2zKJ5Ys,10607
61
65
  mteb/cli/__init__.py,sha256=v-csUr3eUZElIvrGB6QGtaIdndDfNWEe9oZchsGsJpg,64
62
66
  mteb/cli/_display_tasks.py,sha256=7A06dT9sSoTz6shyMvskPxuc5eHY_H7PGPlROzMP0yw,2196
@@ -83,6 +87,7 @@ mteb/descriptive_stats/BitextMining/PhincBitextMining.json,sha256=bC31IS_N3-eehB
83
87
  mteb/descriptive_stats/BitextMining/PubChemSMILESBitextMining.json,sha256=v4OzVwYV3Q-J3VitfK8zX_t2ZOZzvD4WtPeJoFeVNgI,3737
84
88
  mteb/descriptive_stats/BitextMining/RomaTalesBitextMining.json,sha256=ciHSHuKgi4Ip0eH4f6G52w4MQ0BFvjizBN1Mh-2tPNE,1415
85
89
  mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.json,sha256=f_lS0ua_dtwhmw-zqqZGGPJ4b_4u82VDicM8a71SId8,2209
90
+ mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json,sha256=jm6tsOyOooX64kgPl_rLTv_aiHy34Nc1MudlqXgrSpQ,2214
86
91
  mteb/descriptive_stats/BitextMining/SAMSumFa.json,sha256=A1o7RjIwoNwjxRoMS6Qmn-4VGy5vX_QDK4sQAjJCM5Y,613
87
92
  mteb/descriptive_stats/BitextMining/SRNCorpusBitextMining.json,sha256=41u--q1IXxBXqFb8f_BXYdv9fIlUkbfSPldzwCNbo-w,2136
88
93
  mteb/descriptive_stats/BitextMining/SynPerChatbotRAGSumSRetrieval.json,sha256=nNEK8VHlRxDRmMjwuBA4U5RuXNZwxWyCHFCJHQbqIAQ,612
@@ -1426,7 +1431,7 @@ mteb/languages/language_scripts.py,sha256=5wix9HTYolNIpTiS5oXf2pGJyL7ftdGKs_m432
1426
1431
  mteb/languages/programming_languages.py,sha256=zxAakT3OSUnAuTnQ34VyeFIECnNXMlleZmAake6jsZE,211
1427
1432
  mteb/leaderboard/__init__.py,sha256=991roXmtRwEQysV-37hWEzWpkvPgMCGRqZTHR-hm2io,88
1428
1433
  mteb/leaderboard/app.py,sha256=29MxFLKEVT-roULHG5boHmsQVhld1rDGNS94r7MWlz8,33118
1429
- mteb/leaderboard/benchmark_selector.py,sha256=uH66SI0iT1J4_fnebViWa83dQwhPi7toBv7PRL_epDw,7784
1434
+ mteb/leaderboard/benchmark_selector.py,sha256=qd-2L20RQ4ACke01UlytkhZok1dkWgfUlXzfET52kGc,7956
1430
1435
  mteb/leaderboard/figures.py,sha256=mPO0go_23QEhAm1RJdLiBxPFCoUiA74_ztyl6yimc7k,7553
1431
1436
  mteb/leaderboard/table.py,sha256=6SnrYC5GcBlvVSO6vOk6ObuqtoveBLv3JUuXqdKueG8,8333
1432
1437
  mteb/leaderboard/text_segments.py,sha256=iMIkS04QQjPbT-SkU0x6fOcS8xRbUYevryu9HydipKM,6570
@@ -1510,7 +1515,7 @@ mteb/models/model_implementations/moco_models.py,sha256=Kl0nBsqkG3crYoo5YulFq1fv
1510
1515
  mteb/models/model_implementations/model2vec_models.py,sha256=D-EY-6P-cKKunbgzk4DHzJL1ogpWYFhpHbTLb8qQjJw,13765
1511
1516
  mteb/models/model_implementations/moka_models.py,sha256=Y5do7Z4JyGxabYrjHhkBLqCKTQKotniS-f4kOgXJjag,4995
1512
1517
  mteb/models/model_implementations/mxbai_models.py,sha256=33ta2BnhvKYBUgE89wFgPNf-CnOb7ooumZvqHOvbZsA,3593
1513
- mteb/models/model_implementations/nb_sbert.py,sha256=dF3WBn6ERIK7Oqp-tXdLn11Gf0Z7RKLhAoCq0YHxEug,861
1518
+ mteb/models/model_implementations/nbailab.py,sha256=bqqR0qs10IH2g5HC6K962tDMBciw8qFsNVHADNS72jk,2396
1514
1519
  mteb/models/model_implementations/no_instruct_sentence_models.py,sha256=6i-xbLRRNKuDpU-hwklwdQjgu1wnz5CecLSoc6kyd7Q,3976
1515
1520
  mteb/models/model_implementations/nomic_models.py,sha256=mT-v5Gs5-sRH8-ziCw_CtxB9ox3C6FtwWJjNghNrunw,11334
1516
1521
  mteb/models/model_implementations/nomic_models_vision.py,sha256=gEEieMThvw4p-QhRH0G_9-WWTvj-jqOlgFsh6O07dbc,6731
@@ -1596,7 +1601,7 @@ mteb/tasks/bitext_mining/fas/__init__.py,sha256=srw2I-yfcLgIkRzJv-p_qZ9fg0cCKr-D
1596
1601
  mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py,sha256=yj8l1K3CIdESwl4sl2M4HhyOrZZYQZb_EDo-EgCAJdg,3330
1597
1602
  mteb/tasks/bitext_mining/kat/__init__.py,sha256=a-KcFJ3Ol7R8yq02RcGjaOxEfqnwJeo7AAib-RU-JFw,116
1598
1603
  mteb/tasks/bitext_mining/kat/tbilisi_city_hall_bitext_mining.py,sha256=xVCxpJr7UW2KadNdn7Gsw-wZ65uz5vhRDhQZ7eILokQ,1918
1599
- mteb/tasks/bitext_mining/multilingual/__init__.py,sha256=qDgixbHEvV3xz6JN3kFQDnvtSL-yVjl-Z8inUwimI6I,1954
1604
+ mteb/tasks/bitext_mining/multilingual/__init__.py,sha256=LcPygeOuvrka67aDkktT-2lSqcxpWPSMmd_BaxIsl24,2012
1600
1605
  mteb/tasks/bitext_mining/multilingual/bible_nlp_bitext_mining.py,sha256=lNbCz3dN9o3F04Y7vtNBhF-lPUNyVbAOKgUR-QKZn_8,29082
1601
1606
  mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py,sha256=tTKvS-v7d9V_zymCn_ZonUKlo9NI7vTyppxS9iAu8I0,2873
1602
1607
  mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py,sha256=P_UHMWh2gKG6CloXmP5J2kjrCTQwoJAU1MKdLl6JFKc,1836
@@ -1615,7 +1620,7 @@ mteb/tasks/bitext_mining/multilingual/nusa_translation_bitext_mining.py,sha256=e
1615
1620
  mteb/tasks/bitext_mining/multilingual/nusa_x_bitext_mining.py,sha256=BphnEDdG1-IsCklJWRCs2yK7I1zVuPh7PQrrYAI540c,2309
1616
1621
  mteb/tasks/bitext_mining/multilingual/phinc_bitext_mining.py,sha256=53xLXpgIDk55JfCoe3pa93T_9T9sfRJAryBVcWZx5co,1477
1617
1622
  mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py,sha256=5uwf4vhud5bQuPAcufWGcA7UBmp5YPKsyvc5OUYgK-M,1730
1618
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py,sha256=lxaWPFLZpWwAy6CIGAENkBvH7iVLShE4sGf0eT0-Mac,2075
1623
+ mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py,sha256=09e8XDy-zvqpvGMuj8AIbYUGmrggPi6XvOZi9Fbw0G4,4162
1619
1624
  mteb/tasks/bitext_mining/multilingual/tatoeba_bitext_mining.py,sha256=Y6QnjbmL4fIuTgK8tuZfNnWmM-GwBVaAjUiAtTSgPqM,5898
1620
1625
  mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py,sha256=vWoRmAJKR0ed8mUKDqLUMe_IpFjj7Xlsw0NFhcIOJOc,14978
1621
1626
  mteb/tasks/bitext_mining/srn/__init__.py,sha256=XaMVvUagmgLUG6tZw2jo6fMKiVTfQpaaWZGQZo-1YYk,97
@@ -2573,9 +2578,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2573
2578
  mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
2574
2579
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2575
2580
  mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
2576
- mteb-2.3.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2577
- mteb-2.3.7.dist-info/METADATA,sha256=8WsmZm9cDT-XjdHIF04XscnqFbTmCiudzikksCUJSiw,13923
2578
- mteb-2.3.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2579
- mteb-2.3.7.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2580
- mteb-2.3.7.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2581
- mteb-2.3.7.dist-info/RECORD,,
2581
+ mteb-2.3.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2582
+ mteb-2.3.8.dist-info/METADATA,sha256=QMpRmhMLXi45L0d29kCoNcEugCwDl8IWCc3wE_r-fb4,13923
2583
+ mteb-2.3.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2584
+ mteb-2.3.8.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2585
+ mteb-2.3.8.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2586
+ mteb-2.3.8.dist-info/RECORD,,
@@ -1,25 +0,0 @@
1
- from mteb.models.model_meta import ModelMeta
2
- from mteb.models.sentence_transformer_wrapper import (
3
- SentenceTransformerEncoderWrapper,
4
- )
5
-
6
- nb_sbert = ModelMeta(
7
- loader=SentenceTransformerEncoderWrapper,
8
- name="NbAiLab/nb-sbert-base",
9
- languages=["nno-Latn", "nob-Latn", "swe-Latn", "dan-Latn"],
10
- open_weights=True,
11
- revision="b95656350a076aeafd2d23763660f80655408cc6",
12
- release_date="2022-11-23",
13
- n_parameters=1_780_000_000,
14
- memory_usage_mb=678,
15
- embed_dim=4096,
16
- license="apache-2.0",
17
- max_tokens=75,
18
- reference="https://huggingface.co/NbAiLab/nb-sbert-base",
19
- similarity_fn_name="cosine",
20
- framework=["Sentence Transformers", "PyTorch"],
21
- use_instructions=False,
22
- public_training_code=None,
23
- public_training_data="https://huggingface.co/datasets/NbAiLab/mnli-norwegian",
24
- training_datasets=set(),
25
- )
File without changes