PyPI - mteb - Versions diffs - 2.0.4__py3-none-any.whl → 2.1.0__py3-none-any.whl - Mend

mteb 2.0.4py3-none-any.whl → 2.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

mteb/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from importlib.metadata import version
+from mteb import types
 from mteb.abstasks import AbsTask
 from mteb.abstasks.task_metadata import TaskMetadata
 from mteb.deprecated_evaluator import MTEB
@@ -7,7 +8,12 @@ from mteb.evaluate import evaluate
 from mteb.filter_tasks import filter_tasks
 from mteb.get_tasks import get_task, get_tasks
 from mteb.load_results import load_results
-from mteb.models import EncoderProtocol, SentenceTransformerEncoderWrapper
+from mteb.models import (
+    CrossEncoderProtocol,
+    EncoderProtocol,
+    SearchProtocol,
+    SentenceTransformerEncoderWrapper,
+)
 from mteb.models.get_model_meta import get_model, get_model_meta, get_model_metas
 from mteb.results import BenchmarkResults, TaskResult
@@ -21,7 +27,9 @@ __all__ = [
     "AbsTask",
     "Benchmark",
     "BenchmarkResults",
+    "CrossEncoderProtocol",
     "EncoderProtocol",
+    "SearchProtocol",
     "SentenceTransformerEncoderWrapper",
     "TaskMetadata",
     "TaskResult",
@@ -35,4 +43,5 @@ __all__ = [
     "get_task",
     "get_tasks",
     "load_results",
+    "types",
 ]

mteb/benchmarks/benchmarks/__init__.py CHANGED Viewed

@@ -27,6 +27,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
     MTEB_KOR,
     MTEB_MAIN_RU,
     MTEB_MINERS_BITEXT_MINING,
+    MTEB_NL,
     MTEB_POL,
     MTEB_RETRIEVAL_LAW,
     MTEB_RETRIEVAL_MEDICAL,
@@ -87,6 +88,7 @@ __all__ = [
     "MTEB_KOR",
     "MTEB_MAIN_RU",
     "MTEB_MINERS_BITEXT_MINING",
+    "MTEB_NL",
     "MTEB_POL",
     "MTEB_RETRIEVAL_LAW",
     "MTEB_RETRIEVAL_MEDICAL",

mteb/benchmarks/benchmarks/benchmarks.py CHANGED Viewed

@@ -1636,6 +1636,81 @@ BEIR_NL = Benchmark(
 """,
 )
+MTEB_NL = Benchmark(
+    name="MTEB(nld, v1)",
+    display_name="MTEB-NL",
+    icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/nl.svg",
+    tasks=MTEBTasks(
+        get_tasks(
+            languages=["nld"],
+            exclusive_language_filter=True,
+            tasks=[
+                # Classification
+                "DutchBookReviewSentimentClassification",
+                "MassiveIntentClassification",
+                "MassiveScenarioClassification",
+                "SIB200Classification",
+                "MultiHateClassification",
+                "VaccinChatNLClassification",
+                "DutchColaClassification",
+                "DutchGovernmentBiasClassification",
+                "DutchSarcasticHeadlinesClassification",
+                "DutchNewsArticlesClassification",
+                "OpenTenderClassification",
+                "IconclassClassification",
+                # # PairClassification
+                "SICKNLPairClassification",
+                "XLWICNLPairClassification",
+                # # MultiLabelClassification
+                "CovidDisinformationNLMultiLabelClassification",
+                "MultiEURLEXMultilabelClassification",
+                "VABBMultiLabelClassification",
+                # # Clustering
+                "DutchNewsArticlesClusteringS2S",
+                "DutchNewsArticlesClusteringP2P",
+                "SIB200ClusteringS2S",
+                "VABBClusteringS2S",
+                "VABBClusteringP2P",
+                "OpenTenderClusteringS2S",
+                "OpenTenderClusteringP2P",
+                "IconclassClusteringS2S",
+                # # Reranking
+                "WikipediaRerankingMultilingual",
+                # # Retrieval
+                "ArguAna-NL",
+                "SCIDOCS-NL",
+                "SciFact-NL",
+                "NFCorpus-NL",
+                "BelebeleRetrieval",
+                # "WebFAQRetrieval",
+                "DutchNewsArticlesRetrieval",
+                "bBSARDNLRetrieval",
+                "LegalQANLRetrieval",
+                "OpenTenderRetrieval",
+                "VABBRetrieval",
+                "WikipediaRetrievalMultilingual",
+                # # STS
+                "SICK-NL-STS",
+                "STSBenchmarkMultilingualSTS",
+            ],
+        )
+    ),
+    description="MTEB-NL",
+    reference="https://arxiv.org/abs/2509.12340",
+    contacts=["nikolay-banar"],
+    citation=r"""
+@misc{banar2025mtebnle5nlembeddingbenchmark,
+  archiveprefix = {arXiv},
+  author = {Nikolay Banar and Ehsan Lotfi and Jens Van Nooten and Cristina Arhiliuc and Marija Kliocaite and Walter Daelemans},
+  eprint = {22509.12340},
+  primaryclass = {cs.CL},
+  title = {MTEB-NL and E5-NL: Embedding Benchmark and Models for Dutch},
+  url = {https://arxiv.org/abs/2509.12340},
+  year = {2025},
+}
+""",
+)
 MIEB_common_tasks = [
     # Image Classification
     "Birdsnap",  # fine

mteb/descriptive_stats/BitextMining/BUCC.json CHANGED Viewed

@@ -3,66 +3,96 @@
         "num_samples": 35000,
         "number_of_characters": 146737556,
         "unique_pairs": 35000,
-        "min_sentence1_length": 16,
-        "average_sentence1_length": 99.10931428571429,
-        "max_sentence1_length": 204,
-        "unique_sentence1": 34978,
-        "min_sentence2_length": 17,
-        "average_sentence2_length": 101.14933691422246,
-        "max_sentence2_length": 339,
-        "unique_sentence2": 1133728,
+        "sentence1_statistics": {
+            "total_text_length": 3468826,
+            "min_text_length": 16,
+            "average_text_length": 99.10931428571429,
+            "max_text_length": 204,
+            "unique_texts": 34978
+        },
+        "sentence2_statistics": {
+            "total_text_length": 143268730,
+            "min_text_length": 17,
+            "average_text_length": 101.14933691422246,
+            "max_text_length": 339,
+            "unique_texts": 1133728
+        },
         "hf_subset_descriptive_stats": {
             "de-en": {
                 "num_samples": 9580,
                 "number_of_characters": 41450074,
                 "unique_pairs": 9580,
-                "min_sentence1_length": 50,
-                "average_sentence1_length": 109.07974947807934,
-                "max_sentence1_length": 204,
-                "unique_sentence1": 9573,
-                "min_sentence2_length": 17,
-                "average_sentence2_length": 101.18043156531952,
-                "max_sentence2_length": 293,
-                "unique_sentence2": 397151
+                "sentence1_statistics": {
+                    "total_text_length": 1044984,
+                    "min_text_length": 50,
+                    "average_text_length": 109.07974947807934,
+                    "max_text_length": 204,
+                    "unique_texts": 9573
+                },
+                "sentence2_statistics": {
+                    "total_text_length": 40405090,
+                    "min_text_length": 17,
+                    "average_text_length": 101.18043156531952,
+                    "max_text_length": 293,
+                    "unique_texts": 397151
+                }
             },
             "fr-en": {
                 "num_samples": 9086,
                 "number_of_characters": 38272453,
                 "unique_pairs": 9086,
-                "min_sentence1_length": 43,
-                "average_sentence1_length": 99.31785163988553,
-                "max_sentence1_length": 174,
-                "unique_sentence1": 9081,
-                "min_sentence2_length": 21,
-                "average_sentence2_length": 101.05202942051324,
-                "max_sentence2_length": 319,
-                "unique_sentence2": 368033
+                "sentence1_statistics": {
+                    "total_text_length": 902402,
+                    "min_text_length": 43,
+                    "average_text_length": 99.31785163988553,
+                    "max_text_length": 174,
+                    "unique_texts": 9081
+                },
+                "sentence2_statistics": {
+                    "total_text_length": 37370051,
+                    "min_text_length": 21,
+                    "average_text_length": 101.05202942051324,
+                    "max_text_length": 319,
+                    "unique_texts": 368033
+                }
             },
             "ru-en": {
                 "num_samples": 14435,
                 "number_of_characters": 57904085,
                 "unique_pairs": 14435,
-                "min_sentence1_length": 40,
-                "average_sentence1_length": 101.6593003117423,
-                "max_sentence1_length": 186,
-                "unique_sentence1": 14425,
-                "min_sentence2_length": 21,
-                "average_sentence2_length": 101.06828784332406,
-                "max_sentence2_length": 339,
-                "unique_sentence2": 555503
+                "sentence1_statistics": {
+                    "total_text_length": 1467452,
+                    "min_text_length": 40,
+                    "average_text_length": 101.6593003117423,
+                    "max_text_length": 186,
+                    "unique_texts": 14425
+                },
+                "sentence2_statistics": {
+                    "total_text_length": 56436633,
+                    "min_text_length": 21,
+                    "average_text_length": 101.06828784332406,
+                    "max_text_length": 339,
+                    "unique_texts": 555503
+                }
             },
             "zh-en": {
                 "num_samples": 1899,
                 "number_of_characters": 9110944,
                 "unique_pairs": 1899,
-                "min_sentence1_length": 16,
-                "average_sentence1_length": 28.429699842022117,
-                "max_sentence1_length": 40,
-                "unique_sentence1": 1899,
-                "min_sentence2_length": 22,
-                "average_sentence2_length": 101.92388026108485,
-                "max_sentence2_length": 249,
-                "unique_sentence2": 88360
+                "sentence1_statistics": {
+                    "total_text_length": 53988,
+                    "min_text_length": 16,
+                    "average_text_length": 28.429699842022117,
+                    "max_text_length": 40,
+                    "unique_texts": 1899
+                },
+                "sentence2_statistics": {
+                    "total_text_length": 9056956,
+                    "min_text_length": 22,
+                    "average_text_length": 101.92388026108485,
+                    "max_text_length": 249,
+                    "unique_texts": 88360
+                }
             }
         }
     }

mteb/descriptive_stats/Classification/DKHateClassification.json CHANGED Viewed

@@ -1,37 +1,53 @@
 {
     "test": {
         "num_samples": 329,
-        "number_of_characters": 29011,
         "number_texts_intersect_with_train": 4,
-        "min_text_length": 1,
-        "average_text_length": 88.17933130699087,
-        "max_text_length": 2434,
-        "unique_text": 326,
-        "unique_labels": 2,
-        "labels": {
-            "0": {
-                "count": 288
-            },
-            "1": {
-                "count": 41
+        "text_statistics": {
+            "total_text_length": 29011,
+            "min_text_length": 1,
+            "average_text_length": 88.17933130699087,
+            "max_text_length": 2434,
+            "unique_texts": 326
+        },
+        "image_statistics": null,
+        "label_statistics": {
+            "min_labels_per_text": 1,
+            "average_label_per_text": 1.0,
+            "max_labels_per_text": 1,
+            "unique_labels": 2,
+            "labels": {
+                "0": {
+                    "count": 288
+                },
+                "1": {
+                    "count": 41
+                }
             }
         }
     },
     "train": {
         "num_samples": 2960,
-        "number_of_characters": 307722,
         "number_texts_intersect_with_train": null,
-        "min_text_length": 1,
-        "average_text_length": 103.96013513513513,
-        "max_text_length": 5403,
-        "unique_text": 2902,
-        "unique_labels": 2,
-        "labels": {
-            "0": {
-                "count": 2576
-            },
-            "1": {
-                "count": 384
+        "text_statistics": {
+            "total_text_length": 307722,
+            "min_text_length": 1,
+            "average_text_length": 103.96013513513513,
+            "max_text_length": 5403,
+            "unique_texts": 2902
+        },
+        "image_statistics": null,
+        "label_statistics": {
+            "min_labels_per_text": 1,
+            "average_label_per_text": 1.0,
+            "max_labels_per_text": 1,
+            "unique_labels": 2,
+            "labels": {
+                "0": {
+                    "count": 2576
+                },
+                "1": {
+                    "count": 384
+                }
             }
         }
     }

mteb/descriptive_stats/Classification/DutchColaClassification.json ADDED Viewed

@@ -0,0 +1,54 @@
+{
+    "test": {
+        "num_samples": 2400,
+        "number_texts_intersect_with_train": null,
+        "text_statistics": {
+            "total_text_length": 92146,
+            "min_text_length": 5,
+            "average_text_length": 38.39416666666666,
+            "max_text_length": 138,
+            "unique_texts": 2400
+        },
+        "image_statistics": null,
+        "label_statistics": {
+            "min_labels_per_text": 1,
+            "average_label_per_text": 1.0,
+            "max_labels_per_text": 1,
+            "unique_labels": 2,
+            "labels": {
+                "1": {
+                    "count": 1200
+                },
+                "0": {
+                    "count": 1200
+                }
+            }
+        }
+    },
+    "train": {
+        "num_samples": 19893,
+        "number_texts_intersect_with_train": null,
+        "text_statistics": {
+            "total_text_length": 761416,
+            "min_text_length": 4,
+            "average_text_length": 38.27557432262605,
+            "max_text_length": 152,
+            "unique_texts": 19893
+        },
+        "image_statistics": null,
+        "label_statistics": {
+            "min_labels_per_text": 1,
+            "average_label_per_text": 1.0,
+            "max_labels_per_text": 1,
+            "unique_labels": 2,
+            "labels": {
+                "1": {
+                    "count": 12604
+                },
+                "0": {
+                    "count": 7289
+                }
+            }
+        }
+    }
+}

mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json ADDED Viewed

@@ -0,0 +1,54 @@
+{
+    "test": {
+        "num_samples": 752,
+        "number_texts_intersect_with_train": 100,
+        "text_statistics": {
+            "total_text_length": 171956,
+            "min_text_length": 32,
+            "average_text_length": 228.66489361702128,
+            "max_text_length": 2746,
+            "unique_texts": 752
+        },
+        "image_statistics": null,
+        "label_statistics": {
+            "min_labels_per_text": 1,
+            "average_label_per_text": 1.0,
+            "max_labels_per_text": 1,
+            "unique_labels": 2,
+            "labels": {
+                "0.0": {
+                    "count": 555
+                },
+                "1.0": {
+                    "count": 197
+                }
+            }
+        }
+    },
+    "train": {
+        "num_samples": 1718,
+        "number_texts_intersect_with_train": null,
+        "text_statistics": {
+            "total_text_length": 390362,
+            "min_text_length": 18,
+            "average_text_length": 227.2188591385332,
+            "max_text_length": 2662,
+            "unique_texts": 1718
+        },
+        "image_statistics": null,
+        "label_statistics": {
+            "min_labels_per_text": 1,
+            "average_label_per_text": 1.0,
+            "max_labels_per_text": 1,
+            "unique_labels": 2,
+            "labels": {
+                "1.0": {
+                    "count": 470
+                },
+                "0.0": {
+                    "count": 1248
+                }
+            }
+        }
+    }
+}

mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json ADDED Viewed

@@ -0,0 +1,90 @@
+{
+    "test": {
+        "num_samples": 1200,
+        "number_texts_intersect_with_train": 1,
+        "text_statistics": {
+            "total_text_length": 2034506,
+            "min_text_length": 184,
+            "average_text_length": 1695.4216666666666,
+            "max_text_length": 8825,
+            "unique_texts": 1200
+        },
+        "image_statistics": null,
+        "label_statistics": {
+            "min_labels_per_text": 1,
+            "average_label_per_text": 1.0,
+            "max_labels_per_text": 1,
+            "unique_labels": 8,
+            "labels": {
+                "Opmerkelijk": {
+                    "count": 150
+                },
+                "Buitenland": {
+                    "count": 150
+                },
+                "Cultuur & Media": {
+                    "count": 150
+                },
+                "Binnenland": {
+                    "count": 150
+                },
+                "Politiek": {
+                    "count": 150
+                },
+                "Economie": {
+                    "count": 150
+                },
+                "Tech": {
+                    "count": 150
+                },
+                "Regionaal nieuws": {
+                    "count": 150
+                }
+            }
+        }
+    },
+    "train": {
+        "num_samples": 5600,
+        "number_texts_intersect_with_train": null,
+        "text_statistics": {
+            "total_text_length": 9620538,
+            "min_text_length": 106,
+            "average_text_length": 1717.9532142857142,
+            "max_text_length": 29389,
+            "unique_texts": 5600
+        },
+        "image_statistics": null,
+        "label_statistics": {
+            "min_labels_per_text": 1,
+            "average_label_per_text": 1.0,
+            "max_labels_per_text": 1,
+            "unique_labels": 8,
+            "labels": {
+                "Cultuur & Media": {
+                    "count": 700
+                },
+                "Binnenland": {
+                    "count": 700
+                },
+                "Buitenland": {
+                    "count": 700
+                },
+                "Regionaal nieuws": {
+                    "count": 700
+                },
+                "Politiek": {
+                    "count": 700
+                },
+                "Economie": {
+                    "count": 700
+                },
+                "Opmerkelijk": {
+                    "count": 700
+                },
+                "Tech": {
+                    "count": 700
+                }
+            }
+        }
+    }
+}

mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json ADDED Viewed

@@ -0,0 +1,54 @@
+{
+    "test": {
+        "num_samples": 1326,
+        "number_texts_intersect_with_train": null,
+        "text_statistics": {
+            "total_text_length": 82644,
+            "min_text_length": 17,
+            "average_text_length": 62.32579185520362,
+            "max_text_length": 117,
+            "unique_texts": 1326
+        },
+        "image_statistics": null,
+        "label_statistics": {
+            "min_labels_per_text": 1,
+            "average_label_per_text": 1.0,
+            "max_labels_per_text": 1,
+            "unique_labels": 2,
+            "labels": {
+                "0": {
+                    "count": 826
+                },
+                "1": {
+                    "count": 500
+                }
+            }
+        }
+    },
+    "train": {
+        "num_samples": 10609,
+        "number_texts_intersect_with_train": null,
+        "text_statistics": {
+            "total_text_length": 658787,
+            "min_text_length": 7,
+            "average_text_length": 62.09699311904986,
+            "max_text_length": 161,
+            "unique_texts": 10609
+        },
+        "image_statistics": null,
+        "label_statistics": {
+            "min_labels_per_text": 1,
+            "average_label_per_text": 1.0,
+            "max_labels_per_text": 1,
+            "unique_labels": 2,
+            "labels": {
+                "1": {
+                    "count": 4000
+                },
+                "0": {
+                    "count": 6609
+                }
+            }
+        }
+    }
+}

mteb/descriptive_stats/Classification/FinancialPhrasebankClassification.json CHANGED Viewed

@@ -1,22 +1,30 @@
 {
     "train": {
         "num_samples": 2264,
-        "number_of_characters": 276123,
         "number_texts_intersect_with_train": null,
-        "min_text_length": 9,
-        "average_text_length": 121.96245583038869,
-        "max_text_length": 315,
-        "unique_text": 2259,
-        "unique_labels": 3,
-        "labels": {
-            "1": {
-                "count": 1391
-            },
-            "2": {
-                "count": 570
-            },
-            "0": {
-                "count": 303
+        "text_statistics": {
+            "total_text_length": 276123,
+            "min_text_length": 9,
+            "average_text_length": 121.96245583038869,
+            "max_text_length": 315,
+            "unique_texts": 2259
+        },
+        "image_statistics": null,
+        "label_statistics": {
+            "min_labels_per_text": 1,
+            "average_label_per_text": 1.0,
+            "max_labels_per_text": 1,
+            "unique_labels": 3,
+            "labels": {
+                "1": {
+                    "count": 1391
+                },
+                "2": {
+                    "count": 570
+                },
+                "0": {
+                    "count": 303
+                }
             }
         }
     }

mteb 2.0.4__py3-none-any.whl → 2.1.0__py3-none-any.whl

mteb 2.0.4py3-none-any.whl → 2.1.0py3-none-any.whl