PyPI - mteb - Versions diffs - 2.7.15__py3-none-any.whl → 2.7.16__py3-none-any.whl - Mend

mteb 2.7.15py3-none-any.whl → 2.7.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

mteb/models/model_implementations/misc_models.py CHANGED Viewed

@@ -1007,54 +1007,6 @@ thenlper__gte_small = ModelMeta(
   year={2023}
 }""",
 )
-OrlikB__KartonBERT_USE_base_v1 = ModelMeta(
-    name="OrlikB/KartonBERT-USE-base-v1",
-    model_type=["dense"],
-    revision="1f59dd58fe57995c0e867d5e29f03763eae99645",
-    release_date="2024-09-30",
-    languages=["pol-Latn"],
-    loader=sentence_transformers_loader,
-    n_parameters=103705344,
-    n_embedding_parameters=None,
-    memory_usage_mb=396,
-    max_tokens=512.0,
-    embed_dim=768,
-    license="gpl-3.0",
-    open_weights=True,
-    public_training_code=None,
-    public_training_data=None,
-    framework=["PyTorch"],
-    reference="https://huggingface.co/OrlikB/KartonBERT-USE-base-v1",
-    similarity_fn_name=ScoringFunction.COSINE,
-    use_instructions=None,
-    training_datasets=None,
-    adapted_from="KartonBERT-USE-base-v1",
-    superseded_by=None,
-)
-OrlikB__st_polish_kartonberta_base_alpha_v1 = ModelMeta(
-    name="OrlikB/st-polish-kartonberta-base-alpha-v1",
-    model_type=["dense"],
-    revision="5590a0e2d7bb43674e44d7076b3ff157f7d4a1cb",
-    release_date="2023-11-12",
-    languages=["pol-Latn"],
-    loader=sentence_transformers_loader,
-    n_parameters=None,
-    n_embedding_parameters=None,
-    memory_usage_mb=None,
-    max_tokens=514.0,
-    embed_dim=768,
-    license="lgpl",
-    open_weights=True,
-    public_training_code=None,
-    public_training_data=None,
-    framework=["PyTorch"],
-    reference="https://huggingface.co/OrlikB/st-polish-kartonberta-base-alpha-v1",
-    similarity_fn_name=ScoringFunction.COSINE,
-    use_instructions=None,
-    training_datasets=None,
-    adapted_from="st-polish-kartonberta-base-alpha-v1",
-    superseded_by=None,
-)
 sdadas__mmlw_e5_base = ModelMeta(
     name="sdadas/mmlw-e5-base",
     model_type=["dense"],

mteb/models/model_implementations/rerankers_custom.py CHANGED Viewed

@@ -103,68 +103,6 @@ class BGEReranker(RerankerWrapper):
         return scores
-class MonoBERTReranker(RerankerWrapper):
-    name: str = "MonoBERT"
-    def __init__(
-        self,
-        model_name_or_path="castorini/monobert-large-msmarco",
-        torch_compile=False,
-        **kwargs,
-    ):
-        from transformers import AutoModelForSequenceClassification, AutoTokenizer
-        super().__init__(model_name_or_path, **kwargs)
-        if not self.device:
-            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        model_args = {}
-        if self.fp_options:
-            model_args["torch_dtype"] = self.fp_options
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            model_name_or_path,
-            **model_args,
-        )
-        self.model.to(self.device)
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
-        self.max_length = self.tokenizer.model_max_length
-        logger.info(f"Using max_length of {self.max_length}")
-        self.model.eval()
-    @torch.inference_mode()
-    def predict(
-        self,
-        inputs1: DataLoader[BatchedInput],
-        inputs2: DataLoader[BatchedInput],
-        *,
-        task_metadata: TaskMetadata,
-        hf_split: str,
-        hf_subset: str,
-        prompt_type: PromptType | None = None,
-        **kwargs: Any,
-    ) -> Array:
-        queries = [text for batch in inputs1 for text in batch["query"]]
-        instructions = None
-        if "instruction" in inputs2.dataset.features:
-            instructions = [text for batch in inputs1 for text in batch["instruction"]]
-        passages = [text for batch in inputs2 for text in batch["text"]]
-        if instructions is not None and instructions[0] is not None:
-            queries = [f"{q} {i}".strip() for i, q in zip(instructions, queries)]
-        tokens = self.tokenizer(
-            queries,
-            passages,
-            padding=True,
-            truncation="only_second",
-            return_tensors="pt",
-            max_length=self.max_length,
-        ).to(self.device)
-        output = self.model(**tokens)[0]
-        batch_scores = torch.nn.functional.log_softmax(output, dim=1)
-        return batch_scores[:, 1].exp()
 class JinaReranker(RerankerWrapper):
     name = "Jina"
@@ -219,31 +157,6 @@ class JinaReranker(RerankerWrapper):
         return scores
-monobert_large = ModelMeta(
-    loader=MonoBERTReranker,
-    loader_kwargs=dict(
-        fp_options="float16",
-    ),
-    name="castorini/monobert-large-msmarco",
-    model_type=["cross-encoder"],
-    languages=["eng-Latn"],
-    open_weights=True,
-    revision="0a97706f3827389da43b83348d5d18c9d53876fa",
-    release_date="2020-05-28",
-    n_parameters=None,
-    n_embedding_parameters=31_254_528,
-    memory_usage_mb=None,
-    max_tokens=None,
-    embed_dim=None,
-    license=None,
-    public_training_code=None,
-    public_training_data=None,
-    similarity_fn_name=None,
-    use_instructions=None,
-    training_datasets=None,
-    framework=["Sentence Transformers", "PyTorch", "Transformers"],
-)
 # languages unclear: https://huggingface.co/jinaai/jina-reranker-v2-base-multilingual/discussions/28
 jina_reranker_multilingual = ModelMeta(
     loader=JinaReranker,

mteb/models/model_implementations/rerankers_monot5_based.py CHANGED Viewed

@@ -34,7 +34,6 @@ prediction_tokens = {
     "unicamp-dl/mt5-base-en-msmarco": ["▁no", "▁yes"],
     "unicamp-dl/mt5-base-mmarco-v2": ["▁no", "▁yes"],
     "unicamp-dl/mt5-base-mmarco-v1": ["▁no", "▁yes"],
-    "unicamp-dl/mt5-13b-mmarco-100k": ["▁", "▁true"],
 }
@@ -919,28 +918,3 @@ mt5_base_mmarco_v2 = ModelMeta(
     use_instructions=None,
     framework=["PyTorch", "Transformers"],
 )
-mt5_13b_mmarco_100k = ModelMeta(
-    loader=MonoT5Reranker,
-    loader_kwargs=dict(
-        fp_options="float16",
-    ),
-    name="unicamp-dl/mt5-13b-mmarco-100k",
-    model_type=["cross-encoder"],
-    languages=mt5_languages,
-    open_weights=True,
-    revision="e1a4317e102a525ea9e16745ad21394a4f1bffbc",
-    release_date="2022-11-04",
-    n_parameters=None,
-    n_embedding_parameters=1_024_458_752,
-    memory_usage_mb=None,
-    max_tokens=None,
-    embed_dim=None,
-    license=None,
-    public_training_code=None,
-    public_training_data=None,
-    similarity_fn_name=None,
-    use_instructions=None,
-    training_datasets=None,
-    framework=["PyTorch", "Transformers"],
-)

{mteb-2.7.15.dist-info → mteb-2.7.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mteb
-Version: 2.7.15
+Version: 2.7.16
 Summary: Massive Text Embedding Benchmark
 Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
 Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>

{mteb-2.7.15.dist-info → mteb-2.7.16.dist-info}/RECORD RENAMED Viewed

@@ -1556,7 +1556,7 @@ mteb/models/model_implementations/llm2clip_models.py,sha256=X3W16uipaZ0t4Mco4lhh
 mteb/models/model_implementations/llm2vec_models.py,sha256=n86YQ8fAHU1gVtlY7tZcXq-1ab_ISxBmuk-X4MDnY4o,13348
 mteb/models/model_implementations/mcinext_models.py,sha256=T3vO9JQSmh3BICp6Y_q7j4anuA8P8LGZ4ZWnwGnF7cs,19299
 mteb/models/model_implementations/mdbr_models.py,sha256=AqsRZ-IDekIjq-FDWu0zx7Nk9ySJxaWTdRb8YhUZeu4,2828
-mteb/models/model_implementations/misc_models.py,sha256=0FkvheqPYh3JwM65F4CDlQKBDQQdjyMyfJPUdP1X2Ns,74780
+mteb/models/model_implementations/misc_models.py,sha256=JkJsyha-B5M8myLvHIwFUV14yo2lnSuBzHeO5fE9i74,73191
 mteb/models/model_implementations/mixedbread_ai_models.py,sha256=1-RD4M-16M-Rcf5CTD_R7LVoLv3cNFbmEjataQ__q94,10666
 mteb/models/model_implementations/mme5_models.py,sha256=V7BCGFkfZxkZ3ANJImvSFfP7in8OSfmkbqX-zXc_iF8,1574
 mteb/models/model_implementations/moco_models.py,sha256=6eEGpGTlI4StFRYsaNtXejhYE9GCqasUYCqB_SQy9cE,5714
@@ -1590,8 +1590,8 @@ mteb/models/model_implementations/random_baseline.py,sha256=YsITQoLbea_Iz2X84WNG
 mteb/models/model_implementations/rasgaard_models.py,sha256=_uNYP_nqJcOyoKnHNcvfJnP9gRvsv7HCWhZX2LJzQ9s,1322
 mteb/models/model_implementations/reasonir_model.py,sha256=WNWGqa9wANBL9vTdcFx51TEFXz6yHq_ygK0rij3LCL8,5217
 mteb/models/model_implementations/repllama_models.py,sha256=k6BgN2Cn41p0gQ0F1FdOTQ9OXlmFgG-2RtdvzOcCSZg,7543
-mteb/models/model_implementations/rerankers_custom.py,sha256=Bjgg_UbeHarupzzCk2rdy_Dd0_W0ZsE-DCD5v1EshnI,10953
-mteb/models/model_implementations/rerankers_monot5_based.py,sha256=6por4DPCycS8gljqKRZWUNM093bjjSVvmyQ3dzj9H6U,35321
+mteb/models/model_implementations/rerankers_custom.py,sha256=WBSA7kBRqxgb1549UwRYdtYzUovdwmW8C0PWzvGR54g,8087
+mteb/models/model_implementations/rerankers_monot5_based.py,sha256=U9ChokUEDXtkoFno-o4GeT4fXEEoFtnZn2denIafxi8,34583
 mteb/models/model_implementations/richinfoai_models.py,sha256=FsXamY-bvR5LLagtKK8fP-I5oc6B_bKp_i6_xzUYL8Y,1069
 mteb/models/model_implementations/ru_sentence_models.py,sha256=W4R985LnThJ-9XFbPnTGKb3L1QnoS3i3VXBFq94DK_w,43034
 mteb/models/model_implementations/ruri_models.py,sha256=3zYOqacB3JEnGJkMGYHqFgVkbmLo4uceJs9kzV54ivU,10819
@@ -2646,9 +2646,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
 mteb/types/_result.py,sha256=UKNokV9pu3G74MGebocU512aU_fFU9I9nPKnrG9Q0iE,1035
 mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
 mteb/types/statistics.py,sha256=gElgSShKBXpfcqaZHhU_d2UHln1CyzUj8FN8KFun_UA,4087
-mteb-2.7.15.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-mteb-2.7.15.dist-info/METADATA,sha256=EoUeroRRdre5jYbplBGCJuWs-6M7cZGpzwLqSQyJKgI,14348
-mteb-2.7.15.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-mteb-2.7.15.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
-mteb-2.7.15.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
-mteb-2.7.15.dist-info/RECORD,,
+mteb-2.7.16.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+mteb-2.7.16.dist-info/METADATA,sha256=a-Rt1xa9ZgNdKf-JlM6EUZE_pKzEHoT6KGpFZUvnPo0,14348
+mteb-2.7.16.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+mteb-2.7.16.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
+mteb-2.7.16.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
+mteb-2.7.16.dist-info/RECORD,,

{mteb-2.7.15.dist-info → mteb-2.7.16.dist-info}/WHEEL RENAMED Viewed

File without changes

{mteb-2.7.15.dist-info → mteb-2.7.16.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mteb-2.7.15.dist-info → mteb-2.7.16.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{mteb-2.7.15.dist-info → mteb-2.7.16.dist-info}/top_level.txt RENAMED Viewed

File without changes

mteb 2.7.15__py3-none-any.whl → 2.7.16__py3-none-any.whl

mteb 2.7.15py3-none-any.whl → 2.7.16py3-none-any.whl