PyPI - mteb - Versions diffs - 2.1.17__py3-none-any.whl → 2.1.18__py3-none-any.whl - Mend

mteb 2.1.17py3-none-any.whl → 2.1.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

mteb/models/model_implementations/andersborges.py ADDED Viewed

@@ -0,0 +1,51 @@
+import numpy as np
+from mteb.models.model_implementations.model2vec_models import Model2VecModel
+from mteb.models.model_meta import ModelMeta, ScoringFunction
+model2vecdk = ModelMeta(
+    loader=Model2VecModel,  # type: ignore
+    name="andersborges/model2vecdk",
+    languages=["dan-Latn"],
+    open_weights=True,
+    revision="cb576c78dcc1b729e4612645f61db59929d69e61",
+    release_date="2025-11-21",
+    n_parameters=48042496,
+    memory_usage_mb=183,
+    max_tokens=np.inf,
+    embed_dim=256,
+    license="mit",
+    similarity_fn_name=ScoringFunction.COSINE,
+    framework=["NumPy", "Sentence Transformers"],
+    reference="https://huggingface.co/andersborges/model2vecdk",
+    use_instructions=False,
+    adapted_from="https://huggingface.co/jealk/TTC-L2V-supervised-2",
+    superseded_by=None,
+    training_datasets=set(),  # distilled
+    public_training_code="https://github.com/andersborges/dkmodel2vec",
+    public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
+)
+model2vecdk_stem = ModelMeta(
+    loader=Model2VecModel,  # type: ignore
+    name="andersborges/model2vecdk-stem",
+    languages=["dan-Latn"],
+    open_weights=True,
+    revision="cb576c78dcc1b729e4612645f61db59929d69e61",
+    release_date="2025-11-21",
+    n_parameters=48578560,
+    memory_usage_mb=185,
+    max_tokens=np.inf,
+    embed_dim=256,
+    license="mit",
+    similarity_fn_name=ScoringFunction.COSINE,
+    framework=["NumPy", "Sentence Transformers"],
+    reference="https://huggingface.co/andersborges/model2vecdk",
+    use_instructions=False,
+    adapted_from="https://huggingface.co/jealk/TTC-L2V-supervised-2",
+    superseded_by=None,
+    training_datasets=set(),  # distilled
+    public_training_code="https://github.com/andersborges/dkmodel2vec",
+    public_training_data="https://huggingface.co/datasets/DDSC/nordic-embedding-training-data",
+)

mteb/models/model_implementations/cohere_models.py CHANGED Viewed

@@ -221,7 +221,7 @@ class CohereTextEmbeddingModel(AbsEncoder):
     ) -> None:
         import cohere  # type: ignore
-        self.model_name = model_name.lstrip("Cohere/Cohere-")
+        self.model_name = model_name.removeprefix("Cohere/Cohere-")
         self.sep = sep
         self.model_prompts = self.validate_task_to_prompt_name(model_prompts)
         if embedding_type not in get_args(EmbeddingType):

mteb/models/model_implementations/tarka_models.py CHANGED Viewed

@@ -342,7 +342,6 @@ tarka_embedding_150m_v1 = ModelMeta(
 tark_embedding_350_v1_kwargs = dict(
     model_kwargs={
-        "attn_implementation": "flash_attention_2",
         "torch_dtype": "bfloat16",
     },  # use low-precision
     trust_remote_code=True,
@@ -357,7 +356,7 @@ tarka_embedding_350m_v1 = ModelMeta(
     name="Tarka-AIR/Tarka-Embedding-350M-V1",
     languages=MULTILINGUAL_EVALUATED_LANGUAGES,
     open_weights=True,
-    revision="f4b5de82060cf3a833e52580e7ce59adeacb6fb5",
+    revision="a850d6a329145474727424fed6b12b62096b8ba3",
     release_date="2025-11-11",
     n_parameters=354_483_968,
     memory_usage_mb=676,

mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py CHANGED Viewed

@@ -23,7 +23,7 @@ class BUCCBitextMining(AbsTaskBitextMining):
             "path": "mteb/BUCC",
             "revision": "414572247440f0ccacf7eb0bb70a31533a0e5443",
         },
-        description="BUCC bitext mining dataset",
+        description="BUCC bitext mining dataset train split.",
         reference="https://comparable.limsi.fr/bucc2018/bucc2018-task.html",
         type="BitextMining",
         category="t2t",
@@ -71,7 +71,9 @@ Rapp, Reinhard},
                 sentence1 = data["sentence1"][0]
                 sentence2 = data["sentence2"][0]
-                sentence1 = [sentence1[i] for (i, j) in gold]
+                sentence1 = [
+                    sentence1[i] for (i, j) in gold
+                ]  # keep only sentences in gold. The 2nd value is meant for sentence2 but not used here. This is fixed in BUCC.v2.
                 logger.info(f"Lang {lang} num gold {len(gold)}")
                 logger.info(f"Lang {lang} num sentence1 {len(sentence1)}")
                 logger.info(f"Lang {lang} num sentence2 {len(sentence2)}")

mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py CHANGED Viewed

@@ -20,7 +20,7 @@ class BUCCBitextMiningFast(AbsTaskBitextMining):
             "path": "mteb/bucc-bitext-mining",
             "revision": "1739dc11ffe9b7bfccd7f3d585aeb4c544fc6677",
         },
-        description="BUCC bitext mining dataset",
+        description="BUCC bitext mining dataset train split, gold set only.",
         reference="https://comparable.limsi.fr/bucc2018/bucc2018-task.html",
         type="BitextMining",
         category="t2t",

mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py CHANGED Viewed

@@ -26,5 +26,20 @@ class GreenNodeTableMarkdownRetrieval(AbsTaskRetrieval):
         annotations_creators="human-annotated",
         dialect=[],
         sample_creation="found",
-        bibtex_citation="",  # TODO: Add bibtex citation when the paper is published
+        bibtex_citation=r"""
+@inproceedings{10.1007/978-981-95-1746-6_17,
+  abstract = {Information retrieval often comes in plain text, lacking semi-structured text such as HTML and markdown, retrieving data that contains rich format such as table became non-trivial. In this paper, we tackle this challenge by introducing a new dataset, GreenNode Table Retrieval VN (GN-TRVN), which is collected from a massive corpus, a wide range of topics, and a longer context compared to ViQuAD2.0. To evaluate the effectiveness of our proposed dataset, we introduce two versions, M3-GN-VN and M3-GN-VN-Mixed, by fine-tuning the M3-Embedding model on this dataset. Experimental results show that our models consistently outperform the baselines, including the base model, across most evaluation criteria on various datasets such as VieQuADRetrieval, ZacLegalTextRetrieval, and GN-TRVN. In general, we release a more comprehensive dataset and two model versions that improve response performance for Vietnamese Markdown Table Retrieval.},
+  address = {Singapore},
+  author = {Pham, Bao Loc
+and Hoang, Quoc Viet
+and Luu, Quy Tung
+and Vo, Trong Thu},
+  booktitle = {Proceedings of the Fifth International Conference on Intelligent Systems and Networks},
+  isbn = {978-981-95-1746-6},
+  pages = {153--163},
+  publisher = {Springer Nature Singapore},
+  title = {GN-TRVN: A Benchmark for Vietnamese Table Markdown Retrieval Task},
+  year = {2026},
+}
+""",
     )

{mteb-2.1.17.dist-info → mteb-2.1.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mteb
-Version: 2.1.17
+Version: 2.1.18
 Summary: Massive Text Embedding Benchmark
 Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
 Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>

{mteb-2.1.17.dist-info → mteb-2.1.18.dist-info}/RECORD RENAMED Viewed

@@ -1445,6 +1445,7 @@ mteb/models/cache_wrappers/cache_backends/numpy_cache.py,sha256=GyTVC5DLph3EeRnD
 mteb/models/model_implementations/__init__.py,sha256=BZDdde6ajKv-yroy9mqE2YS3Hw1KBdKoxBPg8aPTZEs,1164
 mteb/models/model_implementations/align_models.py,sha256=DUdVWxETiwC2IrXI90zQwlvHMjeI7JJCNOmFVd2RNws,4518
 mteb/models/model_implementations/amazon_models.py,sha256=pdRU2QGAB5ccQnAfbRSzHE1G3ZUdjvsAgeJwkB_olDQ,694
+mteb/models/model_implementations/andersborges.py,sha256=QUFpASdcCy-IMz2O2C3OAOhMWA2ksNHM4GFWlkELIT4,1879
 mteb/models/model_implementations/ara_models.py,sha256=zS0t9rI21wwEwTlrlX94GqkmPKLnb8ktUaAOY-ZLmw0,1421
 mteb/models/model_implementations/arctic_models.py,sha256=eaMRaN9WdpVq1W6cbtNcJMdrJUTXrTSYUjTJufCdZRY,10350
 mteb/models/model_implementations/b1ade_models.py,sha256=aEKmXWVX8iJ_OotAYPOMxsOHTDEOJYdSwkR6iJsZ-ms,1609
@@ -1459,7 +1460,7 @@ mteb/models/model_implementations/cde_models.py,sha256=3nNU3nq3VZZcImFqH1VPj57-Q
 mteb/models/model_implementations/clip_models.py,sha256=zrfgNmZszu0JMtMNdCMzEohixsrnQ7xFhCqgsiucH_Q,6107
 mteb/models/model_implementations/codefuse_models.py,sha256=19Y-d_qetVU64quzEvuUJ_K8DHo1JEEKEGqjRR48dFg,9113
 mteb/models/model_implementations/codesage_models.py,sha256=D4CdISGyv5f2GMYq4_efgm5qNq80SWAX5R2u5mjEiXM,2998
-mteb/models/model_implementations/cohere_models.py,sha256=H7Mjn57kmeBhIaJx6riaCEEgZS01YBWBBGPagEU87ZQ,13690
+mteb/models/model_implementations/cohere_models.py,sha256=LiYYRT3clhFlh0RE654KyZtO66vnIO22h79HJLmXYwk,13696
 mteb/models/model_implementations/cohere_v.py,sha256=K6VEw1NkyM2PuMd18kHE6aqPrcByYSwEmAKjvLods_w,15760
 mteb/models/model_implementations/colpali_models.py,sha256=uVmK3jXO-GDn-7i6cJFWdc0u0-MU3INHHL1rXUPhBec,8944
 mteb/models/model_implementations/colqwen_models.py,sha256=Y6IBhYKbxjkC3AePa0l37-F50xcX1mtPCXpgW597HyA,8754
@@ -1538,7 +1539,7 @@ mteb/models/model_implementations/siglip_models.py,sha256=tvi8QB2ayBoeXsxwHrl5RF
 mteb/models/model_implementations/sonar_models.py,sha256=Nc6kAJRWSrxA57DPRrgOPHqS1dNhz2vsE_1ZA2JtigQ,4784
 mteb/models/model_implementations/spartan8806_atles_champion.py,sha256=9sWQH7tOT0uxXA7sbQcnqGt2f5O9xcw9HqFpRCzoQAA,918
 mteb/models/model_implementations/stella_models.py,sha256=NL3tk-rnuBdznsQ-nmelqun4tFO2xKoNPPOOVKqnPGU,8062
-mteb/models/model_implementations/tarka_models.py,sha256=xC6olJs9PSe_lrYsScw5hDHTjYSjcxgbvfK_7IoBFnk,27397
+mteb/models/model_implementations/tarka_models.py,sha256=UwSb3e-k7dCgQAJv3176ZvKpkjLZfpdPzwf-b0Oxuuo,27345
 mteb/models/model_implementations/text2vec_models.py,sha256=zaHWRc2W0RYZAOetinqRzug9UGW0HmY5U-jYsLXA8wo,4160
 mteb/models/model_implementations/ua_sentence_models.py,sha256=fcvXR4-Rrt-UDTlDkh2ZAO1gO_ufCOHiT6EhoeKiHx8,1224
 mteb/models/model_implementations/uae_models.py,sha256=KZxH5a3t-sfh33xUBkLizEuyFAyPlGfnRsn-S7mjq74,3112
@@ -1581,8 +1582,8 @@ mteb/tasks/bitext_mining/kat/__init__.py,sha256=a-KcFJ3Ol7R8yq02RcGjaOxEfqnwJeo7
 mteb/tasks/bitext_mining/kat/tbilisi_city_hall_bitext_mining.py,sha256=xVCxpJr7UW2KadNdn7Gsw-wZ65uz5vhRDhQZ7eILokQ,1918
 mteb/tasks/bitext_mining/multilingual/__init__.py,sha256=qDgixbHEvV3xz6JN3kFQDnvtSL-yVjl-Z8inUwimI6I,1954
 mteb/tasks/bitext_mining/multilingual/bible_nlp_bitext_mining.py,sha256=lNbCz3dN9o3F04Y7vtNBhF-lPUNyVbAOKgUR-QKZn_8,29082
-mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py,sha256=lg-4of5K_6mn7iU-TpxwP_HFRk10qILJWprR-QX9Jug,2708
-mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py,sha256=Zv6QCyTchTmlgWegS-iNJKfcc1Tr-ZmGftub5xZO5-w,1808
+mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py,sha256=tTKvS-v7d9V_zymCn_ZonUKlo9NI7vTyppxS9iAu8I0,2873
+mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py,sha256=P_UHMWh2gKG6CloXmP5J2kjrCTQwoJAU1MKdLl6JFKc,1836
 mteb/tasks/bitext_mining/multilingual/danish_medicines_agency_bitext_mining.py,sha256=5iengckKv1NCHILjrX6WDEgBNJlmbSV5y-WWdaLZYrs,1703
 mteb/tasks/bitext_mining/multilingual/diabla_bitext_mining.py,sha256=Ua6DfpJYgoaIRSWB284WNj6wQWqxPiC3kUPcmKNGDWQ,1498
 mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py,sha256=Dh43H2NWZIAzN9Zeib3TtKhUcK1jldUQX-GvTsm1MnI,5616
@@ -2441,7 +2442,7 @@ mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py,sha256=ppFPam-3A
 mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py,sha256=hOiwz2bcayDW6VrCvsIGeYh1TT7koByM76rZZwtp9KA,1754
 mteb/tasks/retrieval/vie/fevervn_retrieval.py,sha256=xLGoXefGk1l1AFiOSf2Ja0fM_rAQp4tdaR8H6jJqYlI,1853
 mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py,sha256=FGfFuLzRCTuupRxZdjVbBiwCOSspb3vwvtNAKvyXjso,1714
-mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py,sha256=AlvGHXqXwOTrEvZNcMxlWL_2a31iNrrZzNz2i6dcJec,1074
+mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py,sha256=O7iIcuvqhrHjB7J1VxH9YJ3v6cuFFBQdrrnYwLgeRfE,2429
 mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py,sha256=FYWj8EhnfwDuPRxZ8uTeGkfa2Q-jDU2bliTmp975Coc,1837
 mteb/tasks/retrieval/vie/msmarcovn_retrieval.py,sha256=xtJ1-rjx4slwSR8p6NedqItTk-79ZzT2f9FlDOhbzkE,1958
 mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py,sha256=4S8IDJ-TVjKEy2teM8GOeDzHIZR8txkPvX0sGDYIyqs,1780
@@ -2555,9 +2556,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
 mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
 mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
 mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
-mteb-2.1.17.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-mteb-2.1.17.dist-info/METADATA,sha256=HZ7_vl0KWjqAR65easMGKslMQs6wsc3lIpZZfR_yi5w,13574
-mteb-2.1.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-mteb-2.1.17.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
-mteb-2.1.17.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
-mteb-2.1.17.dist-info/RECORD,,
+mteb-2.1.18.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+mteb-2.1.18.dist-info/METADATA,sha256=OF7o0Df2GbEQuuNXdXxByJR0atmGv4XdRrwMSWuOcx0,13574
+mteb-2.1.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+mteb-2.1.18.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
+mteb-2.1.18.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
+mteb-2.1.18.dist-info/RECORD,,

{mteb-2.1.17.dist-info → mteb-2.1.18.dist-info}/WHEEL RENAMED Viewed

File without changes

{mteb-2.1.17.dist-info → mteb-2.1.18.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mteb-2.1.17.dist-info → mteb-2.1.18.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{mteb-2.1.17.dist-info → mteb-2.1.18.dist-info}/top_level.txt RENAMED Viewed

File without changes

mteb 2.1.17__py3-none-any.whl → 2.1.18__py3-none-any.whl

mteb 2.1.17py3-none-any.whl → 2.1.18py3-none-any.whl