PyPI - mteb - Versions diffs - 2.7.17__py3-none-any.whl → 2.7.19__py3-none-any.whl - Mend

mteb 2.7.17py3-none-any.whl → 2.7.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (156) hide show

mteb/models/model_implementations/ict_time_and_querit_models.py ADDED Viewed

@@ -0,0 +1,115 @@
+from __future__ import annotations
+from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
+from mteb.models.model_meta import ModelMeta
+from mteb.types import PromptType
+def instruction_template(
+    instruction: str | dict, prompt_type: PromptType | None = None
+) -> str:
+    """Format instruction for the model."""
+    if isinstance(instruction, dict):
+        instruction = instruction.get(prompt_type.value if prompt_type else "", "")
+    elif prompt_type == PromptType.document:
+        return ""
+    if not instruction:
+        return ""
+    return f"Instruct: {instruction}\nQuery:"
+multilingual_langs = [
+    "deu-Latn",
+    "ita-Latn",
+    "ara-Arab",
+    "fas-Arab",
+    "fra-Latn",
+    "hin-Deva",
+    "spa-Latn",
+    "zho-Hans",
+    "ben-Beng",
+    "eng-Latn",
+    "fin-Latn",
+    "ind-Latn",
+    "jpn-Jpan",
+    "kor-Hang",
+    "rus-Cyrl",
+    "swh-Latn",
+    "tel-Telu",
+    "tha-Thai",
+]
+training_data = [
+    "FEVER",
+    "DuRetrieval",
+    "HotpotQA",
+    "MSMARCO",
+    "T2Retrieval",
+    "NQ",
+    "MIRACLRetrieval",
+    "MrTidyRetrieval",
+    "AmazonCounterfactualClassification",
+    "Banking77Classification",
+    "ImdbClassification",
+    "MTOPDomainClassification",
+    "ToxicConversationsClassification",
+    "TweetSentimentExtractionClassification",
+]
+boom_4b_instructions = {
+    "AmazonCounterfactualClassification": "Classify a given Amazon customer review text as either counterfactual or not-counterfactual.",
+    "AmazonPolarityClassification": "Classify Amazon reviews into positive or negative sentiment.",
+    "AmazonReviewsClassification": "Classify the given Amazon review into its appropriate rating category.",
+    "Banking77Classification": "Given a online banking query, find the corresponding intents.",
+    "EmotionClassification": "Classify the emotion expressed in the given Twitter message into one of the six emotions: anger, fear, joy, love, sadness, and surprise.",
+    "ImdbClassification": "Classify the sentiment expressed in the given movie review text from the IMDB dataset.",
+    "MassiveIntentClassification": "Given a user utterance as query, find the user intents.",
+    "MassiveScenarioClassification": "Given a user utterance as query, find the user scenarios.",
+    "MTOPDomainClassification": "Classify the intent domain of the given utterance in task-oriented conversation.",
+    "MTOPIntentClassification": "Classify the intent of the given utterance in task-oriented conversation.",
+    "ToxicConversationsClassification": "Classify the given comments as either toxic or not toxic.",
+    "TweetSentimentExtractionClassification": "Classify the sentiment of a given tweet as either positive, negative, or neutral.",
+    "TNews": "Classify the fine-grained category of the given news title.",
+    "ClimateFEVER": "Given a claim about climate change, retrieve documents that support or refute the claim.",
+    "ClimateFEVERHardNegatives": "Given a claim about climate change, retrieve documents that support or refute the claim.",
+    "DBPedia": "Given a query, retrieve relevant entity descriptions from DBPedia.",
+    "FEVER": "Given a claim, retrieve documents that support or refute the claim.",
+    "FEVERHardNegatives": "Given a claim, retrieve documents that support or refute the claim.",
+    "FiQA2018": "Given a financial question, retrieve user replies that best answer the question.",
+    "HotpotQA": "Given a multi-hop question, retrieve documents that can help answer the question.",
+    "HotpotQAHardNegatives": "Given a multi-hop question, retrieve documents that can help answer the question.",
+    "MSMARCO": "Given a web search query, retrieve relevant passages that answer the query.",
+    "NFCorpus": "Given a question, retrieve relevant documents that best answer the question.",
+    "NQ": "Given a question, retrieve Wikipedia passages that answer the question.",
+}
+# How the template actually renders each one at inference time:
+#   instruction_template(boom_4b_instructions["Banking77Classification"], PromptType.query)
+#   -> "Instruct: Given a online banking query, find the corresponding intents.\nQuery:"
+boom_4b_v1 = ModelMeta(
+    loader=InstructSentenceTransformerModel,
+    loader_kwargs=dict(
+        instruction_template=instruction_template,
+    ),
+    name="ICT-TIME-and-Querit/BOOM_4B_v1",
+    model_type=["dense"],
+    languages=multilingual_langs,
+    open_weights=True,
+    adapted_from="Qwen/Qwen3-4B",
+    revision="447ab88574d27e67c428acc2b429d7d4580a4ea7",
+    release_date="2026-01-31",
+    n_parameters=4021774336,
+    n_embedding_parameters=None,
+    memory_usage_mb=7671,
+    embed_dim=2560,
+    max_tokens=32768,
+    license="apache-2.0",
+    reference="https://huggingface.co/ICT-TIME-and-Querit/BOOM_4B_v1",
+    similarity_fn_name="cosine",
+    framework=["Sentence Transformers", "PyTorch", "safetensors", "Transformers"],
+    use_instructions=True,
+    public_training_code=None,
+    public_training_data=None,
+    training_datasets=training_data,
+)

mteb/models/model_implementations/pylate_models.py CHANGED Viewed

@@ -53,7 +53,7 @@ class PylateSearchEncoder:
         hf_split: str,
         hf_subset: str,
         encode_kwargs: EncodeKwargs,
-        num_proc: int,
+        num_proc: int | None,
     ) -> None:
         """Index the corpus for retrieval.
@@ -89,7 +89,7 @@ class PylateSearchEncoder:
         top_k: int,
         encode_kwargs: EncodeKwargs,
         top_ranked: TopRankedDocumentsType | None = None,
-        num_proc: int,
+        num_proc: int | None,
     ) -> RetrievalOutputType:
         queries_dataloader = create_dataloader(
             queries,
@@ -150,7 +150,7 @@ class PylateSearchEncoder:
         hf_split: str,
         top_k: int,
         encode_kwargs: EncodeKwargs,
-        num_proc: int,
+        num_proc: int | None,
     ) -> dict[str, list[tuple[float, str]]]:
         from pylate import indexes, retrieve
@@ -216,7 +216,7 @@ class PylateSearchEncoder:
         hf_subset: str,
         hf_split: str,
         encode_kwargs: EncodeKwargs,
-        num_proc: int = 1,
+        num_proc: int | None = None,
     ) -> dict[str, list[tuple[float, str]]]:
         """Rerank with PyLate's rank.rerank using per-query candidates.

mteb/models/models_protocols.py CHANGED Viewed

@@ -32,7 +32,7 @@ class SearchProtocol(Protocol):
         hf_split: str,
         hf_subset: str,
         encode_kwargs: EncodeKwargs,
-        num_proc: int,
+        num_proc: int | None,
     ) -> None:
         """Index the corpus for retrieval.
@@ -56,7 +56,7 @@ class SearchProtocol(Protocol):
         top_k: int,
         encode_kwargs: EncodeKwargs,
         top_ranked: TopRankedDocumentsType | None = None,
-        num_proc: int,
+        num_proc: int | None,
     ) -> RetrievalOutputType:
         """Search the corpus using the given queries.

mteb/models/search_wrappers.py CHANGED Viewed

@@ -59,7 +59,7 @@ class SearchEncoderWrapper:
         hf_split: str,
         hf_subset: str,
         encode_kwargs: EncodeKwargs,
-        num_proc: int = 1,
+        num_proc: int | None = None,
     ) -> None:
         """Index the corpus for retrieval.
@@ -101,7 +101,7 @@ class SearchEncoderWrapper:
         top_k: int,
         encode_kwargs: EncodeKwargs,
         top_ranked: TopRankedDocumentsType | None = None,
-        num_proc: int = 1,
+        num_proc: int | None = None,
     ) -> RetrievalOutputType:
         """Search the corpus for the given queries.
@@ -485,7 +485,7 @@ class SearchCrossEncoderWrapper:
         hf_split: str,
         hf_subset: str,
         encode_kwargs: EncodeKwargs,
-        num_proc: int = 1,
+        num_proc: int | None = None,
     ) -> None:
         """Index the corpus for retrieval.
@@ -509,7 +509,7 @@ class SearchCrossEncoderWrapper:
         top_k: int,
         encode_kwargs: EncodeKwargs,
         top_ranked: TopRankedDocumentsType | None = None,
-        num_proc: int = 1,
+        num_proc: int | None = None,
     ) -> RetrievalOutputType:
         """Search the corpus using the given queries.

mteb/tasks/bitext_mining/multilingual/bible_nlp_bitext_mining.py CHANGED Viewed

@@ -914,7 +914,7 @@ class BibleNLPBitextMining(AbsTaskBitextMining):
         self.dataset_transform()
         self.data_loaded = True
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         # Convert to standard format
         for lang in self.hf_subsets:
             l1, l2 = (l.split("_")[0] for l in lang.split("-"))

mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py CHANGED Viewed

@@ -265,7 +265,7 @@ class FloresBitextMining(AbsTaskBitextMining):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py CHANGED Viewed

@@ -99,7 +99,7 @@ class IN22ConvBitextMining(AbsTaskBitextMining):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py CHANGED Viewed

@@ -93,7 +93,7 @@ class IN22GenBitextMining(AbsTaskBitextMining):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py CHANGED Viewed

@@ -280,7 +280,7 @@ class NTREXBitextMining(AbsTaskBitextMining):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py CHANGED Viewed

@@ -32,7 +32,7 @@ class RomaTalesBitextMining(AbsTaskBitextMining):
         bibtex_citation="",
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return

mteb/tasks/classification/ben/bengali_document_classification.py CHANGED Viewed

@@ -43,7 +43,7 @@ Islam, Tanvir},
         superseded_by="BengaliDocumentClassification.v2",
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"article": "text", "category": "label"}
         )
@@ -92,7 +92,7 @@ Islam, Tanvir},
 """,
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py CHANGED Viewed

@@ -46,7 +46,7 @@ Montoyo, Andres},
     )
     samples_per_label = 16
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"comment": "text", "rating_str": "label"}
         )
@@ -99,7 +99,7 @@ Montoyo, Andres},
     )
     samples_per_label = 16
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py CHANGED Viewed

@@ -46,7 +46,7 @@ Montoyo, Andres},
     )
     samples_per_label = 16
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"comment": "text", "sentiment_int": "label"}
         )

mteb/tasks/classification/multilingual/hin_dialect_classification.py CHANGED Viewed

@@ -60,7 +60,7 @@ class HinDialectClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"folksong": "text", "language": "label"}
         )

mteb/tasks/classification/multilingual/indic_lang_classification.py CHANGED Viewed

@@ -137,6 +137,6 @@ Okazaki, Naoaki},
         self.dataset_transform()
         self.data_loaded = True
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.remove_columns(["language", "script"])
         self.dataset = self.dataset.rename_columns({"native sentence": "text"})

mteb/tasks/classification/multilingual/indic_sentiment_classification.py CHANGED Viewed

@@ -52,7 +52,7 @@ class IndicSentimentClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         label_map = {"Negative": 0, "Positive": 1}
         # Convert to standard format
         for lang in self.hf_subsets:

mteb/tasks/classification/multilingual/language_classification.py CHANGED Viewed

@@ -66,7 +66,7 @@ in Natural Language Processing},
 """,
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns({"labels": "label"})
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]

mteb/tasks/classification/multilingual/south_african_lang_classification.py CHANGED Viewed

@@ -49,7 +49,7 @@ class SouthAfricanLangClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {" text": "text", "lang_id": "label"}
         )

mteb/tasks/classification/multilingual/turkic_classification.py CHANGED Viewed

@@ -49,7 +49,7 @@ class TurkicClassification(AbsTaskClassification):
         )
         return dataset_lang["train"]
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return

mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py CHANGED Viewed

@@ -35,7 +35,7 @@ class SlovakMovieReviewSentimentClassification(AbsTaskClassification):
         superseded_by="SlovakMovieReviewSentimentClassification.v2",
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns({"comment": "text"})
         self.dataset = self.stratified_subsampling(
@@ -76,7 +76,7 @@ class SlovakMovieReviewSentimentClassificationV2(AbsTaskClassification):
         adapted_from=["SlovakMovieReviewSentimentClassification"],
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/swa/swahili_news_classification.py CHANGED Viewed

@@ -37,7 +37,7 @@ class SwahiliNewsClassification(AbsTaskClassification):
         superseded_by="SwahiliNewsClassification.v2",
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"content": "text", "category": "label"}
         )
@@ -81,7 +81,7 @@ class SwahiliNewsClassificationV2(AbsTaskClassification):
         adapted_from=["SwahiliNewsClassification"],
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/clustering/deu/ten_k_gnad_clustering_p2p.py CHANGED Viewed

@@ -63,7 +63,7 @@ class TenKGnadClusteringP2PFast(AbsTaskClustering):
         adapted_from=["TenKGnadClusteringP2P"],
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         ds = _convert_to_fast(
             self.dataset, self.input_column_name, self.label_column_name, self.seed
         )

mteb/tasks/clustering/deu/ten_k_gnad_clustering_s2s.py CHANGED Viewed

@@ -63,7 +63,7 @@ class TenKGnadClusteringS2SFast(AbsTaskClustering):
         adapted_from=["TenKGnadClusteringS2S"],
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         ds = _convert_to_fast(
             self.dataset, self.input_column_name, self.label_column_name, self.seed
         )

mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py CHANGED Viewed

@@ -51,7 +51,7 @@ class MLSUMClusteringP2P(AbsTaskClusteringLegacy):
         superseded_by="MLSUMClusteringP2P.v2",
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return
@@ -124,7 +124,7 @@ class MLSUMClusteringP2PFast(AbsTaskClustering):
         adapted_from=["MLSUMClusteringP2P"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return

mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py CHANGED Viewed

@@ -51,7 +51,7 @@ class MLSUMClusteringS2S(AbsTaskClusteringLegacy):
         superseded_by="MLSUMClusteringS2S.v2",
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return
@@ -119,7 +119,7 @@ class MLSUMClusteringS2SFast(AbsTaskClustering):
         adapted_from=["MLSUMClusteringS2S"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return

mteb/tasks/clustering/nob/vg_hierarchical_clustering.py CHANGED Viewed

@@ -45,7 +45,7 @@ class VGHierarchicalClusteringP2P(AbsTaskClustering):
         prompt="Identify the categories (e.g. sports) of given articles in Norwegian",
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"article": "sentences", "classes": "labels"}
         )
@@ -92,7 +92,7 @@ class VGHierarchicalClusteringS2S(AbsTaskClustering):
         prompt="Identify the categories (e.g. sports) of given articles in Norwegian",
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"ingress": "sentences", "classes": "labels"}
         )

mteb/tasks/image_text_pair_classification/eng/image_co_de.py CHANGED Viewed

@@ -53,7 +53,7 @@ class ImageCoDe(AbsTaskImageTextPairClassification):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py CHANGED Viewed

@@ -45,7 +45,7 @@ class SugarCrepe(AbsTaskImageTextPairClassification):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return

mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py CHANGED Viewed

@@ -175,7 +175,7 @@ class mFollowIRCrossLingual(AbsTaskRetrieval):  # noqa: N801
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return
@@ -243,7 +243,7 @@ class mFollowIR(AbsTaskRetrieval):  # noqa: N801
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/multichoice/eng/cv_bench.py CHANGED Viewed

@@ -123,7 +123,7 @@ class CVBenchCount(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         self.corpus, self.queries, self.relevant_docs, self.top_ranked = _load_data(
             path=self.metadata.dataset["path"],
             splits=self.metadata.eval_splits,
@@ -165,7 +165,7 @@ class CVBenchRelation(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         self.corpus, self.queries, self.relevant_docs, self.top_ranked = _load_data(
             path=self.metadata.dataset["path"],
             splits=self.metadata.eval_splits,
@@ -207,7 +207,7 @@ class CVBenchDepth(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         self.corpus, self.queries, self.relevant_docs, self.top_ranked = _load_data(
             path=self.metadata.dataset["path"],
             splits=self.metadata.eval_splits,
@@ -249,7 +249,7 @@ class CVBenchDistance(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         self.corpus, self.queries, self.relevant_docs, self.top_ranked = _load_data(
             path=self.metadata.dataset["path"],
             splits=self.metadata.eval_splits,

mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py CHANGED Viewed

@@ -66,7 +66,7 @@ Yih, Scott Wen-tau},
         },
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         labels = [
             "q2_label",
             "q3_label",

mteb/tasks/pair_classification/eng/pub_chem_smilespc.py CHANGED Viewed

@@ -76,7 +76,7 @@ class PubChemSMILESPC(AbsTaskPairClassification):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs: Any) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs: Any) -> None:
         if self.data_loaded:
             return

mteb/tasks/pair_classification/multilingual/pub_chem_wiki_pair_classification.py CHANGED Viewed

@@ -60,7 +60,7 @@ class PubChemWikiPairClassification(AbsTaskPairClassification):
 """,
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         _dataset = {}
         for lang in self.hf_subsets:
             _dataset[lang] = {}

mteb/tasks/pair_classification/multilingual/rte3.py CHANGED Viewed

@@ -52,7 +52,7 @@ Dolan, Bill},
         # sum of 4 languages after neutral filtering
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return

mteb/tasks/retrieval/ara/sadeem_question_retrieval.py CHANGED Viewed

@@ -37,7 +37,7 @@ class SadeemQuestionRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/code/code_edit_search_retrieval.py CHANGED Viewed

@@ -53,7 +53,7 @@ class CodeEditSearchRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb 2.7.17__py3-none-any.whl → 2.7.19__py3-none-any.whl

mteb 2.7.17py3-none-any.whl → 2.7.19py3-none-any.whl