PyPI - mteb - Versions diffs - 2.7.16__py3-none-any.whl → 2.7.18__py3-none-any.whl - Mend

mteb 2.7.16py3-none-any.whl → 2.7.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (173) hide show

mteb/models/model_implementations/voyage_models.py CHANGED Viewed

@@ -150,7 +150,7 @@ class VoyageModel(AbsEncoder):
         sentences: list[str],
         batch_size: int,
         input_type: Literal["query", "document"],
-    ) -> np.ndarray:
+    ) -> Array:
         embeddings, index = [], 0
         output_dtype = VOYAGE_DTYPE_TRANSLATION.get(

mteb/models/models_protocols.py CHANGED Viewed

@@ -32,7 +32,7 @@ class SearchProtocol(Protocol):
         hf_split: str,
         hf_subset: str,
         encode_kwargs: EncodeKwargs,
-        num_proc: int,
+        num_proc: int | None,
     ) -> None:
         """Index the corpus for retrieval.
@@ -56,7 +56,7 @@ class SearchProtocol(Protocol):
         top_k: int,
         encode_kwargs: EncodeKwargs,
         top_ranked: TopRankedDocumentsType | None = None,
-        num_proc: int,
+        num_proc: int | None,
     ) -> RetrievalOutputType:
         """Search the corpus using the given queries.

mteb/models/search_wrappers.py CHANGED Viewed

@@ -59,7 +59,7 @@ class SearchEncoderWrapper:
         hf_split: str,
         hf_subset: str,
         encode_kwargs: EncodeKwargs,
-        num_proc: int = 1,
+        num_proc: int | None = None,
     ) -> None:
         """Index the corpus for retrieval.
@@ -101,7 +101,7 @@ class SearchEncoderWrapper:
         top_k: int,
         encode_kwargs: EncodeKwargs,
         top_ranked: TopRankedDocumentsType | None = None,
-        num_proc: int = 1,
+        num_proc: int | None = None,
     ) -> RetrievalOutputType:
         """Search the corpus for the given queries.
@@ -485,7 +485,7 @@ class SearchCrossEncoderWrapper:
         hf_split: str,
         hf_subset: str,
         encode_kwargs: EncodeKwargs,
-        num_proc: int = 1,
+        num_proc: int | None = None,
     ) -> None:
         """Index the corpus for retrieval.
@@ -509,7 +509,7 @@ class SearchCrossEncoderWrapper:
         top_k: int,
         encode_kwargs: EncodeKwargs,
         top_ranked: TopRankedDocumentsType | None = None,
-        num_proc: int = 1,
+        num_proc: int | None = None,
     ) -> RetrievalOutputType:
         """Search the corpus using the given queries.

mteb/tasks/bitext_mining/multilingual/bible_nlp_bitext_mining.py CHANGED Viewed

@@ -914,7 +914,7 @@ class BibleNLPBitextMining(AbsTaskBitextMining):
         self.dataset_transform()
         self.data_loaded = True
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         # Convert to standard format
         for lang in self.hf_subsets:
             l1, l2 = (l.split("_")[0] for l in lang.split("-"))

mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py CHANGED Viewed

@@ -265,7 +265,7 @@ class FloresBitextMining(AbsTaskBitextMining):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py CHANGED Viewed

@@ -99,7 +99,7 @@ class IN22ConvBitextMining(AbsTaskBitextMining):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py CHANGED Viewed

@@ -93,7 +93,7 @@ class IN22GenBitextMining(AbsTaskBitextMining):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py CHANGED Viewed

@@ -280,7 +280,7 @@ class NTREXBitextMining(AbsTaskBitextMining):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py CHANGED Viewed

@@ -32,7 +32,7 @@ class RomaTalesBitextMining(AbsTaskBitextMining):
         bibtex_citation="",
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return

mteb/tasks/classification/ben/bengali_document_classification.py CHANGED Viewed

@@ -43,7 +43,7 @@ Islam, Tanvir},
         superseded_by="BengaliDocumentClassification.v2",
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"article": "text", "category": "label"}
         )
@@ -92,7 +92,7 @@ Islam, Tanvir},
 """,
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py CHANGED Viewed

@@ -46,7 +46,7 @@ Montoyo, Andres},
     )
     samples_per_label = 16
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"comment": "text", "rating_str": "label"}
         )
@@ -99,7 +99,7 @@ Montoyo, Andres},
     )
     samples_per_label = 16
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py CHANGED Viewed

@@ -46,7 +46,7 @@ Montoyo, Andres},
     )
     samples_per_label = 16
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"comment": "text", "sentiment_int": "label"}
         )

mteb/tasks/classification/multilingual/hin_dialect_classification.py CHANGED Viewed

@@ -60,7 +60,7 @@ class HinDialectClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"folksong": "text", "language": "label"}
         )

mteb/tasks/classification/multilingual/indic_lang_classification.py CHANGED Viewed

@@ -137,6 +137,6 @@ Okazaki, Naoaki},
         self.dataset_transform()
         self.data_loaded = True
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.remove_columns(["language", "script"])
         self.dataset = self.dataset.rename_columns({"native sentence": "text"})

mteb/tasks/classification/multilingual/indic_sentiment_classification.py CHANGED Viewed

@@ -52,7 +52,7 @@ class IndicSentimentClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         label_map = {"Negative": 0, "Positive": 1}
         # Convert to standard format
         for lang in self.hf_subsets:

mteb/tasks/classification/multilingual/language_classification.py CHANGED Viewed

@@ -66,7 +66,7 @@ in Natural Language Processing},
 """,
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns({"labels": "label"})
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]

mteb/tasks/classification/multilingual/south_african_lang_classification.py CHANGED Viewed

@@ -49,7 +49,7 @@ class SouthAfricanLangClassification(AbsTaskClassification):
 """,
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {" text": "text", "lang_id": "label"}
         )

mteb/tasks/classification/multilingual/turkic_classification.py CHANGED Viewed

@@ -49,7 +49,7 @@ class TurkicClassification(AbsTaskClassification):
         )
         return dataset_lang["train"]
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return

mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py CHANGED Viewed

@@ -35,7 +35,7 @@ class SlovakMovieReviewSentimentClassification(AbsTaskClassification):
         superseded_by="SlovakMovieReviewSentimentClassification.v2",
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns({"comment": "text"})
         self.dataset = self.stratified_subsampling(
@@ -76,7 +76,7 @@ class SlovakMovieReviewSentimentClassificationV2(AbsTaskClassification):
         adapted_from=["SlovakMovieReviewSentimentClassification"],
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["test"]
         )

mteb/tasks/classification/swa/swahili_news_classification.py CHANGED Viewed

@@ -37,7 +37,7 @@ class SwahiliNewsClassification(AbsTaskClassification):
         superseded_by="SwahiliNewsClassification.v2",
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"content": "text", "category": "label"}
         )
@@ -81,7 +81,7 @@ class SwahiliNewsClassificationV2(AbsTaskClassification):
         adapted_from=["SwahiliNewsClassification"],
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.stratified_subsampling(
             self.dataset, seed=self.seed, splits=["train"]
         )

mteb/tasks/clustering/deu/ten_k_gnad_clustering_p2p.py CHANGED Viewed

@@ -63,7 +63,7 @@ class TenKGnadClusteringP2PFast(AbsTaskClustering):
         adapted_from=["TenKGnadClusteringP2P"],
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         ds = _convert_to_fast(
             self.dataset, self.input_column_name, self.label_column_name, self.seed
         )

mteb/tasks/clustering/deu/ten_k_gnad_clustering_s2s.py CHANGED Viewed

@@ -63,7 +63,7 @@ class TenKGnadClusteringS2SFast(AbsTaskClustering):
         adapted_from=["TenKGnadClusteringS2S"],
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         ds = _convert_to_fast(
             self.dataset, self.input_column_name, self.label_column_name, self.seed
         )

mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py CHANGED Viewed

@@ -51,7 +51,7 @@ class MLSUMClusteringP2P(AbsTaskClusteringLegacy):
         superseded_by="MLSUMClusteringP2P.v2",
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return
@@ -124,7 +124,7 @@ class MLSUMClusteringP2PFast(AbsTaskClustering):
         adapted_from=["MLSUMClusteringP2P"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return

mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py CHANGED Viewed

@@ -51,7 +51,7 @@ class MLSUMClusteringS2S(AbsTaskClusteringLegacy):
         superseded_by="MLSUMClusteringS2S.v2",
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return
@@ -119,7 +119,7 @@ class MLSUMClusteringS2SFast(AbsTaskClustering):
         adapted_from=["MLSUMClusteringS2S"],
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub and convert it to the standard format."""
         if self.data_loaded:
             return

mteb/tasks/clustering/nob/vg_hierarchical_clustering.py CHANGED Viewed

@@ -45,7 +45,7 @@ class VGHierarchicalClusteringP2P(AbsTaskClustering):
         prompt="Identify the categories (e.g. sports) of given articles in Norwegian",
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"article": "sentences", "classes": "labels"}
         )
@@ -92,7 +92,7 @@ class VGHierarchicalClusteringS2S(AbsTaskClustering):
         prompt="Identify the categories (e.g. sports) of given articles in Norwegian",
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         self.dataset = self.dataset.rename_columns(
             {"ingress": "sentences", "classes": "labels"}
         )

mteb/tasks/image_text_pair_classification/eng/image_co_de.py CHANGED Viewed

@@ -53,7 +53,7 @@ class ImageCoDe(AbsTaskImageTextPairClassification):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py CHANGED Viewed

@@ -45,7 +45,7 @@ class SugarCrepe(AbsTaskImageTextPairClassification):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return

mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py CHANGED Viewed

@@ -175,7 +175,7 @@ class mFollowIRCrossLingual(AbsTaskRetrieval):  # noqa: N801
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return
@@ -243,7 +243,7 @@ class mFollowIR(AbsTaskRetrieval):  # noqa: N801
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/multichoice/eng/cv_bench.py CHANGED Viewed

@@ -123,7 +123,7 @@ class CVBenchCount(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         self.corpus, self.queries, self.relevant_docs, self.top_ranked = _load_data(
             path=self.metadata.dataset["path"],
             splits=self.metadata.eval_splits,
@@ -165,7 +165,7 @@ class CVBenchRelation(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         self.corpus, self.queries, self.relevant_docs, self.top_ranked = _load_data(
             path=self.metadata.dataset["path"],
             splits=self.metadata.eval_splits,
@@ -207,7 +207,7 @@ class CVBenchDepth(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         self.corpus, self.queries, self.relevant_docs, self.top_ranked = _load_data(
             path=self.metadata.dataset["path"],
             splits=self.metadata.eval_splits,
@@ -249,7 +249,7 @@ class CVBenchDistance(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         self.corpus, self.queries, self.relevant_docs, self.top_ranked = _load_data(
             path=self.metadata.dataset["path"],
             splits=self.metadata.eval_splits,

mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py CHANGED Viewed

@@ -66,7 +66,7 @@ Yih, Scott Wen-tau},
         },
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         labels = [
             "q2_label",
             "q3_label",

mteb/tasks/pair_classification/eng/pub_chem_smilespc.py CHANGED Viewed

@@ -76,7 +76,7 @@ class PubChemSMILESPC(AbsTaskPairClassification):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs: Any) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs: Any) -> None:
         if self.data_loaded:
             return

mteb/tasks/pair_classification/multilingual/pub_chem_wiki_pair_classification.py CHANGED Viewed

@@ -60,7 +60,7 @@ class PubChemWikiPairClassification(AbsTaskPairClassification):
 """,
     )
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         _dataset = {}
         for lang in self.hf_subsets:
             _dataset[lang] = {}

mteb/tasks/pair_classification/multilingual/rte3.py CHANGED Viewed

@@ -52,7 +52,7 @@ Dolan, Bill},
         # sum of 4 languages after neutral filtering
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return

mteb/tasks/retrieval/ara/sadeem_question_retrieval.py CHANGED Viewed

@@ -37,7 +37,7 @@ class SadeemQuestionRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/code/code_edit_search_retrieval.py CHANGED Viewed

@@ -53,7 +53,7 @@ class CodeEditSearchRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/code/code_rag.py CHANGED Viewed

@@ -51,7 +51,7 @@ class CodeRAGProgrammingSolutionsRetrieval(AbsTaskRetrieval):
         **common_args,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
@@ -59,7 +59,7 @@ class CodeRAGProgrammingSolutionsRetrieval(AbsTaskRetrieval):
         self.dataset_transform()
         self.data_loaded = True
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         """And transform to a retrieval dataset, which have the following attributes
         self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
@@ -108,7 +108,7 @@ class CodeRAGOnlineTutorialsRetrieval(AbsTaskRetrieval):
         **common_args,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
@@ -116,7 +116,7 @@ class CodeRAGOnlineTutorialsRetrieval(AbsTaskRetrieval):
         self.dataset_transform()
         self.data_loaded = True
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         """And transform to a retrieval dataset, which have the following attributes
         self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
@@ -168,7 +168,7 @@ class CodeRAGLibraryDocumentationSolutionsRetrieval(AbsTaskRetrieval):
         **common_args,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
@@ -176,7 +176,7 @@ class CodeRAGLibraryDocumentationSolutionsRetrieval(AbsTaskRetrieval):
         self.dataset_transform()
         self.data_loaded = True
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         """And transform to a retrieval dataset, which have the following attributes
         self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
@@ -225,7 +225,7 @@ class CodeRAGStackoverflowPostsRetrieval(AbsTaskRetrieval):
         **common_args,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         """Load dataset from HuggingFace hub"""
         if self.data_loaded:
             return
@@ -233,7 +233,7 @@ class CodeRAGStackoverflowPostsRetrieval(AbsTaskRetrieval):
         self.dataset_transform()
         self.data_loaded = True
-    def dataset_transform(self, num_proc: int = 1, **kwargs) -> None:
+    def dataset_transform(self, num_proc: int | None = None, **kwargs) -> None:
         """And transform to a retrieval dataset, which have the following attributes
         self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text

mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py CHANGED Viewed

@@ -99,7 +99,7 @@ class CodeSearchNetCCRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py CHANGED Viewed

@@ -97,7 +97,7 @@ class COIRCodeSearchNetRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/code/ds1000_retrieval.py CHANGED Viewed

@@ -34,7 +34,7 @@ class DS1000Retrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/code/fresh_stack_retrieval.py CHANGED Viewed

@@ -37,7 +37,7 @@ class FreshStackRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/code/human_eval_retrieval.py CHANGED Viewed

@@ -34,7 +34,7 @@ class HumanEvalRetrieval(AbsTaskRetrieval):
 }""",
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/code/mbpp_retrieval.py CHANGED Viewed

@@ -34,7 +34,7 @@ class MBPPRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb/tasks/retrieval/code/wiki_sql_retrieval.py CHANGED Viewed

@@ -36,7 +36,7 @@ class WikiSQLRetrieval(AbsTaskRetrieval):
 """,
     )
-    def load_data(self, num_proc: int = 1, **kwargs) -> None:
+    def load_data(self, num_proc: int | None = None, **kwargs) -> None:
         if self.data_loaded:
             return

mteb 2.7.16__py3-none-any.whl → 2.7.18__py3-none-any.whl

mteb 2.7.16py3-none-any.whl → 2.7.18py3-none-any.whl