PyPI - mteb - Versions diffs - 2.7.4__py3-none-any.whl → 2.7.6__py3-none-any.whl - Mend

mteb 2.7.4py3-none-any.whl → 2.7.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (287) hide show

mteb/_create_dataloaders.py CHANGED Viewed

@@ -30,6 +30,7 @@ logger = logging.getLogger(__name__)
 def _create_dataloader_from_texts(
     text: list[str],
     batch_size: int = 32,
+    num_proc: int = 1,
     **kwargs: Any,
 ) -> DataLoader[TextInput]:
     """Create a dataloader from a list of text.
@@ -37,15 +38,17 @@ def _create_dataloader_from_texts(
     Args:
         text: A list of text to create a dataloader from.
         batch_size: Batch size for the dataloader.
+        num_proc: Number of processes to use.
         kwargs: Not used, present catching extra arguments.
     Returns:
         A dataloader with the text.
     """
     dataset = Dataset.from_dict({"text": text})
-    return torch.utils.data.DataLoader(
+    return DataLoader(
         dataset,
         batch_size=batch_size,
+        num_workers=num_proc if num_proc > 1 else 0,
     )
@@ -71,20 +74,27 @@ def _corpus_to_dict(
 def _create_dataloader_for_retrieval_corpus(
     dataset: Dataset,
     batch_size: int = 32,
+    num_proc: int = 1,
 ) -> DataLoader[CorpusInput]:
     """Create a dataloader from a corpus.
     Args:
         dataset: Corpus
         batch_size: Batch size for the dataloader.
+        num_proc: Number of processes to use.
     Returns:
         A dataloader with the corpus.
     """
-    new_ds = dataset.map(_corpus_to_dict, desc="Converting corpus dict")
-    return torch.utils.data.DataLoader(
+    new_ds = dataset.map(
+        _corpus_to_dict,
+        desc="Converting corpus dict",
+        num_proc=num_proc,
+    )
+    return DataLoader(
         new_ds,
         batch_size=batch_size,
+        num_workers=num_proc if num_proc > 1 else 0,
     )
@@ -101,12 +111,14 @@ def _combine_queries_with_instruction_text(row: dict[str, str]) -> dict[str, str
 def _create_text_dataloader_for_queries(
     queries: QueryDatasetType,
     batch_size: int = 32,
+    num_proc: int = 1,
 ) -> DataLoader[QueryInput]:
     """Create a dataloader from a list of queries.
     Args:
         queries: A list of queries.
         batch_size: Batch size for the dataloader.
+        num_proc: Number of processes to use.
     Returns:
         A dataloader with the queries.
@@ -114,10 +126,12 @@ def _create_text_dataloader_for_queries(
     queries = queries.map(
         _combine_queries_with_instruction_text,
         desc="Processing queries for dataloading",
+        num_proc=num_proc,
     )
-    return torch.utils.data.DataLoader(
+    return DataLoader(
         queries,
         batch_size=batch_size,
+        num_workers=num_proc if num_proc > 1 else 0,
     )
@@ -186,12 +200,14 @@ def _convert_conv_history_to_query(
 def _create_dataloader_for_queries_conversation(
     queries: QueryDatasetType,
     batch_size: int = 32,
+    num_proc: int = 1,
 ) -> DataLoader[QueryInput]:
     """Create a dataloader from a list of queries.
     Args:
         queries: A list of queries.
         batch_size: Batch size for the dataloader.
+        num_proc: Number of processes to use.
     Returns:
         A dataloader with the queries.
@@ -200,9 +216,11 @@ def _create_dataloader_for_queries_conversation(
         queries.map(
             _convert_conv_history_to_query,
             desc="Converting conversations to queries",
+            num_proc=num_proc,
         ),
         collate_fn=_custom_collate_fn,
         batch_size=batch_size,
+        num_workers=num_proc if num_proc > 1 else 0,
     )
@@ -247,6 +265,7 @@ def _prepare_image_dataset(
     dataset: Dataset,
     image_column_name: str | None = None,
     transform: Callable[[Any], Any] | None = None,
+    num_proc: int = 1,
 ) -> Dataset:
     """Prepare the image dataset by converting images to RGB and applying transformations."""
     if (
@@ -262,6 +281,7 @@ def _prepare_image_dataset(
         _convert_images_to_rgb,
         fn_kwargs={"image_col_name": "image", "transform": transform},
         desc="Converting images to RGB",
+        num_proc=num_proc,
     )
@@ -295,6 +315,7 @@ def _create_image_dataloader(
     batch_size: int = 32,
     transform: Callable[[Any], Any] | None = None,
     collate_fn: Callable[[list[dict[str, Any]]], dict[str, Any]] = _custom_collate_fn,
+    num_proc: int = 1,
 ) -> DataLoader[ImageInput]:
     """Creates a DataLoader with the image dataset prepared using the explicit transformation.
@@ -304,33 +325,41 @@ def _create_image_dataloader(
         batch_size: Batch size for the dataloader.
         transform: A transformation function to apply to each image (e.g., converting to tensor).
         collate_fn: A custom collate function to handle batching.
+        num_proc: Number of processes to use.
     Returns:
         A DataLoader with the image dataset.
     """
     dataset = _prepare_image_dataset(
-        dataset, image_column_name, transform
+        dataset,
+        image_column_name,
+        transform,
+        num_proc=num_proc,
     ).select_columns(["image"])
     return DataLoader(
         dataset,
         batch_size=batch_size,
         collate_fn=collate_fn,
         shuffle=False,
+        num_workers=num_proc if num_proc > 1 else 0,
     )
 def _create_text_queries_dataloader(
     dataset: Dataset,
     batch_size: int = 32,
+    num_proc: int = 1,
 ) -> DataLoader[QueryInput]:
     if not isinstance(dataset["text"][0], list):
         return _create_text_dataloader_for_queries(
             dataset,
             batch_size=batch_size,
+            num_proc=num_proc,
         )
     return _create_dataloader_for_queries_conversation(
         dataset,
         batch_size=batch_size,
+        num_proc=num_proc,
     )
@@ -339,6 +368,7 @@ def _create_queries_dataloader(
     task_metadata: TaskMetadata,
     input_column: str | None = None,
     batch_size: int = 32,
+    num_proc: int = 1,
 ) -> DataLoader[QueryInput | ImageInput]:
     """Create a dataloader for queries."""
     queries_type = task_metadata.get_modalities(PromptType.query)
@@ -346,12 +376,14 @@ def _create_queries_dataloader(
         return _create_text_queries_dataloader(
             dataset,
             batch_size=batch_size,
+            num_proc=num_proc,
         )
     if "image" in queries_type:  # contains image
         return _create_image_dataloader(
             dataset,
             image_column_name="image",
             batch_size=batch_size,
+            num_proc=num_proc,
         )
     raise ValueError(f"Can't handle queries type {queries_type}")
@@ -361,6 +393,7 @@ def _create_document_dataloader(
     task_metadata: TaskMetadata,
     input_column: str | None = None,
     batch_size: int = 32,
+    num_proc: int = 1,
 ) -> DataLoader[CorpusInput | ImageInput]:
     """Create a dataloader for documents.
@@ -369,6 +402,7 @@ def _create_document_dataloader(
         task_metadata: Metadata of the task to determine the document type.
         input_column: The column to use as input. If None, it will use the first column that matches the modality.
         batch_size: Batch size for the dataloader.
+        num_proc: Number of processes to use.
     Returns:
         A dataloader for the documents.
@@ -378,12 +412,14 @@ def _create_document_dataloader(
         return _create_dataloader_for_retrieval_corpus(
             dataset,
             batch_size=batch_size,
+            num_proc=num_proc,
         )
     if "image" in document_type:  # contains image
         return _create_image_dataloader(
             dataset,
             image_column_name="image",
             batch_size=batch_size,
+            num_proc=num_proc,
         )
     raise ValueError(f"Can't handle queries type {document_type}")
@@ -394,6 +430,7 @@ def create_dataloader(
     prompt_type: PromptType | None = None,
     input_column: str | None = None,
     batch_size: int = 32,
+    num_proc: int = 1,
     **kwargs: Any,
 ) -> DataLoader[BatchedInput]:
     """Create a dataloader from a dataset.
@@ -407,6 +444,7 @@ def create_dataloader(
         prompt_type: The type of prompt to create a dataloader for. If None, it will be inferred from the task metadata.
         input_column: The column to use as input. If None, it will use the first column that matches the modality.
         batch_size: The batch size for the dataloader.
+        num_proc: The number of processes to use for dataset processing.
         **kwargs: Additional arguments to pass to the dataloader creation functions.
     Returns:
@@ -418,6 +456,7 @@ def create_dataloader(
             task_metadata,
             batch_size=batch_size,
             input_column=input_column,
+            num_proc=num_proc,
         )
     if prompt_type == PromptType.document:
         return _create_document_dataloader(
@@ -425,6 +464,7 @@ def create_dataloader(
             task_metadata,
             input_column=input_column,
             batch_size=batch_size,
+            num_proc=num_proc,
         )
     if "image" in task_metadata.modalities:
@@ -432,6 +472,7 @@ def create_dataloader(
             dataset,
             image_column_name=input_column,
             batch_size=batch_size,
+            num_proc=num_proc,
         )
     if "text" in task_metadata.modalities and input_column is not None:
         return _create_dataloader_from_texts(
@@ -441,4 +482,5 @@ def create_dataloader(
     return DataLoader(
         dataset,
         batch_size=batch_size,
+        num_workers=num_proc if num_proc > 1 else 0,
     )

mteb/_evaluators/any_sts_evaluator.py CHANGED Viewed

@@ -66,6 +66,7 @@ class AnySTSEvaluator(Evaluator):
         model: EncoderProtocol,
         *,
         encode_kwargs: EncodeKwargs,
+        num_proc: int = 1,
     ) -> STSEvaluatorScores:
         logger.info("Running semantic similarity - Encoding samples (1/2)")
         embeddings1 = model.encode(
@@ -73,6 +74,7 @@ class AnySTSEvaluator(Evaluator):
                 self.dataset,
                 self.task_metadata,
                 input_column=self.input_columns[0],
+                num_proc=num_proc,
                 **encode_kwargs,
             ),
             task_metadata=self.task_metadata,

mteb/_evaluators/clustering_evaluator.py CHANGED Viewed

@@ -45,11 +45,13 @@ class ClusteringEvaluator(Evaluator):
         model: EncoderProtocol,
         *,
         encode_kwargs: EncodeKwargs,
+        num_proc: int = 1,
     ) -> list[int]:
         data_loader = create_dataloader(
             self.dataset,
             self.task_metadata,
             input_column=self.input_column_name,
+            num_proc=num_proc,
             **encode_kwargs,
         )

mteb/_evaluators/evaluator.py CHANGED Viewed

@@ -24,7 +24,7 @@ class Evaluator(ABC):
     @abstractmethod
     def __call__(
-        self, model: EncoderProtocol, *, encode_kwargs: EncodeKwargs
+        self, model: EncoderProtocol, *, encode_kwargs: EncodeKwargs, num_proc: int = 1
     ) -> Mapping[str, float] | Iterable[Any]:
         """This is called during training to evaluate the model.
@@ -33,5 +33,6 @@ class Evaluator(ABC):
         Args:
             model: the model to evaluate
             encode_kwargs: kwargs to pass to the model's encode method
+            num_proc: number of processes to use for data loading
         """
         pass

mteb/_evaluators/image/imagetext_pairclassification_evaluator.py CHANGED Viewed

@@ -91,6 +91,7 @@ class ImageTextPairClassificationEvaluator(Evaluator):
         model: EncoderProtocol,
         *,
         encode_kwargs: EncodeKwargs,
+        num_proc: int = 1,
     ) -> list[torch.Tensor]:
         images = []
         if isinstance(self.images_column_names, str):
@@ -113,6 +114,7 @@ class ImageTextPairClassificationEvaluator(Evaluator):
         text_embeddings = model.encode(
             _create_dataloader_from_texts(
                 texts,
+                num_proc=num_proc,
                 **encode_kwargs,
             ),
             task_metadata=self.task_metadata,
@@ -129,10 +131,15 @@ class ImageTextPairClassificationEvaluator(Evaluator):
             dim=-1,
         ).view(len(self.dataset), self.num_texts_per_sample, -1)
+        def _image_collate_fn(batch):
+            """Collate function for image batches."""
+            return {"image": [item["image"] for item in batch]}
         image_embeddings = model.encode(
             DataLoader(
                 CustomImageDataset(images),
-                collate_fn=lambda x: {"image": [item["image"] for item in x]},
+                collate_fn=_image_collate_fn,
+                num_workers=num_proc if num_proc > 1 else 0,
             ),
             task_metadata=self.task_metadata,
             hf_subset=self.hf_subset,

mteb/_evaluators/pair_classification_evaluator.py CHANGED Viewed

@@ -91,6 +91,7 @@ class PairClassificationEvaluator(Evaluator):
         self,
         model: EncoderProtocol,
         encode_kwargs: EncodeKwargs,
+        num_proc: int = 1,
     ) -> PairClassificationDistances:
         logger.info("Running pair classification - Encoding samples (1/2)")
         embeddings1 = model.encode(
@@ -98,6 +99,7 @@ class PairClassificationEvaluator(Evaluator):
                 self.dataset,
                 task_metadata=self.task_metadata,
                 input_column=self.input1_column_name,
+                num_proc=num_proc,
                 **encode_kwargs,
             ),
             task_metadata=self.task_metadata,
@@ -112,6 +114,7 @@ class PairClassificationEvaluator(Evaluator):
                 self.dataset,
                 task_metadata=self.task_metadata,
                 input_column=self.input2_column_name,
+                num_proc=num_proc,
                 **encode_kwargs,
             ),
             task_metadata=self.task_metadata,

mteb/_evaluators/retrieval_evaluator.py CHANGED Viewed

@@ -55,6 +55,7 @@ class RetrievalEvaluator(Evaluator):
         self,
         search_model: SearchProtocol,
         encode_kwargs: EncodeKwargs,
+        num_proc: int = 1,
     ) -> RetrievalOutputType:
         logger.info("Running retrieval task - Indexing corpus...")
         search_model.index(
@@ -63,6 +64,7 @@ class RetrievalEvaluator(Evaluator):
             hf_split=self.hf_split,
             hf_subset=self.hf_subset,
             encode_kwargs=encode_kwargs,
+            num_proc=num_proc,
         )
         logger.info("Running retrieval task - Searching queries...")
         return search_model.search(
@@ -73,6 +75,7 @@ class RetrievalEvaluator(Evaluator):
             hf_subset=self.hf_subset,
             encode_kwargs=encode_kwargs,
             top_ranked=self.top_ranked,
+            num_proc=num_proc,
         )
     def evaluate(

mteb/_evaluators/sklearn_evaluator.py CHANGED Viewed

@@ -54,18 +54,20 @@ class SklearnEvaluator(Evaluator):
         self.evaluator_model = evaluator_model
     def create_dataloaders(
-        self, encode_kwargs: EncodeKwargs
+        self, encode_kwargs: EncodeKwargs, num_proc: int
     ) -> tuple[DataLoader[BatchedInput], DataLoader[BatchedInput]]:
         dataloader_train = create_dataloader(
             self.train_dataset,
             self.task_metadata,
             input_column=self.values_column_name,
+            num_proc=num_proc,
             **encode_kwargs,
         )
         dataloader_test = create_dataloader(
             self.eval_dataset,
             self.task_metadata,
             input_column=self.values_column_name,
+            num_proc=num_proc,
             **encode_kwargs,
         )
         return dataloader_train, dataloader_test
@@ -76,6 +78,7 @@ class SklearnEvaluator(Evaluator):
         *,
         encode_kwargs: EncodeKwargs,
         test_cache: Array | None = None,
+        num_proc: int = 1,
     ) -> tuple[np.ndarray, Array]:
         """Classification evaluation by training a sklearn classifier on the embeddings of the training set and evaluating on the embeddings of the test set.
@@ -83,6 +86,7 @@ class SklearnEvaluator(Evaluator):
             model: Encoder
             encode_kwargs: encode kwargs
             test_cache: embeddings of the test set, if already computed
+            num_proc: number of processes to use
         Returns:
             Tuple of test predictions and embeddings
@@ -90,6 +94,7 @@ class SklearnEvaluator(Evaluator):
         """
         dataloader_train, dataloader_test = self.create_dataloaders(
             encode_kwargs=encode_kwargs,
+            num_proc=num_proc,
         )
         logger.info("Running - Encoding samples...")

mteb/_evaluators/text/bitext_mining_evaluator.py CHANGED Viewed

@@ -41,6 +41,7 @@ class BitextMiningEvaluator(Evaluator):
         model: EncoderProtocol,
         *,
         encode_kwargs: EncodeKwargs,
+        num_proc: int = 1,
     ) -> dict[str, list[dict[str, float]]]:
         pair_elements = {p for pair in self.pairs for p in pair}
         if isinstance(self.sentences, Dataset):
@@ -55,6 +56,7 @@ class BitextMiningEvaluator(Evaluator):
         for sub in tqdm(subsets):
             dataloader = _create_dataloader_from_texts(
                 self.sentences[sub],
+                num_proc=num_proc,
                 **encode_kwargs,
             )
             embeddings[sub] = model.encode(

mteb/_evaluators/text/summarization_evaluator.py CHANGED Viewed

@@ -100,6 +100,7 @@ class SummarizationEvaluator(Evaluator):
         model: EncoderProtocol,
         *,
         encode_kwargs: EncodeKwargs,
+        num_proc: int = 1,
     ) -> SummarizationDistances:
         # Get the human & machine summaries for the text in one go for all
         human_lens = [len(human_summaries) for human_summaries in self.human_summaries]
@@ -115,6 +116,7 @@ class SummarizationEvaluator(Evaluator):
                     for human_summaries in self.human_summaries
                     for summary in human_summaries
                 ],
+                num_proc=num_proc,
                 **encode_kwargs,
             ),
             task_metadata=self.task_metadata,

mteb/_evaluators/zeroshot_classification_evaluator.py CHANGED Viewed

@@ -48,11 +48,13 @@ class ZeroShotClassificationEvaluator(Evaluator):
         model: EncoderProtocol,
         *,
         encode_kwargs: EncodeKwargs,
+        num_proc: int = 1,
     ) -> Array:
         dataloader = create_dataloader(
             self.dataset,
             input_column=self.input_column_name,
             task_metadata=self.task_metadata,
+            num_proc=num_proc,
             **encode_kwargs,
         )

mteb/abstasks/abstask.py CHANGED Viewed

@@ -116,11 +116,14 @@ class AbsTask(ABC):
             logger.warning(msg)
             warnings.warn(msg)
-    def dataset_transform(self):
+    def dataset_transform(self, num_proc: int = 1):
         """A transform operations applied to the dataset after loading.
         This method is useful when the dataset from Huggingface is not in an `mteb` compatible format.
         Override this method if your dataset requires additional transformation.
+        Args:
+            num_proc: Number of processes to use for the transformation.
         """
         pass
@@ -132,6 +135,7 @@ class AbsTask(ABC):
         *,
         encode_kwargs: EncodeKwargs,
         prediction_folder: Path | None = None,
+        num_proc: int = 1,
         **kwargs: Any,
     ) -> Mapping[HFSubset, ScoresDict]:
         """Evaluates an MTEB compatible model on the task.
@@ -142,6 +146,7 @@ class AbsTask(ABC):
             subsets_to_run: List of huggingface subsets (HFSubsets) to evaluate. If None, all subsets are evaluated.
             encode_kwargs: Additional keyword arguments that are passed to the model's `encode` method.
             prediction_folder: Folder to save model predictions
+            num_proc: Number of processes to use for loading the dataset or processing.
             kwargs: Additional keyword arguments that are passed to the _evaluate_subset method.
         Returns:
@@ -197,6 +202,7 @@ class AbsTask(ABC):
                 hf_subset=hf_subset,
                 encode_kwargs=encode_kwargs,
                 prediction_folder=prediction_folder,
+                num_proc=num_proc,
                 **kwargs,
             )
             self._add_main_score(scores[hf_subset])
@@ -212,6 +218,7 @@ class AbsTask(ABC):
         hf_subset: str,
         encode_kwargs: EncodeKwargs,
         prediction_folder: Path | None = None,
+        num_proc: int = 1,
         **kwargs: Any,
     ) -> ScoresDict:
         raise NotImplementedError(
@@ -316,11 +323,15 @@ class AbsTask(ABC):
             )  # only take the specified test split.
         return dataset_dict
-    def load_data(self) -> None:
+    def load_data(self, num_proc: int = 1, **kwargs: Any) -> None:
         """Loads dataset from HuggingFace hub
         This is the main loading function for Task. Do not overwrite this, instead we recommend using `dataset_transform`, which is called after the
         dataset is loaded using `datasets.load_dataset`.
+        Args:
+            num_proc: Number of processes to use for loading the dataset.
+            kwargs: Additional keyword arguments passed to the load_dataset function. Keep for forward compatibility.
         """
         if self.data_loaded:
             return
@@ -333,11 +344,12 @@ class AbsTask(ABC):
                     self.dataset[hf_subset] = load_dataset(
                         name=hf_subset,
                         **self.metadata.dataset,
+                        num_proc=num_proc,
                     )
         else:
             # some of monolingual datasets explicitly adding the split name to the dataset name
-            self.dataset = load_dataset(**self.metadata.dataset)
-        self.dataset_transform()
+            self.dataset = load_dataset(**self.metadata.dataset, num_proc=num_proc)
+        self.dataset_transform(num_proc=num_proc)
         self.data_loaded = True
     def fast_load(self) -> None:
@@ -360,12 +372,13 @@ class AbsTask(ABC):
             self.dataset[lang] = DatasetDict(subset)
     def calculate_descriptive_statistics(
-        self, overwrite_results: bool = False
+        self, overwrite_results: bool = False, num_proc: int = 1
     ) -> dict[str, DescriptiveStatistics]:
         """Calculates descriptive statistics from the dataset.
         Args:
             overwrite_results: Whether to overwrite existing results. If False and results already exist, the existing results will be loaded from cache.
+            num_proc: Number of processes to use for loading the dataset.
         Returns:
             A dictionary containing descriptive statistics for each split.
@@ -379,7 +392,7 @@ class AbsTask(ABC):
             return existing_stats
         if not self.data_loaded:
-            self.load_data()
+            self.load_data(num_proc=num_proc)
         descriptive_stats: dict[str, DescriptiveStatistics] = {}
         hf_subset_stat: Literal["hf_subset_descriptive_stats"] = (
@@ -517,7 +530,7 @@ class AbsTask(ABC):
         scores["main_score"] = scores[self.metadata.main_score]
     def _upload_dataset_to_hub(
-        self, repo_name: str, fields: list[str] | dict[str, str]
+        self, repo_name: str, fields: list[str] | dict[str, str], num_proc: int = 1
     ) -> None:
         if self.dataset is None:
             raise ValueError("Dataset not loaded")
@@ -542,7 +555,10 @@ class AbsTask(ABC):
                         )
                 sentences = DatasetDict(sentences)
                 sentences.push_to_hub(
-                    repo_name, config, commit_message=f"Add {config} dataset"
+                    repo_name,
+                    config,
+                    commit_message=f"Add {config} dataset",
+                    num_proc=num_proc,
                 )
         else:
             sentences = {}
@@ -559,16 +575,19 @@ class AbsTask(ABC):
                         {field: self.dataset[split][field] for field in fields}
                     )
             sentences = DatasetDict(sentences)
-            sentences.push_to_hub(repo_name, commit_message="Add dataset")
+            sentences.push_to_hub(
+                repo_name, commit_message="Add dataset", num_proc=num_proc
+            )
-    def _push_dataset_to_hub(self, repo_name: str) -> None:
+    def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
         raise NotImplementedError
-    def push_dataset_to_hub(self, repo_name: str) -> None:
+    def push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
         """Push the dataset to the HuggingFace Hub.
         Args:
             repo_name: The name of the repository to push the dataset to.
+            num_proc: Number of processes to use for loading the dataset.
         Examples:
             >>> import mteb
@@ -580,7 +599,7 @@ class AbsTask(ABC):
         if not self.data_loaded:
             self.load_data()
-        self._push_dataset_to_hub(repo_name)
+        self._push_dataset_to_hub(repo_name, num_proc)
         # dataset repo not creating when pushing card
         self.metadata.push_dataset_card_to_hub(repo_name)

mteb 2.7.4__py3-none-any.whl → 2.7.6__py3-none-any.whl

mteb 2.7.4py3-none-any.whl → 2.7.6py3-none-any.whl