PyPI - kreuzberg - Versions diffs - 3.17.3__py3-none-any.whl → 3.19.0__py3-none-any.whl - Mend

kreuzberg 3.17.3py3-none-any.whl → 3.19.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

kreuzberg/_api/main.py +45 -3
kreuzberg/_entity_extraction.py +108 -18
kreuzberg/_error_handling.py +182 -0
kreuzberg/_extractors/_base.py +2 -2
kreuzberg/_extractors/_html.py +2 -2
kreuzberg/_extractors/_pdf.py +33 -54
kreuzberg/_extractors/_structured.py +1 -1
kreuzberg/_language_detection.py +2 -0
kreuzberg/_ocr/_tesseract.py +28 -6
kreuzberg/_types.py +18 -0
kreuzberg/cli.py +36 -22
kreuzberg/extraction.py +251 -107
{kreuzberg-3.17.3.dist-info → kreuzberg-3.19.0.dist-info}/METADATA +7 -4
{kreuzberg-3.17.3.dist-info → kreuzberg-3.19.0.dist-info}/RECORD +17 -16
{kreuzberg-3.17.3.dist-info → kreuzberg-3.19.0.dist-info}/WHEEL +0 -0
{kreuzberg-3.17.3.dist-info → kreuzberg-3.19.0.dist-info}/entry_points.txt +0 -0
{kreuzberg-3.17.3.dist-info → kreuzberg-3.19.0.dist-info}/licenses/LICENSE +0 -0

kreuzberg/_api/main.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import base64
 import io
+import os
 import traceback
 from json import dumps
 from typing import TYPE_CHECKING, Annotated, Any, Literal
@@ -100,6 +101,35 @@ def exception_handler(request: Request[Any, Any, Any], exception: KreuzbergError
     )
+def _get_max_upload_size() -> int:
+    """Get the maximum upload size from environment variable.
+    Returns:
+        Maximum upload size in bytes. Defaults to 1GB if not set.
+    Environment Variables:
+        KREUZBERG_MAX_UPLOAD_SIZE: Maximum upload size in bytes (default: 1073741824 = 1GB)
+    """
+    default_size = 1024 * 1024 * 1024
+    try:
+        size = int(os.environ.get("KREUZBERG_MAX_UPLOAD_SIZE", default_size))
+        return size if size >= 0 else default_size
+    except ValueError:
+        return default_size
+def _is_opentelemetry_enabled() -> bool:
+    """Check if OpenTelemetry should be enabled.
+    Returns:
+        True if OpenTelemetry should be enabled, False otherwise.
+    Environment Variables:
+        KREUZBERG_ENABLE_OPENTELEMETRY: Enable OpenTelemetry tracing (true/false) (default: true)
+    """
+    return os.environ.get("KREUZBERG_ENABLE_OPENTELEMETRY", "true").lower() in ("true", "1", "yes", "on")
 def general_exception_handler(request: Request[Any, Any, Any], exception: Exception) -> Response[Any]:
     error_type = type(exception).__name__
     error_message = str(exception)
@@ -242,7 +272,7 @@ async def handle_files_upload(  # noqa: PLR0913
     - Language detection (if enabled)
     Supports various file formats including PDF, Office documents, images, and more.
-    Maximum file size: 1GB per file.
+    Maximum file size: Configurable via KREUZBERG_MAX_UPLOAD_SIZE environment variable (default: 1GB per file).
     Args:
         request: The HTTP request object
@@ -280,6 +310,9 @@ async def handle_files_upload(  # noqa: PLR0913
     """
     static_config = discover_config_cached()
+    if not data:
+        raise ValidationError("No files provided for extraction", context={"file_count": 0})
     min_dims = _create_dimension_tuple(image_ocr_min_width, image_ocr_min_height)
     max_dims = _create_dimension_tuple(image_ocr_max_width, image_ocr_max_height)
@@ -379,9 +412,18 @@ type_encoders = {
     Image.Image: _pil_image_encoder,
 }
+def _get_plugins() -> list[Any]:
+    """Get configured plugins based on environment variables."""
+    plugins = []
+    if _is_opentelemetry_enabled():
+        plugins.append(OpenTelemetryPlugin(OpenTelemetryConfig()))
+    return plugins
 app = Litestar(
     route_handlers=[handle_files_upload, health_check, get_configuration],
-    plugins=[OpenTelemetryPlugin(OpenTelemetryConfig())],
+    plugins=_get_plugins(),
     logging_config=StructLoggingConfig(),
     openapi_config=openapi_config,
     exception_handlers={
@@ -389,5 +431,5 @@ app = Litestar(
         Exception: general_exception_handler,
     },
     type_encoders=type_encoders,
-    request_max_body_size=1024 * 1024 * 1024,
+    request_max_body_size=_get_max_upload_size(),
 )

kreuzberg/_entity_extraction.py CHANGED Viewed

@@ -2,19 +2,77 @@ from __future__ import annotations
 import os
 import re
+import shutil
 import subprocess
-import sys
 from functools import lru_cache
 from itertools import chain
 from typing import TYPE_CHECKING, Any
+import anyio
 from kreuzberg._types import Entity, SpacyEntityExtractionConfig
+from kreuzberg._utils._sync import run_sync
 from kreuzberg.exceptions import KreuzbergError, MissingDependencyError
 if TYPE_CHECKING:
     from collections.abc import Sequence
+def is_uv_available() -> bool:
+    """Check if uv is available in the environment."""
+    return shutil.which("uv") is not None
+def get_spacy_model_url(model_name: str, version: str = "3.8.0") -> str:
+    """Get the direct download URL for a spaCy model.
+    Args:
+        model_name: Name of the spaCy model (e.g., 'en_core_web_sm')
+        version: Model version to download (default: 3.8.0)
+    Returns:
+        Direct download URL for the model
+    """
+    return f"https://github.com/explosion/spacy-models/releases/download/{model_name}-{version}/{model_name}-{version}-py3-none-any.whl"
+async def install_spacy_model_with_uv(model_name: str) -> subprocess.CompletedProcess[str]:
+    """Install spaCy model using uv.
+    Args:
+        model_name: Name of the spaCy model to install
+    Returns:
+        Completed process result
+    """
+    model_url = get_spacy_model_url(model_name)
+    return await run_sync(
+        subprocess.run,
+        ["uv", "pip", "install", model_url],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+async def install_spacy_model_with_spacy(model_name: str) -> bool:
+    """Install spaCy model using spacy download function.
+    Args:
+        model_name: Name of the spaCy model to install
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        import spacy.cli.download  # noqa: PLC0415
+        await run_sync(spacy.cli.download, model_name)  # type: ignore[attr-defined]
+        return True
+    except (ImportError, OSError, RuntimeError):
+        return False
 def extract_entities(
     text: str,
     entity_types: Sequence[str] = ("PERSON", "ORGANIZATION", "LOCATION", "DATE", "EMAIL", "PHONE"),
@@ -46,11 +104,11 @@ def extract_entities(
             functionality="Entity Extraction",
         ) from e
-    model_name = _select_spacy_model(languages, spacy_config)
+    model_name = select_spacy_model(languages, spacy_config)
     if not model_name:
         return entities
-    nlp = _load_spacy_model(model_name, spacy_config)
+    nlp = load_spacy_model(model_name, spacy_config)
     if len(text) > spacy_config.max_doc_length:
         text = text[: spacy_config.max_doc_length]
@@ -74,7 +132,7 @@ def extract_entities(
 @lru_cache(maxsize=32)
-def _load_spacy_model(model_name: str, spacy_config: SpacyEntityExtractionConfig) -> Any:
+def load_spacy_model(model_name: str, spacy_config: SpacyEntityExtractionConfig) -> Any:
     try:
         import spacy  # noqa: PLC0415
     except ImportError:
@@ -86,22 +144,54 @@ def _load_spacy_model(model_name: str, spacy_config: SpacyEntityExtractionConfig
     try:
         nlp = spacy.load(model_name)
     except OSError:
-        result = subprocess.run(
-            [sys.executable, "-m", "spacy", "download", model_name],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        if result.returncode != 0:
+        async def install_model() -> tuple[bool, str | None]:
+            """Install model and return success status and error message."""
+            try:
+                success = await install_spacy_model_with_spacy(model_name)
+                if success:
+                    return True, None
+            except (ImportError, OSError, RuntimeError) as e:
+                spacy_error = str(e)
+            else:
+                spacy_error = "spaCy download failed"
+            if is_uv_available():
+                try:
+                    result = await install_spacy_model_with_uv(model_name)
+                    return result.returncode == 0, result.stderr
+                except (OSError, subprocess.SubprocessError) as e:
+                    return False, f"spaCy: {spacy_error}, uv: {e!s}"
+            return False, spacy_error
+        try:
+            success, error_details = anyio.run(install_model)
+        except SystemExit as e:
+            success, error_details = False, f"spaCy CLI exit code: {e.code}"
+        if not success:
+            if is_uv_available():
+                model_url = get_spacy_model_url(model_name)
+                manual_install_cmd = f"uv pip install {model_url}"
+            else:
+                manual_install_cmd = f"python -m spacy download {model_name}"
             error_msg = (
-                f"Failed to download spaCy model '{model_name}'. "
-                f"Please install it manually with: python -m spacy download {model_name}"
+                f"Failed to download spaCy model '{model_name}'. Please install it manually with: {manual_install_cmd}"
             )
-            if result.stderr:
-                error_msg += f"\nError details: {result.stderr}"
+            if error_details:
+                error_msg += f"\nError details: {error_details}"
             raise KreuzbergError(
-                error_msg, context={"model": model_name, "stderr": result.stderr, "return_code": result.returncode}
+                error_msg,
+                context={
+                    "model": model_name,
+                    "manual_install_cmd": manual_install_cmd,
+                    "error_details": error_details,
+                    "uv_available": is_uv_available(),
+                },
             ) from None
         try:
@@ -118,7 +208,7 @@ def _load_spacy_model(model_name: str, spacy_config: SpacyEntityExtractionConfig
     return nlp
-def _select_spacy_model(languages: list[str] | None, spacy_config: SpacyEntityExtractionConfig) -> str | None:
+def select_spacy_model(languages: list[str] | None, spacy_config: SpacyEntityExtractionConfig) -> str | None:
     if not languages:
         return spacy_config.get_model_for_language("en")
@@ -140,7 +230,7 @@ def extract_keywords(
         kw_model = KeyBERT()
         keywords = kw_model.extract_keywords(text, top_n=keyword_count)
         return [(kw, float(score)) for kw, score in keywords]
-    except (RuntimeError, OSError, ValueError):
+    except ValueError:
         return []
     except ImportError as e:  # pragma: no cover
         raise MissingDependencyError.create_for_package(

kreuzberg/_error_handling.py ADDED Viewed

@@ -0,0 +1,182 @@
+"""Type-safe error handling utilities for extraction pipeline."""
+from __future__ import annotations
+import traceback
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from collections.abc import Callable
+from kreuzberg._types import ErrorContextType, ExtractionResult, Metadata, ProcessingErrorDict
+from kreuzberg.exceptions import KreuzbergError, MissingDependencyError, ValidationError
+def should_exception_bubble_up(exception: Exception, context: ErrorContextType = "unknown") -> bool:
+    """Determine if an exception should bubble up or be handled gracefully.
+    Args:
+        exception: The exception to classify
+        context: The context where the exception occurred (e.g., "batch_processing", "single_extraction", "optional_feature")
+    Returns:
+        True if the exception should bubble up, False if it should be handled gracefully
+    """
+    if isinstance(exception, (SystemExit, KeyboardInterrupt, MemoryError, OSError, RuntimeError)):
+        return True
+    if isinstance(exception, MissingDependencyError):
+        return True
+    if isinstance(exception, ValidationError):
+        if context == "batch_processing":
+            return False
+        return context != "optional_feature"
+    if isinstance(exception, KreuzbergError) and context == "optional_feature":
+        return False
+    if context == "batch_processing":
+        return isinstance(exception, (SystemExit, KeyboardInterrupt, MemoryError, OSError, RuntimeError))
+    return not (context == "optional_feature" and isinstance(exception, (IOError, ImportError)))
+class FeatureProcessingError:
+    """Type-safe processing error for extraction features."""
+    def __init__(self, feature: str, error: Exception) -> None:
+        self._feature = feature
+        self._error = error
+        self._traceback = traceback.format_exc()
+    @property
+    def feature(self) -> str:
+        return self._feature
+    @property
+    def error_type(self) -> str:
+        return type(self._error).__name__
+    @property
+    def error_message(self) -> str:
+        return str(self._error)
+    @property
+    def traceback(self) -> str:
+        return self._traceback
+    def to_dict(self) -> ProcessingErrorDict:
+        return {
+            "feature": self.feature,
+            "error_type": self.error_type,
+            "error_message": self.error_message,
+            "traceback": self.traceback,
+        }
+def safe_feature_execution(
+    feature_name: str,
+    execution_func: Callable[[], Any],
+    default_value: Any,
+    result: ExtractionResult,
+    context: ErrorContextType = "optional_feature",
+) -> Any:
+    """Safely execute a feature extraction function with proper error handling.
+    Args:
+        feature_name: Name of the feature being executed
+        execution_func: Function to execute that may raise exceptions
+        default_value: Default value to return if execution fails
+        result: ExtractionResult to update with error information
+        context: The context for exception handling decisions
+    Returns:
+        Either the successful result or the default value
+    """
+    try:
+        return execution_func()
+    except Exception as e:
+        if should_exception_bubble_up(e, context):
+            raise
+        _add_processing_error(result, FeatureProcessingError(feature_name, e))
+        return default_value
+def _add_processing_error(result: ExtractionResult, error: FeatureProcessingError) -> None:
+    """Add a processing error to the result metadata in a type-safe way."""
+    if result.metadata is None:
+        result.metadata = {}
+    if "processing_errors" not in result.metadata:
+        result.metadata["processing_errors"] = []
+    errors_list = result.metadata["processing_errors"]
+    if isinstance(errors_list, list):
+        errors_list.append(error.to_dict())
+    else:
+        result.metadata["processing_errors"] = [error.to_dict()]
+def preserve_result_with_errors(
+    result: ExtractionResult,
+    errors: list[FeatureProcessingError],
+) -> ExtractionResult:
+    """Preserve a successful extraction result while adding error information.
+    This is used when core extraction succeeds but optional features fail.
+    Args:
+        result: The successful extraction result
+        errors: List of errors that occurred during optional processing
+    Returns:
+        The result with error information added to metadata
+    """
+    for error in errors:
+        _add_processing_error(result, error)
+    return result
+def create_error_result(
+    content: str,
+    mime_type: str,
+    errors: list[FeatureProcessingError],
+    **metadata_kwargs: Any,
+) -> ExtractionResult:
+    """Create an error result with proper type safety.
+    Args:
+        content: Error content to include
+        mime_type: MIME type of the result
+        errors: List of errors that occurred
+        **metadata_kwargs: Additional metadata to include
+    Returns:
+        An ExtractionResult with error information
+    """
+    metadata: Metadata = {
+        "error": f"Multiple processing errors occurred: {len(errors)} errors",
+        "error_context": {
+            "error_count": len(errors),
+            "errors": [error.to_dict() for error in errors],
+            **metadata_kwargs,
+        },
+        "processing_errors": [error.to_dict() for error in errors],
+    }
+    return ExtractionResult(
+        content=content,
+        chunks=[],
+        mime_type=mime_type,
+        metadata=metadata,
+        entities=[],
+        keywords=[],
+        detected_languages=[],
+        tables=[],
+        images=[],
+        image_ocr_results=[],
+    )

kreuzberg/_extractors/_base.py CHANGED Viewed

@@ -230,13 +230,13 @@ class Extractor(ABC):
                 confidence_score=None,
                 processing_time=duration,
             )
-        except (OSError, ValueError) as e:  # pragma: no cover
+        except ValueError as e:  # pragma: no cover
             return ImageOCRResult(
                 image=target,
                 ocr_result=ExtractionResult(content="", mime_type="text/plain", metadata={}),
                 skipped_reason=f"OCR failed: {type(e).__name__}: {e}",
             )
-        except (RuntimeError, TypeError) as e:  # pragma: no cover
+        except TypeError as e:  # pragma: no cover
             return ImageOCRResult(
                 image=target,
                 ocr_result=ExtractionResult(content="", mime_type="text/plain", metadata={}),

kreuzberg/_extractors/_html.py CHANGED Viewed

@@ -75,7 +75,7 @@ class HTMLExtractor(Extractor):
         soup = BeautifulSoup(html_content, "xml")
         for img in soup.find_all("img"):
-            src_val = img.get("src")  # type: ignore[union-attr]
+            src_val = img.get("src")
             if isinstance(src_val, str) and src_val.startswith("data:image/"):
                 try:
                     header, data = src_val.split(",", 1)
@@ -105,7 +105,7 @@ class HTMLExtractor(Extractor):
                     except (OSError, ValueError) as e:  # pragma: no cover
                         logger.debug("Could not determine image dimensions for %s: %s", format_name, e)
-                    alt_val = img.get("alt")  # type: ignore[union-attr]
+                    alt_val = img.get("alt")
                     desc = alt_val if isinstance(alt_val, str) else None
                     images.append(
                         ExtractedImage(

kreuzberg/_extractors/_pdf.py CHANGED Viewed

@@ -6,7 +6,6 @@ import logging
 import os
 import tempfile
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from dataclasses import asdict
 from itertools import count
 from multiprocessing import cpu_count
 from pathlib import Path
@@ -27,14 +26,11 @@ from kreuzberg._mime_types import PDF_MIME_TYPE, PLAIN_TEXT_MIME_TYPE
 from kreuzberg._ocr import get_ocr_backend
 from kreuzberg._playa import extract_pdf_metadata, extract_pdf_metadata_sync
 from kreuzberg._types import (
-    EasyOCRConfig,
     ExtractedImage,
     ExtractionResult,
     ImageOCRResult,
     Metadata,
     OcrBackendType,
-    PaddleOCRConfig,
-    TesseractConfig,
 )
 from kreuzberg._utils._errors import create_error_context, should_retry
 from kreuzberg._utils._image_preprocessing import calculate_optimal_dpi
@@ -134,48 +130,47 @@ class PDFExtractor(Extractor):
     def extract_path_sync(self, path: Path) -> ExtractionResult:
         content_bytes = path.read_bytes()
+        result: ExtractionResult | None = None
         document: Document | None = None
         if self.config.extract_images or self.config.extract_tables:
             document = self._parse_with_password_attempts(content_bytes)
-        try:
-            text = self._extract_pdf_searchable_text_sync(path)
-        except ParsingError:
-            text = ""
+        if not self.config.force_ocr:
+            try:
+                content = self._extract_pdf_searchable_text_sync(path)
+                if self._validate_extracted_text(content):
+                    result = ExtractionResult(content=content, mime_type=PLAIN_TEXT_MIME_TYPE, metadata={})
+            except ParsingError:
+                pass
-        if (self.config.force_ocr or not self._validate_extracted_text(text)) and self.config.ocr_backend is not None:
-            text = self._extract_pdf_with_ocr_sync(path)
+        if not result and self.config.ocr_backend is not None:
+            result = self._extract_pdf_text_with_ocr_sync(path, self.config.ocr_backend)
+        if not result:
+            result = ExtractionResult(content="", mime_type=PLAIN_TEXT_MIME_TYPE, metadata={})
+        metadata = self._extract_metadata_with_password_attempts_sync(content_bytes)
+        result.metadata = metadata
-        tables = []
         if self.config.extract_tables:
             # GMFT is optional dependency ~keep
             try:
                 from kreuzberg._gmft import extract_tables_sync  # noqa: PLC0415
                 tables = extract_tables_sync(path)
+                result.tables = tables
             except ImportError:  # pragma: no cover
-                tables = []
-        if not self.config.force_ocr and self._validate_extracted_text(text):
-            text = self._extract_with_playa_sync(path, fallback_text=text)
-        text = normalize_spaces(text)
-        result = ExtractionResult(
-            content=text,
-            mime_type=PLAIN_TEXT_MIME_TYPE,
-            metadata={},
-            tables=list(tables),
-        )
+                result.tables = []
-        if tables:
-            table_summary = generate_table_summary(tables)
-            result.metadata = result.metadata | {
-                "table_count": table_summary["table_count"],
-                "tables_summary": f"Document contains {table_summary['table_count']} tables "
-                f"across {table_summary['pages_with_tables']} pages with "
-                f"{table_summary['total_rows']} total rows",
-            }
+            if result.tables:
+                table_summary = generate_table_summary(result.tables)
+                result.metadata = result.metadata | {
+                    "table_count": table_summary["table_count"],
+                    "tables_summary": f"Document contains {table_summary['table_count']} tables "
+                    f"across {table_summary['pages_with_tables']} pages with "
+                    f"{table_summary['total_rows']} total rows",
+                }
         if self.config.extract_images and document:
             images = self._extract_images_from_playa_sync(document)
@@ -405,7 +400,7 @@ class PDFExtractor(Extractor):
         except Exception as e:
             raise ParsingError(f"Failed to extract PDF text: {e}") from e
-    def _extract_pdf_with_ocr_sync(self, path: Path) -> str:
+    def _extract_pdf_text_with_ocr_sync(self, path: Path, ocr_backend: OcrBackendType) -> ExtractionResult:
         temp_files: list[Path] = []
         try:
             with pdf_document_sync(path) as pdf:
@@ -443,7 +438,8 @@ class PDFExtractor(Extractor):
                         with pdf_resources_sync(bitmap, page):
                             pil_image.close()
-            return self._process_pdf_images_with_ocr([str(p) for p in temp_files])
+            content = self._process_pdf_images_with_ocr([str(p) for p in temp_files], ocr_backend)
+            return ExtractionResult(content=content, mime_type=PLAIN_TEXT_MIME_TYPE, metadata={})
         except Exception as e:
             raise ParsingError(f"Failed to OCR PDF: {e}") from e
@@ -452,28 +448,11 @@ class PDFExtractor(Extractor):
                 with contextlib.suppress(OSError):
                     p.unlink()
-    def _process_pdf_images_with_ocr(self, image_paths: list[str]) -> str:
-        backend = get_ocr_backend(self.config.ocr_backend)
+    def _process_pdf_images_with_ocr(self, image_paths: list[str], ocr_backend: OcrBackendType) -> str:
+        backend = get_ocr_backend(ocr_backend)
         paths = [Path(p) for p in image_paths]
-        match self.config.ocr_backend:
-            case "tesseract":
-                config = (
-                    self.config.ocr_config if isinstance(self.config.ocr_config, TesseractConfig) else TesseractConfig()
-                )
-                results = backend.process_batch_sync(paths, **asdict(config))
-            case "paddleocr":
-                paddle_config = (
-                    self.config.ocr_config if isinstance(self.config.ocr_config, PaddleOCRConfig) else PaddleOCRConfig()
-                )
-                results = backend.process_batch_sync(paths, **asdict(paddle_config))
-            case "easyocr":
-                easy_config = (
-                    self.config.ocr_config if isinstance(self.config.ocr_config, EasyOCRConfig) else EasyOCRConfig()
-                )
-                results = backend.process_batch_sync(paths, **asdict(easy_config))
-            case _:
-                raise NotImplementedError(f"Sync OCR not implemented for {self.config.ocr_backend}")
+        results = backend.process_batch_sync(paths, **self.config.get_config_dict())
         return "\n\n".join(result.content for result in results)

kreuzberg/_extractors/_structured.py CHANGED Viewed

@@ -14,7 +14,7 @@ else:  # pragma: no cover
 try:
     import yaml
 except ImportError:  # pragma: no cover
-    yaml = None
+    yaml = None  # type: ignore[assignment]
 from anyio import Path as AsyncPath

kreuzberg/_language_detection.py CHANGED Viewed

@@ -31,5 +31,7 @@ def detect_languages(text: str, config: LanguageDetectionConfig | None = None) -
             langs = [result["lang"].lower() for result in results if result.get("lang")]
             return langs if langs else None
         return None
+    except (RuntimeError, OSError, MemoryError):
+        raise
     except Exception:  # noqa: BLE001
         return None

kreuzberg 3.17.3__py3-none-any.whl → 3.19.0__py3-none-any.whl

kreuzberg 3.17.3py3-none-any.whl → 3.19.0py3-none-any.whl