PyPI - natural-pdf - Versions diffs - 0.1.32__py3-none-any.whl → 0.1.34__py3-none-any.whl - Mend

natural-pdf 0.1.32py3-none-any.whl → 0.1.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

natural_pdf/analyzers/__init__.py +2 -2
natural_pdf/analyzers/guides.py +670 -595
natural_pdf/analyzers/layout/base.py +53 -6
natural_pdf/analyzers/layout/layout_analyzer.py +3 -1
natural_pdf/analyzers/layout/layout_manager.py +18 -14
natural_pdf/analyzers/layout/layout_options.py +1 -0
natural_pdf/analyzers/layout/paddle.py +102 -64
natural_pdf/analyzers/layout/table_structure_utils.py +3 -1
natural_pdf/analyzers/layout/yolo.py +2 -6
natural_pdf/analyzers/shape_detection_mixin.py +15 -6
natural_pdf/classification/manager.py +92 -77
natural_pdf/classification/mixin.py +49 -5
natural_pdf/classification/results.py +1 -1
natural_pdf/cli.py +7 -3
natural_pdf/collections/pdf_collection.py +96 -101
natural_pdf/core/element_manager.py +188 -82
natural_pdf/core/highlighting_service.py +5 -6
natural_pdf/core/page.py +132 -16
natural_pdf/core/pdf.py +486 -71
natural_pdf/describe/__init__.py +18 -12
natural_pdf/describe/base.py +179 -172
natural_pdf/describe/elements.py +155 -155
natural_pdf/describe/mixin.py +27 -19
natural_pdf/describe/summary.py +44 -55
natural_pdf/elements/base.py +134 -18
natural_pdf/elements/collections.py +90 -18
natural_pdf/elements/image.py +2 -1
natural_pdf/elements/line.py +0 -31
natural_pdf/elements/rect.py +0 -14
natural_pdf/elements/region.py +238 -111
natural_pdf/elements/text.py +18 -12
natural_pdf/exporters/__init__.py +4 -1
natural_pdf/exporters/original_pdf.py +12 -4
natural_pdf/extraction/mixin.py +66 -10
natural_pdf/extraction/result.py +1 -1
natural_pdf/flows/flow.py +63 -4
natural_pdf/flows/region.py +4 -4
natural_pdf/ocr/engine.py +83 -2
natural_pdf/ocr/engine_paddle.py +5 -5
natural_pdf/ocr/ocr_factory.py +2 -1
natural_pdf/ocr/ocr_manager.py +24 -13
natural_pdf/ocr/ocr_options.py +3 -10
natural_pdf/qa/document_qa.py +21 -8
natural_pdf/qa/qa_result.py +3 -7
natural_pdf/search/__init__.py +3 -2
natural_pdf/search/lancedb_search_service.py +5 -6
natural_pdf/search/numpy_search_service.py +5 -2
natural_pdf/selectors/parser.py +51 -6
natural_pdf/tables/__init__.py +2 -2
natural_pdf/tables/result.py +7 -6
natural_pdf/utils/bidi_mirror.py +2 -1
natural_pdf/utils/reading_order.py +3 -2
natural_pdf/utils/visualization.py +3 -3
natural_pdf/widgets/viewer.py +0 -1
{natural_pdf-0.1.32.dist-info → natural_pdf-0.1.34.dist-info}/METADATA +1 -1
natural_pdf-0.1.34.dist-info/RECORD +121 -0
optimization/memory_comparison.py +73 -58
optimization/pdf_analyzer.py +141 -96
optimization/performance_analysis.py +111 -110
optimization/test_cleanup_methods.py +47 -36
optimization/test_memory_fix.py +40 -39
tools/bad_pdf_eval/__init__.py +0 -1
tools/bad_pdf_eval/analyser.py +35 -18
tools/bad_pdf_eval/collate_summaries.py +22 -18
tools/bad_pdf_eval/compile_attempts_markdown.py +127 -0
tools/bad_pdf_eval/eval_suite.py +21 -9
tools/bad_pdf_eval/evaluate_quality.py +198 -0
tools/bad_pdf_eval/export_enrichment_csv.py +12 -8
tools/bad_pdf_eval/llm_enrich.py +71 -39
tools/bad_pdf_eval/llm_enrich_with_retry.py +289 -0
tools/bad_pdf_eval/reporter.py +1 -1
tools/bad_pdf_eval/utils.py +7 -4
natural_pdf-0.1.32.dist-info/RECORD +0 -118
{natural_pdf-0.1.32.dist-info → natural_pdf-0.1.34.dist-info}/WHEEL +0 -0
{natural_pdf-0.1.32.dist-info → natural_pdf-0.1.34.dist-info}/entry_points.txt +0 -0
{natural_pdf-0.1.32.dist-info → natural_pdf-0.1.34.dist-info}/licenses/LICENSE +0 -0
{natural_pdf-0.1.32.dist-info → natural_pdf-0.1.34.dist-info}/top_level.txt +0 -0

optimization/test_memory_fix.py CHANGED Viewed

@@ -10,9 +10,10 @@ This test verifies that:
 import gc
 import os
-import psutil
 import sys
 from pathlib import Path
+import psutil
 import pytest
 import natural_pdf as npdf
@@ -26,7 +27,7 @@ def get_memory_usage():
 class TestCharacterMemoryFix:
     """Test suite for character memory optimization"""
     @pytest.fixture
     def test_pdf_path(self):
         """Get path to a test PDF"""
@@ -35,128 +36,128 @@ class TestCharacterMemoryFix:
         if not test_path.exists():
             pytest.skip("Test PDF not found")
         return str(test_path)
     def test_character_access_still_works(self, test_pdf_path):
         """Test that character access through words still works after optimization"""
         pdf = npdf.PDF(test_pdf_path)
         page = pdf.pages[0]
-        # Force loading of elements
+        # Force loading of elements
         text_elements = page.find_all("text")
         # Test that we have text elements
         assert len(text_elements) > 0, "Should have text elements"
         print(f"Found {len(text_elements)} text elements")
-        # Test that words can access their constituent characters
+        # Test that words can access their constituent characters
         for word in text_elements[:5]:  # Test first 5 words
-            if hasattr(word, '_char_indices') and word._char_indices:
+            if hasattr(word, "_char_indices") and word._char_indices:
                 # New optimized approach
                 constituent_chars = word.chars
                 assert isinstance(constituent_chars, list), "word.chars should return a list"
                 assert len(constituent_chars) > 0, "Should have constituent characters"
                 # Test character properties
                 for char in constituent_chars[:3]:  # Test first 3 chars of each word
-                    assert hasattr(char, 'text'), "Character should have text attribute"
-                    assert hasattr(char, 'x0'), "Character should have x0 coordinate"
-            elif hasattr(word, '_char_dicts') and word._char_dicts:
+                    assert hasattr(char, "text"), "Character should have text attribute"
+                    assert hasattr(char, "x0"), "Character should have x0 coordinate"
+            elif hasattr(word, "_char_dicts") and word._char_dicts:
                 # Old approach - should still work for compatibility
                 char_dicts = word._char_dicts
                 assert isinstance(char_dicts, list), "word._char_dicts should be a list"
                 assert len(char_dicts) > 0, "Should have character dictionaries"
     def test_memory_usage_improvement(self, test_pdf_path):
         """Test that memory usage is improved with the optimization"""
         # This test will compare memory usage patterns
         # Note: Exact numbers will vary, but we should see improvement
         pdf = npdf.PDF(test_pdf_path)
         page = pdf.pages[0]
         # Measure memory before loading elements
         gc.collect()
         memory_before = get_memory_usage()
         # Load elements (this triggers the optimization)
         chars = page.find_all("text")
         words = page.find_all("words")
         # Measure memory after loading
         gc.collect()
         memory_after = get_memory_usage()
         memory_used = memory_after - memory_before
         # Log the memory usage for analysis
         print(f"\nMemory usage analysis:")
         print(f"Characters loaded: {len(chars)}")
         print(f"Words loaded: {len(words)}")
         print(f"Memory used: {memory_used:.2f} MB")
         print(f"Memory per character: {memory_used / len(chars) * 1024:.2f} KB" if chars else "N/A")
         # The memory usage should be reasonable (not exact test due to variability)
         # Main goal is to verify no crashes and reasonable memory usage
         assert memory_used < 100, f"Memory usage seems too high: {memory_used:.2f} MB"
     def test_word_text_extraction_works(self, test_pdf_path):
         """Test that text extraction from words still works correctly"""
         pdf = npdf.PDF(test_pdf_path)
         page = pdf.pages[0]
         words = page.find_all("text")  # All text elements are words in this PDF
         # Test text extraction from words
         for word in words[:10]:  # Test first 10 words
             word_text = word.text
             assert isinstance(word_text, str), "Word text should be a string"
             # Text should not be empty for actual words
             if word_text.strip():  # Skip empty/whitespace words
                 assert len(word_text) > 0, "Non-empty words should have text content"
     def test_backwards_compatibility(self, test_pdf_path):
         """Test that existing code patterns still work"""
         pdf = npdf.PDF(test_pdf_path)
         page = pdf.pages[0]
         # Test that existing element access patterns work
         all_elements = page.find_all("text")
         assert len(all_elements) > 0, "Should find text elements"
         # Test that element properties are accessible
         for element in all_elements[:5]:
-            assert hasattr(element, 'text'), "Element should have text attribute"
-            assert hasattr(element, 'x0'), "Element should have x0 coordinate"
-            assert hasattr(element, 'top'), "Element should have top coordinate"
-            assert hasattr(element, 'width'), "Element should have width"
-            assert hasattr(element, 'height'), "Element should have height"
+            assert hasattr(element, "text"), "Element should have text attribute"
+            assert hasattr(element, "x0"), "Element should have x0 coordinate"
+            assert hasattr(element, "top"), "Element should have top coordinate"
+            assert hasattr(element, "width"), "Element should have width"
+            assert hasattr(element, "height"), "Element should have height"
 def main():
     """Run the memory fix test"""
     print("Running character memory optimization test...")
     # Check if test PDF exists
     test_pdf = Path("pdfs/01-practice.pdf")
     if not test_pdf.exists():
         print(f"ERROR: Test PDF not found at {test_pdf}")
         print("Please ensure the test PDF exists before running this test.")
         return 1
     # Run pytest on just this file
     exit_code = pytest.main([__file__, "-v", "-s"])
     if exit_code == 0:
         print("\n✅ All memory optimization tests passed!")
         print("The character duplication fix is working correctly.")
     else:
         print("\n❌ Some tests failed!")
         print("The memory optimization needs investigation.")
     return exit_code
 if __name__ == "__main__":
-    exit(main())
+    exit(main())

tools/bad_pdf_eval/__init__.py CHANGED Viewed

	@@ -1 +0,0 @@
1	-

tools/bad_pdf_eval/analyser.py CHANGED Viewed

@@ -3,15 +3,16 @@ from __future__ import annotations
 import re
 import time
 from pathlib import Path
-from typing import Dict, List, Optional, Any
+from typing import Any, Dict, List, Optional
-import natural_pdf as npdf
 from PIL import Image
-from rich.table import Table
 from rich.console import Console
+from rich.table import Table
+import natural_pdf as npdf
+from .reporter import log_section, save_json
 from .utils import slugify
-from .reporter import save_json, log_section
 console = Console()
@@ -201,7 +202,10 @@ class BadPDFAnalyzer:
                 page_result["goal_tag"] = "unknown"
             # Difficulties determination
-            if page_result.get("text_len", 0) < 100 and page_result.get("ocr_text_elements", 0) > 20:
+            if (
+                page_result.get("text_len", 0) < 100
+                and page_result.get("ocr_text_elements", 0) > 20
+            ):
                 difficulties.append("scanned_image")
             page_result["difficulties"] = difficulties
@@ -235,40 +239,48 @@ class BadPDFAnalyzer:
                 import_lines.append("import pandas as pd")
             code_lines: List[str] = import_lines + [
-                f"pdf = PDF(\"{self.pdf_path}\")",
+                f'pdf = PDF("{self.pdf_path}")',
                 f"page = pdf.pages[{page_idx_1based - 1}]  # page {page_idx_1based}",
             ]
             thought_lines: List[str] = []
             # build reasoning
-            thought_lines.append(f"Goal tag: {page_result['goal_tag']}. Detected difficulties: {', '.join(difficulties) or 'none'}.")
+            thought_lines.append(
+                f"Goal tag: {page_result['goal_tag']}. Detected difficulties: {', '.join(difficulties) or 'none'}."
+            )
             if page_result["goal_tag"] == "table_extraction":
-                thought_lines.append("Plan: rely on layout models to locate tables, then extract with Natural-PDF helper.")
+                thought_lines.append(
+                    "Plan: rely on layout models to locate tables, then extract with Natural-PDF helper."
+                )
                 if page_result.get("layout_tatr_count", 0) > 0:
                     code_lines.append("page.analyze_layout('tatr')  # adds 'table' regions")
                 else:
                     code_lines.append("page.analyze_layout()  # YOLO fallback")
                 if page_result.get("layout_tatr_count", 0) > 1:
-                    thought_lines.append("Multiple tables detected, choose second as goal mentions 'second table'.")
+                    thought_lines.append(
+                        "Multiple tables detected, choose second as goal mentions 'second table'."
+                    )
                     code_lines.append("tables = page.find_all('table')")
                     code_lines.append("tbl = tables[1]")
                 else:
                     code_lines.append("tbl = page.find('table')  # first table")
-                code_lines.extend([
-                    "data = tbl.extract_table()",
-                    "columns, rows = data[0], data[1:]",
-                    "df = pd.DataFrame(rows, columns=columns)",
-                ])
+                code_lines.extend(
+                    [
+                        "data = tbl.extract_table()",
+                        "columns, rows = data[0], data[1:]",
+                        "df = pd.DataFrame(rows, columns=columns)",
+                    ]
+                )
             elif page_result["goal_tag"] == "text_extraction":
                 anchor = _first_anchor_from_goal(goal_str)
                 if "scanned_image" in difficulties:
                     thought_lines.append("No native text detected; need OCR before querying.")
                     code_lines.append("page.apply_ocr(engine='paddle')")
                 thought_lines.append(f"Anchor on text '{anchor}' then read below region.")
-                code_lines.append(f"section = page.find(\"text:contains({anchor})\").below(0, 50)")
+                code_lines.append(f'section = page.find("text:contains({anchor})").below(0, 50)')
                 code_lines.append("text = section.extract_text()")
             else:
                 thought_lines.append("Goal unclear; placeholder snippet provided.")
@@ -282,8 +294,13 @@ class BadPDFAnalyzer:
             # Provide quick heuristic comment
             if page_result.get("text_len", 0) == 0 and page_result.get("ocr_text_elements", 0) > 20:
                 page_result["auto_comment"] = "Likely scanned/needs OCR; no native text."
-            elif page_result.get("text_len", 0) > 1000 and page_result.get("layout_yolo_count", 0) == 0:
-                page_result["auto_comment"] = "Native dense text; YOLO found no regions – may be fine, fonts just small."
+            elif (
+                page_result.get("text_len", 0) > 1000
+                and page_result.get("layout_yolo_count", 0) == 0
+            ):
+                page_result["auto_comment"] = (
+                    "Native dense text; YOLO found no regions – may be fine, fonts just small."
+                )
             else:
                 page_result.setdefault("auto_comment", "")
@@ -299,4 +316,4 @@ PAGE_REGEX = re.compile(r"page\s*(\d{1,4})", re.IGNORECASE)
 def extract_page_hints(text: str) -> List[int]:
-    return [int(m.group(1)) for m in PAGE_REGEX.finditer(text)]
+    return [int(m.group(1)) for m in PAGE_REGEX.finditer(text)]

tools/bad_pdf_eval/collate_summaries.py CHANGED Viewed

@@ -36,11 +36,13 @@ def collect() -> List[dict]:
             tp = (p.get("thought_process") or "").strip()
             if not cs and not tp:
                 continue
-            page_snippets.append({
-                "page": p.get("page_number"),
-                "code": cs,
-                "thought": tp,
-            })
+            page_snippets.append(
+                {
+                    "page": p.get("page_number"),
+                    "code": cs,
+                    "thought": tp,
+                }
+            )
             # --- lightweight feature tagging --------------------------------
             gt = (p.get("goal_tag") or "").lower()
             if "table" in gt:
@@ -64,18 +66,20 @@ def collect() -> List[dict]:
         if language and language.lower() not in {"english", "en", "en-us"}:
             features.add("non_english")
-        rows.append({
-            "id": submission_id,
-            "language": language,
-            "issues": issues,
-            "description": description,
-            "doc_thought": doc_tp,
-            "doc_code": doc_cs,
-            "doc_difficult": doc_diff,
-            "doc_test": doc_test,
-            "snippets": page_snippets,
-            "features": sorted(features),
-        })
+        rows.append(
+            {
+                "id": submission_id,
+                "language": language,
+                "issues": issues,
+                "description": description,
+                "doc_thought": doc_tp,
+                "doc_code": doc_cs,
+                "doc_difficult": doc_diff,
+                "doc_test": doc_test,
+                "snippets": page_snippets,
+                "features": sorted(features),
+            }
+        )
     return rows
@@ -127,4 +131,4 @@ def export_markdown(rows: List[dict]):
 if __name__ == "__main__":
     rows = collect()
     export_markdown(rows)
-    print(f"Wrote {len(rows)} summaries to eval_results/collated_summary.md")
+    print(f"Wrote {len(rows)} summaries to eval_results/collated_summary.md")

tools/bad_pdf_eval/compile_attempts_markdown.py ADDED Viewed

@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+"""Compile multi-try enrichment attempts into a single Markdown report.
+For every `summary.json` produced by the retry enrichment pipeline, this script
+collects all attempts (initial + retries) and writes a human-readable markdown
+file that shows, *per PDF*, every attempt alongside its quality score.
+Example
+-------
+$ python -m tools.bad_pdf_eval.compile_attempts_markdown \
+    --output eval_results/attempts_progress.md
+The resulting markdown looks like::
+    # Attempts Progress Report
+    ## obe1Vq5 — obe1Vq5.pdf
+    ### Attempt 0 (Score: 3/12)
+    ...
+    ### Attempt 1 (Score: 6/12)
+    ...
+This file can then be fed into an LLM for meta-analysis of score improvements
+and guidance quality.
+"""
+from __future__ import annotations
+import argparse
+import json
+from pathlib import Path
+from typing import Iterable, List
+# Re-use the same constants as other evaluation utilities ---------------------
+ROOT = Path(__file__).resolve().parent.parent.parent  # repo root
+EVAL_DIR = ROOT / "eval_results"
+# ---------------------------------------------------------------------------
+def iter_summary_paths(submission: str | None) -> Iterable[Path]:
+    """Yield all summary.json paths (optionally filtered by submission ID)."""
+    if submission:
+        p = EVAL_DIR / submission / "summary.json"
+        if not p.exists():
+            raise FileNotFoundError(
+                f"No summary.json found for submission '{submission}' – expected {p}"
+            )
+        yield p
+    else:
+        yield from EVAL_DIR.glob("*/summary.json")
+def load_summary(path: Path) -> dict:
+    """Return the parsed JSON for the given summary path."""
+    try:
+        return json.loads(path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as exc:
+        raise ValueError(f"Invalid JSON in {path}: {exc}") from exc
+def build_markdown_for_summary(submission_id: str, summary: dict) -> str:
+    """Return a markdown string for a single submission (all attempts)."""
+    pdf_name = Path(summary.get("pdf", "")).name or "<unknown.pdf>"
+    header = f"## {submission_id} — {pdf_name}"
+    attempts: List[dict] = sorted(summary.get("attempts", []), key=lambda d: d.get("attempt", 0))
+    if not attempts:
+        return header + "\n\n_No attempts recorded – run the enrichment retry pipeline first._\n"
+    sections: List[str] = [header]
+    for att in attempts:
+        num = att.get("attempt", "?")
+        score = att.get("score", "?")
+        tp = att.get("thought_process", "").strip()
+        code = att.get("code_suggestion", "").rstrip()
+        sections.append(f"### Attempt {num} (Score: {score}/12)")
+        if tp:
+            sections.append("**Thought Process**")
+            # indent each line with > for blockquote formatting
+            quoted_tp = "\n".join(f"> {line}" for line in tp.splitlines())
+            sections.append(quoted_tp)
+        if code:
+            sections.append("```python")
+            sections.append(code)
+            sections.append("```")
+    return "\n\n".join(sections)
+def compile_report(paths: Iterable[Path]) -> str:
+    """Aggregate individual submission markdown into one report."""
+    pieces: List[str] = ["# Attempts Progress Report", ""]
+    for p in sorted(paths):
+        submission_id = p.parent.name
+        summary = load_summary(p)
+        pieces.append(build_markdown_for_summary(submission_id, summary))
+        pieces.append("---")  # horizontal rule between PDFs
+    return "\n\n".join(pieces).rstrip("-\n")
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Compile multi-retry attempts into markdown.")
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=EVAL_DIR / "attempts_progress.md",
+        help="Destination .md file (default: eval_results/attempts_progress.md).",
+    )
+    parser.add_argument("--submission", help="Only compile a single submission ID.")
+    args = parser.parse_args()
+    summary_paths = list(iter_summary_paths(args.submission))
+    if not summary_paths:
+        raise SystemExit("No summary.json files found.")
+    md = compile_report(summary_paths)
+    args.output.write_text(md, encoding="utf-8")
+    print(
+        f"[ok] Wrote markdown report to {args.output.relative_to(ROOT)} (covers {len(summary_paths)} PDFs)"
+    )
+if __name__ == "__main__":
+    main()

tools/bad_pdf_eval/eval_suite.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import argparse
 import re
 from pathlib import Path
-from typing import List, Dict
+from typing import Dict, List
 import pandas as pd
 from rich.console import Console
-from .utils import find_local_pdf, slugify
 from .analyser import BadPDFAnalyzer, extract_page_hints
 from .reporter import save_json
+from .utils import find_local_pdf, slugify
 console = Console()
@@ -43,9 +43,15 @@ def main():
         default="eval_results",
         help="Directory to write results into (will be git-ignored)",
     )
-    parser.add_argument("--max-row", type=int, default=None, help="debug: process only first n CSV rows")
-    parser.add_argument("--limit", type=int, default=None, help="process at most N PDFs with local files")
-    parser.add_argument("--overwrite", action="store_true", help="re-run analysis even if summary.json exists")
+    parser.add_argument(
+        "--max-row", type=int, default=None, help="debug: process only first n CSV rows"
+    )
+    parser.add_argument(
+        "--limit", type=int, default=None, help="process at most N PDFs with local files"
+    )
+    parser.add_argument(
+        "--overwrite", action="store_true", help="re-run analysis even if summary.json exists"
+    )
     args = parser.parse_args()
     csv_path = Path(args.csv)
@@ -70,7 +76,9 @@ def main():
             # Ignore files that are not .pdf (e.g. ZIPs mistakenly included)
             if pdf_path.suffix.lower() != ".pdf":
-                console.print(f"[yellow]Not a PDF ({pdf_path.suffix}) for {submission_id}; skipping.")
+                console.print(
+                    f"[yellow]Not a PDF ({pdf_path.suffix}) for {submission_id}; skipping."
+                )
                 continue
             sub_output = output_root / submission_id
@@ -88,12 +96,16 @@ def main():
                 console.print(f"[yellow]Could not copy PDF into results folder: {copy_err}")
             if summary_path.exists() and not args.overwrite:
-                console.print(f"[yellow]Summary exists for {submission_id}; skipping (use --overwrite to refresh)")
+                console.print(
+                    f"[yellow]Summary exists for {submission_id}; skipping (use --overwrite to refresh)"
+                )
                 continue
             pages = build_pages_list(row)
             try:
-                analyser = BadPDFAnalyzer(pdf_path=pdf_path, output_dir=sub_output, submission_meta=row, pages=pages)
+                analyser = BadPDFAnalyzer(
+                    pdf_path=pdf_path, output_dir=sub_output, submission_meta=row, pages=pages
+                )
                 summary = analyser.run()
                 master_records.append(summary)
             except Exception as e:
@@ -113,4 +125,4 @@ def main():
 if __name__ == "__main__":
-    main()
+    main()

natural-pdf 0.1.32__py3-none-any.whl → 0.1.34__py3-none-any.whl

natural-pdf 0.1.32py3-none-any.whl → 0.1.34py3-none-any.whl