PyPI - natocr - Versions diffs - 1.3.3__tar.gz - Mend

natocr 1.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

natocr-1.3.3/LICENSE +21 -0
natocr-1.3.3/PKG-INFO +203 -0
natocr-1.3.3/README.md +158 -0
natocr-1.3.3/natocr/__init__.py +21 -0
natocr-1.3.3/natocr/core.py +112 -0
natocr-1.3.3/natocr/macos.py +177 -0
natocr-1.3.3/natocr/models.py +108 -0
natocr-1.3.3/natocr/windows.py +174 -0
natocr-1.3.3/natocr.egg-info/PKG-INFO +203 -0
natocr-1.3.3/natocr.egg-info/SOURCES.txt +20 -0
natocr-1.3.3/natocr.egg-info/dependency_links.txt +1 -0
natocr-1.3.3/natocr.egg-info/requires.txt +17 -0
natocr-1.3.3/natocr.egg-info/top_level.txt +1 -0
natocr-1.3.3/pyproject.toml +139 -0
natocr-1.3.3/setup.cfg +4 -0
natocr-1.3.3/tests/test_integration_macos.py +79 -0
natocr-1.3.3/tests/test_integration_windows.py +81 -0
natocr-1.3.3/tests/test_macos.py +164 -0
natocr-1.3.3/tests/test_models.py +59 -0
natocr-1.3.3/tests/test_ocr.py +117 -0
natocr-1.3.3/tests/test_package.py +12 -0
natocr-1.3.3/tests/test_windows.py +123 -0

natocr-1.3.3/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

natocr-1.3.3/PKG-INFO ADDED Viewed

@@ -0,0 +1,203 @@
+Metadata-Version: 2.4
+Name: natocr
+Version: 1.3.3
+Summary: Native OCR library using platform-specific frameworks (macOS Vision, Windows Runtime OCR)
+Author-email: alfredchiesa <alfred.personal@icloud.com>
+Maintainer-email: alfredchiesa <alfred.personal@icloud.com>
+License: MIT
+Project-URL: Homepage, https://github.com/alfredchiesa/natocr
+Project-URL: Documentation, https://alfredchiesa.github.io/natocr
+Project-URL: Repository, https://github.com/alfredchiesa/natocr.git
+Project-URL: Issues, https://github.com/alfredchiesa/natocr/issues
+Project-URL: Changelog, https://github.com/alfredchiesa/natocr/blob/main/CHANGELOG.md
+Keywords: ocr,text-recognition,vision,macos,windows,native
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: MacOS
+Classifier: Operating System :: Microsoft :: Windows
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Topic :: Scientific/Engineering :: Image Recognition
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: Pillow>=9.0.0
+Requires-Dist: numpy>=1.21
+Provides-Extra: macos
+Requires-Dist: pyobjc-framework-Vision>=11.1; extra == "macos"
+Requires-Dist: pyobjc-framework-Quartz>=11.1; extra == "macos"
+Provides-Extra: windows
+Requires-Dist: pywin32>=311; extra == "windows"
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
+Requires-Dist: python-semantic-release>=8.0.0; extra == "dev"
+Requires-Dist: mkdocs>=1.6.1; extra == "dev"
+Requires-Dist: mkdocs-material>=9.6.21; extra == "dev"
+Requires-Dist: mkdocstrings[python]>=0.26; extra == "dev"
+Dynamic: license-file
+# natocr
+**natocr** (*native ocr*) is a small Python wrapper around the OCR engines that
+already ship with macOS and Windows: Vision framework on macOS and Windows
+Runtime OCR on Windows.
+These built-in engines are generally faster, more efficient, and more accurate
+than third-party alternatives like Tesseract. **natocr** makes reaching for them
+painless via one clean Python API instead of wrangling with Objective-C bridges
+or WinRT async plumbing.
+## Install
+```bash
+pip install natocr[macos]      # on macOS
+pip install natocr[windows]    # on Windows
+```
+## Quick start
+```python
+from natocr import OCR
+ocr = OCR()                    # defaults to english
+result = ocr.recognize("invoice.png")
+print(result.text)
+```
+```text
+Invoice #1042 Total $58.20 Thank you!
+```
+### Confidence Scores and Bounding Boxes
+`recognize()` returns an `OCRResult`. Beyond the flat `.text`, you get a
+per-detection breakdown with bounding boxes and (*on macOS*) confidence scores:
+```python
+result = ocr.recognize("receipt.png")
+print(result.confidence)          # average confidence, or None if unavailable
+for element in result.elements:
+    box = element.bounds.bounds   # (x, y, width, height) in pixels
+    print(f"{element.text!r} @ {box} conf={element.confidence}")
+```
+```text
+0.93
+'Acme Coffee' @ (24.0, 18.0, 180.0, 32.0) conf=0.97
+'Latte' @ (24.0, 70.0, 96.0, 28.0) conf=0.95
+'$4.50' @ (220.0, 70.0, 80.0, 28.0) conf=0.88
+```
+### Lines and Words
+There's also convenience views for grouping results by reading order:
+```python
+result.lines      # ['Acme Coffee', 'Latte $4.50']  - elements grouped into lines
+result.words      # list of TextElement with non-empty text
+```
+### Detection Language
+Pick a different recognition language, and inspect what the current platform
+supports:
+```python
+ocr = OCR(language="fr")
+print(ocr.platform)               # 'darwin' or 'win32'
+print(ocr.supported_languages)    # ['en-US', 'fr-FR', 'de-DE', ...]
+```
+The supported set is decided by the OS and queried live, so
+`supported_languages` always reflects the current machine. On macOS it's
+Vision's built-in set for your macOS version; on Windows it's whatever OCR
+language packs are installed. See the [Usage guide](https://alfredchiesa.github.io/natocr/usage/#supported-languages)
+for the full list and how to add Windows language packs.
+### Alternative Inputs
+`recognize()` accepts more than file paths - hand it whatever you already have
+in memory:
+```python
+from PIL import Image
+import numpy as np
+ocr.recognize("page.png")              # a file path
+ocr.recognize(Image.open("page.png"))  # a PIL image
+ocr.recognize(np.array(image))         # a numpy array (e.g. from OpenCV)
+ocr.recognize(open("page.png", "rb").read())  # raw image bytes
+```
+## Supported File Types
+Images are decoded with [Pillow](https://python-pillow.org/), so any raster
+format Pillow can open works as an input file or byte string.
+| Format | Extensions | Notes |
+| --- | --- | --- |
+| PNG | `.png` | recommended - lossless |
+| JPEG | `.jpg`, `.jpeg` | great for photos of documents |
+| TIFF | `.tif`, `.tiff` | common for scans |
+| BMP | `.bmp` | uncompressed bitmap |
+| GIF | `.gif` | first frame is used |
+| WebP | `.webp` | modern lossy/lossless |
+| PPM/PGM | `.ppm`, `.pgm` | netpbm bitmaps |
+In addition to file paths, `recognize()` accepts these in-memory types:
+| Input type | Example |
+| --- | --- |
+| `str` (file path) | `ocr.recognize("page.png")` |
+| `PIL.Image.Image` | `ocr.recognize(Image.open("page.png"))` |
+| `numpy.ndarray` | `ocr.recognize(np.array(image))` |
+| `bytes` (encoded image) | `ocr.recognize(data)` |
+> [!NOTE]
+> PDFs and other multi-page documents aren't decoded directly - rasterize a page
+> to one of the formats above first (e.g. with `pdf2image` or `pymupdf`).
+## Testing
+Install the dev dependencies (in a virtualenv), then run the suite. The tests
+mock the native macOS Vision and Windows Runtime backends, so they run anywhere
+without those frameworks installed.
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -e ".[dev]"
+```
+Run everything with coverage (coverage is wired up in `pyproject.toml`, so plain
+`pytest` already reports it):
+```bash
+pytest
+```
+Other handy invocations:
+```bash
+# run a single test file
+pytest tests/test_models.py
+# run one test by name
+pytest -k test_lines_groups_close_y_into_single_line
+# verbose output
+pytest -v
+```
+Coverage reports land in the terminal, in `htmlcov/index.html`, and in
+`coverage.xml`.

natocr-1.3.3/README.md ADDED Viewed

@@ -0,0 +1,158 @@
+# natocr
+**natocr** (*native ocr*) is a small Python wrapper around the OCR engines that
+already ship with macOS and Windows: Vision framework on macOS and Windows
+Runtime OCR on Windows.
+These built-in engines are generally faster, more efficient, and more accurate
+than third-party alternatives like Tesseract. **natocr** makes reaching for them
+painless via one clean Python API instead of wrangling with Objective-C bridges
+or WinRT async plumbing.
+## Install
+```bash
+pip install natocr[macos]      # on macOS
+pip install natocr[windows]    # on Windows
+```
+## Quick start
+```python
+from natocr import OCR
+ocr = OCR()                    # defaults to english
+result = ocr.recognize("invoice.png")
+print(result.text)
+```
+```text
+Invoice #1042 Total $58.20 Thank you!
+```
+### Confidence Scores and Bounding Boxes
+`recognize()` returns an `OCRResult`. Beyond the flat `.text`, you get a
+per-detection breakdown with bounding boxes and (*on macOS*) confidence scores:
+```python
+result = ocr.recognize("receipt.png")
+print(result.confidence)          # average confidence, or None if unavailable
+for element in result.elements:
+    box = element.bounds.bounds   # (x, y, width, height) in pixels
+    print(f"{element.text!r} @ {box} conf={element.confidence}")
+```
+```text
+0.93
+'Acme Coffee' @ (24.0, 18.0, 180.0, 32.0) conf=0.97
+'Latte' @ (24.0, 70.0, 96.0, 28.0) conf=0.95
+'$4.50' @ (220.0, 70.0, 80.0, 28.0) conf=0.88
+```
+### Lines and Words
+There's also convenience views for grouping results by reading order:
+```python
+result.lines      # ['Acme Coffee', 'Latte $4.50']  - elements grouped into lines
+result.words      # list of TextElement with non-empty text
+```
+### Detection Language
+Pick a different recognition language, and inspect what the current platform
+supports:
+```python
+ocr = OCR(language="fr")
+print(ocr.platform)               # 'darwin' or 'win32'
+print(ocr.supported_languages)    # ['en-US', 'fr-FR', 'de-DE', ...]
+```
+The supported set is decided by the OS and queried live, so
+`supported_languages` always reflects the current machine. On macOS it's
+Vision's built-in set for your macOS version; on Windows it's whatever OCR
+language packs are installed. See the [Usage guide](https://alfredchiesa.github.io/natocr/usage/#supported-languages)
+for the full list and how to add Windows language packs.
+### Alternative Inputs
+`recognize()` accepts more than file paths - hand it whatever you already have
+in memory:
+```python
+from PIL import Image
+import numpy as np
+ocr.recognize("page.png")              # a file path
+ocr.recognize(Image.open("page.png"))  # a PIL image
+ocr.recognize(np.array(image))         # a numpy array (e.g. from OpenCV)
+ocr.recognize(open("page.png", "rb").read())  # raw image bytes
+```
+## Supported File Types
+Images are decoded with [Pillow](https://python-pillow.org/), so any raster
+format Pillow can open works as an input file or byte string.
+| Format | Extensions | Notes |
+| --- | --- | --- |
+| PNG | `.png` | recommended - lossless |
+| JPEG | `.jpg`, `.jpeg` | great for photos of documents |
+| TIFF | `.tif`, `.tiff` | common for scans |
+| BMP | `.bmp` | uncompressed bitmap |
+| GIF | `.gif` | first frame is used |
+| WebP | `.webp` | modern lossy/lossless |
+| PPM/PGM | `.ppm`, `.pgm` | netpbm bitmaps |
+In addition to file paths, `recognize()` accepts these in-memory types:
+| Input type | Example |
+| --- | --- |
+| `str` (file path) | `ocr.recognize("page.png")` |
+| `PIL.Image.Image` | `ocr.recognize(Image.open("page.png"))` |
+| `numpy.ndarray` | `ocr.recognize(np.array(image))` |
+| `bytes` (encoded image) | `ocr.recognize(data)` |
+> [!NOTE]
+> PDFs and other multi-page documents aren't decoded directly - rasterize a page
+> to one of the formats above first (e.g. with `pdf2image` or `pymupdf`).
+## Testing
+Install the dev dependencies (in a virtualenv), then run the suite. The tests
+mock the native macOS Vision and Windows Runtime backends, so they run anywhere
+without those frameworks installed.
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -e ".[dev]"
+```
+Run everything with coverage (coverage is wired up in `pyproject.toml`, so plain
+`pytest` already reports it):
+```bash
+pytest
+```
+Other handy invocations:
+```bash
+# run a single test file
+pytest tests/test_models.py
+# run one test by name
+pytest -k test_lines_groups_close_y_into_single_line
+# verbose output
+pytest -v
+```
+Coverage reports land in the terminal, in `htmlcov/index.html`, and in
+`coverage.xml`.

natocr-1.3.3/natocr/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""
+natocr - native ocr library using platform-specific frameworks
+this package provides ocr functionality using native frameworks:
+- macos: vision framework
+- windows: windows runtime ocr
+"""
+from .core import OCR
+from .models import BoundingBox, OCRResult, TextElement
+__version__ = "1.3.3"
+__author__ = "alfredchiesa"
+__email__ = "alfred.personal@icloud.com"
+__all__ = [
+    "OCR",
+    "OCRResult",
+    "TextElement",
+    "BoundingBox",
+]

natocr-1.3.3/natocr/core.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""
+main ocr class with platform detection and delegation
+"""
+import io
+import sys
+from typing import List, Union
+import numpy as np
+from PIL import Image
+from .macos import MacOSOCR
+from .models import OCRResult
+from .windows import WindowsOCR
+class OCR:
+    """Run OCR using the operating system's native engine.
+    Picks the right backend for the current platform - the Vision framework on
+    macOS, Windows Runtime OCR on Windows - and gives you one API over both.
+    Example:
+        ```python
+        from natocr import OCR
+        ocr = OCR()                       # english by default
+        result = ocr.recognize("invoice.png")
+        print(result.text)
+        ```
+    Args:
+        language: language code for text recognition (default: ``"en"``).
+    Raises:
+        RuntimeError: on an unsupported platform, or when the platform's native
+            OCR dependencies aren't installed.
+    """
+    def __init__(self, language: str = "en"):
+        self.language = language
+        self._backend = None
+        self._initialize_backend()
+    def _initialize_backend(self):
+        """initialize platform-specific ocr backend"""
+        if sys.platform == "darwin":
+            try:
+                self._backend = MacOSOCR(self.language)
+            except ImportError:
+                raise RuntimeError(
+                    "macos dependencies not installed. install with: pip install natocr[macos]"
+                )
+        elif sys.platform == "win32":
+            try:
+                self._backend = WindowsOCR(self.language)
+            except ImportError:
+                raise RuntimeError(
+                    "windows dependencies not installed. install with: pip install natocr[windows]"
+                )
+        else:
+            raise RuntimeError(f"unsupported platform: {sys.platform}")
+    def recognize(self, image: Union[str, Image.Image, np.ndarray, bytes]) -> OCRResult:
+        """Recognize text in an image.
+        Args:
+            image: what to read. One of: a file path (``str``), a
+                ``PIL.Image.Image``, a ``numpy.ndarray``, or raw encoded image
+                ``bytes``.
+        Returns:
+            An [OCRResult][natocr.OCRResult] with the detected text and
+            per-element metadata.
+        Raises:
+            ValueError: if ``image`` isn't one of the supported types.
+        """
+        # convert input to pil image for consistent processing
+        pil_image = self._convert_to_pil(image)
+        # delegate to platform-specific implementation
+        return self._backend.recognize(pil_image)
+    def _convert_to_pil(
+        self, image: Union[str, Image.Image, np.ndarray, bytes]
+    ) -> Image.Image:
+        """convert various image formats to pil image"""
+        if isinstance(image, str):
+            # file path
+            return Image.open(image)
+        elif isinstance(image, Image.Image):
+            # already a pil image
+            return image
+        elif isinstance(image, np.ndarray):
+            # numpy array
+            return Image.fromarray(image)
+        elif isinstance(image, bytes):
+            # raw bytes
+            return Image.open(io.BytesIO(image))
+        else:
+            raise ValueError(f"unsupported image type: {type(image)}")
+    @property
+    def supported_languages(self) -> List[str]:
+        """Language codes the current platform's backend supports."""
+        return self._backend.supported_languages if self._backend else []
+    @property
+    def platform(self) -> str:
+        """The current platform identifier (e.g. ``"darwin"`` or ``"win32"``)."""
+        return sys.platform

natocr-1.3.3/natocr/macos.py ADDED Viewed

@@ -0,0 +1,177 @@
+"""
+macos implementation using vision framework
+"""
+import io
+from typing import List
+from PIL import Image
+try:
+    from Foundation import NSData
+    from Vision import (
+        VNImageRequestHandler,
+        VNRecognizeTextRequest,
+        VNRequestTextRecognitionLevelAccurate,
+    )
+    VISION_AVAILABLE = True
+except ImportError:
+    VISION_AVAILABLE = False
+from .models import BoundingBox, OCRResult, TextElement
+# curated fallback if vision's live query fails - the accurate recognizer's set
+# as of macos 15 (bcp-47 tags, exactly what vision returns)
+COMMON_LANGUAGES = [
+    "en-US",
+    "fr-FR",
+    "it-IT",
+    "de-DE",
+    "es-ES",
+    "pt-BR",
+    "zh-Hans",
+    "zh-Hant",
+    "yue-Hans",
+    "yue-Hant",
+    "ko-KR",
+    "ja-JP",
+    "ru-RU",
+    "uk-UA",
+    "th-TH",
+    "vi-VT",
+    "ar-SA",
+    "ars-SA",
+]
+class MacOSOCR:
+    """macos ocr implementation using vision framework"""
+    def __init__(self, language: str = "en"):
+        """
+        initialize macos ocr
+        args:
+            language: language code for text recognition
+        """
+        if not VISION_AVAILABLE:
+            raise ImportError("vision framework not available")
+        self.language = language
+        self._setup_request()
+    def _setup_request(self):
+        """setup vision text recognition request"""
+        self.request = VNRecognizeTextRequest.alloc().init()
+        # pyobjc needs the objc setters, plain attribute assignment is read-only
+        self.request.setRecognitionLanguages_([self.language])
+        self.request.setRecognitionLevel_(VNRequestTextRecognitionLevelAccurate)
+        self.request.setUsesLanguageCorrection_(True)
+    def recognize(self, image: Image.Image) -> OCRResult:
+        """
+        perform ocr on pil image
+        args:
+            image: pil image to process
+        returns:
+            OCRResult with detected text and metadata
+        """
+        # convert pil image to nsdata for vision framework
+        ns_image_data = self._pil_to_nsdata(image)
+        # create image request handler
+        handler = VNImageRequestHandler.alloc().initWithData_options_(ns_image_data, {})
+        # perform text recognition
+        success, error = handler.performRequests_error_([self.request], None)
+        if not success:
+            raise RuntimeError(f"vision framework error: {error}")
+        # extract results
+        observations = self.request.results()
+        if not observations:
+            return OCRResult(text="", confidence=None, elements=[])
+        # process observations into structured result
+        return self._process_observations(observations, image.size)
+    def _pil_to_nsdata(self, image: Image.Image) -> NSData:
+        """convert pil image to nsdata for vision framework"""
+        # convert to rgb if needed
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+        # save to bytes
+        buffer = io.BytesIO()
+        image.save(buffer, format="PNG")
+        image_data = buffer.getvalue()
+        # create nsdata
+        return NSData.dataWithBytes_length_(image_data, len(image_data))
+    def _process_observations(self, observations, image_size) -> OCRResult:
+        """process vision observations into ocr result"""
+        elements = []
+        full_text_parts = []
+        total_confidence = 0.0
+        valid_observations = 0
+        for observation in observations:
+            # get recognized text
+            text = observation.topCandidates_(1)[0].string()
+            confidence = observation.topCandidates_(1)[0].confidence()
+            if text.strip():
+                # get bounding box
+                bbox = observation.boundingBox()
+                # convert normalized coordinates to pixel coordinates
+                x = bbox.origin.x * image_size[0]
+                y = (1.0 - bbox.origin.y - bbox.size.height) * image_size[1]  # flip y
+                width = bbox.size.width * image_size[0]
+                height = bbox.size.height * image_size[1]
+                # create bounding box and text element
+                bounds = BoundingBox(x=x, y=y, width=width, height=height)
+                element = TextElement(text=text, bounds=bounds, confidence=confidence)
+                elements.append(element)
+                # accumulate text and confidence
+                full_text_parts.append(text)
+                total_confidence += confidence
+                valid_observations += 1
+        # calculate average confidence
+        avg_confidence = (
+            total_confidence / valid_observations if valid_observations > 0 else None
+        )
+        # join text parts
+        full_text = " ".join(full_text_parts)
+        return OCRResult(text=full_text, confidence=avg_confidence, elements=elements)
+    @property
+    def supported_languages(self) -> List[str]:
+        """Language codes Vision can recognize on this machine.
+        Queried live from Vision for the request's recognition level, so it
+        always matches what the installed macOS version actually supports
+        (returned as BCP-47 tags like ``en-US``). Falls back to the curated
+        [`COMMON_LANGUAGES`][natocr.macos.COMMON_LANGUAGES] set if the query
+        fails.
+        """
+        # ask vision directly instead of guessing - the set changes per os version
+        try:
+            languages, error = (
+                self.request.supportedRecognitionLanguagesAndReturnError_(None)
+            )
+            if error or not languages:
+                return list(COMMON_LANGUAGES)
+            return list(languages)
+        except Exception:
+            return list(COMMON_LANGUAGES)