natocr 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
natocr/__init__.py ADDED
@@ -0,0 +1,21 @@
1
+ """
2
+ natocr - native ocr library using platform-specific frameworks
3
+
4
+ this package provides ocr functionality using native frameworks:
5
+ - macos: vision framework
6
+ - windows: windows runtime ocr
7
+ """
8
+
9
+ from .core import OCR
10
+ from .models import BoundingBox, OCRResult, TextElement
11
+
12
+ __version__ = "1.3.3"
13
+ __author__ = "alfredchiesa"
14
+ __email__ = "alfred.personal@icloud.com"
15
+
16
+ __all__ = [
17
+ "OCR",
18
+ "OCRResult",
19
+ "TextElement",
20
+ "BoundingBox",
21
+ ]
natocr/core.py ADDED
@@ -0,0 +1,112 @@
1
+ """
2
+ main ocr class with platform detection and delegation
3
+ """
4
+
5
+ import io
6
+ import sys
7
+ from typing import List, Union
8
+
9
+ import numpy as np
10
+ from PIL import Image
11
+
12
+ from .macos import MacOSOCR
13
+ from .models import OCRResult
14
+ from .windows import WindowsOCR
15
+
16
+
17
+ class OCR:
18
+ """Run OCR using the operating system's native engine.
19
+
20
+ Picks the right backend for the current platform - the Vision framework on
21
+ macOS, Windows Runtime OCR on Windows - and gives you one API over both.
22
+
23
+ Example:
24
+ ```python
25
+ from natocr import OCR
26
+
27
+ ocr = OCR() # english by default
28
+ result = ocr.recognize("invoice.png")
29
+ print(result.text)
30
+ ```
31
+
32
+ Args:
33
+ language: language code for text recognition (default: ``"en"``).
34
+
35
+ Raises:
36
+ RuntimeError: on an unsupported platform, or when the platform's native
37
+ OCR dependencies aren't installed.
38
+ """
39
+
40
+ def __init__(self, language: str = "en"):
41
+ self.language = language
42
+ self._backend = None
43
+ self._initialize_backend()
44
+
45
+ def _initialize_backend(self):
46
+ """initialize platform-specific ocr backend"""
47
+ if sys.platform == "darwin":
48
+ try:
49
+ self._backend = MacOSOCR(self.language)
50
+ except ImportError:
51
+ raise RuntimeError(
52
+ "macos dependencies not installed. install with: pip install natocr[macos]"
53
+ )
54
+ elif sys.platform == "win32":
55
+ try:
56
+ self._backend = WindowsOCR(self.language)
57
+ except ImportError:
58
+ raise RuntimeError(
59
+ "windows dependencies not installed. install with: pip install natocr[windows]"
60
+ )
61
+ else:
62
+ raise RuntimeError(f"unsupported platform: {sys.platform}")
63
+
64
+ def recognize(self, image: Union[str, Image.Image, np.ndarray, bytes]) -> OCRResult:
65
+ """Recognize text in an image.
66
+
67
+ Args:
68
+ image: what to read. One of: a file path (``str``), a
69
+ ``PIL.Image.Image``, a ``numpy.ndarray``, or raw encoded image
70
+ ``bytes``.
71
+
72
+ Returns:
73
+ An [OCRResult][natocr.OCRResult] with the detected text and
74
+ per-element metadata.
75
+
76
+ Raises:
77
+ ValueError: if ``image`` isn't one of the supported types.
78
+ """
79
+ # convert input to pil image for consistent processing
80
+ pil_image = self._convert_to_pil(image)
81
+
82
+ # delegate to platform-specific implementation
83
+ return self._backend.recognize(pil_image)
84
+
85
+ def _convert_to_pil(
86
+ self, image: Union[str, Image.Image, np.ndarray, bytes]
87
+ ) -> Image.Image:
88
+ """convert various image formats to pil image"""
89
+ if isinstance(image, str):
90
+ # file path
91
+ return Image.open(image)
92
+ elif isinstance(image, Image.Image):
93
+ # already a pil image
94
+ return image
95
+ elif isinstance(image, np.ndarray):
96
+ # numpy array
97
+ return Image.fromarray(image)
98
+ elif isinstance(image, bytes):
99
+ # raw bytes
100
+ return Image.open(io.BytesIO(image))
101
+ else:
102
+ raise ValueError(f"unsupported image type: {type(image)}")
103
+
104
+ @property
105
+ def supported_languages(self) -> List[str]:
106
+ """Language codes the current platform's backend supports."""
107
+ return self._backend.supported_languages if self._backend else []
108
+
109
+ @property
110
+ def platform(self) -> str:
111
+ """The current platform identifier (e.g. ``"darwin"`` or ``"win32"``)."""
112
+ return sys.platform
natocr/macos.py ADDED
@@ -0,0 +1,177 @@
1
+ """
2
+ macos implementation using vision framework
3
+ """
4
+
5
+ import io
6
+ from typing import List
7
+
8
+ from PIL import Image
9
+
10
+ try:
11
+ from Foundation import NSData
12
+ from Vision import (
13
+ VNImageRequestHandler,
14
+ VNRecognizeTextRequest,
15
+ VNRequestTextRecognitionLevelAccurate,
16
+ )
17
+
18
+ VISION_AVAILABLE = True
19
+ except ImportError:
20
+ VISION_AVAILABLE = False
21
+
22
+ from .models import BoundingBox, OCRResult, TextElement
23
+
24
+ # curated fallback if vision's live query fails - the accurate recognizer's set
25
+ # as of macos 15 (bcp-47 tags, exactly what vision returns)
26
+ COMMON_LANGUAGES = [
27
+ "en-US",
28
+ "fr-FR",
29
+ "it-IT",
30
+ "de-DE",
31
+ "es-ES",
32
+ "pt-BR",
33
+ "zh-Hans",
34
+ "zh-Hant",
35
+ "yue-Hans",
36
+ "yue-Hant",
37
+ "ko-KR",
38
+ "ja-JP",
39
+ "ru-RU",
40
+ "uk-UA",
41
+ "th-TH",
42
+ "vi-VT",
43
+ "ar-SA",
44
+ "ars-SA",
45
+ ]
46
+
47
+
48
+ class MacOSOCR:
49
+ """macos ocr implementation using vision framework"""
50
+
51
+ def __init__(self, language: str = "en"):
52
+ """
53
+ initialize macos ocr
54
+
55
+ args:
56
+ language: language code for text recognition
57
+ """
58
+ if not VISION_AVAILABLE:
59
+ raise ImportError("vision framework not available")
60
+
61
+ self.language = language
62
+ self._setup_request()
63
+
64
+ def _setup_request(self):
65
+ """setup vision text recognition request"""
66
+ self.request = VNRecognizeTextRequest.alloc().init()
67
+ # pyobjc needs the objc setters, plain attribute assignment is read-only
68
+ self.request.setRecognitionLanguages_([self.language])
69
+ self.request.setRecognitionLevel_(VNRequestTextRecognitionLevelAccurate)
70
+ self.request.setUsesLanguageCorrection_(True)
71
+
72
+ def recognize(self, image: Image.Image) -> OCRResult:
73
+ """
74
+ perform ocr on pil image
75
+
76
+ args:
77
+ image: pil image to process
78
+
79
+ returns:
80
+ OCRResult with detected text and metadata
81
+ """
82
+ # convert pil image to nsdata for vision framework
83
+ ns_image_data = self._pil_to_nsdata(image)
84
+
85
+ # create image request handler
86
+ handler = VNImageRequestHandler.alloc().initWithData_options_(ns_image_data, {})
87
+
88
+ # perform text recognition
89
+ success, error = handler.performRequests_error_([self.request], None)
90
+
91
+ if not success:
92
+ raise RuntimeError(f"vision framework error: {error}")
93
+
94
+ # extract results
95
+ observations = self.request.results()
96
+ if not observations:
97
+ return OCRResult(text="", confidence=None, elements=[])
98
+
99
+ # process observations into structured result
100
+ return self._process_observations(observations, image.size)
101
+
102
+ def _pil_to_nsdata(self, image: Image.Image) -> NSData:
103
+ """convert pil image to nsdata for vision framework"""
104
+ # convert to rgb if needed
105
+ if image.mode != "RGB":
106
+ image = image.convert("RGB")
107
+
108
+ # save to bytes
109
+ buffer = io.BytesIO()
110
+ image.save(buffer, format="PNG")
111
+ image_data = buffer.getvalue()
112
+
113
+ # create nsdata
114
+ return NSData.dataWithBytes_length_(image_data, len(image_data))
115
+
116
+ def _process_observations(self, observations, image_size) -> OCRResult:
117
+ """process vision observations into ocr result"""
118
+ elements = []
119
+ full_text_parts = []
120
+ total_confidence = 0.0
121
+ valid_observations = 0
122
+
123
+ for observation in observations:
124
+ # get recognized text
125
+ text = observation.topCandidates_(1)[0].string()
126
+ confidence = observation.topCandidates_(1)[0].confidence()
127
+
128
+ if text.strip():
129
+ # get bounding box
130
+ bbox = observation.boundingBox()
131
+
132
+ # convert normalized coordinates to pixel coordinates
133
+ x = bbox.origin.x * image_size[0]
134
+ y = (1.0 - bbox.origin.y - bbox.size.height) * image_size[1] # flip y
135
+ width = bbox.size.width * image_size[0]
136
+ height = bbox.size.height * image_size[1]
137
+
138
+ # create bounding box and text element
139
+ bounds = BoundingBox(x=x, y=y, width=width, height=height)
140
+ element = TextElement(text=text, bounds=bounds, confidence=confidence)
141
+ elements.append(element)
142
+
143
+ # accumulate text and confidence
144
+ full_text_parts.append(text)
145
+ total_confidence += confidence
146
+ valid_observations += 1
147
+
148
+ # calculate average confidence
149
+ avg_confidence = (
150
+ total_confidence / valid_observations if valid_observations > 0 else None
151
+ )
152
+
153
+ # join text parts
154
+ full_text = " ".join(full_text_parts)
155
+
156
+ return OCRResult(text=full_text, confidence=avg_confidence, elements=elements)
157
+
158
+ @property
159
+ def supported_languages(self) -> List[str]:
160
+ """Language codes Vision can recognize on this machine.
161
+
162
+ Queried live from Vision for the request's recognition level, so it
163
+ always matches what the installed macOS version actually supports
164
+ (returned as BCP-47 tags like ``en-US``). Falls back to the curated
165
+ [`COMMON_LANGUAGES`][natocr.macos.COMMON_LANGUAGES] set if the query
166
+ fails.
167
+ """
168
+ # ask vision directly instead of guessing - the set changes per os version
169
+ try:
170
+ languages, error = (
171
+ self.request.supportedRecognitionLanguagesAndReturnError_(None)
172
+ )
173
+ if error or not languages:
174
+ return list(COMMON_LANGUAGES)
175
+ return list(languages)
176
+ except Exception:
177
+ return list(COMMON_LANGUAGES)
natocr/models.py ADDED
@@ -0,0 +1,108 @@
1
+ """
2
+ data models for ocr results
3
+ """
4
+
5
+ from dataclasses import dataclass
6
+ from typing import List, Optional, Tuple
7
+
8
+
9
+ @dataclass
10
+ class BoundingBox:
11
+ """Pixel-space bounding box for a piece of detected text.
12
+
13
+ The origin is the top-left of the image, with ``y`` growing downward.
14
+
15
+ Attributes:
16
+ x: left edge, in pixels.
17
+ y: top edge, in pixels.
18
+ width: box width, in pixels.
19
+ height: box height, in pixels.
20
+ """
21
+
22
+ x: float
23
+ y: float
24
+ width: float
25
+ height: float
26
+
27
+ @property
28
+ def bounds(self) -> Tuple[float, float, float, float]:
29
+ """The box as an ``(x, y, width, height)`` tuple."""
30
+ return (self.x, self.y, self.width, self.height)
31
+
32
+
33
+ @dataclass
34
+ class TextElement:
35
+ """A single detected piece of text with its location.
36
+
37
+ Attributes:
38
+ text: the recognized string.
39
+ bounds: where it was found in the image.
40
+ confidence: recognition confidence in ``0.0..1.0``, or ``None`` when the
41
+ backend doesn't report one (Windows OCR doesn't).
42
+ """
43
+
44
+ text: str
45
+ bounds: BoundingBox
46
+ confidence: Optional[float] = None
47
+
48
+
49
+ @dataclass
50
+ class OCRResult:
51
+ """Everything an OCR pass found in one image.
52
+
53
+ Attributes:
54
+ text: all detected text joined into a single string.
55
+ confidence: average confidence across detections, or ``None`` if the
56
+ backend doesn't report confidence.
57
+ elements: per-detection breakdown with text, bounds, and confidence.
58
+ """
59
+
60
+ text: str
61
+ confidence: Optional[float] = None
62
+ elements: List[TextElement] = None
63
+
64
+ def __post_init__(self):
65
+ # default the mutable elements list when none was passed
66
+ if self.elements is None:
67
+ self.elements = []
68
+
69
+ @property
70
+ def words(self) -> List[TextElement]:
71
+ """The elements that contain non-whitespace text."""
72
+ return [elem for elem in self.elements if elem.text.strip()]
73
+
74
+ @property
75
+ def lines(self) -> List[str]:
76
+ """Detected text grouped into lines by vertical position.
77
+
78
+ Elements whose ``y`` are close together are treated as one line and
79
+ joined left-to-right. Falls back to ``[text]`` (or ``[]``) when there
80
+ are no elements.
81
+ """
82
+ if not self.elements:
83
+ return [self.text] if self.text else []
84
+
85
+ # group elements by approximate y-coordinate for line detection
86
+ lines = []
87
+ current_line = []
88
+ current_y = None
89
+
90
+ for elem in sorted(self.elements, key=lambda e: (e.bounds.y, e.bounds.x)):
91
+ if (
92
+ current_y is None
93
+ or abs(elem.bounds.y - current_y) < elem.bounds.height * 0.5
94
+ ):
95
+ current_line.append(elem)
96
+ current_y = elem.bounds.y
97
+ else:
98
+ if current_line:
99
+ line_text = " ".join(elem.text for elem in current_line)
100
+ lines.append(line_text)
101
+ current_line = [elem]
102
+ current_y = elem.bounds.y
103
+
104
+ if current_line:
105
+ line_text = " ".join(elem.text for elem in current_line)
106
+ lines.append(line_text)
107
+
108
+ return lines
natocr/windows.py ADDED
@@ -0,0 +1,174 @@
1
+ """
2
+ windows implementation using windows runtime ocr
3
+ """
4
+
5
+ import io
6
+ from typing import List, Optional
7
+
8
+ from PIL import Image
9
+
10
+ try:
11
+ import asyncio
12
+
13
+ import winrt.windows.foundation as foundation
14
+ import winrt.windows.graphics.imaging as imaging
15
+ import winrt.windows.media.ocr as ocr
16
+ import winrt.windows.storage.streams as streams
17
+
18
+ WINDOWS_OCR_AVAILABLE = True
19
+ except ImportError:
20
+ WINDOWS_OCR_AVAILABLE = False
21
+
22
+ from .models import BoundingBox, OCRResult, TextElement
23
+
24
+ # curated fallback if the engine's live query fails - common windows ocr packs
25
+ # (bcp-47 tags). actual availability depends on which packs are installed.
26
+ COMMON_LANGUAGES = [
27
+ "en-US",
28
+ "en-GB",
29
+ "fr-FR",
30
+ "de-DE",
31
+ "es-ES",
32
+ "it-IT",
33
+ "pt-BR",
34
+ "nl-NL",
35
+ "ru-RU",
36
+ "ja-JP",
37
+ "ko-KR",
38
+ "zh-Hans-CN",
39
+ "zh-Hant-TW",
40
+ ]
41
+
42
+
43
+ class WindowsOCR:
44
+ """windows ocr implementation using windows runtime ocr"""
45
+
46
+ def __init__(self, language: str = "en"):
47
+ """
48
+ initialize windows ocr
49
+
50
+ args:
51
+ language: language code for text recognition
52
+ """
53
+ if not WINDOWS_OCR_AVAILABLE:
54
+ raise ImportError("windows runtime ocr not available")
55
+
56
+ self.language = language
57
+ self._setup_engine()
58
+
59
+ def _setup_engine(self):
60
+ """setup windows ocr engine"""
61
+ # create ocr engine with specified language
62
+ self.engine = ocr.OcrEngine.try_create_from_language(
63
+ ocr.Language(self.language)
64
+ )
65
+
66
+ if not self.engine:
67
+ # fallback to default language if specified not available
68
+ self.engine = ocr.OcrEngine.try_create_from_user_profile_languages()
69
+
70
+ if not self.engine:
71
+ raise RuntimeError("could not create ocr engine")
72
+
73
+ def recognize(self, image: Image.Image) -> OCRResult:
74
+ """
75
+ perform ocr on pil image
76
+
77
+ args:
78
+ image: pil image to process
79
+
80
+ returns:
81
+ OCRResult with detected text and metadata
82
+ """
83
+ # run async recognition
84
+ return asyncio.run(self._recognize_async(image))
85
+
86
+ async def _recognize_async(self, image: Image.Image) -> OCRResult:
87
+ """async ocr recognition"""
88
+ # convert pil image to windows bitmap
89
+ bitmap = await self._pil_to_bitmap(image)
90
+
91
+ # perform ocr
92
+ result = await self.engine.recognize_async(bitmap)
93
+
94
+ # process results
95
+ return self._process_result(result, image.size)
96
+
97
+ async def _pil_to_bitmap(self, image: Image.Image):
98
+ """convert pil image to windows bitmap"""
99
+ # convert to rgb if needed
100
+ if image.mode != "RGB":
101
+ image = image.convert("RGB")
102
+
103
+ # save to bytes
104
+ buffer = io.BytesIO()
105
+ image.save(buffer, format="PNG")
106
+ image_data = buffer.getvalue()
107
+
108
+ # create in-memory random access stream
109
+ stream = streams.InMemoryRandomAccessStream()
110
+ writer = streams.DataWriter(stream)
111
+ writer.write_bytes(image_data)
112
+ await writer.store_async()
113
+ await writer.flush_async()
114
+ stream.seek(0)
115
+
116
+ # create bitmap decoder
117
+ decoder = await imaging.BitmapDecoder.create_async(stream)
118
+ bitmap = await decoder.get_software_bitmap_async()
119
+
120
+ return bitmap
121
+
122
+ def _process_result(self, result, image_size) -> OCRResult:
123
+ """process windows ocr result into ocr result"""
124
+ elements = []
125
+ full_text_parts = []
126
+ total_confidence = 0.0
127
+ valid_lines = 0
128
+
129
+ for line in result.lines:
130
+ line_text = line.text
131
+ if line_text.strip():
132
+ # get line bounding box
133
+ bbox = line.words[0].bounding_rect if line.words else line.bounding_rect
134
+
135
+ # convert to pixel coordinates
136
+ x = bbox.x
137
+ y = bbox.y
138
+ width = bbox.width
139
+ height = bbox.height
140
+
141
+ # create bounding box and text element
142
+ bounds = BoundingBox(x=x, y=y, width=width, height=height)
143
+ element = TextElement(text=line_text, bounds=bounds, confidence=None)
144
+ elements.append(element)
145
+
146
+ # accumulate text
147
+ full_text_parts.append(line_text)
148
+ valid_lines += 1
149
+
150
+ # join text parts
151
+ full_text = " ".join(full_text_parts)
152
+
153
+ return OCRResult(
154
+ text=full_text,
155
+ confidence=None, # windows ocr doesn't provide confidence scores
156
+ elements=elements,
157
+ )
158
+
159
+ @property
160
+ def supported_languages(self) -> List[str]:
161
+ """Language codes with an OCR pack installed on this machine.
162
+
163
+ Queried live from the engine, so it reflects whatever Windows OCR
164
+ language packs are installed (returned as BCP-47 tags like ``en-US``).
165
+ Falls back to the curated
166
+ [`COMMON_LANGUAGES`][natocr.windows.COMMON_LANGUAGES] set if the query
167
+ fails.
168
+ """
169
+ # the set depends on installed ocr language packs, so ask the engine
170
+ try:
171
+ languages = self.engine.available_recognizer_languages
172
+ return [lang.language_tag for lang in languages]
173
+ except Exception:
174
+ return list(COMMON_LANGUAGES)
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: natocr
3
+ Version: 1.3.3
4
+ Summary: Native OCR library using platform-specific frameworks (macOS Vision, Windows Runtime OCR)
5
+ Author-email: alfredchiesa <alfred.personal@icloud.com>
6
+ Maintainer-email: alfredchiesa <alfred.personal@icloud.com>
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/alfredchiesa/natocr
9
+ Project-URL: Documentation, https://alfredchiesa.github.io/natocr
10
+ Project-URL: Repository, https://github.com/alfredchiesa/natocr.git
11
+ Project-URL: Issues, https://github.com/alfredchiesa/natocr/issues
12
+ Project-URL: Changelog, https://github.com/alfredchiesa/natocr/blob/main/CHANGELOG.md
13
+ Keywords: ocr,text-recognition,vision,macos,windows,native
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: MacOS
18
+ Classifier: Operating System :: Microsoft :: Windows
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: Programming Language :: Python :: 3.13
24
+ Classifier: Programming Language :: Python :: 3.14
25
+ Classifier: Topic :: Scientific/Engineering :: Image Recognition
26
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
+ Requires-Python: >=3.10
28
+ Description-Content-Type: text/markdown
29
+ License-File: LICENSE
30
+ Requires-Dist: Pillow>=9.0.0
31
+ Requires-Dist: numpy>=1.21
32
+ Provides-Extra: macos
33
+ Requires-Dist: pyobjc-framework-Vision>=11.1; extra == "macos"
34
+ Requires-Dist: pyobjc-framework-Quartz>=11.1; extra == "macos"
35
+ Provides-Extra: windows
36
+ Requires-Dist: pywin32>=311; extra == "windows"
37
+ Provides-Extra: dev
38
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
39
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
40
+ Requires-Dist: python-semantic-release>=8.0.0; extra == "dev"
41
+ Requires-Dist: mkdocs>=1.6.1; extra == "dev"
42
+ Requires-Dist: mkdocs-material>=9.6.21; extra == "dev"
43
+ Requires-Dist: mkdocstrings[python]>=0.26; extra == "dev"
44
+ Dynamic: license-file
45
+
46
+ # natocr
47
+
48
+ **natocr** (*native ocr*) is a small Python wrapper around the OCR engines that
49
+ already ship with macOS and Windows: Vision framework on macOS and Windows
50
+ Runtime OCR on Windows.
51
+
52
+ These built-in engines are generally faster, more efficient, and more accurate
53
+ than third-party alternatives like Tesseract. **natocr** makes reaching for them
54
+ painless via one clean Python API instead of wrangling with Objective-C bridges
55
+ or WinRT async plumbing.
56
+
57
+ ## Install
58
+
59
+ ```bash
60
+ pip install natocr[macos] # on macOS
61
+ pip install natocr[windows] # on Windows
62
+ ```
63
+
64
+ ## Quick start
65
+
66
+ ```python
67
+ from natocr import OCR
68
+
69
+ ocr = OCR() # defaults to english
70
+ result = ocr.recognize("invoice.png")
71
+
72
+ print(result.text)
73
+ ```
74
+
75
+ ```text
76
+ Invoice #1042 Total $58.20 Thank you!
77
+ ```
78
+
79
+ ### Confidence Scores and Bounding Boxes
80
+
81
+ `recognize()` returns an `OCRResult`. Beyond the flat `.text`, you get a
82
+ per-detection breakdown with bounding boxes and (*on macOS*) confidence scores:
83
+
84
+ ```python
85
+ result = ocr.recognize("receipt.png")
86
+
87
+ print(result.confidence) # average confidence, or None if unavailable
88
+
89
+ for element in result.elements:
90
+ box = element.bounds.bounds # (x, y, width, height) in pixels
91
+ print(f"{element.text!r} @ {box} conf={element.confidence}")
92
+ ```
93
+
94
+ ```text
95
+ 0.93
96
+ 'Acme Coffee' @ (24.0, 18.0, 180.0, 32.0) conf=0.97
97
+ 'Latte' @ (24.0, 70.0, 96.0, 28.0) conf=0.95
98
+ '$4.50' @ (220.0, 70.0, 80.0, 28.0) conf=0.88
99
+ ```
100
+
101
+ ### Lines and Words
102
+
103
+ There's also convenience views for grouping results by reading order:
104
+
105
+ ```python
106
+ result.lines # ['Acme Coffee', 'Latte $4.50'] - elements grouped into lines
107
+ result.words # list of TextElement with non-empty text
108
+ ```
109
+
110
+ ### Detection Language
111
+
112
+ Pick a different recognition language, and inspect what the current platform
113
+ supports:
114
+
115
+ ```python
116
+ ocr = OCR(language="fr")
117
+ print(ocr.platform) # 'darwin' or 'win32'
118
+ print(ocr.supported_languages) # ['en-US', 'fr-FR', 'de-DE', ...]
119
+ ```
120
+
121
+ The supported set is decided by the OS and queried live, so
122
+ `supported_languages` always reflects the current machine. On macOS it's
123
+ Vision's built-in set for your macOS version; on Windows it's whatever OCR
124
+ language packs are installed. See the [Usage guide](https://alfredchiesa.github.io/natocr/usage/#supported-languages)
125
+ for the full list and how to add Windows language packs.
126
+
127
+ ### Alternative Inputs
128
+
129
+ `recognize()` accepts more than file paths - hand it whatever you already have
130
+ in memory:
131
+
132
+ ```python
133
+ from PIL import Image
134
+ import numpy as np
135
+
136
+ ocr.recognize("page.png") # a file path
137
+ ocr.recognize(Image.open("page.png")) # a PIL image
138
+ ocr.recognize(np.array(image)) # a numpy array (e.g. from OpenCV)
139
+ ocr.recognize(open("page.png", "rb").read()) # raw image bytes
140
+ ```
141
+
142
+ ## Supported File Types
143
+
144
+ Images are decoded with [Pillow](https://python-pillow.org/), so any raster
145
+ format Pillow can open works as an input file or byte string.
146
+
147
+ | Format | Extensions | Notes |
148
+ | --- | --- | --- |
149
+ | PNG | `.png` | recommended - lossless |
150
+ | JPEG | `.jpg`, `.jpeg` | great for photos of documents |
151
+ | TIFF | `.tif`, `.tiff` | common for scans |
152
+ | BMP | `.bmp` | uncompressed bitmap |
153
+ | GIF | `.gif` | first frame is used |
154
+ | WebP | `.webp` | modern lossy/lossless |
155
+ | PPM/PGM | `.ppm`, `.pgm` | netpbm bitmaps |
156
+
157
+ In addition to file paths, `recognize()` accepts these in-memory types:
158
+
159
+ | Input type | Example |
160
+ | --- | --- |
161
+ | `str` (file path) | `ocr.recognize("page.png")` |
162
+ | `PIL.Image.Image` | `ocr.recognize(Image.open("page.png"))` |
163
+ | `numpy.ndarray` | `ocr.recognize(np.array(image))` |
164
+ | `bytes` (encoded image) | `ocr.recognize(data)` |
165
+
166
+ > [!NOTE]
167
+ > PDFs and other multi-page documents aren't decoded directly - rasterize a page
168
+ > to one of the formats above first (e.g. with `pdf2image` or `pymupdf`).
169
+
170
+ ## Testing
171
+
172
+ Install the dev dependencies (in a virtualenv), then run the suite. The tests
173
+ mock the native macOS Vision and Windows Runtime backends, so they run anywhere
174
+ without those frameworks installed.
175
+
176
+ ```bash
177
+ python3 -m venv .venv
178
+ source .venv/bin/activate
179
+ pip install -e ".[dev]"
180
+ ```
181
+
182
+ Run everything with coverage (coverage is wired up in `pyproject.toml`, so plain
183
+ `pytest` already reports it):
184
+
185
+ ```bash
186
+ pytest
187
+ ```
188
+
189
+ Other handy invocations:
190
+
191
+ ```bash
192
+ # run a single test file
193
+ pytest tests/test_models.py
194
+
195
+ # run one test by name
196
+ pytest -k test_lines_groups_close_y_into_single_line
197
+
198
+ # verbose output
199
+ pytest -v
200
+ ```
201
+
202
+ Coverage reports land in the terminal, in `htmlcov/index.html`, and in
203
+ `coverage.xml`.
@@ -0,0 +1,10 @@
1
+ natocr/__init__.py,sha256=9A3lC52yGZu7MMPd6AcG1AVdR7c8vNHC1heCvwX6kb4,446
2
+ natocr/core.py,sha256=V_ipHNIImTOXxwGSGSmDT9oXxi8qDatbkBGjZW596Hw,3632
3
+ natocr/macos.py,sha256=g6MKtNHJ0SInrkAm_nndetTEv8otfkx6Ag5RxW8LP0Q,5639
4
+ natocr/models.py,sha256=Em4qOpBwSUylB_SGLDmkJ5IoMAxzS-HdoA5RzPlukJI,3178
5
+ natocr/windows.py,sha256=EiE7xeIxbe5e3zxNcfdELAfLYUfHgLyBGldCvmNbPh4,5261
6
+ natocr-1.3.3.dist-info/licenses/LICENSE,sha256=lSQGjh39IyIvxFdkgGVGuvuZS0mEdXVRkRQTqogJv7g,1056
7
+ natocr-1.3.3.dist-info/METADATA,sha256=3GUlmZJVF0UoijHc_fNp54SKsv1G_4c2rKzhMFrWAwQ,6588
8
+ natocr-1.3.3.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
9
+ natocr-1.3.3.dist-info/top_level.txt,sha256=8YbBgCvaZjOLyplN9SbSI04u1zPWJO_2pnf1n8hwoNY,7
10
+ natocr-1.3.3.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ natocr