natocr 1.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
natocr-1.3.3/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
natocr-1.3.3/PKG-INFO ADDED
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: natocr
3
+ Version: 1.3.3
4
+ Summary: Native OCR library using platform-specific frameworks (macOS Vision, Windows Runtime OCR)
5
+ Author-email: alfredchiesa <alfred.personal@icloud.com>
6
+ Maintainer-email: alfredchiesa <alfred.personal@icloud.com>
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/alfredchiesa/natocr
9
+ Project-URL: Documentation, https://alfredchiesa.github.io/natocr
10
+ Project-URL: Repository, https://github.com/alfredchiesa/natocr.git
11
+ Project-URL: Issues, https://github.com/alfredchiesa/natocr/issues
12
+ Project-URL: Changelog, https://github.com/alfredchiesa/natocr/blob/main/CHANGELOG.md
13
+ Keywords: ocr,text-recognition,vision,macos,windows,native
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: MacOS
18
+ Classifier: Operating System :: Microsoft :: Windows
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: Programming Language :: Python :: 3.13
24
+ Classifier: Programming Language :: Python :: 3.14
25
+ Classifier: Topic :: Scientific/Engineering :: Image Recognition
26
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
+ Requires-Python: >=3.10
28
+ Description-Content-Type: text/markdown
29
+ License-File: LICENSE
30
+ Requires-Dist: Pillow>=9.0.0
31
+ Requires-Dist: numpy>=1.21
32
+ Provides-Extra: macos
33
+ Requires-Dist: pyobjc-framework-Vision>=11.1; extra == "macos"
34
+ Requires-Dist: pyobjc-framework-Quartz>=11.1; extra == "macos"
35
+ Provides-Extra: windows
36
+ Requires-Dist: pywin32>=311; extra == "windows"
37
+ Provides-Extra: dev
38
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
39
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
40
+ Requires-Dist: python-semantic-release>=8.0.0; extra == "dev"
41
+ Requires-Dist: mkdocs>=1.6.1; extra == "dev"
42
+ Requires-Dist: mkdocs-material>=9.6.21; extra == "dev"
43
+ Requires-Dist: mkdocstrings[python]>=0.26; extra == "dev"
44
+ Dynamic: license-file
45
+
46
+ # natocr
47
+
48
+ **natocr** (*native ocr*) is a small Python wrapper around the OCR engines that
49
+ already ship with macOS and Windows: Vision framework on macOS and Windows
50
+ Runtime OCR on Windows.
51
+
52
+ These built-in engines are generally faster, more efficient, and more accurate
53
+ than third-party alternatives like Tesseract. **natocr** makes reaching for them
54
+ painless via one clean Python API instead of wrangling with Objective-C bridges
55
+ or WinRT async plumbing.
56
+
57
+ ## Install
58
+
59
+ ```bash
60
+ pip install natocr[macos] # on macOS
61
+ pip install natocr[windows] # on Windows
62
+ ```
63
+
64
+ ## Quick start
65
+
66
+ ```python
67
+ from natocr import OCR
68
+
69
+ ocr = OCR() # defaults to english
70
+ result = ocr.recognize("invoice.png")
71
+
72
+ print(result.text)
73
+ ```
74
+
75
+ ```text
76
+ Invoice #1042 Total $58.20 Thank you!
77
+ ```
78
+
79
+ ### Confidence Scores and Bounding Boxes
80
+
81
+ `recognize()` returns an `OCRResult`. Beyond the flat `.text`, you get a
82
+ per-detection breakdown with bounding boxes and (*on macOS*) confidence scores:
83
+
84
+ ```python
85
+ result = ocr.recognize("receipt.png")
86
+
87
+ print(result.confidence) # average confidence, or None if unavailable
88
+
89
+ for element in result.elements:
90
+ box = element.bounds.bounds # (x, y, width, height) in pixels
91
+ print(f"{element.text!r} @ {box} conf={element.confidence}")
92
+ ```
93
+
94
+ ```text
95
+ 0.93
96
+ 'Acme Coffee' @ (24.0, 18.0, 180.0, 32.0) conf=0.97
97
+ 'Latte' @ (24.0, 70.0, 96.0, 28.0) conf=0.95
98
+ '$4.50' @ (220.0, 70.0, 80.0, 28.0) conf=0.88
99
+ ```
100
+
101
+ ### Lines and Words
102
+
103
+ There's also convenience views for grouping results by reading order:
104
+
105
+ ```python
106
+ result.lines # ['Acme Coffee', 'Latte $4.50'] - elements grouped into lines
107
+ result.words # list of TextElement with non-empty text
108
+ ```
109
+
110
+ ### Detection Language
111
+
112
+ Pick a different recognition language, and inspect what the current platform
113
+ supports:
114
+
115
+ ```python
116
+ ocr = OCR(language="fr")
117
+ print(ocr.platform) # 'darwin' or 'win32'
118
+ print(ocr.supported_languages) # ['en-US', 'fr-FR', 'de-DE', ...]
119
+ ```
120
+
121
+ The supported set is decided by the OS and queried live, so
122
+ `supported_languages` always reflects the current machine. On macOS it's
123
+ Vision's built-in set for your macOS version; on Windows it's whatever OCR
124
+ language packs are installed. See the [Usage guide](https://alfredchiesa.github.io/natocr/usage/#supported-languages)
125
+ for the full list and how to add Windows language packs.
126
+
127
+ ### Alternative Inputs
128
+
129
+ `recognize()` accepts more than file paths - hand it whatever you already have
130
+ in memory:
131
+
132
+ ```python
133
+ from PIL import Image
134
+ import numpy as np
135
+
136
+ ocr.recognize("page.png") # a file path
137
+ ocr.recognize(Image.open("page.png")) # a PIL image
138
+ ocr.recognize(np.array(image)) # a numpy array (e.g. from OpenCV)
139
+ ocr.recognize(open("page.png", "rb").read()) # raw image bytes
140
+ ```
141
+
142
+ ## Supported File Types
143
+
144
+ Images are decoded with [Pillow](https://python-pillow.org/), so any raster
145
+ format Pillow can open works as an input file or byte string.
146
+
147
+ | Format | Extensions | Notes |
148
+ | --- | --- | --- |
149
+ | PNG | `.png` | recommended - lossless |
150
+ | JPEG | `.jpg`, `.jpeg` | great for photos of documents |
151
+ | TIFF | `.tif`, `.tiff` | common for scans |
152
+ | BMP | `.bmp` | uncompressed bitmap |
153
+ | GIF | `.gif` | first frame is used |
154
+ | WebP | `.webp` | modern lossy/lossless |
155
+ | PPM/PGM | `.ppm`, `.pgm` | netpbm bitmaps |
156
+
157
+ In addition to file paths, `recognize()` accepts these in-memory types:
158
+
159
+ | Input type | Example |
160
+ | --- | --- |
161
+ | `str` (file path) | `ocr.recognize("page.png")` |
162
+ | `PIL.Image.Image` | `ocr.recognize(Image.open("page.png"))` |
163
+ | `numpy.ndarray` | `ocr.recognize(np.array(image))` |
164
+ | `bytes` (encoded image) | `ocr.recognize(data)` |
165
+
166
+ > [!NOTE]
167
+ > PDFs and other multi-page documents aren't decoded directly - rasterize a page
168
+ > to one of the formats above first (e.g. with `pdf2image` or `pymupdf`).
169
+
170
+ ## Testing
171
+
172
+ Install the dev dependencies (in a virtualenv), then run the suite. The tests
173
+ mock the native macOS Vision and Windows Runtime backends, so they run anywhere
174
+ without those frameworks installed.
175
+
176
+ ```bash
177
+ python3 -m venv .venv
178
+ source .venv/bin/activate
179
+ pip install -e ".[dev]"
180
+ ```
181
+
182
+ Run everything with coverage (coverage is wired up in `pyproject.toml`, so plain
183
+ `pytest` already reports it):
184
+
185
+ ```bash
186
+ pytest
187
+ ```
188
+
189
+ Other handy invocations:
190
+
191
+ ```bash
192
+ # run a single test file
193
+ pytest tests/test_models.py
194
+
195
+ # run one test by name
196
+ pytest -k test_lines_groups_close_y_into_single_line
197
+
198
+ # verbose output
199
+ pytest -v
200
+ ```
201
+
202
+ Coverage reports land in the terminal, in `htmlcov/index.html`, and in
203
+ `coverage.xml`.
natocr-1.3.3/README.md ADDED
@@ -0,0 +1,158 @@
1
+ # natocr
2
+
3
+ **natocr** (*native ocr*) is a small Python wrapper around the OCR engines that
4
+ already ship with macOS and Windows: Vision framework on macOS and Windows
5
+ Runtime OCR on Windows.
6
+
7
+ These built-in engines are generally faster, more efficient, and more accurate
8
+ than third-party alternatives like Tesseract. **natocr** makes reaching for them
9
+ painless via one clean Python API instead of wrangling with Objective-C bridges
10
+ or WinRT async plumbing.
11
+
12
+ ## Install
13
+
14
+ ```bash
15
+ pip install natocr[macos] # on macOS
16
+ pip install natocr[windows] # on Windows
17
+ ```
18
+
19
+ ## Quick start
20
+
21
+ ```python
22
+ from natocr import OCR
23
+
24
+ ocr = OCR() # defaults to english
25
+ result = ocr.recognize("invoice.png")
26
+
27
+ print(result.text)
28
+ ```
29
+
30
+ ```text
31
+ Invoice #1042 Total $58.20 Thank you!
32
+ ```
33
+
34
+ ### Confidence Scores and Bounding Boxes
35
+
36
+ `recognize()` returns an `OCRResult`. Beyond the flat `.text`, you get a
37
+ per-detection breakdown with bounding boxes and (*on macOS*) confidence scores:
38
+
39
+ ```python
40
+ result = ocr.recognize("receipt.png")
41
+
42
+ print(result.confidence) # average confidence, or None if unavailable
43
+
44
+ for element in result.elements:
45
+ box = element.bounds.bounds # (x, y, width, height) in pixels
46
+ print(f"{element.text!r} @ {box} conf={element.confidence}")
47
+ ```
48
+
49
+ ```text
50
+ 0.93
51
+ 'Acme Coffee' @ (24.0, 18.0, 180.0, 32.0) conf=0.97
52
+ 'Latte' @ (24.0, 70.0, 96.0, 28.0) conf=0.95
53
+ '$4.50' @ (220.0, 70.0, 80.0, 28.0) conf=0.88
54
+ ```
55
+
56
+ ### Lines and Words
57
+
58
+ There's also convenience views for grouping results by reading order:
59
+
60
+ ```python
61
+ result.lines # ['Acme Coffee', 'Latte $4.50'] - elements grouped into lines
62
+ result.words # list of TextElement with non-empty text
63
+ ```
64
+
65
+ ### Detection Language
66
+
67
+ Pick a different recognition language, and inspect what the current platform
68
+ supports:
69
+
70
+ ```python
71
+ ocr = OCR(language="fr")
72
+ print(ocr.platform) # 'darwin' or 'win32'
73
+ print(ocr.supported_languages) # ['en-US', 'fr-FR', 'de-DE', ...]
74
+ ```
75
+
76
+ The supported set is decided by the OS and queried live, so
77
+ `supported_languages` always reflects the current machine. On macOS it's
78
+ Vision's built-in set for your macOS version; on Windows it's whatever OCR
79
+ language packs are installed. See the [Usage guide](https://alfredchiesa.github.io/natocr/usage/#supported-languages)
80
+ for the full list and how to add Windows language packs.
81
+
82
+ ### Alternative Inputs
83
+
84
+ `recognize()` accepts more than file paths - hand it whatever you already have
85
+ in memory:
86
+
87
+ ```python
88
+ from PIL import Image
89
+ import numpy as np
90
+
91
+ ocr.recognize("page.png") # a file path
92
+ ocr.recognize(Image.open("page.png")) # a PIL image
93
+ ocr.recognize(np.array(image)) # a numpy array (e.g. from OpenCV)
94
+ ocr.recognize(open("page.png", "rb").read()) # raw image bytes
95
+ ```
96
+
97
+ ## Supported File Types
98
+
99
+ Images are decoded with [Pillow](https://python-pillow.org/), so any raster
100
+ format Pillow can open works as an input file or byte string.
101
+
102
+ | Format | Extensions | Notes |
103
+ | --- | --- | --- |
104
+ | PNG | `.png` | recommended - lossless |
105
+ | JPEG | `.jpg`, `.jpeg` | great for photos of documents |
106
+ | TIFF | `.tif`, `.tiff` | common for scans |
107
+ | BMP | `.bmp` | uncompressed bitmap |
108
+ | GIF | `.gif` | first frame is used |
109
+ | WebP | `.webp` | modern lossy/lossless |
110
+ | PPM/PGM | `.ppm`, `.pgm` | netpbm bitmaps |
111
+
112
+ In addition to file paths, `recognize()` accepts these in-memory types:
113
+
114
+ | Input type | Example |
115
+ | --- | --- |
116
+ | `str` (file path) | `ocr.recognize("page.png")` |
117
+ | `PIL.Image.Image` | `ocr.recognize(Image.open("page.png"))` |
118
+ | `numpy.ndarray` | `ocr.recognize(np.array(image))` |
119
+ | `bytes` (encoded image) | `ocr.recognize(data)` |
120
+
121
+ > [!NOTE]
122
+ > PDFs and other multi-page documents aren't decoded directly - rasterize a page
123
+ > to one of the formats above first (e.g. with `pdf2image` or `pymupdf`).
124
+
125
+ ## Testing
126
+
127
+ Install the dev dependencies (in a virtualenv), then run the suite. The tests
128
+ mock the native macOS Vision and Windows Runtime backends, so they run anywhere
129
+ without those frameworks installed.
130
+
131
+ ```bash
132
+ python3 -m venv .venv
133
+ source .venv/bin/activate
134
+ pip install -e ".[dev]"
135
+ ```
136
+
137
+ Run everything with coverage (coverage is wired up in `pyproject.toml`, so plain
138
+ `pytest` already reports it):
139
+
140
+ ```bash
141
+ pytest
142
+ ```
143
+
144
+ Other handy invocations:
145
+
146
+ ```bash
147
+ # run a single test file
148
+ pytest tests/test_models.py
149
+
150
+ # run one test by name
151
+ pytest -k test_lines_groups_close_y_into_single_line
152
+
153
+ # verbose output
154
+ pytest -v
155
+ ```
156
+
157
+ Coverage reports land in the terminal, in `htmlcov/index.html`, and in
158
+ `coverage.xml`.
@@ -0,0 +1,21 @@
1
+ """
2
+ natocr - native ocr library using platform-specific frameworks
3
+
4
+ this package provides ocr functionality using native frameworks:
5
+ - macos: vision framework
6
+ - windows: windows runtime ocr
7
+ """
8
+
9
+ from .core import OCR
10
+ from .models import BoundingBox, OCRResult, TextElement
11
+
12
+ __version__ = "1.3.3"
13
+ __author__ = "alfredchiesa"
14
+ __email__ = "alfred.personal@icloud.com"
15
+
16
+ __all__ = [
17
+ "OCR",
18
+ "OCRResult",
19
+ "TextElement",
20
+ "BoundingBox",
21
+ ]
@@ -0,0 +1,112 @@
1
+ """
2
+ main ocr class with platform detection and delegation
3
+ """
4
+
5
+ import io
6
+ import sys
7
+ from typing import List, Union
8
+
9
+ import numpy as np
10
+ from PIL import Image
11
+
12
+ from .macos import MacOSOCR
13
+ from .models import OCRResult
14
+ from .windows import WindowsOCR
15
+
16
+
17
+ class OCR:
18
+ """Run OCR using the operating system's native engine.
19
+
20
+ Picks the right backend for the current platform - the Vision framework on
21
+ macOS, Windows Runtime OCR on Windows - and gives you one API over both.
22
+
23
+ Example:
24
+ ```python
25
+ from natocr import OCR
26
+
27
+ ocr = OCR() # english by default
28
+ result = ocr.recognize("invoice.png")
29
+ print(result.text)
30
+ ```
31
+
32
+ Args:
33
+ language: language code for text recognition (default: ``"en"``).
34
+
35
+ Raises:
36
+ RuntimeError: on an unsupported platform, or when the platform's native
37
+ OCR dependencies aren't installed.
38
+ """
39
+
40
+ def __init__(self, language: str = "en"):
41
+ self.language = language
42
+ self._backend = None
43
+ self._initialize_backend()
44
+
45
+ def _initialize_backend(self):
46
+ """initialize platform-specific ocr backend"""
47
+ if sys.platform == "darwin":
48
+ try:
49
+ self._backend = MacOSOCR(self.language)
50
+ except ImportError:
51
+ raise RuntimeError(
52
+ "macos dependencies not installed. install with: pip install natocr[macos]"
53
+ )
54
+ elif sys.platform == "win32":
55
+ try:
56
+ self._backend = WindowsOCR(self.language)
57
+ except ImportError:
58
+ raise RuntimeError(
59
+ "windows dependencies not installed. install with: pip install natocr[windows]"
60
+ )
61
+ else:
62
+ raise RuntimeError(f"unsupported platform: {sys.platform}")
63
+
64
+ def recognize(self, image: Union[str, Image.Image, np.ndarray, bytes]) -> OCRResult:
65
+ """Recognize text in an image.
66
+
67
+ Args:
68
+ image: what to read. One of: a file path (``str``), a
69
+ ``PIL.Image.Image``, a ``numpy.ndarray``, or raw encoded image
70
+ ``bytes``.
71
+
72
+ Returns:
73
+ An [OCRResult][natocr.OCRResult] with the detected text and
74
+ per-element metadata.
75
+
76
+ Raises:
77
+ ValueError: if ``image`` isn't one of the supported types.
78
+ """
79
+ # convert input to pil image for consistent processing
80
+ pil_image = self._convert_to_pil(image)
81
+
82
+ # delegate to platform-specific implementation
83
+ return self._backend.recognize(pil_image)
84
+
85
+ def _convert_to_pil(
86
+ self, image: Union[str, Image.Image, np.ndarray, bytes]
87
+ ) -> Image.Image:
88
+ """convert various image formats to pil image"""
89
+ if isinstance(image, str):
90
+ # file path
91
+ return Image.open(image)
92
+ elif isinstance(image, Image.Image):
93
+ # already a pil image
94
+ return image
95
+ elif isinstance(image, np.ndarray):
96
+ # numpy array
97
+ return Image.fromarray(image)
98
+ elif isinstance(image, bytes):
99
+ # raw bytes
100
+ return Image.open(io.BytesIO(image))
101
+ else:
102
+ raise ValueError(f"unsupported image type: {type(image)}")
103
+
104
+ @property
105
+ def supported_languages(self) -> List[str]:
106
+ """Language codes the current platform's backend supports."""
107
+ return self._backend.supported_languages if self._backend else []
108
+
109
+ @property
110
+ def platform(self) -> str:
111
+ """The current platform identifier (e.g. ``"darwin"`` or ``"win32"``)."""
112
+ return sys.platform
@@ -0,0 +1,177 @@
1
+ """
2
+ macos implementation using vision framework
3
+ """
4
+
5
+ import io
6
+ from typing import List
7
+
8
+ from PIL import Image
9
+
10
+ try:
11
+ from Foundation import NSData
12
+ from Vision import (
13
+ VNImageRequestHandler,
14
+ VNRecognizeTextRequest,
15
+ VNRequestTextRecognitionLevelAccurate,
16
+ )
17
+
18
+ VISION_AVAILABLE = True
19
+ except ImportError:
20
+ VISION_AVAILABLE = False
21
+
22
+ from .models import BoundingBox, OCRResult, TextElement
23
+
24
+ # curated fallback if vision's live query fails - the accurate recognizer's set
25
+ # as of macos 15 (bcp-47 tags, exactly what vision returns)
26
+ COMMON_LANGUAGES = [
27
+ "en-US",
28
+ "fr-FR",
29
+ "it-IT",
30
+ "de-DE",
31
+ "es-ES",
32
+ "pt-BR",
33
+ "zh-Hans",
34
+ "zh-Hant",
35
+ "yue-Hans",
36
+ "yue-Hant",
37
+ "ko-KR",
38
+ "ja-JP",
39
+ "ru-RU",
40
+ "uk-UA",
41
+ "th-TH",
42
+ "vi-VT",
43
+ "ar-SA",
44
+ "ars-SA",
45
+ ]
46
+
47
+
48
+ class MacOSOCR:
49
+ """macos ocr implementation using vision framework"""
50
+
51
+ def __init__(self, language: str = "en"):
52
+ """
53
+ initialize macos ocr
54
+
55
+ args:
56
+ language: language code for text recognition
57
+ """
58
+ if not VISION_AVAILABLE:
59
+ raise ImportError("vision framework not available")
60
+
61
+ self.language = language
62
+ self._setup_request()
63
+
64
+ def _setup_request(self):
65
+ """setup vision text recognition request"""
66
+ self.request = VNRecognizeTextRequest.alloc().init()
67
+ # pyobjc needs the objc setters, plain attribute assignment is read-only
68
+ self.request.setRecognitionLanguages_([self.language])
69
+ self.request.setRecognitionLevel_(VNRequestTextRecognitionLevelAccurate)
70
+ self.request.setUsesLanguageCorrection_(True)
71
+
72
+ def recognize(self, image: Image.Image) -> OCRResult:
73
+ """
74
+ perform ocr on pil image
75
+
76
+ args:
77
+ image: pil image to process
78
+
79
+ returns:
80
+ OCRResult with detected text and metadata
81
+ """
82
+ # convert pil image to nsdata for vision framework
83
+ ns_image_data = self._pil_to_nsdata(image)
84
+
85
+ # create image request handler
86
+ handler = VNImageRequestHandler.alloc().initWithData_options_(ns_image_data, {})
87
+
88
+ # perform text recognition
89
+ success, error = handler.performRequests_error_([self.request], None)
90
+
91
+ if not success:
92
+ raise RuntimeError(f"vision framework error: {error}")
93
+
94
+ # extract results
95
+ observations = self.request.results()
96
+ if not observations:
97
+ return OCRResult(text="", confidence=None, elements=[])
98
+
99
+ # process observations into structured result
100
+ return self._process_observations(observations, image.size)
101
+
102
+ def _pil_to_nsdata(self, image: Image.Image) -> NSData:
103
+ """convert pil image to nsdata for vision framework"""
104
+ # convert to rgb if needed
105
+ if image.mode != "RGB":
106
+ image = image.convert("RGB")
107
+
108
+ # save to bytes
109
+ buffer = io.BytesIO()
110
+ image.save(buffer, format="PNG")
111
+ image_data = buffer.getvalue()
112
+
113
+ # create nsdata
114
+ return NSData.dataWithBytes_length_(image_data, len(image_data))
115
+
116
+ def _process_observations(self, observations, image_size) -> OCRResult:
117
+ """process vision observations into ocr result"""
118
+ elements = []
119
+ full_text_parts = []
120
+ total_confidence = 0.0
121
+ valid_observations = 0
122
+
123
+ for observation in observations:
124
+ # get recognized text
125
+ text = observation.topCandidates_(1)[0].string()
126
+ confidence = observation.topCandidates_(1)[0].confidence()
127
+
128
+ if text.strip():
129
+ # get bounding box
130
+ bbox = observation.boundingBox()
131
+
132
+ # convert normalized coordinates to pixel coordinates
133
+ x = bbox.origin.x * image_size[0]
134
+ y = (1.0 - bbox.origin.y - bbox.size.height) * image_size[1] # flip y
135
+ width = bbox.size.width * image_size[0]
136
+ height = bbox.size.height * image_size[1]
137
+
138
+ # create bounding box and text element
139
+ bounds = BoundingBox(x=x, y=y, width=width, height=height)
140
+ element = TextElement(text=text, bounds=bounds, confidence=confidence)
141
+ elements.append(element)
142
+
143
+ # accumulate text and confidence
144
+ full_text_parts.append(text)
145
+ total_confidence += confidence
146
+ valid_observations += 1
147
+
148
+ # calculate average confidence
149
+ avg_confidence = (
150
+ total_confidence / valid_observations if valid_observations > 0 else None
151
+ )
152
+
153
+ # join text parts
154
+ full_text = " ".join(full_text_parts)
155
+
156
+ return OCRResult(text=full_text, confidence=avg_confidence, elements=elements)
157
+
158
+ @property
159
+ def supported_languages(self) -> List[str]:
160
+ """Language codes Vision can recognize on this machine.
161
+
162
+ Queried live from Vision for the request's recognition level, so it
163
+ always matches what the installed macOS version actually supports
164
+ (returned as BCP-47 tags like ``en-US``). Falls back to the curated
165
+ [`COMMON_LANGUAGES`][natocr.macos.COMMON_LANGUAGES] set if the query
166
+ fails.
167
+ """
168
+ # ask vision directly instead of guessing - the set changes per os version
169
+ try:
170
+ languages, error = (
171
+ self.request.supportedRecognitionLanguagesAndReturnError_(None)
172
+ )
173
+ if error or not languages:
174
+ return list(COMMON_LANGUAGES)
175
+ return list(languages)
176
+ except Exception:
177
+ return list(COMMON_LANGUAGES)