macos-vision 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,164 @@
1
+ # macos-vision
2
+
3
+ > Apple Vision for Node.js — native, fast, offline, no API keys required.
4
+
5
+ Uses macOS's built-in [Vision framework](https://developer.apple.com/documentation/vision) via a compiled Swift binary. Works completely offline. No cloud services, no API keys, no Python, zero runtime dependencies.
6
+
7
+ ## Requirements
8
+
9
+ - macOS 12+
10
+ - Node.js 18+
11
+ - Xcode Command Line Tools
12
+
13
+ ```bash
14
+ xcode-select --install
15
+ ```
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ npm install macos-vision
21
+ ```
22
+
23
+ The native Swift binary is compiled automatically on install.
24
+
25
+ ## Usage
26
+
27
+ ```js
28
+ import { ocr, detectFaces, detectBarcodes, detectRectangles, detectDocument, classify } from 'macos-vision'
29
+
30
+ // OCR — plain text
31
+ const text = await ocr('photo.jpg')
32
+
33
+ // OCR — structured blocks with bounding boxes
34
+ const blocks = await ocr('photo.jpg', { format: 'blocks' })
35
+
36
+ // Detect faces
37
+ const faces = await detectFaces('photo.jpg')
38
+
39
+ // Detect barcodes and QR codes
40
+ const codes = await detectBarcodes('invoice.jpg')
41
+
42
+ // Detect rectangular shapes (tables, forms, cards)
43
+ const rects = await detectRectangles('document.jpg')
44
+
45
+ // Find document boundary in a photo
46
+ const doc = await detectDocument('photo.jpg') // DocumentBounds | null
47
+
48
+ // Classify image content
49
+ const labels = await classify('photo.jpg')
50
+ ```
51
+
52
+ ## API
53
+
54
+ ### `ocr(imagePath, options?)`
55
+
56
+ Extracts text from an image.
57
+
58
+ | Parameter | Type | Default | Description |
59
+ |-----------|------|---------|-------------|
60
+ | `imagePath` | `string` | — | Path to image (PNG, JPG, JPEG, WEBP) |
61
+ | `options.format` | `'text' \| 'blocks'` | `'text'` | Plain text or structured blocks with coordinates |
62
+
63
+ Returns `Promise<string>` or `Promise<VisionBlock[]>`.
64
+
65
+ ```ts
66
+ interface VisionBlock {
67
+ text: string
68
+ x: number // 0–1 from left
69
+ y: number // 0–1 from top
70
+ width: number // 0–1
71
+ height: number // 0–1
72
+ }
73
+ ```
74
+
75
+ ---
76
+
77
+ ### `detectFaces(imagePath)`
78
+
79
+ Detects human faces and returns their bounding boxes.
80
+
81
+ ```ts
82
+ interface Face {
83
+ x: number; y: number; width: number; height: number
84
+ confidence: number // 0–1
85
+ }
86
+ ```
87
+
88
+ ---
89
+
90
+ ### `detectBarcodes(imagePath)`
91
+
92
+ Detects barcodes and QR codes and decodes their payload.
93
+
94
+ ```ts
95
+ interface Barcode {
96
+ type: string // e.g. 'org.iso.QRCode', 'org.gs1.EAN-13'
97
+ value: string // decoded content
98
+ x: number; y: number; width: number; height: number
99
+ }
100
+ ```
101
+
102
+ ---
103
+
104
+ ### `detectRectangles(imagePath)`
105
+
106
+ Finds rectangular shapes (documents, tables, cards, forms).
107
+
108
+ ```ts
109
+ interface Rectangle {
110
+ topLeft: [number, number]; topRight: [number, number]
111
+ bottomLeft: [number, number]; bottomRight: [number, number]
112
+ confidence: number
113
+ }
114
+ ```
115
+
116
+ ---
117
+
118
+ ### `detectDocument(imagePath)`
119
+
120
+ Finds the boundary of a document in a photo (e.g. paper on a desk). Returns `null` if no document is found.
121
+
122
+ ```ts
123
+ interface DocumentBounds {
124
+ topLeft: [number, number]; topRight: [number, number]
125
+ bottomLeft: [number, number]; bottomRight: [number, number]
126
+ confidence: number
127
+ }
128
+ ```
129
+
130
+ ---
131
+
132
+ ### `classify(imagePath)`
133
+
134
+ Returns top image classification labels with confidence scores.
135
+
136
+ ```ts
137
+ interface Classification {
138
+ identifier: string // e.g. 'document', 'outdoor', 'animal'
139
+ confidence: number // 0–1
140
+ }
141
+ ```
142
+
143
+ ---
144
+
145
+ ## Why macos-vision?
146
+
147
+ | | macos-vision | Tesseract.js | Cloud APIs |
148
+ |---|---|---|---|
149
+ | Offline | ✅ | ✅ | ❌ |
150
+ | No API key | ✅ | ✅ | ❌ |
151
+ | Native speed | ✅ | ❌ | — |
152
+ | Zero runtime deps | ✅ | ❌ | ❌ |
153
+ | OCR with bounding boxes | ✅ | ✅ | ✅ |
154
+ | Face detection | ✅ | ❌ | ✅ |
155
+ | Barcode / QR | ✅ | ❌ | ✅ |
156
+ | Document detection | ✅ | ❌ | ✅ |
157
+ | Image classification | ✅ | ❌ | ✅ |
158
+ | macOS only | ✅ | ❌ | ❌ |
159
+
160
+ Apple Vision is the same engine used by macOS Spotlight, Live Text, and Shortcuts — highly optimized and accurate.
161
+
162
+ ## License
163
+
164
+ MIT
Binary file
@@ -0,0 +1,85 @@
1
+ export interface VisionBlock {
2
+ /** Recognized text */
3
+ text: string;
4
+ /** Horizontal position, 0–1 from left */
5
+ x: number;
6
+ /** Vertical position, 0–1 from top */
7
+ y: number;
8
+ /** Width, 0–1 relative to image */
9
+ width: number;
10
+ /** Height, 0–1 relative to image */
11
+ height: number;
12
+ }
13
+ export interface OcrOptions {
14
+ /** Return plain text (default) or structured blocks with coordinates */
15
+ format?: 'text' | 'blocks';
16
+ }
17
+ export declare function ocr(imagePath: string, options?: {
18
+ format?: 'text';
19
+ }): Promise<string>;
20
+ export declare function ocr(imagePath: string, options: {
21
+ format: 'blocks';
22
+ }): Promise<VisionBlock[]>;
23
+ export interface Face {
24
+ /** Horizontal position, 0–1 from left */
25
+ x: number;
26
+ /** Vertical position, 0–1 from top */
27
+ y: number;
28
+ /** Width, 0–1 relative to image */
29
+ width: number;
30
+ /** Height, 0–1 relative to image */
31
+ height: number;
32
+ /** Detection confidence, 0–1 */
33
+ confidence: number;
34
+ }
35
+ export declare function detectFaces(imagePath: string): Promise<Face[]>;
36
+ export interface Barcode {
37
+ /** Symbology type, e.g. 'org.iso.QRCode', 'org.gs1.EAN-13', 'org.iso.Code128' */
38
+ type: string;
39
+ /** Decoded payload value */
40
+ value: string;
41
+ /** Horizontal position, 0–1 from left */
42
+ x: number;
43
+ /** Vertical position, 0–1 from top */
44
+ y: number;
45
+ /** Width, 0–1 relative to image */
46
+ width: number;
47
+ /** Height, 0–1 relative to image */
48
+ height: number;
49
+ }
50
+ export declare function detectBarcodes(imagePath: string): Promise<Barcode[]>;
51
+ export interface Rectangle {
52
+ /** Top-left corner [x, y], values 0–1 */
53
+ topLeft: [number, number];
54
+ /** Top-right corner [x, y], values 0–1 */
55
+ topRight: [number, number];
56
+ /** Bottom-left corner [x, y], values 0–1 */
57
+ bottomLeft: [number, number];
58
+ /** Bottom-right corner [x, y], values 0–1 */
59
+ bottomRight: [number, number];
60
+ /** Detection confidence, 0–1 */
61
+ confidence: number;
62
+ }
63
+ export declare function detectRectangles(imagePath: string): Promise<Rectangle[]>;
64
+ export interface DocumentBounds {
65
+ /** Top-left corner [x, y], values 0–1 */
66
+ topLeft: [number, number];
67
+ /** Top-right corner [x, y], values 0–1 */
68
+ topRight: [number, number];
69
+ /** Bottom-left corner [x, y], values 0–1 */
70
+ bottomLeft: [number, number];
71
+ /** Bottom-right corner [x, y], values 0–1 */
72
+ bottomRight: [number, number];
73
+ /** Detection confidence, 0–1 */
74
+ confidence: number;
75
+ }
76
+ /** Returns the detected document boundary, or null if no document found. */
77
+ export declare function detectDocument(imagePath: string): Promise<DocumentBounds | null>;
78
+ export interface Classification {
79
+ /** Category identifier, e.g. 'document', 'outdoor', 'animal' */
80
+ identifier: string;
81
+ /** Confidence score, 0–1 */
82
+ confidence: number;
83
+ }
84
+ /** Returns top image classifications sorted by confidence (highest first). */
85
+ export declare function classify(imagePath: string): Promise<Classification[]>;
package/dist/index.js ADDED
@@ -0,0 +1,51 @@
1
+ import { execFile } from 'child_process';
2
+ import { promisify } from 'util';
3
+ import { resolve, dirname } from 'path';
4
+ import { fileURLToPath } from 'url';
5
+ const execFileAsync = promisify(execFile);
6
+ const __dirname = dirname(fileURLToPath(import.meta.url));
7
+ const BIN_PATH = resolve(__dirname, '../bin/vision-helper');
8
+ async function run(flag, imagePath) {
9
+ const { stdout } = await execFileAsync(BIN_PATH, [flag, resolve(imagePath)]);
10
+ return stdout;
11
+ }
12
+ export async function ocr(imagePath, options = {}) {
13
+ const absPath = resolve(imagePath);
14
+ const { format = 'text' } = options;
15
+ if (format === 'blocks') {
16
+ const { stdout } = await execFileAsync(BIN_PATH, ['--json', absPath]);
17
+ const raw = JSON.parse(stdout);
18
+ return raw.map((b) => ({ text: b.t, x: b.x, y: b.y, width: b.w, height: b.h }));
19
+ }
20
+ const { stdout } = await execFileAsync(BIN_PATH, [absPath]);
21
+ return stdout.trim();
22
+ }
23
+ export async function detectFaces(imagePath) {
24
+ const raw = JSON.parse(await run('--faces', imagePath));
25
+ return raw.map((f) => ({ x: f.x, y: f.y, width: f.w, height: f.h, confidence: f.confidence }));
26
+ }
27
+ export async function detectBarcodes(imagePath) {
28
+ const raw = JSON.parse(await run('--barcodes', imagePath));
29
+ return raw.map((b) => ({
30
+ type: b.type,
31
+ value: b.value,
32
+ x: b.x,
33
+ y: b.y,
34
+ width: b.w,
35
+ height: b.h,
36
+ }));
37
+ }
38
+ export async function detectRectangles(imagePath) {
39
+ const raw = JSON.parse(await run('--rectangles', imagePath));
40
+ return raw;
41
+ }
42
+ /** Returns the detected document boundary, or null if no document found. */
43
+ export async function detectDocument(imagePath) {
44
+ const raw = JSON.parse(await run('--document', imagePath));
45
+ return raw.length > 0 ? raw[0] : null;
46
+ }
47
+ /** Returns top image classifications sorted by confidence (highest first). */
48
+ export async function classify(imagePath) {
49
+ const raw = JSON.parse(await run('--classify', imagePath));
50
+ return raw;
51
+ }
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "macos-vision",
3
+ "version": "0.1.0",
4
+ "description": "Apple Vision OCR & image analysis for Node.js — native, fast, offline, no API keys",
5
+ "author": "Adrian Wolczuk",
6
+ "license": "MIT","type": "module",
7
+ "main": "./dist/index.js",
8
+ "types": "./dist/index.d.ts",
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "git+https://github.com/woladi/macos-vision.git"
12
+ },
13
+ "scripts": {
14
+ "build-native": "node scripts/build-native.js",
15
+ "postinstall": "node scripts/build-native.js",
16
+ "build": "tsc",
17
+ "prepublishOnly": "npm run build",
18
+ "test": "vitest run"
19
+ },
20
+ "keywords": [
21
+ "ocr",
22
+ "macos",
23
+ "apple-vision",
24
+ "image",
25
+ "text-extraction",
26
+ "vision",
27
+ "native",
28
+ "offline",
29
+ "face-detection",
30
+ "barcode",
31
+ "qr-code",
32
+ "document-detection",
33
+ "image-classification"
34
+ ],
35
+ "os": [
36
+ "darwin"
37
+ ],
38
+ "engines": {
39
+ "node": ">=18.0.0"
40
+ },
41
+ "devDependencies": {
42
+ "@types/node": "^20.0.0",
43
+ "typescript": "^5.4.0",
44
+ "vitest": "^2.1.9"
45
+ }
46
+ }
@@ -0,0 +1,28 @@
1
+ import { execSync } from 'child_process';
2
+ import { mkdirSync, existsSync } from 'fs';
3
+ import { fileURLToPath } from 'url';
4
+ import path from 'path';
5
+
6
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
7
+ const root = path.resolve(__dirname, '..');
8
+ const binDir = path.join(root, 'bin');
9
+ const binPath = path.join(binDir, 'vision-helper');
10
+ const swiftSrc = path.join(root, 'src', 'native', 'vision-helper.swift');
11
+
12
+ if (existsSync(binPath)) {
13
+ process.exit(0);
14
+ }
15
+
16
+ if (!mkdirSync(binDir, { recursive: true }) === false) {
17
+ // dir created
18
+ }
19
+
20
+ try {
21
+ execSync(`swiftc -O "${swiftSrc}" -o "${binPath}"`, { stdio: 'inherit' });
22
+ console.log('✅ macos-vision: native binary compiled successfully');
23
+ } catch {
24
+ console.error('❌ macos-vision: Swift compilation failed.');
25
+ console.error(' Make sure Xcode Command Line Tools are installed:');
26
+ console.error(' xcode-select --install');
27
+ process.exit(1);
28
+ }