macos-vision 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +164 -0
- package/bin/vision-helper +0 -0
- package/dist/index.d.ts +85 -0
- package/dist/index.js +51 -0
- package/package.json +46 -0
- package/scripts/build-native.js +28 -0
package/README.md
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# macos-vision
|
|
2
|
+
|
|
3
|
+
> Apple Vision for Node.js — native, fast, offline, no API keys required.
|
|
4
|
+
|
|
5
|
+
Uses macOS's built-in [Vision framework](https://developer.apple.com/documentation/vision) via a compiled Swift binary. Works completely offline. No cloud services, no API keys, no Python, zero runtime dependencies.
|
|
6
|
+
|
|
7
|
+
## Requirements
|
|
8
|
+
|
|
9
|
+
- macOS 12+
|
|
10
|
+
- Node.js 18+
|
|
11
|
+
- Xcode Command Line Tools
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
xcode-select --install
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
npm install macos-vision
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
The native Swift binary is compiled automatically on install.
|
|
24
|
+
|
|
25
|
+
## Usage
|
|
26
|
+
|
|
27
|
+
```js
|
|
28
|
+
import { ocr, detectFaces, detectBarcodes, detectRectangles, detectDocument, classify } from 'macos-vision'
|
|
29
|
+
|
|
30
|
+
// OCR — plain text
|
|
31
|
+
const text = await ocr('photo.jpg')
|
|
32
|
+
|
|
33
|
+
// OCR — structured blocks with bounding boxes
|
|
34
|
+
const blocks = await ocr('photo.jpg', { format: 'blocks' })
|
|
35
|
+
|
|
36
|
+
// Detect faces
|
|
37
|
+
const faces = await detectFaces('photo.jpg')
|
|
38
|
+
|
|
39
|
+
// Detect barcodes and QR codes
|
|
40
|
+
const codes = await detectBarcodes('invoice.jpg')
|
|
41
|
+
|
|
42
|
+
// Detect rectangular shapes (tables, forms, cards)
|
|
43
|
+
const rects = await detectRectangles('document.jpg')
|
|
44
|
+
|
|
45
|
+
// Find document boundary in a photo
|
|
46
|
+
const doc = await detectDocument('photo.jpg') // DocumentBounds | null
|
|
47
|
+
|
|
48
|
+
// Classify image content
|
|
49
|
+
const labels = await classify('photo.jpg')
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## API
|
|
53
|
+
|
|
54
|
+
### `ocr(imagePath, options?)`
|
|
55
|
+
|
|
56
|
+
Extracts text from an image.
|
|
57
|
+
|
|
58
|
+
| Parameter | Type | Default | Description |
|
|
59
|
+
|-----------|------|---------|-------------|
|
|
60
|
+
| `imagePath` | `string` | — | Path to image (PNG, JPG, JPEG, WEBP) |
|
|
61
|
+
| `options.format` | `'text' \| 'blocks'` | `'text'` | Plain text or structured blocks with coordinates |
|
|
62
|
+
|
|
63
|
+
Returns `Promise<string>` or `Promise<VisionBlock[]>`.
|
|
64
|
+
|
|
65
|
+
```ts
|
|
66
|
+
interface VisionBlock {
|
|
67
|
+
text: string
|
|
68
|
+
x: number // 0–1 from left
|
|
69
|
+
y: number // 0–1 from top
|
|
70
|
+
width: number // 0–1
|
|
71
|
+
height: number // 0–1
|
|
72
|
+
}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
### `detectFaces(imagePath)`
|
|
78
|
+
|
|
79
|
+
Detects human faces and returns their bounding boxes.
|
|
80
|
+
|
|
81
|
+
```ts
|
|
82
|
+
interface Face {
|
|
83
|
+
x: number; y: number; width: number; height: number
|
|
84
|
+
confidence: number // 0–1
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
### `detectBarcodes(imagePath)`
|
|
91
|
+
|
|
92
|
+
Detects barcodes and QR codes and decodes their payload.
|
|
93
|
+
|
|
94
|
+
```ts
|
|
95
|
+
interface Barcode {
|
|
96
|
+
type: string // e.g. 'org.iso.QRCode', 'org.gs1.EAN-13'
|
|
97
|
+
value: string // decoded content
|
|
98
|
+
x: number; y: number; width: number; height: number
|
|
99
|
+
}
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
### `detectRectangles(imagePath)`
|
|
105
|
+
|
|
106
|
+
Finds rectangular shapes (documents, tables, cards, forms).
|
|
107
|
+
|
|
108
|
+
```ts
|
|
109
|
+
interface Rectangle {
|
|
110
|
+
topLeft: [number, number]; topRight: [number, number]
|
|
111
|
+
bottomLeft: [number, number]; bottomRight: [number, number]
|
|
112
|
+
confidence: number
|
|
113
|
+
}
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
### `detectDocument(imagePath)`
|
|
119
|
+
|
|
120
|
+
Finds the boundary of a document in a photo (e.g. paper on a desk). Returns `null` if no document is found.
|
|
121
|
+
|
|
122
|
+
```ts
|
|
123
|
+
interface DocumentBounds {
|
|
124
|
+
topLeft: [number, number]; topRight: [number, number]
|
|
125
|
+
bottomLeft: [number, number]; bottomRight: [number, number]
|
|
126
|
+
confidence: number
|
|
127
|
+
}
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
### `classify(imagePath)`
|
|
133
|
+
|
|
134
|
+
Returns top image classification labels with confidence scores.
|
|
135
|
+
|
|
136
|
+
```ts
|
|
137
|
+
interface Classification {
|
|
138
|
+
identifier: string // e.g. 'document', 'outdoor', 'animal'
|
|
139
|
+
confidence: number // 0–1
|
|
140
|
+
}
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Why macos-vision?
|
|
146
|
+
|
|
147
|
+
| | macos-vision | Tesseract.js | Cloud APIs |
|
|
148
|
+
|---|---|---|---|
|
|
149
|
+
| Offline | ✅ | ✅ | ❌ |
|
|
150
|
+
| No API key | ✅ | ✅ | ❌ |
|
|
151
|
+
| Native speed | ✅ | ❌ | — |
|
|
152
|
+
| Zero runtime deps | ✅ | ❌ | ❌ |
|
|
153
|
+
| OCR with bounding boxes | ✅ | ✅ | ✅ |
|
|
154
|
+
| Face detection | ✅ | ❌ | ✅ |
|
|
155
|
+
| Barcode / QR | ✅ | ❌ | ✅ |
|
|
156
|
+
| Document detection | ✅ | ❌ | ✅ |
|
|
157
|
+
| Image classification | ✅ | ❌ | ✅ |
|
|
158
|
+
| macOS only | ✅ | ❌ | ❌ |
|
|
159
|
+
|
|
160
|
+
Apple Vision is the same engine used by macOS Spotlight, Live Text, and Shortcuts — highly optimized and accurate.
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
|
|
164
|
+
MIT
|
|
Binary file
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
export interface VisionBlock {
|
|
2
|
+
/** Recognized text */
|
|
3
|
+
text: string;
|
|
4
|
+
/** Horizontal position, 0–1 from left */
|
|
5
|
+
x: number;
|
|
6
|
+
/** Vertical position, 0–1 from top */
|
|
7
|
+
y: number;
|
|
8
|
+
/** Width, 0–1 relative to image */
|
|
9
|
+
width: number;
|
|
10
|
+
/** Height, 0–1 relative to image */
|
|
11
|
+
height: number;
|
|
12
|
+
}
|
|
13
|
+
export interface OcrOptions {
|
|
14
|
+
/** Return plain text (default) or structured blocks with coordinates */
|
|
15
|
+
format?: 'text' | 'blocks';
|
|
16
|
+
}
|
|
17
|
+
export declare function ocr(imagePath: string, options?: {
|
|
18
|
+
format?: 'text';
|
|
19
|
+
}): Promise<string>;
|
|
20
|
+
export declare function ocr(imagePath: string, options: {
|
|
21
|
+
format: 'blocks';
|
|
22
|
+
}): Promise<VisionBlock[]>;
|
|
23
|
+
export interface Face {
|
|
24
|
+
/** Horizontal position, 0–1 from left */
|
|
25
|
+
x: number;
|
|
26
|
+
/** Vertical position, 0–1 from top */
|
|
27
|
+
y: number;
|
|
28
|
+
/** Width, 0–1 relative to image */
|
|
29
|
+
width: number;
|
|
30
|
+
/** Height, 0–1 relative to image */
|
|
31
|
+
height: number;
|
|
32
|
+
/** Detection confidence, 0–1 */
|
|
33
|
+
confidence: number;
|
|
34
|
+
}
|
|
35
|
+
export declare function detectFaces(imagePath: string): Promise<Face[]>;
|
|
36
|
+
export interface Barcode {
|
|
37
|
+
/** Symbology type, e.g. 'org.iso.QRCode', 'org.gs1.EAN-13', 'org.iso.Code128' */
|
|
38
|
+
type: string;
|
|
39
|
+
/** Decoded payload value */
|
|
40
|
+
value: string;
|
|
41
|
+
/** Horizontal position, 0–1 from left */
|
|
42
|
+
x: number;
|
|
43
|
+
/** Vertical position, 0–1 from top */
|
|
44
|
+
y: number;
|
|
45
|
+
/** Width, 0–1 relative to image */
|
|
46
|
+
width: number;
|
|
47
|
+
/** Height, 0–1 relative to image */
|
|
48
|
+
height: number;
|
|
49
|
+
}
|
|
50
|
+
export declare function detectBarcodes(imagePath: string): Promise<Barcode[]>;
|
|
51
|
+
export interface Rectangle {
|
|
52
|
+
/** Top-left corner [x, y], values 0–1 */
|
|
53
|
+
topLeft: [number, number];
|
|
54
|
+
/** Top-right corner [x, y], values 0–1 */
|
|
55
|
+
topRight: [number, number];
|
|
56
|
+
/** Bottom-left corner [x, y], values 0–1 */
|
|
57
|
+
bottomLeft: [number, number];
|
|
58
|
+
/** Bottom-right corner [x, y], values 0–1 */
|
|
59
|
+
bottomRight: [number, number];
|
|
60
|
+
/** Detection confidence, 0–1 */
|
|
61
|
+
confidence: number;
|
|
62
|
+
}
|
|
63
|
+
export declare function detectRectangles(imagePath: string): Promise<Rectangle[]>;
|
|
64
|
+
export interface DocumentBounds {
|
|
65
|
+
/** Top-left corner [x, y], values 0–1 */
|
|
66
|
+
topLeft: [number, number];
|
|
67
|
+
/** Top-right corner [x, y], values 0–1 */
|
|
68
|
+
topRight: [number, number];
|
|
69
|
+
/** Bottom-left corner [x, y], values 0–1 */
|
|
70
|
+
bottomLeft: [number, number];
|
|
71
|
+
/** Bottom-right corner [x, y], values 0–1 */
|
|
72
|
+
bottomRight: [number, number];
|
|
73
|
+
/** Detection confidence, 0–1 */
|
|
74
|
+
confidence: number;
|
|
75
|
+
}
|
|
76
|
+
/** Returns the detected document boundary, or null if no document found. */
|
|
77
|
+
export declare function detectDocument(imagePath: string): Promise<DocumentBounds | null>;
|
|
78
|
+
export interface Classification {
|
|
79
|
+
/** Category identifier, e.g. 'document', 'outdoor', 'animal' */
|
|
80
|
+
identifier: string;
|
|
81
|
+
/** Confidence score, 0–1 */
|
|
82
|
+
confidence: number;
|
|
83
|
+
}
|
|
84
|
+
/** Returns top image classifications sorted by confidence (highest first). */
|
|
85
|
+
export declare function classify(imagePath: string): Promise<Classification[]>;
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { execFile } from 'child_process';
|
|
2
|
+
import { promisify } from 'util';
|
|
3
|
+
import { resolve, dirname } from 'path';
|
|
4
|
+
import { fileURLToPath } from 'url';
|
|
5
|
+
const execFileAsync = promisify(execFile);
|
|
6
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
const BIN_PATH = resolve(__dirname, '../bin/vision-helper');
|
|
8
|
+
async function run(flag, imagePath) {
|
|
9
|
+
const { stdout } = await execFileAsync(BIN_PATH, [flag, resolve(imagePath)]);
|
|
10
|
+
return stdout;
|
|
11
|
+
}
|
|
12
|
+
export async function ocr(imagePath, options = {}) {
|
|
13
|
+
const absPath = resolve(imagePath);
|
|
14
|
+
const { format = 'text' } = options;
|
|
15
|
+
if (format === 'blocks') {
|
|
16
|
+
const { stdout } = await execFileAsync(BIN_PATH, ['--json', absPath]);
|
|
17
|
+
const raw = JSON.parse(stdout);
|
|
18
|
+
return raw.map((b) => ({ text: b.t, x: b.x, y: b.y, width: b.w, height: b.h }));
|
|
19
|
+
}
|
|
20
|
+
const { stdout } = await execFileAsync(BIN_PATH, [absPath]);
|
|
21
|
+
return stdout.trim();
|
|
22
|
+
}
|
|
23
|
+
export async function detectFaces(imagePath) {
|
|
24
|
+
const raw = JSON.parse(await run('--faces', imagePath));
|
|
25
|
+
return raw.map((f) => ({ x: f.x, y: f.y, width: f.w, height: f.h, confidence: f.confidence }));
|
|
26
|
+
}
|
|
27
|
+
export async function detectBarcodes(imagePath) {
|
|
28
|
+
const raw = JSON.parse(await run('--barcodes', imagePath));
|
|
29
|
+
return raw.map((b) => ({
|
|
30
|
+
type: b.type,
|
|
31
|
+
value: b.value,
|
|
32
|
+
x: b.x,
|
|
33
|
+
y: b.y,
|
|
34
|
+
width: b.w,
|
|
35
|
+
height: b.h,
|
|
36
|
+
}));
|
|
37
|
+
}
|
|
38
|
+
export async function detectRectangles(imagePath) {
|
|
39
|
+
const raw = JSON.parse(await run('--rectangles', imagePath));
|
|
40
|
+
return raw;
|
|
41
|
+
}
|
|
42
|
+
/** Returns the detected document boundary, or null if no document found. */
|
|
43
|
+
export async function detectDocument(imagePath) {
|
|
44
|
+
const raw = JSON.parse(await run('--document', imagePath));
|
|
45
|
+
return raw.length > 0 ? raw[0] : null;
|
|
46
|
+
}
|
|
47
|
+
/** Returns top image classifications sorted by confidence (highest first). */
|
|
48
|
+
export async function classify(imagePath) {
|
|
49
|
+
const raw = JSON.parse(await run('--classify', imagePath));
|
|
50
|
+
return raw;
|
|
51
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "macos-vision",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Apple Vision OCR & image analysis for Node.js — native, fast, offline, no API keys",
|
|
5
|
+
"author": "Adrian Wolczuk",
|
|
6
|
+
"license": "MIT","type": "module",
|
|
7
|
+
"main": "./dist/index.js",
|
|
8
|
+
"types": "./dist/index.d.ts",
|
|
9
|
+
"repository": {
|
|
10
|
+
"type": "git",
|
|
11
|
+
"url": "git+https://github.com/woladi/macos-vision.git"
|
|
12
|
+
},
|
|
13
|
+
"scripts": {
|
|
14
|
+
"build-native": "node scripts/build-native.js",
|
|
15
|
+
"postinstall": "node scripts/build-native.js",
|
|
16
|
+
"build": "tsc",
|
|
17
|
+
"prepublishOnly": "npm run build",
|
|
18
|
+
"test": "vitest run"
|
|
19
|
+
},
|
|
20
|
+
"keywords": [
|
|
21
|
+
"ocr",
|
|
22
|
+
"macos",
|
|
23
|
+
"apple-vision",
|
|
24
|
+
"image",
|
|
25
|
+
"text-extraction",
|
|
26
|
+
"vision",
|
|
27
|
+
"native",
|
|
28
|
+
"offline",
|
|
29
|
+
"face-detection",
|
|
30
|
+
"barcode",
|
|
31
|
+
"qr-code",
|
|
32
|
+
"document-detection",
|
|
33
|
+
"image-classification"
|
|
34
|
+
],
|
|
35
|
+
"os": [
|
|
36
|
+
"darwin"
|
|
37
|
+
],
|
|
38
|
+
"engines": {
|
|
39
|
+
"node": ">=18.0.0"
|
|
40
|
+
},
|
|
41
|
+
"devDependencies": {
|
|
42
|
+
"@types/node": "^20.0.0",
|
|
43
|
+
"typescript": "^5.4.0",
|
|
44
|
+
"vitest": "^2.1.9"
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { execSync } from 'child_process';
|
|
2
|
+
import { mkdirSync, existsSync } from 'fs';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
|
|
6
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
const root = path.resolve(__dirname, '..');
|
|
8
|
+
const binDir = path.join(root, 'bin');
|
|
9
|
+
const binPath = path.join(binDir, 'vision-helper');
|
|
10
|
+
const swiftSrc = path.join(root, 'src', 'native', 'vision-helper.swift');
|
|
11
|
+
|
|
12
|
+
if (existsSync(binPath)) {
|
|
13
|
+
process.exit(0);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
if (!mkdirSync(binDir, { recursive: true }) === false) {
|
|
17
|
+
// dir created
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
try {
|
|
21
|
+
execSync(`swiftc -O "${swiftSrc}" -o "${binPath}"`, { stdio: 'inherit' });
|
|
22
|
+
console.log('✅ macos-vision: native binary compiled successfully');
|
|
23
|
+
} catch {
|
|
24
|
+
console.error('❌ macos-vision: Swift compilation failed.');
|
|
25
|
+
console.error(' Make sure Xcode Command Line Tools are installed:');
|
|
26
|
+
console.error(' xcode-select --install');
|
|
27
|
+
process.exit(1);
|
|
28
|
+
}
|