macos-vision 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/README.md +13 -0
- package/bin/vision-helper +0 -0
- package/dist/cli.js +23 -15
- package/dist/index.d.ts +4 -0
- package/dist/index.js +17 -4
- package/package.json +1 -1
- package/scripts/build-native.js +1 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.2.0](https://github.com/woladi/macos-vision/compare/v0.1.4...v0.2.0) (2026-04-08)
|
|
4
|
+
|
|
5
|
+
### Features
|
|
6
|
+
|
|
7
|
+
* add confidence to VisionBlock and Barcode ([a87df27](https://github.com/woladi/macos-vision/commit/a87df275e51dec4b57fbff6e3bffc4220b96b4d7))
|
|
8
|
+
|
|
9
|
+
### Bug Fixes
|
|
10
|
+
|
|
11
|
+
* correct mkdirSync, CLI error on missing file, execFile timeout, README scope ([1cef2c7](https://github.com/woladi/macos-vision/commit/1cef2c7078430c9182fcd39792cf0c002833203f))
|
|
12
|
+
* replace try? with do/catch in Swift helper — surface Vision errors properly ([f287065](https://github.com/woladi/macos-vision/commit/f2870655225806070be3db462ea15923201fecbf))
|
|
13
|
+
|
|
3
14
|
## 0.1.4 (2026-04-08)
|
package/README.md
CHANGED
|
@@ -22,6 +22,19 @@ npm install macos-vision
|
|
|
22
22
|
|
|
23
23
|
The native Swift binary is compiled automatically on install.
|
|
24
24
|
|
|
25
|
+
## What this is (and isn't)
|
|
26
|
+
|
|
27
|
+
`macos-vision` gives you **raw Apple Vision results** — text, coordinates, bounding boxes, labels.
|
|
28
|
+
|
|
29
|
+
It is **not** a document pipeline. It does not:
|
|
30
|
+
- Convert PDFs or images to Markdown
|
|
31
|
+
- Understand document structure (headings, tables, paragraphs)
|
|
32
|
+
- Chain multiple detections into a final report
|
|
33
|
+
|
|
34
|
+
For those use cases, use the raw output as input to an LLM or a post-processing layer of your own.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
25
38
|
## CLI
|
|
26
39
|
|
|
27
40
|
```bash
|
package/bin/vision-helper
CHANGED
|
Binary file
|
package/dist/cli.js
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { resolve
|
|
3
|
-
import { fileURLToPath } from 'url';
|
|
2
|
+
import { resolve } from 'path';
|
|
4
3
|
import { ocr, detectFaces, detectBarcodes, detectRectangles, detectDocument, classify, } from './index.js';
|
|
5
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
6
4
|
const USAGE = `
|
|
7
5
|
Usage: vision-cli [options] <image>
|
|
8
6
|
|
|
@@ -28,9 +26,14 @@ if (rawArgs.includes('--help') || rawArgs.length === 0) {
|
|
|
28
26
|
console.log(USAGE);
|
|
29
27
|
process.exit(0);
|
|
30
28
|
}
|
|
31
|
-
const flags = new Set(rawArgs.filter(a => a.startsWith('--')));
|
|
32
|
-
const fileArgs = rawArgs.filter(a => !a.startsWith('--'));
|
|
33
|
-
|
|
29
|
+
const flags = new Set(rawArgs.filter((a) => a.startsWith('--')));
|
|
30
|
+
const fileArgs = rawArgs.filter((a) => !a.startsWith('--'));
|
|
31
|
+
if (!fileArgs[0]) {
|
|
32
|
+
console.error('Error: no image path provided.\n');
|
|
33
|
+
console.log(USAGE);
|
|
34
|
+
process.exit(1);
|
|
35
|
+
}
|
|
36
|
+
const imagePath = resolve(fileArgs[0]);
|
|
34
37
|
const runAll = flags.has('--all');
|
|
35
38
|
const runOcr = runAll || flags.has('--ocr');
|
|
36
39
|
const runBlocks = runAll || flags.has('--blocks');
|
|
@@ -40,9 +43,14 @@ const runRects = runAll || flags.has('--rectangles');
|
|
|
40
43
|
const runDoc = runAll || flags.has('--document');
|
|
41
44
|
const runClassify = runAll || flags.has('--classify');
|
|
42
45
|
// Default: OCR text when no feature flag is given
|
|
43
|
-
const anyFeatureFlag = runAll ||
|
|
44
|
-
flags.has('--
|
|
45
|
-
flags.has('--
|
|
46
|
+
const anyFeatureFlag = runAll ||
|
|
47
|
+
flags.has('--ocr') ||
|
|
48
|
+
flags.has('--blocks') ||
|
|
49
|
+
flags.has('--faces') ||
|
|
50
|
+
flags.has('--barcodes') ||
|
|
51
|
+
flags.has('--rectangles') ||
|
|
52
|
+
flags.has('--document') ||
|
|
53
|
+
flags.has('--classify');
|
|
46
54
|
const useDefault = !anyFeatureFlag;
|
|
47
55
|
async function main() {
|
|
48
56
|
try {
|
|
@@ -51,27 +59,27 @@ async function main() {
|
|
|
51
59
|
console.log(text);
|
|
52
60
|
}
|
|
53
61
|
if (runBlocks) {
|
|
54
|
-
const blocks = await ocr(imagePath, { format: 'blocks' });
|
|
62
|
+
const blocks = (await ocr(imagePath, { format: 'blocks' }));
|
|
55
63
|
console.log(JSON.stringify(blocks, null, 2));
|
|
56
64
|
}
|
|
57
65
|
if (runFaces) {
|
|
58
|
-
const faces = await detectFaces(imagePath);
|
|
66
|
+
const faces = (await detectFaces(imagePath));
|
|
59
67
|
console.log(JSON.stringify(faces, null, 2));
|
|
60
68
|
}
|
|
61
69
|
if (runBarcodes) {
|
|
62
|
-
const barcodes = await detectBarcodes(imagePath);
|
|
70
|
+
const barcodes = (await detectBarcodes(imagePath));
|
|
63
71
|
console.log(JSON.stringify(barcodes, null, 2));
|
|
64
72
|
}
|
|
65
73
|
if (runRects) {
|
|
66
|
-
const rectangles = await detectRectangles(imagePath);
|
|
74
|
+
const rectangles = (await detectRectangles(imagePath));
|
|
67
75
|
console.log(JSON.stringify(rectangles, null, 2));
|
|
68
76
|
}
|
|
69
77
|
if (runDoc) {
|
|
70
|
-
const doc = await detectDocument(imagePath);
|
|
78
|
+
const doc = (await detectDocument(imagePath));
|
|
71
79
|
console.log(JSON.stringify(doc, null, 2));
|
|
72
80
|
}
|
|
73
81
|
if (runClassify) {
|
|
74
|
-
const labels = await classify(imagePath);
|
|
82
|
+
const labels = (await classify(imagePath));
|
|
75
83
|
console.log(JSON.stringify(labels, null, 2));
|
|
76
84
|
}
|
|
77
85
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -9,6 +9,8 @@ export interface VisionBlock {
|
|
|
9
9
|
width: number;
|
|
10
10
|
/** Height, 0–1 relative to image */
|
|
11
11
|
height: number;
|
|
12
|
+
/** OCR transcription confidence, 0–1 */
|
|
13
|
+
confidence: number;
|
|
12
14
|
}
|
|
13
15
|
export interface OcrOptions {
|
|
14
16
|
/** Return plain text (default) or structured blocks with coordinates */
|
|
@@ -46,6 +48,8 @@ export interface Barcode {
|
|
|
46
48
|
width: number;
|
|
47
49
|
/** Height, 0–1 relative to image */
|
|
48
50
|
height: number;
|
|
51
|
+
/** Detection confidence, 0–1 */
|
|
52
|
+
confidence: number;
|
|
49
53
|
}
|
|
50
54
|
export declare function detectBarcodes(imagePath: string): Promise<Barcode[]>;
|
|
51
55
|
export interface Rectangle {
|
package/dist/index.js
CHANGED
|
@@ -5,19 +5,31 @@ import { fileURLToPath } from 'url';
|
|
|
5
5
|
const execFileAsync = promisify(execFile);
|
|
6
6
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
7
7
|
const BIN_PATH = resolve(__dirname, '../bin/vision-helper');
|
|
8
|
+
const BINARY_TIMEOUT_MS = 30_000;
|
|
8
9
|
async function run(flag, imagePath) {
|
|
9
|
-
const { stdout } = await execFileAsync(BIN_PATH, [flag, resolve(imagePath)]
|
|
10
|
+
const { stdout } = await execFileAsync(BIN_PATH, [flag, resolve(imagePath)], {
|
|
11
|
+
timeout: BINARY_TIMEOUT_MS,
|
|
12
|
+
});
|
|
10
13
|
return stdout;
|
|
11
14
|
}
|
|
12
15
|
export async function ocr(imagePath, options = {}) {
|
|
13
16
|
const absPath = resolve(imagePath);
|
|
14
17
|
const { format = 'text' } = options;
|
|
15
18
|
if (format === 'blocks') {
|
|
16
|
-
const { stdout } = await execFileAsync(BIN_PATH, ['--json', absPath]
|
|
19
|
+
const { stdout } = await execFileAsync(BIN_PATH, ['--json', absPath], {
|
|
20
|
+
timeout: BINARY_TIMEOUT_MS,
|
|
21
|
+
});
|
|
17
22
|
const raw = JSON.parse(stdout);
|
|
18
|
-
return raw.map((b) => ({
|
|
23
|
+
return raw.map((b) => ({
|
|
24
|
+
text: b.t,
|
|
25
|
+
x: b.x,
|
|
26
|
+
y: b.y,
|
|
27
|
+
width: b.w,
|
|
28
|
+
height: b.h,
|
|
29
|
+
confidence: b.confidence,
|
|
30
|
+
}));
|
|
19
31
|
}
|
|
20
|
-
const { stdout } = await execFileAsync(BIN_PATH, [absPath]);
|
|
32
|
+
const { stdout } = await execFileAsync(BIN_PATH, [absPath], { timeout: BINARY_TIMEOUT_MS });
|
|
21
33
|
return stdout.trim();
|
|
22
34
|
}
|
|
23
35
|
export async function detectFaces(imagePath) {
|
|
@@ -33,6 +45,7 @@ export async function detectBarcodes(imagePath) {
|
|
|
33
45
|
y: b.y,
|
|
34
46
|
width: b.w,
|
|
35
47
|
height: b.h,
|
|
48
|
+
confidence: b.confidence,
|
|
36
49
|
}));
|
|
37
50
|
}
|
|
38
51
|
export async function detectRectangles(imagePath) {
|
package/package.json
CHANGED
package/scripts/build-native.js
CHANGED
|
@@ -13,9 +13,7 @@ if (existsSync(binPath)) {
|
|
|
13
13
|
process.exit(0);
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
// dir created
|
|
18
|
-
}
|
|
16
|
+
mkdirSync(binDir, { recursive: true });
|
|
19
17
|
|
|
20
18
|
try {
|
|
21
19
|
execSync(`swiftc -O "${swiftSrc}" -o "${binPath}"`, { stdio: 'inherit' });
|