macos-vision 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ #!/bin/sh
2
+ npx --no -- commitlint --edit $1
@@ -0,0 +1,3 @@
1
+ #!/bin/sh
2
+ npx lint-staged
3
+ npx tsc --noEmit
@@ -0,0 +1,4 @@
1
+ dist/
2
+ node_modules/
3
+ bin/
4
+ *.md
@@ -0,0 +1,7 @@
1
+ {
2
+ "semi": true,
3
+ "singleQuote": true,
4
+ "trailingComma": "es5",
5
+ "printWidth": 100,
6
+ "tabWidth": 2
7
+ }
@@ -0,0 +1,20 @@
1
+ {
2
+ "$schema": "https://unpkg.com/release-it/schema/release-it.json",
3
+ "plugins": {
4
+ "@release-it/conventional-changelog": {
5
+ "preset": "conventionalcommits",
6
+ "infile": "CHANGELOG.md"
7
+ }
8
+ },
9
+ "git": {
10
+ "commitMessage": "chore(release): v${version}",
11
+ "tagName": "v${version}"
12
+ },
13
+ "github": {
14
+ "release": false
15
+ },
16
+ "npm": {
17
+ "publish": true,
18
+ "tag": "latest"
19
+ }
20
+ }
package/CHANGELOG.md ADDED
@@ -0,0 +1,14 @@
1
+ # Changelog
2
+
3
+ ## [0.2.0](https://github.com/woladi/macos-vision/compare/v0.1.4...v0.2.0) (2026-04-08)
4
+
5
+ ### Features
6
+
7
+ * add confidence to VisionBlock and Barcode ([a87df27](https://github.com/woladi/macos-vision/commit/a87df275e51dec4b57fbff6e3bffc4220b96b4d7))
8
+
9
+ ### Bug Fixes
10
+
11
+ * correct mkdirSync, CLI error on missing file, execFile timeout, README scope ([1cef2c7](https://github.com/woladi/macos-vision/commit/1cef2c7078430c9182fcd39792cf0c002833203f))
12
+ * replace try? with do/catch in Swift helper — surface Vision errors properly ([f287065](https://github.com/woladi/macos-vision/commit/f2870655225806070be3db462ea15923201fecbf))
13
+
14
+ ## 0.1.4 (2026-04-08)
package/README.md CHANGED
@@ -22,6 +22,19 @@ npm install macos-vision
22
22
 
23
23
  The native Swift binary is compiled automatically on install.
24
24
 
25
+ ## What this is (and isn't)
26
+
27
+ `macos-vision` gives you **raw Apple Vision results** — text, coordinates, bounding boxes, labels.
28
+
29
+ It is **not** a document pipeline. It does not:
30
+ - Convert PDFs or images to Markdown
31
+ - Understand document structure (headings, tables, paragraphs)
32
+ - Chain multiple detections into a final report
33
+
34
+ For those use cases, use the raw output as input to an LLM or a post-processing layer of your own.
35
+
36
+ ---
37
+
25
38
  ## CLI
26
39
 
27
40
  ```bash
package/bin/vision-helper CHANGED
Binary file
@@ -0,0 +1 @@
1
+ export default { extends: ['@commitlint/config-conventional'] };
package/dist/cli.js CHANGED
@@ -1,8 +1,6 @@
1
1
  #!/usr/bin/env node
2
- import { resolve, dirname } from 'path';
3
- import { fileURLToPath } from 'url';
2
+ import { resolve } from 'path';
4
3
  import { ocr, detectFaces, detectBarcodes, detectRectangles, detectDocument, classify, } from './index.js';
5
- const __dirname = dirname(fileURLToPath(import.meta.url));
6
4
  const USAGE = `
7
5
  Usage: vision-cli [options] <image>
8
6
 
@@ -28,9 +26,14 @@ if (rawArgs.includes('--help') || rawArgs.length === 0) {
28
26
  console.log(USAGE);
29
27
  process.exit(0);
30
28
  }
31
- const flags = new Set(rawArgs.filter(a => a.startsWith('--')));
32
- const fileArgs = rawArgs.filter(a => !a.startsWith('--'));
33
- const imagePath = fileArgs[0] || resolve(__dirname, '../test/fixtures/sample.png');
29
+ const flags = new Set(rawArgs.filter((a) => a.startsWith('--')));
30
+ const fileArgs = rawArgs.filter((a) => !a.startsWith('--'));
31
+ if (!fileArgs[0]) {
32
+ console.error('Error: no image path provided.\n');
33
+ console.log(USAGE);
34
+ process.exit(1);
35
+ }
36
+ const imagePath = resolve(fileArgs[0]);
34
37
  const runAll = flags.has('--all');
35
38
  const runOcr = runAll || flags.has('--ocr');
36
39
  const runBlocks = runAll || flags.has('--blocks');
@@ -40,9 +43,14 @@ const runRects = runAll || flags.has('--rectangles');
40
43
  const runDoc = runAll || flags.has('--document');
41
44
  const runClassify = runAll || flags.has('--classify');
42
45
  // Default: OCR text when no feature flag is given
43
- const anyFeatureFlag = runAll || flags.has('--ocr') || flags.has('--blocks') ||
44
- flags.has('--faces') || flags.has('--barcodes') || flags.has('--rectangles') ||
45
- flags.has('--document') || flags.has('--classify');
46
+ const anyFeatureFlag = runAll ||
47
+ flags.has('--ocr') ||
48
+ flags.has('--blocks') ||
49
+ flags.has('--faces') ||
50
+ flags.has('--barcodes') ||
51
+ flags.has('--rectangles') ||
52
+ flags.has('--document') ||
53
+ flags.has('--classify');
46
54
  const useDefault = !anyFeatureFlag;
47
55
  async function main() {
48
56
  try {
@@ -51,27 +59,27 @@ async function main() {
51
59
  console.log(text);
52
60
  }
53
61
  if (runBlocks) {
54
- const blocks = await ocr(imagePath, { format: 'blocks' });
62
+ const blocks = (await ocr(imagePath, { format: 'blocks' }));
55
63
  console.log(JSON.stringify(blocks, null, 2));
56
64
  }
57
65
  if (runFaces) {
58
- const faces = await detectFaces(imagePath);
66
+ const faces = (await detectFaces(imagePath));
59
67
  console.log(JSON.stringify(faces, null, 2));
60
68
  }
61
69
  if (runBarcodes) {
62
- const barcodes = await detectBarcodes(imagePath);
70
+ const barcodes = (await detectBarcodes(imagePath));
63
71
  console.log(JSON.stringify(barcodes, null, 2));
64
72
  }
65
73
  if (runRects) {
66
- const rectangles = await detectRectangles(imagePath);
74
+ const rectangles = (await detectRectangles(imagePath));
67
75
  console.log(JSON.stringify(rectangles, null, 2));
68
76
  }
69
77
  if (runDoc) {
70
- const doc = await detectDocument(imagePath);
78
+ const doc = (await detectDocument(imagePath));
71
79
  console.log(JSON.stringify(doc, null, 2));
72
80
  }
73
81
  if (runClassify) {
74
- const labels = await classify(imagePath);
82
+ const labels = (await classify(imagePath));
75
83
  console.log(JSON.stringify(labels, null, 2));
76
84
  }
77
85
  }
package/dist/index.d.ts CHANGED
@@ -9,6 +9,8 @@ export interface VisionBlock {
9
9
  width: number;
10
10
  /** Height, 0–1 relative to image */
11
11
  height: number;
12
+ /** OCR transcription confidence, 0–1 */
13
+ confidence: number;
12
14
  }
13
15
  export interface OcrOptions {
14
16
  /** Return plain text (default) or structured blocks with coordinates */
@@ -46,6 +48,8 @@ export interface Barcode {
46
48
  width: number;
47
49
  /** Height, 0–1 relative to image */
48
50
  height: number;
51
+ /** Detection confidence, 0–1 */
52
+ confidence: number;
49
53
  }
50
54
  export declare function detectBarcodes(imagePath: string): Promise<Barcode[]>;
51
55
  export interface Rectangle {
package/dist/index.js CHANGED
@@ -5,19 +5,31 @@ import { fileURLToPath } from 'url';
5
5
  const execFileAsync = promisify(execFile);
6
6
  const __dirname = dirname(fileURLToPath(import.meta.url));
7
7
  const BIN_PATH = resolve(__dirname, '../bin/vision-helper');
8
+ const BINARY_TIMEOUT_MS = 30_000;
8
9
  async function run(flag, imagePath) {
9
- const { stdout } = await execFileAsync(BIN_PATH, [flag, resolve(imagePath)]);
10
+ const { stdout } = await execFileAsync(BIN_PATH, [flag, resolve(imagePath)], {
11
+ timeout: BINARY_TIMEOUT_MS,
12
+ });
10
13
  return stdout;
11
14
  }
12
15
  export async function ocr(imagePath, options = {}) {
13
16
  const absPath = resolve(imagePath);
14
17
  const { format = 'text' } = options;
15
18
  if (format === 'blocks') {
16
- const { stdout } = await execFileAsync(BIN_PATH, ['--json', absPath]);
19
+ const { stdout } = await execFileAsync(BIN_PATH, ['--json', absPath], {
20
+ timeout: BINARY_TIMEOUT_MS,
21
+ });
17
22
  const raw = JSON.parse(stdout);
18
- return raw.map((b) => ({ text: b.t, x: b.x, y: b.y, width: b.w, height: b.h }));
23
+ return raw.map((b) => ({
24
+ text: b.t,
25
+ x: b.x,
26
+ y: b.y,
27
+ width: b.w,
28
+ height: b.h,
29
+ confidence: b.confidence,
30
+ }));
19
31
  }
20
- const { stdout } = await execFileAsync(BIN_PATH, [absPath]);
32
+ const { stdout } = await execFileAsync(BIN_PATH, [absPath], { timeout: BINARY_TIMEOUT_MS });
21
33
  return stdout.trim();
22
34
  }
23
35
  export async function detectFaces(imagePath) {
@@ -33,6 +45,7 @@ export async function detectBarcodes(imagePath) {
33
45
  y: b.y,
34
46
  width: b.w,
35
47
  height: b.h,
48
+ confidence: b.confidence,
36
49
  }));
37
50
  }
38
51
  export async function detectRectangles(imagePath) {
@@ -0,0 +1,21 @@
1
+ import tseslint from 'typescript-eslint';
2
+ import prettier from 'eslint-config-prettier';
3
+
4
+ export default tseslint.config(
5
+ ...tseslint.configs.recommended,
6
+ prettier,
7
+ {
8
+ files: ['src/**/*.ts'],
9
+ languageOptions: {
10
+ parser: tseslint.parser,
11
+ parserOptions: {
12
+ project: true, // Szuka najbliższego tsconfig.json
13
+ },
14
+ },
15
+ rules: {
16
+ '@typescript-eslint/no-explicit-any': 'warn',
17
+ '@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
18
+ },
19
+ },
20
+ { ignores: ['dist/**', 'node_modules/**', 'bin/**'] }
21
+ );
package/package.json CHANGED
@@ -1,9 +1,10 @@
1
1
  {
2
2
  "name": "macos-vision",
3
- "version": "0.1.3",
3
+ "version": "0.2.0",
4
4
  "description": "Apple Vision OCR & image analysis for Node.js — native, fast, offline, no API keys",
5
5
  "author": "Adrian Wolczuk",
6
- "license": "MIT","type": "module",
6
+ "license": "MIT",
7
+ "type": "module",
7
8
  "main": "./dist/index.js",
8
9
  "types": "./dist/index.d.ts",
9
10
  "bin": {
@@ -12,13 +13,19 @@
12
13
  "repository": {
13
14
  "type": "git",
14
15
  "url": "git+https://github.com/woladi/macos-vision.git"
15
- },
16
+ },
16
17
  "scripts": {
17
18
  "build-native": "node scripts/build-native.js",
18
19
  "postinstall": "node scripts/build-native.js",
19
20
  "build": "tsc",
20
21
  "prepublishOnly": "npm run build",
21
- "test": "vitest run"
22
+ "test": "vitest run",
23
+ "prepare": "[ -d .git ] && husky || true",
24
+ "typecheck": "tsc --noEmit",
25
+ "lint": "eslint src/**/*.ts",
26
+ "format": "prettier --write src/**/*.ts",
27
+ "release": "release-it",
28
+ "release:beta": "release-it --preRelease=beta"
22
29
  },
23
30
  "keywords": [
24
31
  "ocr",
@@ -35,6 +42,12 @@
35
42
  "document-detection",
36
43
  "image-classification"
37
44
  ],
45
+ "lint-staged": {
46
+ "src/**/*.ts": [
47
+ "prettier --write",
48
+ "eslint --fix"
49
+ ]
50
+ },
38
51
  "os": [
39
52
  "darwin"
40
53
  ],
@@ -42,8 +55,18 @@
42
55
  "node": ">=18.0.0"
43
56
  },
44
57
  "devDependencies": {
58
+ "@commitlint/cli": "^20.5.0",
59
+ "@commitlint/config-conventional": "^20.5.0",
60
+ "@release-it/conventional-changelog": "^10.0.6",
45
61
  "@types/node": "^20.0.0",
62
+ "eslint": "^10.2.0",
63
+ "eslint-config-prettier": "^10.1.8",
64
+ "husky": "^9.1.7",
65
+ "lint-staged": "^16.4.0",
66
+ "prettier": "^3.8.1",
67
+ "release-it": "^19.2.4",
46
68
  "typescript": "^5.4.0",
69
+ "typescript-eslint": "^8.58.0",
47
70
  "vitest": "^2.1.9"
48
71
  }
49
72
  }
@@ -13,9 +13,7 @@ if (existsSync(binPath)) {
13
13
  process.exit(0);
14
14
  }
15
15
 
16
- if (!mkdirSync(binDir, { recursive: true }) === false) {
17
- // dir created
18
- }
16
+ mkdirSync(binDir, { recursive: true });
19
17
 
20
18
  try {
21
19
  execSync(`swiftc -O "${swiftSrc}" -o "${binPath}"`, { stdio: 'inherit' });