macos-vision 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,241 @@
1
+ import Vision
2
+ import AppKit
3
+ import Foundation
4
+
5
+ // ─── Result structs ──────────────────────────────────────────────────────────
6
+
7
+ struct OCRResult: Codable {
8
+ let t: String
9
+ let x: Double; let y: Double; let w: Double; let h: Double
10
+ let confidence: Float
11
+ }
12
+
13
+ struct FaceResult: Codable {
14
+ let x: Double; let y: Double; let w: Double; let h: Double
15
+ let confidence: Float
16
+ }
17
+
18
+ struct BarcodeResult: Codable {
19
+ let type: String
20
+ let value: String
21
+ let x: Double; let y: Double; let w: Double; let h: Double
22
+ let confidence: Float
23
+ }
24
+
25
+ struct RectangleResult: Codable {
26
+ let topLeft: [Double]; let topRight: [Double]
27
+ let bottomLeft: [Double]; let bottomRight: [Double]
28
+ let confidence: Float
29
+ }
30
+
31
+ struct DocumentResult: Codable {
32
+ let topLeft: [Double]; let topRight: [Double]
33
+ let bottomLeft: [Double]; let bottomRight: [Double]
34
+ let confidence: Float
35
+ }
36
+
37
+ struct ClassificationResult: Codable {
38
+ let identifier: String
39
+ let confidence: Float
40
+ }
41
+
42
+ // ─── Helpers ─────────────────────────────────────────────────────────────────
43
+
44
+ // Vision: 0,0 = bottom-left. We flip Y so 0,0 = top-left (web standard).
45
+ func flipY(_ y: Double, _ h: Double) -> Double { 1.0 - y - h }
46
+
47
+ func pt(_ p: CGPoint) -> [Double] { [Double(p.x), 1.0 - Double(p.y)] }
48
+
49
+ func encodeJSON<T: Encodable>(_ value: T) -> String {
50
+ guard let data = try? JSONEncoder().encode(value),
51
+ let str = String(data: data, encoding: .utf8) else { return "[]" }
52
+ return str
53
+ }
54
+
55
+ // ─── Argument parsing ─────────────────────────────────────────────────────────
56
+
57
+ let args = CommandLine.arguments
58
+ let isJsonMode = args.contains("--json")
59
+ let isFaces = args.contains("--faces")
60
+ let isBarcodes = args.contains("--barcodes")
61
+ let isRectangles = args.contains("--rectangles")
62
+ let isDocument = args.contains("--document")
63
+ let isClassify = args.contains("--classify")
64
+
65
+ let fileArgs = args.filter { !$0.hasPrefix("--") && !$0.contains("vision-helper") }
66
+
67
+ guard let imagePath = fileArgs.first else {
68
+ print("Usage: vision-helper [--json|--faces|--barcodes|--rectangles|--document|--classify] <path>")
69
+ exit(0)
70
+ }
71
+
72
+ guard let image = NSImage(contentsOf: URL(fileURLWithPath: imagePath)),
73
+ let cgImage = image.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
74
+ fputs("ERROR: Cannot open file: \(imagePath)\n", stderr)
75
+ exit(1)
76
+ }
77
+
78
+ let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
79
+
80
+ // ─── OCR (default + --json) ───────────────────────────────────────────────────
81
+
82
+ if isJsonMode || (!isFaces && !isBarcodes && !isRectangles && !isDocument && !isClassify) {
83
+ var ocrResults: [OCRResult] = []
84
+ var rawText = ""
85
+
86
+ let request = VNRecognizeTextRequest { (req, _) in
87
+ guard let obs = req.results as? [VNRecognizedTextObservation] else { return }
88
+ for o in obs {
89
+ guard let c = o.topCandidates(1).first else { continue }
90
+ let box = o.boundingBox
91
+ if isJsonMode {
92
+ ocrResults.append(OCRResult(
93
+ t: c.string,
94
+ x: Double(box.origin.x),
95
+ y: flipY(Double(box.origin.y), Double(box.size.height)),
96
+ w: Double(box.size.width),
97
+ h: Double(box.size.height),
98
+ confidence: c.confidence
99
+ ))
100
+ } else {
101
+ rawText += c.string + "\n"
102
+ }
103
+ }
104
+ }
105
+ request.recognitionLevel = .accurate
106
+
107
+ do {
108
+ try handler.perform([request])
109
+ } catch {
110
+ fputs("ERROR: Vision OCR failed: \(error.localizedDescription)\n", stderr)
111
+ exit(1)
112
+ }
113
+ print(isJsonMode ? encodeJSON(ocrResults) : rawText.trimmingCharacters(in: .whitespacesAndNewlines))
114
+ exit(0)
115
+ }
116
+
117
+ // ─── Faces ───────────────────────────────────────────────────────────────────
118
+
119
+ if isFaces {
120
+ var results: [FaceResult] = []
121
+ let request = VNDetectFaceRectanglesRequest { (req, _) in
122
+ guard let obs = req.results as? [VNFaceObservation] else { return }
123
+ for o in obs {
124
+ let box = o.boundingBox
125
+ results.append(FaceResult(
126
+ x: Double(box.origin.x),
127
+ y: flipY(Double(box.origin.y), Double(box.size.height)),
128
+ w: Double(box.size.width),
129
+ h: Double(box.size.height),
130
+ confidence: o.confidence
131
+ ))
132
+ }
133
+ }
134
+ do {
135
+ try handler.perform([request])
136
+ } catch {
137
+ fputs("ERROR: Vision face detection failed: \(error.localizedDescription)\n", stderr)
138
+ exit(1)
139
+ }
140
+ print(encodeJSON(results))
141
+ exit(0)
142
+ }
143
+
144
+ // ─── Barcodes ────────────────────────────────────────────────────────────────
145
+
146
+ if isBarcodes {
147
+ var results: [BarcodeResult] = []
148
+ let request = VNDetectBarcodesRequest { (req, _) in
149
+ guard let obs = req.results as? [VNBarcodeObservation] else { return }
150
+ for o in obs {
151
+ let box = o.boundingBox
152
+ results.append(BarcodeResult(
153
+ type: o.symbology.rawValue,
154
+ value: o.payloadStringValue ?? "",
155
+ x: Double(box.origin.x),
156
+ y: flipY(Double(box.origin.y), Double(box.size.height)),
157
+ w: Double(box.size.width),
158
+ h: Double(box.size.height),
159
+ confidence: o.confidence
160
+ ))
161
+ }
162
+ }
163
+ do {
164
+ try handler.perform([request])
165
+ } catch {
166
+ fputs("ERROR: Vision barcode detection failed: \(error.localizedDescription)\n", stderr)
167
+ exit(1)
168
+ }
169
+ print(encodeJSON(results))
170
+ exit(0)
171
+ }
172
+
173
+ // ─── Rectangles ──────────────────────────────────────────────────────────────
174
+
175
+ if isRectangles {
176
+ var results: [RectangleResult] = []
177
+ let request = VNDetectRectanglesRequest { (req, _) in
178
+ guard let obs = req.results as? [VNRectangleObservation] else { return }
179
+ for o in obs {
180
+ results.append(RectangleResult(
181
+ topLeft: pt(o.topLeft), topRight: pt(o.topRight),
182
+ bottomLeft: pt(o.bottomLeft), bottomRight: pt(o.bottomRight),
183
+ confidence: o.confidence
184
+ ))
185
+ }
186
+ }
187
+ (request as VNDetectRectanglesRequest).maximumObservations = 0
188
+ do {
189
+ try handler.perform([request])
190
+ } catch {
191
+ fputs("ERROR: Vision rectangle detection failed: \(error.localizedDescription)\n", stderr)
192
+ exit(1)
193
+ }
194
+ print(encodeJSON(results))
195
+ exit(0)
196
+ }
197
+
198
+ // ─── Document ────────────────────────────────────────────────────────────────
199
+
200
+ if isDocument {
201
+ var results: [DocumentResult] = []
202
+ let request = VNDetectDocumentSegmentationRequest { (req, _) in
203
+ guard let obs = req.results as? [VNRectangleObservation] else { return }
204
+ for o in obs {
205
+ results.append(DocumentResult(
206
+ topLeft: pt(o.topLeft), topRight: pt(o.topRight),
207
+ bottomLeft: pt(o.bottomLeft), bottomRight: pt(o.bottomRight),
208
+ confidence: o.confidence
209
+ ))
210
+ }
211
+ }
212
+ do {
213
+ try handler.perform([request])
214
+ } catch {
215
+ fputs("ERROR: Vision document detection failed: \(error.localizedDescription)\n", stderr)
216
+ exit(1)
217
+ }
218
+ print(encodeJSON(results))
219
+ exit(0)
220
+ }
221
+
222
+ // ─── Classify ────────────────────────────────────────────────────────────────
223
+
224
+ if isClassify {
225
+ var results: [ClassificationResult] = []
226
+ let request = VNClassifyImageRequest { (req, _) in
227
+ guard let obs = req.results as? [VNClassificationObservation] else { return }
228
+ let top = obs.filter { $0.confidence > 0.01 }.prefix(10)
229
+ for o in top {
230
+ results.append(ClassificationResult(identifier: o.identifier, confidence: o.confidence))
231
+ }
232
+ }
233
+ do {
234
+ try handler.perform([request])
235
+ } catch {
236
+ fputs("ERROR: Vision classification failed: \(error.localizedDescription)\n", stderr)
237
+ exit(1)
238
+ }
239
+ print(encodeJSON(results))
240
+ exit(0)
241
+ }
package/.husky/commit-msg DELETED
@@ -1,2 +0,0 @@
1
- #!/bin/sh
2
- npx --no -- commitlint --edit $1
package/.husky/pre-commit DELETED
@@ -1,3 +0,0 @@
1
- #!/bin/sh
2
- npx lint-staged
3
- npx tsc --noEmit
package/.prettierignore DELETED
@@ -1,4 +0,0 @@
1
- dist/
2
- node_modules/
3
- bin/
4
- *.md
package/.prettierrc.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "semi": true,
3
- "singleQuote": true,
4
- "trailingComma": "es5",
5
- "printWidth": 100,
6
- "tabWidth": 2
7
- }
package/.release-it.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "$schema": "https://unpkg.com/release-it/schema/release-it.json",
3
- "plugins": {
4
- "@release-it/conventional-changelog": {
5
- "preset": "conventionalcommits",
6
- "infile": "CHANGELOG.md"
7
- }
8
- },
9
- "git": {
10
- "commitMessage": "chore(release): v${version}",
11
- "tagName": "v${version}"
12
- },
13
- "github": {
14
- "release": false
15
- },
16
- "npm": {
17
- "publish": true,
18
- "tag": "latest"
19
- }
20
- }
package/CHANGELOG.md DELETED
@@ -1,44 +0,0 @@
1
- # Changelog
2
-
3
- ## [1.2.0](https://github.com/woladi/macos-vision/compare/v1.1.0...v1.2.0) (2026-04-09)
4
-
5
- ### Features
6
-
7
- * replace sips with PDFKit-based pdf-helper binary for PDF rasterization ([4a223e2](https://github.com/woladi/macos-vision/commit/4a223e2de79571794d866452fd5e87b84590ff0d))
8
-
9
- ## [1.1.0](https://github.com/woladi/macos-vision/compare/v1.0.3...v1.1.0) (2026-04-09)
10
-
11
- ### Features
12
-
13
- * add PDF support via sips rasterization ([a48bf17](https://github.com/woladi/macos-vision/commit/a48bf17579a6df11aed6eadbde4fa5041ccaa981))
14
-
15
- ## [1.0.3](https://github.com/woladi/macos-vision/compare/v1.0.2...v1.0.3) (2026-04-08)
16
-
17
- ### Reverts
18
-
19
- * remove socket.ignore field — worsens supply chain risk score ([a1827ad](https://github.com/woladi/macos-vision/commit/a1827ad489220ebb7a2e8c85632945fe969438db))
20
-
21
- ## [1.0.2](https://github.com/woladi/macos-vision/compare/v1.0.1...v1.0.2) (2026-04-08)
22
-
23
- ## [1.0.1](https://github.com/woladi/macos-vision/compare/v0.3.1...v1.0.1) (2026-04-08)
24
-
25
- ## [0.3.1](https://github.com/woladi/macos-vision/compare/v0.3.0...v0.3.1) (2026-04-08)
26
-
27
- ## [0.3.0](https://github.com/woladi/macos-vision/compare/v0.2.0...v0.3.0) (2026-04-08)
28
-
29
- ### Features
30
-
31
- * add inferLayout() — unified reading-order LayoutBlock representation ([aec507e](https://github.com/woladi/macos-vision/commit/aec507eb7cf133ec1e56759c0945563a48d871ee))
32
-
33
- ## [0.2.0](https://github.com/woladi/macos-vision/compare/v0.1.4...v0.2.0) (2026-04-08)
34
-
35
- ### Features
36
-
37
- * add confidence to VisionBlock and Barcode ([a87df27](https://github.com/woladi/macos-vision/commit/a87df275e51dec4b57fbff6e3bffc4220b96b4d7))
38
-
39
- ### Bug Fixes
40
-
41
- * correct mkdirSync, CLI error on missing file, execFile timeout, README scope ([1cef2c7](https://github.com/woladi/macos-vision/commit/1cef2c7078430c9182fcd39792cf0c002833203f))
42
- * replace try? with do/catch in Swift helper — surface Vision errors properly ([f287065](https://github.com/woladi/macos-vision/commit/f2870655225806070be3db462ea15923201fecbf))
43
-
44
- ## 0.1.4 (2026-04-08)
@@ -1 +0,0 @@
1
- export default { extends: ['@commitlint/config-conventional'] };
package/debug.js DELETED
@@ -1,37 +0,0 @@
1
- #!/usr/bin/env node
2
- import {
3
- ocr,
4
- detectFaces,
5
- detectBarcodes,
6
- detectRectangles,
7
- detectDocument,
8
- classify,
9
- inferLayout,
10
- } from './dist/index.js';
11
-
12
- const imagePath = process.argv[2] || './test/fixtures/sample.png';
13
- console.log(`\n📸 Analyzing: ${imagePath}\n`);
14
-
15
- const [text, blocks, faces, barcodes, rects, doc, labels] = await Promise.all([
16
- ocr(imagePath),
17
- ocr(imagePath, { format: 'blocks' }),
18
- detectFaces(imagePath),
19
- detectBarcodes(imagePath),
20
- detectRectangles(imagePath),
21
- detectDocument(imagePath),
22
- classify(imagePath),
23
- ]);
24
-
25
- const sep = (title) => console.log('\n' + '─'.repeat(60) + '\n' + title + '\n');
26
-
27
- sep('📝 OCR text'); console.log(text);
28
- sep('📝 OCR blocks'); console.log(JSON.stringify(blocks, null, 2));
29
- sep('👤 Faces'); console.log(JSON.stringify(faces, null, 2));
30
- sep('🔲 Barcodes'); console.log(JSON.stringify(barcodes, null, 2));
31
- sep('📦 Rectangles'); console.log(JSON.stringify(rects, null, 2));
32
- sep('📄 Document'); console.log(JSON.stringify(doc, null, 2));
33
- sep('🏷️ Classification'); console.log(JSON.stringify(labels, null, 2));
34
-
35
- const layout = inferLayout({ textBlocks: blocks, faces, barcodes, rectangles: rects, document: doc });
36
- sep('🗂️ Layout (reading order)'); console.log(JSON.stringify(layout, null, 2));
37
- console.log('\n' + '─'.repeat(60) + '\n');
package/eslint.config.js DELETED
@@ -1,21 +0,0 @@
1
- import tseslint from 'typescript-eslint';
2
- import prettier from 'eslint-config-prettier';
3
-
4
- export default tseslint.config(
5
- ...tseslint.configs.recommended,
6
- prettier,
7
- {
8
- files: ['src/**/*.ts'],
9
- languageOptions: {
10
- parser: tseslint.parser,
11
- parserOptions: {
12
- project: true, // Szuka najbliższego tsconfig.json
13
- },
14
- },
15
- rules: {
16
- '@typescript-eslint/no-explicit-any': 'warn',
17
- '@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
18
- },
19
- },
20
- { ignores: ['dist/**', 'node_modules/**', 'bin/**'] }
21
- );