macos-vision 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +161 -103
- package/bin/pdf-helper +0 -0
- package/bin/vision-helper +0 -0
- package/dist/cli.js +131 -68
- package/dist/index.d.ts +23 -0
- package/dist/index.js +34 -40
- package/dist/markdown/chunker.d.ts +11 -0
- package/dist/markdown/chunker.js +39 -0
- package/dist/markdown/index.d.ts +61 -0
- package/dist/markdown/index.js +92 -0
- package/dist/markdown/ollama.d.ts +21 -0
- package/dist/markdown/ollama.js +50 -0
- package/dist/markdown/prompt.d.ts +35 -0
- package/dist/markdown/prompt.js +82 -0
- package/package.json +30 -5
- package/scripts/build-native.js +26 -11
- package/src/native/pdf-helper.swift +122 -0
- package/src/native/vision-helper.swift +241 -0
- package/.husky/commit-msg +0 -2
- package/.husky/pre-commit +0 -3
- package/.prettierignore +0 -4
- package/.prettierrc.json +0 -7
- package/.release-it.json +0 -20
- package/CHANGELOG.md +0 -38
- package/commitlint.config.js +0 -1
- package/debug.js +0 -37
- package/eslint.config.js +0 -21
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import PDFKit
|
|
2
|
+
import AppKit
|
|
3
|
+
import Foundation
|
|
4
|
+
|
|
5
|
+
// ─── Result struct ────────────────────────────────────────────────────────────
|
|
6
|
+
|
|
7
|
+
struct PageResult: Codable {
|
|
8
|
+
let page: Int // 0-based
|
|
9
|
+
let path: String
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
func fail(_ message: String) -> Never {
|
|
15
|
+
fputs("ERROR: \(message)\n", stderr)
|
|
16
|
+
exit(1)
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
func encodeJSON<T: Encodable>(_ value: T) -> String {
|
|
20
|
+
guard let data = try? JSONEncoder().encode(value),
|
|
21
|
+
let str = String(data: data, encoding: .utf8) else { return "[]" }
|
|
22
|
+
return str
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// ─── Argument parsing ─────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
let args = CommandLine.arguments
|
|
28
|
+
guard args.count >= 2 else {
|
|
29
|
+
fail("Usage: pdf-helper <path-to-pdf>")
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
let pdfPath = args[1]
|
|
33
|
+
let pdfURL = URL(fileURLWithPath: pdfPath)
|
|
34
|
+
|
|
35
|
+
guard let pdf = PDFDocument(url: pdfURL) else {
|
|
36
|
+
fail("Cannot open PDF: \(pdfPath)")
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
let pageCount = pdf.pageCount
|
|
40
|
+
guard pageCount > 0 else {
|
|
41
|
+
fail("PDF has no pages: \(pdfPath)")
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ─── Output directory: ~/.cache/macos-vision/{basename}-{uuid}/ ───────────────
|
|
45
|
+
|
|
46
|
+
let basename = pdfURL.deletingPathExtension().lastPathComponent
|
|
47
|
+
let uuid = UUID().uuidString.lowercased()
|
|
48
|
+
let cacheBase = FileManager.default.homeDirectoryForCurrentUser
|
|
49
|
+
.appendingPathComponent(".cache/macos-vision")
|
|
50
|
+
let outDir = cacheBase.appendingPathComponent("\(basename)-\(uuid)")
|
|
51
|
+
|
|
52
|
+
do {
|
|
53
|
+
try FileManager.default.createDirectory(at: outDir, withIntermediateDirectories: true)
|
|
54
|
+
} catch {
|
|
55
|
+
fail("Cannot create output directory \(outDir.path): \(error.localizedDescription)")
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ─── Rasterize each page at 300 DPI ──────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
// PDF points are 72 pt/inch. Scale factor for 300 DPI = 300/72 ≈ 4.167
|
|
61
|
+
let scale: CGFloat = 300.0 / 72.0
|
|
62
|
+
|
|
63
|
+
var results: [PageResult] = []
|
|
64
|
+
|
|
65
|
+
for pageIndex in 0..<pageCount {
|
|
66
|
+
guard let page = pdf.page(at: pageIndex) else {
|
|
67
|
+
fail("Cannot access page \(pageIndex) of \(pdfPath)")
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
let mediaBox = page.bounds(for: .mediaBox)
|
|
71
|
+
let width = Int((mediaBox.width * scale).rounded())
|
|
72
|
+
let height = Int((mediaBox.height * scale).rounded())
|
|
73
|
+
|
|
74
|
+
guard let bitmapRep = NSBitmapImageRep(
|
|
75
|
+
bitmapDataPlanes: nil,
|
|
76
|
+
pixelsWide: width,
|
|
77
|
+
pixelsHigh: height,
|
|
78
|
+
bitsPerSample: 8,
|
|
79
|
+
samplesPerPixel: 4,
|
|
80
|
+
hasAlpha: true,
|
|
81
|
+
isPlanar: false,
|
|
82
|
+
colorSpaceName: .calibratedRGB,
|
|
83
|
+
bytesPerRow: 0,
|
|
84
|
+
bitsPerPixel: 0
|
|
85
|
+
) else {
|
|
86
|
+
fail("Cannot create bitmap for page \(pageIndex)")
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
guard let ctx = NSGraphicsContext(bitmapImageRep: bitmapRep) else {
|
|
90
|
+
fail("Cannot create graphics context for page \(pageIndex)")
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Fill white background (PDFs are transparent by default)
|
|
94
|
+
NSGraphicsContext.saveGraphicsState()
|
|
95
|
+
NSGraphicsContext.current = ctx
|
|
96
|
+
NSColor.white.setFill()
|
|
97
|
+
NSRect(x: 0, y: 0, width: width, height: height).fill()
|
|
98
|
+
|
|
99
|
+
ctx.cgContext.scaleBy(x: scale, y: scale)
|
|
100
|
+
page.draw(with: .mediaBox, to: ctx.cgContext)
|
|
101
|
+
NSGraphicsContext.restoreGraphicsState()
|
|
102
|
+
|
|
103
|
+
guard let pngData = bitmapRep.representation(using: .png, properties: [:]) else {
|
|
104
|
+
fail("Cannot encode page \(pageIndex) to PNG")
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Zero-pad page number to 3 digits: page-001.png, page-002.png, …
|
|
108
|
+
let filename = String(format: "%@-page-%03d.png", basename, pageIndex + 1)
|
|
109
|
+
let outPath = outDir.appendingPathComponent(filename)
|
|
110
|
+
|
|
111
|
+
do {
|
|
112
|
+
try pngData.write(to: outPath)
|
|
113
|
+
} catch {
|
|
114
|
+
fail("Cannot write \(outPath.path): \(error.localizedDescription)")
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
results.append(PageResult(page: pageIndex, path: outPath.path))
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// ─── Output JSON ──────────────────────────────────────────────────────────────
|
|
121
|
+
|
|
122
|
+
print(encodeJSON(results))
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import Vision
|
|
2
|
+
import AppKit
|
|
3
|
+
import Foundation
|
|
4
|
+
|
|
5
|
+
// ─── Result structs ──────────────────────────────────────────────────────────
|
|
6
|
+
|
|
7
|
+
struct OCRResult: Codable {
|
|
8
|
+
let t: String
|
|
9
|
+
let x: Double; let y: Double; let w: Double; let h: Double
|
|
10
|
+
let confidence: Float
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
struct FaceResult: Codable {
|
|
14
|
+
let x: Double; let y: Double; let w: Double; let h: Double
|
|
15
|
+
let confidence: Float
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
struct BarcodeResult: Codable {
|
|
19
|
+
let type: String
|
|
20
|
+
let value: String
|
|
21
|
+
let x: Double; let y: Double; let w: Double; let h: Double
|
|
22
|
+
let confidence: Float
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
struct RectangleResult: Codable {
|
|
26
|
+
let topLeft: [Double]; let topRight: [Double]
|
|
27
|
+
let bottomLeft: [Double]; let bottomRight: [Double]
|
|
28
|
+
let confidence: Float
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
struct DocumentResult: Codable {
|
|
32
|
+
let topLeft: [Double]; let topRight: [Double]
|
|
33
|
+
let bottomLeft: [Double]; let bottomRight: [Double]
|
|
34
|
+
let confidence: Float
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
struct ClassificationResult: Codable {
|
|
38
|
+
let identifier: String
|
|
39
|
+
let confidence: Float
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
43
|
+
|
|
44
|
+
// Vision: 0,0 = bottom-left. We flip Y so 0,0 = top-left (web standard).
|
|
45
|
+
func flipY(_ y: Double, _ h: Double) -> Double { 1.0 - y - h }
|
|
46
|
+
|
|
47
|
+
func pt(_ p: CGPoint) -> [Double] { [Double(p.x), 1.0 - Double(p.y)] }
|
|
48
|
+
|
|
49
|
+
func encodeJSON<T: Encodable>(_ value: T) -> String {
|
|
50
|
+
guard let data = try? JSONEncoder().encode(value),
|
|
51
|
+
let str = String(data: data, encoding: .utf8) else { return "[]" }
|
|
52
|
+
return str
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// ─── Argument parsing ─────────────────────────────────────────────────────────
|
|
56
|
+
|
|
57
|
+
let args = CommandLine.arguments
|
|
58
|
+
let isJsonMode = args.contains("--json")
|
|
59
|
+
let isFaces = args.contains("--faces")
|
|
60
|
+
let isBarcodes = args.contains("--barcodes")
|
|
61
|
+
let isRectangles = args.contains("--rectangles")
|
|
62
|
+
let isDocument = args.contains("--document")
|
|
63
|
+
let isClassify = args.contains("--classify")
|
|
64
|
+
|
|
65
|
+
let fileArgs = args.filter { !$0.hasPrefix("--") && !$0.contains("vision-helper") }
|
|
66
|
+
|
|
67
|
+
guard let imagePath = fileArgs.first else {
|
|
68
|
+
print("Usage: vision-helper [--json|--faces|--barcodes|--rectangles|--document|--classify] <path>")
|
|
69
|
+
exit(0)
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
guard let image = NSImage(contentsOf: URL(fileURLWithPath: imagePath)),
|
|
73
|
+
let cgImage = image.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
|
|
74
|
+
fputs("ERROR: Cannot open file: \(imagePath)\n", stderr)
|
|
75
|
+
exit(1)
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
|
|
79
|
+
|
|
80
|
+
// ─── OCR (default + --json) ───────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
if isJsonMode || (!isFaces && !isBarcodes && !isRectangles && !isDocument && !isClassify) {
|
|
83
|
+
var ocrResults: [OCRResult] = []
|
|
84
|
+
var rawText = ""
|
|
85
|
+
|
|
86
|
+
let request = VNRecognizeTextRequest { (req, _) in
|
|
87
|
+
guard let obs = req.results as? [VNRecognizedTextObservation] else { return }
|
|
88
|
+
for o in obs {
|
|
89
|
+
guard let c = o.topCandidates(1).first else { continue }
|
|
90
|
+
let box = o.boundingBox
|
|
91
|
+
if isJsonMode {
|
|
92
|
+
ocrResults.append(OCRResult(
|
|
93
|
+
t: c.string,
|
|
94
|
+
x: Double(box.origin.x),
|
|
95
|
+
y: flipY(Double(box.origin.y), Double(box.size.height)),
|
|
96
|
+
w: Double(box.size.width),
|
|
97
|
+
h: Double(box.size.height),
|
|
98
|
+
confidence: c.confidence
|
|
99
|
+
))
|
|
100
|
+
} else {
|
|
101
|
+
rawText += c.string + "\n"
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
request.recognitionLevel = .accurate
|
|
106
|
+
|
|
107
|
+
do {
|
|
108
|
+
try handler.perform([request])
|
|
109
|
+
} catch {
|
|
110
|
+
fputs("ERROR: Vision OCR failed: \(error.localizedDescription)\n", stderr)
|
|
111
|
+
exit(1)
|
|
112
|
+
}
|
|
113
|
+
print(isJsonMode ? encodeJSON(ocrResults) : rawText.trimmingCharacters(in: .whitespacesAndNewlines))
|
|
114
|
+
exit(0)
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// ─── Faces ───────────────────────────────────────────────────────────────────
|
|
118
|
+
|
|
119
|
+
if isFaces {
|
|
120
|
+
var results: [FaceResult] = []
|
|
121
|
+
let request = VNDetectFaceRectanglesRequest { (req, _) in
|
|
122
|
+
guard let obs = req.results as? [VNFaceObservation] else { return }
|
|
123
|
+
for o in obs {
|
|
124
|
+
let box = o.boundingBox
|
|
125
|
+
results.append(FaceResult(
|
|
126
|
+
x: Double(box.origin.x),
|
|
127
|
+
y: flipY(Double(box.origin.y), Double(box.size.height)),
|
|
128
|
+
w: Double(box.size.width),
|
|
129
|
+
h: Double(box.size.height),
|
|
130
|
+
confidence: o.confidence
|
|
131
|
+
))
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
do {
|
|
135
|
+
try handler.perform([request])
|
|
136
|
+
} catch {
|
|
137
|
+
fputs("ERROR: Vision face detection failed: \(error.localizedDescription)\n", stderr)
|
|
138
|
+
exit(1)
|
|
139
|
+
}
|
|
140
|
+
print(encodeJSON(results))
|
|
141
|
+
exit(0)
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// ─── Barcodes ────────────────────────────────────────────────────────────────
|
|
145
|
+
|
|
146
|
+
if isBarcodes {
|
|
147
|
+
var results: [BarcodeResult] = []
|
|
148
|
+
let request = VNDetectBarcodesRequest { (req, _) in
|
|
149
|
+
guard let obs = req.results as? [VNBarcodeObservation] else { return }
|
|
150
|
+
for o in obs {
|
|
151
|
+
let box = o.boundingBox
|
|
152
|
+
results.append(BarcodeResult(
|
|
153
|
+
type: o.symbology.rawValue,
|
|
154
|
+
value: o.payloadStringValue ?? "",
|
|
155
|
+
x: Double(box.origin.x),
|
|
156
|
+
y: flipY(Double(box.origin.y), Double(box.size.height)),
|
|
157
|
+
w: Double(box.size.width),
|
|
158
|
+
h: Double(box.size.height),
|
|
159
|
+
confidence: o.confidence
|
|
160
|
+
))
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
do {
|
|
164
|
+
try handler.perform([request])
|
|
165
|
+
} catch {
|
|
166
|
+
fputs("ERROR: Vision barcode detection failed: \(error.localizedDescription)\n", stderr)
|
|
167
|
+
exit(1)
|
|
168
|
+
}
|
|
169
|
+
print(encodeJSON(results))
|
|
170
|
+
exit(0)
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// ─── Rectangles ──────────────────────────────────────────────────────────────
|
|
174
|
+
|
|
175
|
+
if isRectangles {
|
|
176
|
+
var results: [RectangleResult] = []
|
|
177
|
+
let request = VNDetectRectanglesRequest { (req, _) in
|
|
178
|
+
guard let obs = req.results as? [VNRectangleObservation] else { return }
|
|
179
|
+
for o in obs {
|
|
180
|
+
results.append(RectangleResult(
|
|
181
|
+
topLeft: pt(o.topLeft), topRight: pt(o.topRight),
|
|
182
|
+
bottomLeft: pt(o.bottomLeft), bottomRight: pt(o.bottomRight),
|
|
183
|
+
confidence: o.confidence
|
|
184
|
+
))
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
(request as VNDetectRectanglesRequest).maximumObservations = 0
|
|
188
|
+
do {
|
|
189
|
+
try handler.perform([request])
|
|
190
|
+
} catch {
|
|
191
|
+
fputs("ERROR: Vision rectangle detection failed: \(error.localizedDescription)\n", stderr)
|
|
192
|
+
exit(1)
|
|
193
|
+
}
|
|
194
|
+
print(encodeJSON(results))
|
|
195
|
+
exit(0)
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// ─── Document ────────────────────────────────────────────────────────────────
|
|
199
|
+
|
|
200
|
+
if isDocument {
|
|
201
|
+
var results: [DocumentResult] = []
|
|
202
|
+
let request = VNDetectDocumentSegmentationRequest { (req, _) in
|
|
203
|
+
guard let obs = req.results as? [VNRectangleObservation] else { return }
|
|
204
|
+
for o in obs {
|
|
205
|
+
results.append(DocumentResult(
|
|
206
|
+
topLeft: pt(o.topLeft), topRight: pt(o.topRight),
|
|
207
|
+
bottomLeft: pt(o.bottomLeft), bottomRight: pt(o.bottomRight),
|
|
208
|
+
confidence: o.confidence
|
|
209
|
+
))
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
do {
|
|
213
|
+
try handler.perform([request])
|
|
214
|
+
} catch {
|
|
215
|
+
fputs("ERROR: Vision document detection failed: \(error.localizedDescription)\n", stderr)
|
|
216
|
+
exit(1)
|
|
217
|
+
}
|
|
218
|
+
print(encodeJSON(results))
|
|
219
|
+
exit(0)
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// ─── Classify ────────────────────────────────────────────────────────────────
|
|
223
|
+
|
|
224
|
+
if isClassify {
|
|
225
|
+
var results: [ClassificationResult] = []
|
|
226
|
+
let request = VNClassifyImageRequest { (req, _) in
|
|
227
|
+
guard let obs = req.results as? [VNClassificationObservation] else { return }
|
|
228
|
+
let top = obs.filter { $0.confidence > 0.01 }.prefix(10)
|
|
229
|
+
for o in top {
|
|
230
|
+
results.append(ClassificationResult(identifier: o.identifier, confidence: o.confidence))
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
do {
|
|
234
|
+
try handler.perform([request])
|
|
235
|
+
} catch {
|
|
236
|
+
fputs("ERROR: Vision classification failed: \(error.localizedDescription)\n", stderr)
|
|
237
|
+
exit(1)
|
|
238
|
+
}
|
|
239
|
+
print(encodeJSON(results))
|
|
240
|
+
exit(0)
|
|
241
|
+
}
|
package/.husky/commit-msg
DELETED
package/.husky/pre-commit
DELETED
package/.prettierignore
DELETED
package/.prettierrc.json
DELETED
package/.release-it.json
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"$schema": "https://unpkg.com/release-it/schema/release-it.json",
|
|
3
|
-
"plugins": {
|
|
4
|
-
"@release-it/conventional-changelog": {
|
|
5
|
-
"preset": "conventionalcommits",
|
|
6
|
-
"infile": "CHANGELOG.md"
|
|
7
|
-
}
|
|
8
|
-
},
|
|
9
|
-
"git": {
|
|
10
|
-
"commitMessage": "chore(release): v${version}",
|
|
11
|
-
"tagName": "v${version}"
|
|
12
|
-
},
|
|
13
|
-
"github": {
|
|
14
|
-
"release": false
|
|
15
|
-
},
|
|
16
|
-
"npm": {
|
|
17
|
-
"publish": true,
|
|
18
|
-
"tag": "latest"
|
|
19
|
-
}
|
|
20
|
-
}
|
package/CHANGELOG.md
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
# Changelog
|
|
2
|
-
|
|
3
|
-
## [1.1.0](https://github.com/woladi/macos-vision/compare/v1.0.3...v1.1.0) (2026-04-09)
|
|
4
|
-
|
|
5
|
-
### Features
|
|
6
|
-
|
|
7
|
-
* add PDF support via sips rasterization ([a48bf17](https://github.com/woladi/macos-vision/commit/a48bf17579a6df11aed6eadbde4fa5041ccaa981))
|
|
8
|
-
|
|
9
|
-
## [1.0.3](https://github.com/woladi/macos-vision/compare/v1.0.2...v1.0.3) (2026-04-08)
|
|
10
|
-
|
|
11
|
-
### Reverts
|
|
12
|
-
|
|
13
|
-
* remove socket.ignore field — worsens supply chain risk score ([a1827ad](https://github.com/woladi/macos-vision/commit/a1827ad489220ebb7a2e8c85632945fe969438db))
|
|
14
|
-
|
|
15
|
-
## [1.0.2](https://github.com/woladi/macos-vision/compare/v1.0.1...v1.0.2) (2026-04-08)
|
|
16
|
-
|
|
17
|
-
## [1.0.1](https://github.com/woladi/macos-vision/compare/v0.3.1...v1.0.1) (2026-04-08)
|
|
18
|
-
|
|
19
|
-
## [0.3.1](https://github.com/woladi/macos-vision/compare/v0.3.0...v0.3.1) (2026-04-08)
|
|
20
|
-
|
|
21
|
-
## [0.3.0](https://github.com/woladi/macos-vision/compare/v0.2.0...v0.3.0) (2026-04-08)
|
|
22
|
-
|
|
23
|
-
### Features
|
|
24
|
-
|
|
25
|
-
* add inferLayout() — unified reading-order LayoutBlock representation ([aec507e](https://github.com/woladi/macos-vision/commit/aec507eb7cf133ec1e56759c0945563a48d871ee))
|
|
26
|
-
|
|
27
|
-
## [0.2.0](https://github.com/woladi/macos-vision/compare/v0.1.4...v0.2.0) (2026-04-08)
|
|
28
|
-
|
|
29
|
-
### Features
|
|
30
|
-
|
|
31
|
-
* add confidence to VisionBlock and Barcode ([a87df27](https://github.com/woladi/macos-vision/commit/a87df275e51dec4b57fbff6e3bffc4220b96b4d7))
|
|
32
|
-
|
|
33
|
-
### Bug Fixes
|
|
34
|
-
|
|
35
|
-
* correct mkdirSync, CLI error on missing file, execFile timeout, README scope ([1cef2c7](https://github.com/woladi/macos-vision/commit/1cef2c7078430c9182fcd39792cf0c002833203f))
|
|
36
|
-
* replace try? with do/catch in Swift helper — surface Vision errors properly ([f287065](https://github.com/woladi/macos-vision/commit/f2870655225806070be3db462ea15923201fecbf))
|
|
37
|
-
|
|
38
|
-
## 0.1.4 (2026-04-08)
|
package/commitlint.config.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export default { extends: ['@commitlint/config-conventional'] };
|
package/debug.js
DELETED
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
import {
|
|
3
|
-
ocr,
|
|
4
|
-
detectFaces,
|
|
5
|
-
detectBarcodes,
|
|
6
|
-
detectRectangles,
|
|
7
|
-
detectDocument,
|
|
8
|
-
classify,
|
|
9
|
-
inferLayout,
|
|
10
|
-
} from './dist/index.js';
|
|
11
|
-
|
|
12
|
-
const imagePath = process.argv[2] || './test/fixtures/sample.png';
|
|
13
|
-
console.log(`\n📸 Analyzing: ${imagePath}\n`);
|
|
14
|
-
|
|
15
|
-
const [text, blocks, faces, barcodes, rects, doc, labels] = await Promise.all([
|
|
16
|
-
ocr(imagePath),
|
|
17
|
-
ocr(imagePath, { format: 'blocks' }),
|
|
18
|
-
detectFaces(imagePath),
|
|
19
|
-
detectBarcodes(imagePath),
|
|
20
|
-
detectRectangles(imagePath),
|
|
21
|
-
detectDocument(imagePath),
|
|
22
|
-
classify(imagePath),
|
|
23
|
-
]);
|
|
24
|
-
|
|
25
|
-
const sep = (title) => console.log('\n' + '─'.repeat(60) + '\n' + title + '\n');
|
|
26
|
-
|
|
27
|
-
sep('📝 OCR text'); console.log(text);
|
|
28
|
-
sep('📝 OCR blocks'); console.log(JSON.stringify(blocks, null, 2));
|
|
29
|
-
sep('👤 Faces'); console.log(JSON.stringify(faces, null, 2));
|
|
30
|
-
sep('🔲 Barcodes'); console.log(JSON.stringify(barcodes, null, 2));
|
|
31
|
-
sep('📦 Rectangles'); console.log(JSON.stringify(rects, null, 2));
|
|
32
|
-
sep('📄 Document'); console.log(JSON.stringify(doc, null, 2));
|
|
33
|
-
sep('🏷️ Classification'); console.log(JSON.stringify(labels, null, 2));
|
|
34
|
-
|
|
35
|
-
const layout = inferLayout({ textBlocks: blocks, faces, barcodes, rectangles: rects, document: doc });
|
|
36
|
-
sep('🗂️ Layout (reading order)'); console.log(JSON.stringify(layout, null, 2));
|
|
37
|
-
console.log('\n' + '─'.repeat(60) + '\n');
|
package/eslint.config.js
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import tseslint from 'typescript-eslint';
|
|
2
|
-
import prettier from 'eslint-config-prettier';
|
|
3
|
-
|
|
4
|
-
export default tseslint.config(
|
|
5
|
-
...tseslint.configs.recommended,
|
|
6
|
-
prettier,
|
|
7
|
-
{
|
|
8
|
-
files: ['src/**/*.ts'],
|
|
9
|
-
languageOptions: {
|
|
10
|
-
parser: tseslint.parser,
|
|
11
|
-
parserOptions: {
|
|
12
|
-
project: true, // Szuka najbliższego tsconfig.json
|
|
13
|
-
},
|
|
14
|
-
},
|
|
15
|
-
rules: {
|
|
16
|
-
'@typescript-eslint/no-explicit-any': 'warn',
|
|
17
|
-
'@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
|
|
18
|
-
},
|
|
19
|
-
},
|
|
20
|
-
{ ignores: ['dist/**', 'node_modules/**', 'bin/**'] }
|
|
21
|
-
);
|