macos-vision 1.0.3 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/bin/pdf-helper +0 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.js +66 -1
- package/dist/layout.d.ts +8 -0
- package/dist/layout.js +8 -0
- package/package.json +1 -1
- package/scripts/build-native.js +26 -11
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [1.2.0](https://github.com/woladi/macos-vision/compare/v1.1.0...v1.2.0) (2026-04-09)
|
|
4
|
+
|
|
5
|
+
### Features
|
|
6
|
+
|
|
7
|
+
* replace sips with PDFKit-based pdf-helper binary for PDF rasterization ([4a223e2](https://github.com/woladi/macos-vision/commit/4a223e2de79571794d866452fd5e87b84590ff0d))
|
|
8
|
+
|
|
9
|
+
## [1.1.0](https://github.com/woladi/macos-vision/compare/v1.0.3...v1.1.0) (2026-04-09)
|
|
10
|
+
|
|
11
|
+
### Features
|
|
12
|
+
|
|
13
|
+
* add PDF support via sips rasterization ([a48bf17](https://github.com/woladi/macos-vision/commit/a48bf17579a6df11aed6eadbde4fa5041ccaa981))
|
|
14
|
+
|
|
3
15
|
## [1.0.3](https://github.com/woladi/macos-vision/compare/v1.0.2...v1.0.3) (2026-04-08)
|
|
4
16
|
|
|
5
17
|
### Reverts
|
package/bin/pdf-helper
ADDED
|
Binary file
|
package/dist/index.d.ts
CHANGED
|
@@ -1,3 +1,24 @@
|
|
|
1
|
+
export interface PdfPage {
|
|
2
|
+
/** 0-based page index */
|
|
3
|
+
page: number;
|
|
4
|
+
/** Absolute path to the rasterized PNG file */
|
|
5
|
+
path: string;
|
|
6
|
+
}
|
|
7
|
+
export interface PdfRasterizeResult {
|
|
8
|
+
/** Pages in document order */
|
|
9
|
+
pages: PdfPage[];
|
|
10
|
+
/** Directory containing all rasterized PNGs */
|
|
11
|
+
cacheDir: string;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Rasterizes a PDF to 300 DPI PNG files using the native `pdf-helper` binary
|
|
15
|
+
* (PDFKit-based). Files are saved persistently to `~/.cache/macos-vision/`
|
|
16
|
+
* so they can be reused by downstream tools — **caller is responsible for cleanup**.
|
|
17
|
+
*
|
|
18
|
+
* @param pdfPath - Absolute or relative path to the PDF file.
|
|
19
|
+
* @returns An object with `pages` (sorted array of `{page, path}`) and `cacheDir`.
|
|
20
|
+
*/
|
|
21
|
+
export declare function rasterizePdf(pdfPath: string): Promise<PdfRasterizeResult>;
|
|
1
22
|
export interface VisionBlock {
|
|
2
23
|
/** Recognized text */
|
|
3
24
|
text: string;
|
|
@@ -11,6 +32,8 @@ export interface VisionBlock {
|
|
|
11
32
|
height: number;
|
|
12
33
|
/** OCR transcription confidence, 0–1 */
|
|
13
34
|
confidence: number;
|
|
35
|
+
/** 0-based page index. Present only when the source was a PDF. Absent for images. */
|
|
36
|
+
page?: number;
|
|
14
37
|
}
|
|
15
38
|
export interface OcrOptions {
|
|
16
39
|
/** Return plain text (default) or structured blocks with coordinates */
|
package/dist/index.js
CHANGED
|
@@ -1,20 +1,85 @@
|
|
|
1
1
|
import { execFile } from 'child_process';
|
|
2
2
|
import { promisify } from 'util';
|
|
3
|
-
import { resolve, dirname } from 'path';
|
|
3
|
+
import { resolve, dirname, extname, dirname as pathDirname } from 'path';
|
|
4
4
|
import { fileURLToPath } from 'url';
|
|
5
|
+
import { open } from 'fs/promises';
|
|
5
6
|
const execFileAsync = promisify(execFile);
|
|
6
7
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
7
8
|
const BIN_PATH = resolve(__dirname, '../bin/vision-helper');
|
|
9
|
+
const PDF_BIN_PATH = resolve(__dirname, '../bin/pdf-helper');
|
|
8
10
|
const BINARY_TIMEOUT_MS = 30_000;
|
|
11
|
+
const PDF_RASTERIZE_TIMEOUT_MS = 120_000;
|
|
9
12
|
async function run(flag, imagePath) {
|
|
10
13
|
const { stdout } = await execFileAsync(BIN_PATH, [flag, resolve(imagePath)], {
|
|
11
14
|
timeout: BINARY_TIMEOUT_MS,
|
|
12
15
|
});
|
|
13
16
|
return stdout;
|
|
14
17
|
}
|
|
18
|
+
// ─── PDF helpers ─────────────────────────────────────────────────────────────
|
|
19
|
+
/**
|
|
20
|
+
* Returns true if the file at `filePath` is a PDF.
|
|
21
|
+
* Uses extension as a fast path; falls back to magic bytes (`%PDF`) for
|
|
22
|
+
* files whose extension does not match their actual content.
|
|
23
|
+
*/
|
|
24
|
+
async function isPdf(filePath) {
|
|
25
|
+
if (extname(filePath).toLowerCase() === '.pdf')
|
|
26
|
+
return true;
|
|
27
|
+
let fh;
|
|
28
|
+
try {
|
|
29
|
+
fh = await open(filePath, 'r');
|
|
30
|
+
const buf = Buffer.alloc(4);
|
|
31
|
+
await fh.read(buf, 0, 4, 0);
|
|
32
|
+
return buf[0] === 0x25 && buf[1] === 0x50 && buf[2] === 0x44 && buf[3] === 0x46;
|
|
33
|
+
}
|
|
34
|
+
finally {
|
|
35
|
+
await fh?.close();
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Rasterizes a PDF to 300 DPI PNG files using the native `pdf-helper` binary
|
|
40
|
+
* (PDFKit-based). Files are saved persistently to `~/.cache/macos-vision/`
|
|
41
|
+
* so they can be reused by downstream tools — **caller is responsible for cleanup**.
|
|
42
|
+
*
|
|
43
|
+
* @param pdfPath - Absolute or relative path to the PDF file.
|
|
44
|
+
* @returns An object with `pages` (sorted array of `{page, path}`) and `cacheDir`.
|
|
45
|
+
*/
|
|
46
|
+
export async function rasterizePdf(pdfPath) {
|
|
47
|
+
const absPath = resolve(pdfPath);
|
|
48
|
+
const { stdout } = await execFileAsync(PDF_BIN_PATH, [absPath], {
|
|
49
|
+
timeout: PDF_RASTERIZE_TIMEOUT_MS,
|
|
50
|
+
});
|
|
51
|
+
const pages = JSON.parse(stdout);
|
|
52
|
+
const cacheDir = pages.length > 0 ? pathDirname(pages[0].path) : '';
|
|
53
|
+
return { pages, cacheDir };
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Internal PDF OCR pipeline: rasterize via pdf-helper → OCR each page → merge.
|
|
57
|
+
* PNG files are NOT cleaned up — they persist in ~/.cache/macos-vision/.
|
|
58
|
+
*/
|
|
59
|
+
async function ocrPdf(pdfPath, format) {
|
|
60
|
+
const { pages } = await rasterizePdf(pdfPath);
|
|
61
|
+
if (format === 'blocks') {
|
|
62
|
+
const all = [];
|
|
63
|
+
for (const { page, path: pagePath } of pages) {
|
|
64
|
+
const blocks = (await ocr(pagePath, { format: 'blocks' }));
|
|
65
|
+
all.push(...blocks.map((b) => ({ ...b, page })));
|
|
66
|
+
}
|
|
67
|
+
return all;
|
|
68
|
+
}
|
|
69
|
+
const texts = [];
|
|
70
|
+
for (const { path: pagePath } of pages) {
|
|
71
|
+
texts.push((await ocr(pagePath)));
|
|
72
|
+
}
|
|
73
|
+
return texts.join('\n\n--- Page Break ---\n\n');
|
|
74
|
+
}
|
|
15
75
|
export async function ocr(imagePath, options = {}) {
|
|
16
76
|
const absPath = resolve(imagePath);
|
|
17
77
|
const { format = 'text' } = options;
|
|
78
|
+
// ── PDF fast-path: rasterize via sips, then OCR each page ────────────────
|
|
79
|
+
if (await isPdf(absPath)) {
|
|
80
|
+
return ocrPdf(absPath, format);
|
|
81
|
+
}
|
|
82
|
+
// ── Existing image path (unchanged) ──────────────────────────────────────
|
|
18
83
|
if (format === 'blocks') {
|
|
19
84
|
const { stdout } = await execFileAsync(BIN_PATH, ['--json', absPath], {
|
|
20
85
|
timeout: BINARY_TIMEOUT_MS,
|
package/dist/layout.d.ts
CHANGED
|
@@ -85,6 +85,14 @@ export declare function sortBlocksByReadingOrder(blocks: LayoutBlock[]): LayoutB
|
|
|
85
85
|
* (`paragraphId`) using simple bounding-box heuristics. All other block types
|
|
86
86
|
* are placed into the sorted sequence by their top-left coordinate.
|
|
87
87
|
*
|
|
88
|
+
* **Multi-page PDFs**: `VisionBlock` items from PDF OCR carry an optional `page` field (0-based).
|
|
89
|
+
* Because all coordinates are page-local (0–1 relative to each page), mixing blocks from
|
|
90
|
+
* different pages produces meaningless geometry. Pre-filter by page before calling inferLayout:
|
|
91
|
+
* ```ts
|
|
92
|
+
* const pageBlocks = pdfBlocks.filter(b => b.page === 0);
|
|
93
|
+
* const layout = inferLayout({ textBlocks: pageBlocks });
|
|
94
|
+
* ```
|
|
95
|
+
*
|
|
88
96
|
* @example
|
|
89
97
|
* ```ts
|
|
90
98
|
* const blocks = await ocr('page.png', { format: 'blocks' });
|
package/dist/layout.js
CHANGED
|
@@ -114,6 +114,14 @@ export function sortBlocksByReadingOrder(blocks) {
|
|
|
114
114
|
* (`paragraphId`) using simple bounding-box heuristics. All other block types
|
|
115
115
|
* are placed into the sorted sequence by their top-left coordinate.
|
|
116
116
|
*
|
|
117
|
+
* **Multi-page PDFs**: `VisionBlock` items from PDF OCR carry an optional `page` field (0-based).
|
|
118
|
+
* Because all coordinates are page-local (0–1 relative to each page), mixing blocks from
|
|
119
|
+
* different pages produces meaningless geometry. Pre-filter by page before calling inferLayout:
|
|
120
|
+
* ```ts
|
|
121
|
+
* const pageBlocks = pdfBlocks.filter(b => b.page === 0);
|
|
122
|
+
* const layout = inferLayout({ textBlocks: pageBlocks });
|
|
123
|
+
* ```
|
|
124
|
+
*
|
|
117
125
|
* @example
|
|
118
126
|
* ```ts
|
|
119
127
|
* const blocks = await ocr('page.png', { format: 'blocks' });
|
package/package.json
CHANGED
package/scripts/build-native.js
CHANGED
|
@@ -6,21 +6,36 @@ import path from 'path';
|
|
|
6
6
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
7
7
|
const root = path.resolve(__dirname, '..');
|
|
8
8
|
const binDir = path.join(root, 'bin');
|
|
9
|
-
const binPath = path.join(binDir, 'vision-helper');
|
|
10
|
-
const swiftSrc = path.join(root, 'src', 'native', 'vision-helper.swift');
|
|
11
9
|
|
|
12
|
-
|
|
10
|
+
const binaries = [
|
|
11
|
+
{
|
|
12
|
+
src: path.join(root, 'src', 'native', 'vision-helper.swift'),
|
|
13
|
+
out: path.join(binDir, 'vision-helper'),
|
|
14
|
+
name: 'vision-helper',
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
src: path.join(root, 'src', 'native', 'pdf-helper.swift'),
|
|
18
|
+
out: path.join(binDir, 'pdf-helper'),
|
|
19
|
+
name: 'pdf-helper',
|
|
20
|
+
},
|
|
21
|
+
];
|
|
22
|
+
|
|
23
|
+
const allExist = binaries.every(({ out }) => existsSync(out));
|
|
24
|
+
if (allExist) {
|
|
13
25
|
process.exit(0);
|
|
14
26
|
}
|
|
15
27
|
|
|
16
28
|
mkdirSync(binDir, { recursive: true });
|
|
17
29
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
30
|
+
for (const { src, out, name } of binaries) {
|
|
31
|
+
if (existsSync(out)) continue;
|
|
32
|
+
try {
|
|
33
|
+
execSync(`swiftc -O "${src}" -o "${out}"`, { stdio: 'inherit' });
|
|
34
|
+
console.log(`✅ macos-vision: ${name} compiled successfully`);
|
|
35
|
+
} catch {
|
|
36
|
+
console.error(`❌ macos-vision: ${name} compilation failed.`);
|
|
37
|
+
console.error(' Make sure Xcode Command Line Tools are installed:');
|
|
38
|
+
console.error(' xcode-select --install');
|
|
39
|
+
process.exit(1);
|
|
40
|
+
}
|
|
26
41
|
}
|