macos-vision 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/bin/pdf-helper +0 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.js +29 -37
- package/package.json +1 -1
- package/scripts/build-native.js +26 -11
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [1.2.0](https://github.com/woladi/macos-vision/compare/v1.1.0...v1.2.0) (2026-04-09)
|
|
4
|
+
|
|
5
|
+
### Features
|
|
6
|
+
|
|
7
|
+
* replace sips with PDFKit-based pdf-helper binary for PDF rasterization ([4a223e2](https://github.com/woladi/macos-vision/commit/4a223e2de79571794d866452fd5e87b84590ff0d))
|
|
8
|
+
|
|
3
9
|
## [1.1.0](https://github.com/woladi/macos-vision/compare/v1.0.3...v1.1.0) (2026-04-09)
|
|
4
10
|
|
|
5
11
|
### Features
|
package/bin/pdf-helper
ADDED
|
Binary file
|
package/dist/index.d.ts
CHANGED
|
@@ -1,3 +1,24 @@
|
|
|
1
|
+
export interface PdfPage {
|
|
2
|
+
/** 0-based page index */
|
|
3
|
+
page: number;
|
|
4
|
+
/** Absolute path to the rasterized PNG file */
|
|
5
|
+
path: string;
|
|
6
|
+
}
|
|
7
|
+
export interface PdfRasterizeResult {
|
|
8
|
+
/** Pages in document order */
|
|
9
|
+
pages: PdfPage[];
|
|
10
|
+
/** Directory containing all rasterized PNGs */
|
|
11
|
+
cacheDir: string;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Rasterizes a PDF to 300 DPI PNG files using the native `pdf-helper` binary
|
|
15
|
+
* (PDFKit-based). Files are saved persistently to `~/.cache/macos-vision/`
|
|
16
|
+
* so they can be reused by downstream tools — **caller is responsible for cleanup**.
|
|
17
|
+
*
|
|
18
|
+
* @param pdfPath - Absolute or relative path to the PDF file.
|
|
19
|
+
* @returns An object with `pages` (sorted array of `{page, path}`) and `cacheDir`.
|
|
20
|
+
*/
|
|
21
|
+
export declare function rasterizePdf(pdfPath: string): Promise<PdfRasterizeResult>;
|
|
1
22
|
export interface VisionBlock {
|
|
2
23
|
/** Recognized text */
|
|
3
24
|
text: string;
|
package/dist/index.js
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
import { execFile } from 'child_process';
|
|
2
2
|
import { promisify } from 'util';
|
|
3
|
-
import { resolve, dirname, extname,
|
|
3
|
+
import { resolve, dirname, extname, dirname as pathDirname } from 'path';
|
|
4
4
|
import { fileURLToPath } from 'url';
|
|
5
|
-
import {
|
|
6
|
-
import { open, mkdir, readdir, rm } from 'fs/promises';
|
|
5
|
+
import { open } from 'fs/promises';
|
|
7
6
|
const execFileAsync = promisify(execFile);
|
|
8
7
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
9
8
|
const BIN_PATH = resolve(__dirname, '../bin/vision-helper');
|
|
9
|
+
const PDF_BIN_PATH = resolve(__dirname, '../bin/pdf-helper');
|
|
10
10
|
const BINARY_TIMEOUT_MS = 30_000;
|
|
11
|
-
const
|
|
11
|
+
const PDF_RASTERIZE_TIMEOUT_MS = 120_000;
|
|
12
12
|
async function run(flag, imagePath) {
|
|
13
13
|
const { stdout } = await execFileAsync(BIN_PATH, [flag, resolve(imagePath)], {
|
|
14
14
|
timeout: BINARY_TIMEOUT_MS,
|
|
@@ -36,49 +36,41 @@ async function isPdf(filePath) {
|
|
|
36
36
|
}
|
|
37
37
|
}
|
|
38
38
|
/**
|
|
39
|
-
* Rasterizes a PDF to PNG files
|
|
40
|
-
*
|
|
39
|
+
* Rasterizes a PDF to 300 DPI PNG files using the native `pdf-helper` binary
|
|
40
|
+
* (PDFKit-based). Files are saved persistently to `~/.cache/macos-vision/`
|
|
41
|
+
* so they can be reused by downstream tools — **caller is responsible for cleanup**.
|
|
41
42
|
*
|
|
42
|
-
*
|
|
43
|
-
*
|
|
43
|
+
* @param pdfPath - Absolute or relative path to the PDF file.
|
|
44
|
+
* @returns An object with `pages` (sorted array of `{page, path}`) and `cacheDir`.
|
|
44
45
|
*/
|
|
45
|
-
async function rasterizePdf(pdfPath
|
|
46
|
-
|
|
47
|
-
const
|
|
48
|
-
|
|
49
|
-
pngs.sort((a, b) => {
|
|
50
|
-
const numA = parseInt(a.match(/-(\d+)\.png$/i)?.[1] ?? '0', 10);
|
|
51
|
-
const numB = parseInt(b.match(/-(\d+)\.png$/i)?.[1] ?? '0', 10);
|
|
52
|
-
return numA - numB;
|
|
46
|
+
export async function rasterizePdf(pdfPath) {
|
|
47
|
+
const absPath = resolve(pdfPath);
|
|
48
|
+
const { stdout } = await execFileAsync(PDF_BIN_PATH, [absPath], {
|
|
49
|
+
timeout: PDF_RASTERIZE_TIMEOUT_MS,
|
|
53
50
|
});
|
|
54
|
-
|
|
51
|
+
const pages = JSON.parse(stdout);
|
|
52
|
+
const cacheDir = pages.length > 0 ? pathDirname(pages[0].path) : '';
|
|
53
|
+
return { pages, cacheDir };
|
|
55
54
|
}
|
|
56
55
|
/**
|
|
57
|
-
*
|
|
58
|
-
*
|
|
56
|
+
* Internal PDF OCR pipeline: rasterize via pdf-helper → OCR each page → merge.
|
|
57
|
+
* PNG files are NOT cleaned up — they persist in ~/.cache/macos-vision/.
|
|
59
58
|
*/
|
|
60
59
|
async function ocrPdf(pdfPath, format) {
|
|
61
|
-
const
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
const
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
for (let i = 0; i < pages.length; i++) {
|
|
68
|
-
const blocks = (await ocr(pages[i], { format: 'blocks' }));
|
|
69
|
-
all.push(...blocks.map((b) => ({ ...b, page: i })));
|
|
70
|
-
}
|
|
71
|
-
return all;
|
|
72
|
-
}
|
|
73
|
-
const texts = [];
|
|
74
|
-
for (let i = 0; i < pages.length; i++) {
|
|
75
|
-
texts.push((await ocr(pages[i])));
|
|
60
|
+
const { pages } = await rasterizePdf(pdfPath);
|
|
61
|
+
if (format === 'blocks') {
|
|
62
|
+
const all = [];
|
|
63
|
+
for (const { page, path: pagePath } of pages) {
|
|
64
|
+
const blocks = (await ocr(pagePath, { format: 'blocks' }));
|
|
65
|
+
all.push(...blocks.map((b) => ({ ...b, page })));
|
|
76
66
|
}
|
|
77
|
-
return
|
|
67
|
+
return all;
|
|
78
68
|
}
|
|
79
|
-
|
|
80
|
-
|
|
69
|
+
const texts = [];
|
|
70
|
+
for (const { path: pagePath } of pages) {
|
|
71
|
+
texts.push((await ocr(pagePath)));
|
|
81
72
|
}
|
|
73
|
+
return texts.join('\n\n--- Page Break ---\n\n');
|
|
82
74
|
}
|
|
83
75
|
export async function ocr(imagePath, options = {}) {
|
|
84
76
|
const absPath = resolve(imagePath);
|
package/package.json
CHANGED
package/scripts/build-native.js
CHANGED
|
@@ -6,21 +6,36 @@ import path from 'path';
|
|
|
6
6
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
7
7
|
const root = path.resolve(__dirname, '..');
|
|
8
8
|
const binDir = path.join(root, 'bin');
|
|
9
|
-
const binPath = path.join(binDir, 'vision-helper');
|
|
10
|
-
const swiftSrc = path.join(root, 'src', 'native', 'vision-helper.swift');
|
|
11
9
|
|
|
12
|
-
|
|
10
|
+
const binaries = [
|
|
11
|
+
{
|
|
12
|
+
src: path.join(root, 'src', 'native', 'vision-helper.swift'),
|
|
13
|
+
out: path.join(binDir, 'vision-helper'),
|
|
14
|
+
name: 'vision-helper',
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
src: path.join(root, 'src', 'native', 'pdf-helper.swift'),
|
|
18
|
+
out: path.join(binDir, 'pdf-helper'),
|
|
19
|
+
name: 'pdf-helper',
|
|
20
|
+
},
|
|
21
|
+
];
|
|
22
|
+
|
|
23
|
+
const allExist = binaries.every(({ out }) => existsSync(out));
|
|
24
|
+
if (allExist) {
|
|
13
25
|
process.exit(0);
|
|
14
26
|
}
|
|
15
27
|
|
|
16
28
|
mkdirSync(binDir, { recursive: true });
|
|
17
29
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
30
|
+
for (const { src, out, name } of binaries) {
|
|
31
|
+
if (existsSync(out)) continue;
|
|
32
|
+
try {
|
|
33
|
+
execSync(`swiftc -O "${src}" -o "${out}"`, { stdio: 'inherit' });
|
|
34
|
+
console.log(`✅ macos-vision: ${name} compiled successfully`);
|
|
35
|
+
} catch {
|
|
36
|
+
console.error(`❌ macos-vision: ${name} compilation failed.`);
|
|
37
|
+
console.error(' Make sure Xcode Command Line Tools are installed:');
|
|
38
|
+
console.error(' xcode-select --install');
|
|
39
|
+
process.exit(1);
|
|
40
|
+
}
|
|
26
41
|
}
|