@albex/ocr 0.2.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/orchestrator.d.ts +13 -7
- package/dist/orchestrator.d.ts.map +1 -1
- package/dist/orchestrator.js +15 -17
- package/dist/orchestrator.js.map +1 -1
- package/package.json +5 -2
- package/src/orchestrator.ts +29 -27
package/dist/orchestrator.d.ts
CHANGED
|
@@ -18,13 +18,19 @@ import type { Lang } from './language-detector.js';
|
|
|
18
18
|
* peer dependency on `albex` is a type contract.
|
|
19
19
|
*/
|
|
20
20
|
export interface OcrCapableEngine {
|
|
21
|
-
/**
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
21
|
+
/** Read-only feature-detect: `if (engine.ocrImage)` tells the caller
|
|
22
|
+
* whether an OCR adapter is currently attached. Calling it forwards to
|
|
23
|
+
* the adapter's recognise. */
|
|
24
|
+
readonly ocrImage?: (image: ImageLike, opts?: OcrRecognizeOptions) => Promise<RecognitionResult>;
|
|
25
|
+
/** Install an OCR adapter via the engine's plugin API. Replaces the
|
|
26
|
+
* pre-0.5.0 pattern of mutating `ocrImage` / `ocrConfig` directly. */
|
|
27
|
+
attachOcr(adapter: {
|
|
28
|
+
recognize: (image: ImageLike, opts?: OcrRecognizeOptions) => Promise<RecognitionResult>;
|
|
29
|
+
options?: {
|
|
30
|
+
alwaysExtractEmbeddedImages?: boolean;
|
|
31
|
+
};
|
|
32
|
+
}): {
|
|
33
|
+
dispose: () => Promise<void>;
|
|
28
34
|
};
|
|
29
35
|
}
|
|
30
36
|
export interface OcrOptions {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../src/orchestrator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAGH,OAAO,KAAK,EAAE,SAAS,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAEpE,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../src/orchestrator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAGH,OAAO,KAAK,EAAE,SAAS,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAEpE,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAMnD;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;kCAE8B;IAC9B,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,IAAI,CAAC,EAAE,mBAAmB,KAAK,OAAO,CAAC,iBAAiB,CAAC,CAAC;IACjG;0EACsE;IACtE,SAAS,CAAC,OAAO,EAAE;QACjB,SAAS,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,IAAI,CAAC,EAAE,mBAAmB,KAAK,OAAO,CAAC,iBAAiB,CAAC,CAAC;QACxF,OAAO,CAAC,EAAE;YAAE,2BAA2B,CAAC,EAAE,OAAO,CAAA;SAAE,CAAC;KACrD,GAAG;QAAE,OAAO,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAA;KAAE,CAAC;CACtC;AAED,MAAM,WAAW,UAAU;IACzB;;;OAGG;IACH,SAAS,CAAC,EAAE,SAAS,IAAI,EAAE,CAAC;IAC5B;;;OAGG;IACH,eAAe,CAAC,EAAE,IAAI,CAAC;IACvB;;;;OAIG;IACH,OAAO,CAAC,EAAE,SAAS,IAAI,EAAE,CAAC;IAC1B;;;OAGG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;;;;;;;;OAYG;IACH,2BAA2B,CAAC,EAAE,OAAO,CAAC;CACvC;AAED,MAAM,WAAW,mBAAmB;IAClC;;;;OAIG;IACH,IAAI,CAAC,EAAE,IAAI,CAAC;IACZ;;;OAGG;IACH,IAAI,CAAC,EAAE,IAAI,CAAC;CACb;AAED,MAAM,WAAW,SAAS;IACxB,6DAA6D;IAC7D,OAAO,CAAC,KAAK,EAAE,SAAS,IAAI,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC/C,8CAA8C;IAC9C,eAAe,IAAI,IAAI,EAAE,CAAC;IAC1B,wDAAwD;IACxD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,SAAS,CAAC,CAAC,SAAS,gBAAgB,EAClD,MAAM,EAAE,CAAC,EACT,IAAI,GAAE,UAAe,GACpB,SAAS,CAkDX;AA4CD,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAC3G,YAAY,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AACnD,YAAY,EAAE,iBAAiB,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACpE,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,qBAAqB,EACrB,wBAAwB,GACzB,MAAM,aAAa,CAAC"}
|
package/dist/orchestrator.js
CHANGED
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
*/
|
|
14
14
|
import { OcrWorkerPool } from './ocr-worker.js';
|
|
15
15
|
import { SUPPORTED_LANGS } from './language-detector.js';
|
|
16
|
-
import { AlbexOcrError } from './errors.js';
|
|
17
16
|
/**
|
|
18
17
|
* Hook OCR into an Albex engine. Returns a handle that lets the caller
|
|
19
18
|
* preload languages, inspect state, and dispose cleanly.
|
|
@@ -34,9 +33,8 @@ import { AlbexOcrError } from './errors.js';
|
|
|
34
33
|
* ```
|
|
35
34
|
*/
|
|
36
35
|
export function enableOcr(engine, opts = {}) {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
}
|
|
36
|
+
// No pre-check here — engine.attachOcr enforces the single-adapter
|
|
37
|
+
// contract itself and throws when called twice without dispose.
|
|
40
38
|
const enabledLangs = new Set(opts.languages ?? SUPPORTED_LANGS);
|
|
41
39
|
const defaultLang = opts.defaultLanguage ?? 'eng';
|
|
42
40
|
const pool = new OcrWorkerPool({
|
|
@@ -50,17 +48,18 @@ export function enableOcr(engine, opts = {}) {
|
|
|
50
48
|
.filter(l => enabledLangs.has(l))
|
|
51
49
|
.map(l => pool.recognize(EMPTY_PIXEL, l).catch(() => { })));
|
|
52
50
|
}
|
|
53
|
-
//
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
51
|
+
// Install through the engine's plugin API. The adapter owns the
|
|
52
|
+
// recognise function and any options the engine should honour
|
|
53
|
+
// (hybrid PDF mode etc.).
|
|
54
|
+
const engineHandle = engine.attachOcr({
|
|
55
|
+
recognize: async (image, recOpts) => {
|
|
56
|
+
const targetLang = pickLanguage(recOpts, enabledLangs, defaultLang);
|
|
57
|
+
return pool.recognize(image, targetLang);
|
|
58
|
+
},
|
|
59
|
+
options: opts.alwaysExtractEmbeddedImages
|
|
60
|
+
? { alwaysExtractEmbeddedImages: true }
|
|
61
|
+
: undefined,
|
|
62
|
+
});
|
|
64
63
|
return {
|
|
65
64
|
async preload(langs) {
|
|
66
65
|
await Promise.all(langs
|
|
@@ -72,8 +71,7 @@ export function enableOcr(engine, opts = {}) {
|
|
|
72
71
|
},
|
|
73
72
|
async dispose() {
|
|
74
73
|
await pool.dispose();
|
|
75
|
-
|
|
76
|
-
delete engine.ocrConfig;
|
|
74
|
+
await engineHandle.dispose();
|
|
77
75
|
},
|
|
78
76
|
};
|
|
79
77
|
}
|
package/dist/orchestrator.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"orchestrator.js","sourceRoot":"","sources":["../src/orchestrator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAEhD,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"orchestrator.js","sourceRoot":"","sources":["../src/orchestrator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAEhD,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AA0FzD;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,UAAU,SAAS,CACvB,MAAS,EACT,OAAmB,EAAE;IAErB,mEAAmE;IACnE,gEAAgE;IAEhE,MAAM,YAAY,GAAG,IAAI,GAAG,CAAO,IAAI,CAAC,SAAS,IAAI,eAAe,CAAC,CAAC;IACtE,MAAM,WAAW,GAAI,IAAI,CAAC,eAAe,IAAI,KAAK,CAAC;IACnD,MAAM,IAAI,GAAG,IAAI,aAAa,CAAC;QAC7B,aAAa,EAAE,IAAI,CAAC,aAAa;QACjC,QAAQ,EAAO,IAAI,CAAC,QAAQ;KAC7B,CAAC,CAAC;IAEH,uEAAuE;IACvE,gEAAgE;IAChE,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,KAAK,OAAO,CAAC,GAAG,CACd,IAAI,CAAC,OAAO;aACT,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;aAChC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAoE,CAAC,CAAC,CAAC,CAC9H,CAAC;IACJ,CAAC;IAED,gEAAgE;IAChE,8DAA8D;IAC9D,0BAA0B;IAC1B,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,CAAC;QACpC,SAAS,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;YAClC,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,EAAE,YAAY,EAAE,WAAW,CAAC,CAAC;YACpE,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;QAC3C,CAAC;QACD,OAAO,EAAE,IAAI,CAAC,2BAA2B;YACvC,CAAC,CAAC,EAAE,2BAA2B,EAAE,IAAI,EAAE;YACvC,CAAC,CAAC,SAAS;KACd,CAAC,CAAC;IAEH,OAAO;QACL,KAAK,CAAC,OAAO,CAAC,KAAK;YACjB,MAAM,OAAO,CAAC,GAAG,CACf,KAAK;iBACF,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;iBAChC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAiB,CAAC,CAAC,CAAC,CAC3E,CAAC;QACJ,CAAC;QACD,eAAe;YACb,OAAO,IAAI,CAAC,eAAe,EAAE,CAAC;QAChC,CAAC;QACD,KAAK,CAAC,OAAO;YACX,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YACrB,MAAM,YAAY,CAAC,OAAO,EAAE,CAAC;QAC/B,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,YAAY,CACnB,IAAqC,EACrC,OAA0B,EAC1B,QAAc;IAEd,IAAI,IAAI,EAAE,IAAI,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC;QAAI,OAAO,IAAI,CAAC,IAAI,CAAC;IAC7D,IAAI,IAAI,EAAE,IAAI,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC;QAAI,OAAO,IAAI,CAAC,IAAI,CAAC;IAC7D,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,MAAM,WAAW,GAAG,CAAC,GAAG,EAAE;IACxB,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC;QAC3B,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI;KACjB,CAAC,CAAC;IACH,OAAO,KAAK,CAAC,MAAM,CAAC;AACtB,CAAC,CAAC,EAAE,CAAC;AAEL,yEAAyE;AACzE,yEAAyE;AACzE,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAG3G,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,qBAAqB,EACrB,wBAAwB,GACzB,MAAM,aAAa,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@albex/ocr",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "OCR module for Albex — Tesseract.js fast, lazy by language, zero-impact on the base bundle.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -23,8 +23,11 @@
|
|
|
23
23
|
"test": "vitest run",
|
|
24
24
|
"test:watch": "vitest"
|
|
25
25
|
},
|
|
26
|
+
"publishConfig": {
|
|
27
|
+
"access": "public"
|
|
28
|
+
},
|
|
26
29
|
"peerDependencies": {
|
|
27
|
-
"albex": "^0.
|
|
30
|
+
"albex": "^0.6.0"
|
|
28
31
|
},
|
|
29
32
|
"dependencies": {
|
|
30
33
|
"tesseract.js": "^5.1.0"
|
package/src/orchestrator.ts
CHANGED
|
@@ -16,19 +16,26 @@ import { OcrWorkerPool } from './ocr-worker.js';
|
|
|
16
16
|
import type { ImageLike, RecognitionResult } from './ocr-worker.js';
|
|
17
17
|
import { SUPPORTED_LANGS } from './language-detector.js';
|
|
18
18
|
import type { Lang } from './language-detector.js';
|
|
19
|
-
|
|
19
|
+
// AlbexOcrError is no longer used here — the engine.attachOcr contract
|
|
20
|
+
// throws on duplicate-attach so we don't pre-check. Keeping the import
|
|
21
|
+
// would trigger TS6133 (unused import). Errors module is still exported
|
|
22
|
+
// from the package barrel for users who construct their own.
|
|
20
23
|
|
|
21
24
|
/**
|
|
22
25
|
* The subset of `AlbexEngine` we need. Kept minimal so this package's only
|
|
23
26
|
* peer dependency on `albex` is a type contract.
|
|
24
27
|
*/
|
|
25
28
|
export interface OcrCapableEngine {
|
|
26
|
-
/**
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
29
|
+
/** Read-only feature-detect: `if (engine.ocrImage)` tells the caller
|
|
30
|
+
* whether an OCR adapter is currently attached. Calling it forwards to
|
|
31
|
+
* the adapter's recognise. */
|
|
32
|
+
readonly ocrImage?: (image: ImageLike, opts?: OcrRecognizeOptions) => Promise<RecognitionResult>;
|
|
33
|
+
/** Install an OCR adapter via the engine's plugin API. Replaces the
|
|
34
|
+
* pre-0.5.0 pattern of mutating `ocrImage` / `ocrConfig` directly. */
|
|
35
|
+
attachOcr(adapter: {
|
|
36
|
+
recognize: (image: ImageLike, opts?: OcrRecognizeOptions) => Promise<RecognitionResult>;
|
|
37
|
+
options?: { alwaysExtractEmbeddedImages?: boolean };
|
|
38
|
+
}): { dispose: () => Promise<void> };
|
|
32
39
|
}
|
|
33
40
|
|
|
34
41
|
export interface OcrOptions {
|
|
@@ -120,12 +127,8 @@ export function enableOcr<T extends OcrCapableEngine>(
|
|
|
120
127
|
engine: T,
|
|
121
128
|
opts: OcrOptions = {},
|
|
122
129
|
): OcrHandle {
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
'ocr_already_enabled',
|
|
126
|
-
'enableOcr called on an engine that already has OCR attached. Call dispose() on the previous handle first.',
|
|
127
|
-
);
|
|
128
|
-
}
|
|
130
|
+
// No pre-check here — engine.attachOcr enforces the single-adapter
|
|
131
|
+
// contract itself and throws when called twice without dispose.
|
|
129
132
|
|
|
130
133
|
const enabledLangs = new Set<Lang>(opts.languages ?? SUPPORTED_LANGS);
|
|
131
134
|
const defaultLang = opts.defaultLanguage ?? 'eng';
|
|
@@ -144,18 +147,18 @@ export function enableOcr<T extends OcrCapableEngine>(
|
|
|
144
147
|
);
|
|
145
148
|
}
|
|
146
149
|
|
|
147
|
-
//
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
}
|
|
150
|
+
// Install through the engine's plugin API. The adapter owns the
|
|
151
|
+
// recognise function and any options the engine should honour
|
|
152
|
+
// (hybrid PDF mode etc.).
|
|
153
|
+
const engineHandle = engine.attachOcr({
|
|
154
|
+
recognize: async (image, recOpts) => {
|
|
155
|
+
const targetLang = pickLanguage(recOpts, enabledLangs, defaultLang);
|
|
156
|
+
return pool.recognize(image, targetLang);
|
|
157
|
+
},
|
|
158
|
+
options: opts.alwaysExtractEmbeddedImages
|
|
159
|
+
? { alwaysExtractEmbeddedImages: true }
|
|
160
|
+
: undefined,
|
|
161
|
+
});
|
|
159
162
|
|
|
160
163
|
return {
|
|
161
164
|
async preload(langs) {
|
|
@@ -170,8 +173,7 @@ export function enableOcr<T extends OcrCapableEngine>(
|
|
|
170
173
|
},
|
|
171
174
|
async dispose() {
|
|
172
175
|
await pool.dispose();
|
|
173
|
-
|
|
174
|
-
delete engine.ocrConfig;
|
|
176
|
+
await engineHandle.dispose();
|
|
175
177
|
},
|
|
176
178
|
};
|
|
177
179
|
}
|