@albex/ocr 0.2.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,13 +18,19 @@ import type { Lang } from './language-detector.js';
18
18
  * peer dependency on `albex` is a type contract.
19
19
  */
20
20
  export interface OcrCapableEngine {
21
- /** Storage slot where the orchestrator parks its public ocrImage method. */
22
- ocrImage?: (image: ImageLike, opts?: OcrRecognizeOptions) => Promise<RecognitionResult>;
23
- /** Structural slot the engine reads to decide whether to invoke OCR on
24
- * embedded images of PDFs that ALSO have vector text. Set by `enableOcr`
25
- * when the `alwaysExtractEmbeddedImages` option is true. */
26
- ocrConfig?: {
27
- alwaysExtractEmbeddedImages?: boolean;
21
+ /** Read-only feature-detect: `if (engine.ocrImage)` tells the caller
22
+ * whether an OCR adapter is currently attached. Calling it forwards to
23
+ * the adapter's recognise. */
24
+ readonly ocrImage?: (image: ImageLike, opts?: OcrRecognizeOptions) => Promise<RecognitionResult>;
25
+ /** Install an OCR adapter via the engine's plugin API. Replaces the
26
+ * pre-0.5.0 pattern of mutating `ocrImage` / `ocrConfig` directly. */
27
+ attachOcr(adapter: {
28
+ recognize: (image: ImageLike, opts?: OcrRecognizeOptions) => Promise<RecognitionResult>;
29
+ options?: {
30
+ alwaysExtractEmbeddedImages?: boolean;
31
+ };
32
+ }): {
33
+ dispose: () => Promise<void>;
28
34
  };
29
35
  }
30
36
  export interface OcrOptions {
@@ -1 +1 @@
1
- {"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../src/orchestrator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAGH,OAAO,KAAK,EAAE,SAAS,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAEpE,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAGnD;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B,4EAA4E;IAC5E,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,IAAI,CAAC,EAAE,mBAAmB,KAAK,OAAO,CAAC,iBAAiB,CAAC,CAAC;IACxF;;gEAE4D;IAC5D,SAAS,CAAC,EAAE;QAAE,2BAA2B,CAAC,EAAE,OAAO,CAAA;KAAE,CAAC;CACvD;AAED,MAAM,WAAW,UAAU;IACzB;;;OAGG;IACH,SAAS,CAAC,EAAE,SAAS,IAAI,EAAE,CAAC;IAC5B;;;OAGG;IACH,eAAe,CAAC,EAAE,IAAI,CAAC;IACvB;;;;OAIG;IACH,OAAO,CAAC,EAAE,SAAS,IAAI,EAAE,CAAC;IAC1B;;;OAGG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;;;;;;;;OAYG;IACH,2BAA2B,CAAC,EAAE,OAAO,CAAC;CACvC;AAED,MAAM,WAAW,mBAAmB;IAClC;;;;OAIG;IACH,IAAI,CAAC,EAAE,IAAI,CAAC;IACZ;;;OAGG;IACH,IAAI,CAAC,EAAE,IAAI,CAAC;CACb;AAED,MAAM,WAAW,SAAS;IACxB,6DAA6D;IAC7D,OAAO,CAAC,KAAK,EAAE,SAAS,IAAI,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC/C,8CAA8C;IAC9C,eAAe,IAAI,IAAI,EAAE,CAAC;IAC1B,wDAAwD;IACxD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,SAAS,CAAC,CAAC,SAAS,gBAAgB,EAClD,MAAM,EAAE,CAAC,EACT,IAAI,GAAE,UAAe,GACpB,SAAS,CAuDX;AA4CD,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAC3G,YAAY,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AACnD,YAAY,EAAE,iBAAiB,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACpE,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,qBAAqB,EACrB,wBAAwB,GACzB,MAAM,aAAa,CAAC"}
1
+ {"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../src/orchestrator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAGH,OAAO,KAAK,EAAE,SAAS,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAEpE,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AAMnD;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;kCAE8B;IAC9B,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,IAAI,CAAC,EAAE,mBAAmB,KAAK,OAAO,CAAC,iBAAiB,CAAC,CAAC;IACjG;0EACsE;IACtE,SAAS,CAAC,OAAO,EAAE;QACjB,SAAS,EAAE,CAAC,KAAK,EAAE,SAAS,EAAE,IAAI,CAAC,EAAE,mBAAmB,KAAK,OAAO,CAAC,iBAAiB,CAAC,CAAC;QACxF,OAAO,CAAC,EAAE;YAAE,2BAA2B,CAAC,EAAE,OAAO,CAAA;SAAE,CAAC;KACrD,GAAG;QAAE,OAAO,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAA;KAAE,CAAC;CACtC;AAED,MAAM,WAAW,UAAU;IACzB;;;OAGG;IACH,SAAS,CAAC,EAAE,SAAS,IAAI,EAAE,CAAC;IAC5B;;;OAGG;IACH,eAAe,CAAC,EAAE,IAAI,CAAC;IACvB;;;;OAIG;IACH,OAAO,CAAC,EAAE,SAAS,IAAI,EAAE,CAAC;IAC1B;;;OAGG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;;;;;;;;OAYG;IACH,2BAA2B,CAAC,EAAE,OAAO,CAAC;CACvC;AAED,MAAM,WAAW,mBAAmB;IAClC;;;;OAIG;IACH,IAAI,CAAC,EAAE,IAAI,CAAC;IACZ;;;OAGG;IACH,IAAI,CAAC,EAAE,IAAI,CAAC;CACb;AAED,MAAM,WAAW,SAAS;IACxB,6DAA6D;IAC7D,OAAO,CAAC,KAAK,EAAE,SAAS,IAAI,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC/C,8CAA8C;IAC9C,eAAe,IAAI,IAAI,EAAE,CAAC;IAC1B,wDAAwD;IACxD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,SAAS,CAAC,CAAC,SAAS,gBAAgB,EAClD,MAAM,EAAE,CAAC,EACT,IAAI,GAAE,UAAe,GACpB,SAAS,CAkDX;AA4CD,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAC3G,YAAY,EAAE,IAAI,EAAE,MAAM,wBAAwB,CAAC;AACnD,YAAY,EAAE,iBAAiB,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACpE,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,qBAAqB,EACrB,wBAAwB,GACzB,MAAM,aAAa,CAAC"}
@@ -13,7 +13,6 @@
13
13
  */
14
14
  import { OcrWorkerPool } from './ocr-worker.js';
15
15
  import { SUPPORTED_LANGS } from './language-detector.js';
16
- import { AlbexOcrError } from './errors.js';
17
16
  /**
18
17
  * Hook OCR into an Albex engine. Returns a handle that lets the caller
19
18
  * preload languages, inspect state, and dispose cleanly.
@@ -34,9 +33,8 @@ import { AlbexOcrError } from './errors.js';
34
33
  * ```
35
34
  */
36
35
  export function enableOcr(engine, opts = {}) {
37
- if (engine.ocrImage) {
38
- throw new AlbexOcrError('ocr_already_enabled', 'enableOcr called on an engine that already has OCR attached. Call dispose() on the previous handle first.');
39
- }
36
+ // No pre-check here — engine.attachOcr enforces the single-adapter
37
+ // contract itself and throws when called twice without dispose.
40
38
  const enabledLangs = new Set(opts.languages ?? SUPPORTED_LANGS);
41
39
  const defaultLang = opts.defaultLanguage ?? 'eng';
42
40
  const pool = new OcrWorkerPool({
@@ -50,17 +48,18 @@ export function enableOcr(engine, opts = {}) {
50
48
  .filter(l => enabledLangs.has(l))
51
49
  .map(l => pool.recognize(EMPTY_PIXEL, l).catch(() => { })));
52
50
  }
53
- // Attach the recognise method to the engine.
54
- engine.ocrImage = async (image, recOpts) => {
55
- const targetLang = pickLanguage(recOpts, enabledLangs, defaultLang);
56
- return pool.recognize(image, targetLang);
57
- };
58
- // Hybrid-PDF flag. The engine reads this to decide whether to walk every
59
- // PDF's embedded images on top of the normal text extraction. Stored as
60
- // a separate slot so the structural contract with `albex` stays minimal.
61
- if (opts.alwaysExtractEmbeddedImages) {
62
- engine.ocrConfig = { alwaysExtractEmbeddedImages: true };
63
- }
51
+ // Install through the engine's plugin API. The adapter owns the
52
+ // recognise function and any options the engine should honour
53
+ // (hybrid PDF mode etc.).
54
+ const engineHandle = engine.attachOcr({
55
+ recognize: async (image, recOpts) => {
56
+ const targetLang = pickLanguage(recOpts, enabledLangs, defaultLang);
57
+ return pool.recognize(image, targetLang);
58
+ },
59
+ options: opts.alwaysExtractEmbeddedImages
60
+ ? { alwaysExtractEmbeddedImages: true }
61
+ : undefined,
62
+ });
64
63
  return {
65
64
  async preload(langs) {
66
65
  await Promise.all(langs
@@ -72,8 +71,7 @@ export function enableOcr(engine, opts = {}) {
72
71
  },
73
72
  async dispose() {
74
73
  await pool.dispose();
75
- delete engine.ocrImage;
76
- delete engine.ocrConfig;
74
+ await engineHandle.dispose();
77
75
  },
78
76
  };
79
77
  }
@@ -1 +1 @@
1
- {"version":3,"file":"orchestrator.js","sourceRoot":"","sources":["../src/orchestrator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAEhD,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAEzD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAiF5C;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,UAAU,SAAS,CACvB,MAAS,EACT,OAAmB,EAAE;IAErB,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACpB,MAAM,IAAI,aAAa,CACrB,qBAAqB,EACrB,2GAA2G,CAC5G,CAAC;IACJ,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,CAAO,IAAI,CAAC,SAAS,IAAI,eAAe,CAAC,CAAC;IACtE,MAAM,WAAW,GAAI,IAAI,CAAC,eAAe,IAAI,KAAK,CAAC;IACnD,MAAM,IAAI,GAAG,IAAI,aAAa,CAAC;QAC7B,aAAa,EAAE,IAAI,CAAC,aAAa;QACjC,QAAQ,EAAO,IAAI,CAAC,QAAQ;KAC7B,CAAC,CAAC;IAEH,uEAAuE;IACvE,gEAAgE;IAChE,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,KAAK,OAAO,CAAC,GAAG,CACd,IAAI,CAAC,OAAO;aACT,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;aAChC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAoE,CAAC,CAAC,CAAC,CAC9H,CAAC;IACJ,CAAC;IAED,6CAA6C;IAC7C,MAAM,CAAC,QAAQ,GAAG,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;QACzC,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,EAAE,YAAY,EAAE,WAAW,CAAC,CAAC;QACpE,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;IAC3C,CAAC,CAAC;IAEF,yEAAyE;IACzE,wEAAwE;IACxE,yEAAyE;IACzE,IAAI,IAAI,CAAC,2BAA2B,EAAE,CAAC;QACrC,MAAM,CAAC,SAAS,GAAG,EAAE,2BAA2B,EAAE,IAAI,EAAE,CAAC;IAC3D,CAAC;IAED,OAAO;QACL,KAAK,CAAC,OAAO,CAAC,KAAK;YACjB,MAAM,OAAO,CAAC,GAAG,CACf,KAAK;iBACF,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;iBAChC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAiB,CAAC,CAAC,CAAC,CAC3E,CAAC;QACJ,CAAC;QACD,eAAe;YACb,OAAO,IAAI,CAAC,eAAe,EAAE,CAAC;QAChC,CAAC;QACD,KAAK,CAAC,OAAO;YACX,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YACrB,OAAO,MAAM,CAAC,QAAQ,CAAC;YACvB,OAAO,MAAM,CAAC,SAAS,CAAC;QAC1B,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,YAAY,CACnB,IAAqC,EACrC,OAA0B,EAC1B,QAAc;IAEd,IAAI,IAAI,EAAE,IAAI,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC;QAAI,OAAO,IAAI,CAAC,IAAI,CAAC;IAC7D,IAAI,IAAI,EAAE,IAAI,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC;QAAI,OAAO,IAAI,CAAC,IAAI,CAAC;IAC7D,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,MAAM,WAAW,GAAG,CAAC,GAAG,EAAE;IACxB,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC;QAC3B,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI;KACjB,CAAC,CAAC;IACH,OAAO,KAAK,CAAC,MAAM,CAAC;AACtB,CAAC,CAAC,EAAE,CAAC;AAEL,yEAAyE;AACzE,yEAAyE;AACzE,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAG3G,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,qBAAqB,EACrB,wBAAwB,GACzB,MAAM,aAAa,CAAC"}
1
+ {"version":3,"file":"orchestrator.js","sourceRoot":"","sources":["../src/orchestrator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAEhD,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AA0FzD;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,UAAU,SAAS,CACvB,MAAS,EACT,OAAmB,EAAE;IAErB,mEAAmE;IACnE,gEAAgE;IAEhE,MAAM,YAAY,GAAG,IAAI,GAAG,CAAO,IAAI,CAAC,SAAS,IAAI,eAAe,CAAC,CAAC;IACtE,MAAM,WAAW,GAAI,IAAI,CAAC,eAAe,IAAI,KAAK,CAAC;IACnD,MAAM,IAAI,GAAG,IAAI,aAAa,CAAC;QAC7B,aAAa,EAAE,IAAI,CAAC,aAAa;QACjC,QAAQ,EAAO,IAAI,CAAC,QAAQ;KAC7B,CAAC,CAAC;IAEH,uEAAuE;IACvE,gEAAgE;IAChE,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,KAAK,OAAO,CAAC,GAAG,CACd,IAAI,CAAC,OAAO;aACT,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;aAChC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAoE,CAAC,CAAC,CAAC,CAC9H,CAAC;IACJ,CAAC;IAED,gEAAgE;IAChE,8DAA8D;IAC9D,0BAA0B;IAC1B,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,CAAC;QACpC,SAAS,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;YAClC,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,EAAE,YAAY,EAAE,WAAW,CAAC,CAAC;YACpE,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;QAC3C,CAAC;QACD,OAAO,EAAE,IAAI,CAAC,2BAA2B;YACvC,CAAC,CAAC,EAAE,2BAA2B,EAAE,IAAI,EAAE;YACvC,CAAC,CAAC,SAAS;KACd,CAAC,CAAC;IAEH,OAAO;QACL,KAAK,CAAC,OAAO,CAAC,KAAK;YACjB,MAAM,OAAO,CAAC,GAAG,CACf,KAAK;iBACF,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;iBAChC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAiB,CAAC,CAAC,CAAC,CAC3E,CAAC;QACJ,CAAC;QACD,eAAe;YACb,OAAO,IAAI,CAAC,eAAe,EAAE,CAAC;QAChC,CAAC;QACD,KAAK,CAAC,OAAO;YACX,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YACrB,MAAM,YAAY,CAAC,OAAO,EAAE,CAAC;QAC/B,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,YAAY,CACnB,IAAqC,EACrC,OAA0B,EAC1B,QAAc;IAEd,IAAI,IAAI,EAAE,IAAI,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC;QAAI,OAAO,IAAI,CAAC,IAAI,CAAC;IAC7D,IAAI,IAAI,EAAE,IAAI,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC;QAAI,OAAO,IAAI,CAAC,IAAI,CAAC;IAC7D,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,MAAM,WAAW,GAAG,CAAC,GAAG,EAAE;IACxB,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC;QAC3B,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;QAC9C,IAAI,EAAE,IAAI,EAAE,IAAI;KACjB,CAAC,CAAC;IACH,OAAO,KAAK,CAAC,MAAM,CAAC;AACtB,CAAC,CAAC,EAAE,CAAC;AAEL,yEAAyE;AACzE,yEAAyE;AACzE,OAAO,EAAE,cAAc,EAAE,gBAAgB,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAG3G,OAAO,EACL,aAAa,EACb,iBAAiB,EACjB,qBAAqB,EACrB,wBAAwB,GACzB,MAAM,aAAa,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@albex/ocr",
3
- "version": "0.2.0",
3
+ "version": "0.6.0",
4
4
  "description": "OCR module for Albex — Tesseract.js fast, lazy by language, zero-impact on the base bundle.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -23,8 +23,11 @@
23
23
  "test": "vitest run",
24
24
  "test:watch": "vitest"
25
25
  },
26
+ "publishConfig": {
27
+ "access": "public"
28
+ },
26
29
  "peerDependencies": {
27
- "albex": "^0.3.0"
30
+ "albex": "^0.6.0"
28
31
  },
29
32
  "dependencies": {
30
33
  "tesseract.js": "^5.1.0"
@@ -16,19 +16,26 @@ import { OcrWorkerPool } from './ocr-worker.js';
16
16
  import type { ImageLike, RecognitionResult } from './ocr-worker.js';
17
17
  import { SUPPORTED_LANGS } from './language-detector.js';
18
18
  import type { Lang } from './language-detector.js';
19
- import { AlbexOcrError } from './errors.js';
19
+ // AlbexOcrError is no longer used here — the engine.attachOcr contract
20
+ // throws on duplicate-attach so we don't pre-check. Keeping the import
21
+ // would trigger TS6133 (unused import). Errors module is still exported
22
+ // from the package barrel for users who construct their own.
20
23
 
21
24
  /**
22
25
  * The subset of `AlbexEngine` we need. Kept minimal so this package's only
23
26
  * peer dependency on `albex` is a type contract.
24
27
  */
25
28
  export interface OcrCapableEngine {
26
- /** Storage slot where the orchestrator parks its public ocrImage method. */
27
- ocrImage?: (image: ImageLike, opts?: OcrRecognizeOptions) => Promise<RecognitionResult>;
28
- /** Structural slot the engine reads to decide whether to invoke OCR on
29
- * embedded images of PDFs that ALSO have vector text. Set by `enableOcr`
30
- * when the `alwaysExtractEmbeddedImages` option is true. */
31
- ocrConfig?: { alwaysExtractEmbeddedImages?: boolean };
29
+ /** Read-only feature-detect: `if (engine.ocrImage)` tells the caller
30
+ * whether an OCR adapter is currently attached. Calling it forwards to
31
+ * the adapter's recognise. */
32
+ readonly ocrImage?: (image: ImageLike, opts?: OcrRecognizeOptions) => Promise<RecognitionResult>;
33
+ /** Install an OCR adapter via the engine's plugin API. Replaces the
34
+ * pre-0.5.0 pattern of mutating `ocrImage` / `ocrConfig` directly. */
35
+ attachOcr(adapter: {
36
+ recognize: (image: ImageLike, opts?: OcrRecognizeOptions) => Promise<RecognitionResult>;
37
+ options?: { alwaysExtractEmbeddedImages?: boolean };
38
+ }): { dispose: () => Promise<void> };
32
39
  }
33
40
 
34
41
  export interface OcrOptions {
@@ -120,12 +127,8 @@ export function enableOcr<T extends OcrCapableEngine>(
120
127
  engine: T,
121
128
  opts: OcrOptions = {},
122
129
  ): OcrHandle {
123
- if (engine.ocrImage) {
124
- throw new AlbexOcrError(
125
- 'ocr_already_enabled',
126
- 'enableOcr called on an engine that already has OCR attached. Call dispose() on the previous handle first.',
127
- );
128
- }
130
+ // No pre-check here — engine.attachOcr enforces the single-adapter
131
+ // contract itself and throws when called twice without dispose.
129
132
 
130
133
  const enabledLangs = new Set<Lang>(opts.languages ?? SUPPORTED_LANGS);
131
134
  const defaultLang = opts.defaultLanguage ?? 'eng';
@@ -144,18 +147,18 @@ export function enableOcr<T extends OcrCapableEngine>(
144
147
  );
145
148
  }
146
149
 
147
- // Attach the recognise method to the engine.
148
- engine.ocrImage = async (image, recOpts) => {
149
- const targetLang = pickLanguage(recOpts, enabledLangs, defaultLang);
150
- return pool.recognize(image, targetLang);
151
- };
152
-
153
- // Hybrid-PDF flag. The engine reads this to decide whether to walk every
154
- // PDF's embedded images on top of the normal text extraction. Stored as
155
- // a separate slot so the structural contract with `albex` stays minimal.
156
- if (opts.alwaysExtractEmbeddedImages) {
157
- engine.ocrConfig = { alwaysExtractEmbeddedImages: true };
158
- }
150
+ // Install through the engine's plugin API. The adapter owns the
151
+ // recognise function and any options the engine should honour
152
+ // (hybrid PDF mode etc.).
153
+ const engineHandle = engine.attachOcr({
154
+ recognize: async (image, recOpts) => {
155
+ const targetLang = pickLanguage(recOpts, enabledLangs, defaultLang);
156
+ return pool.recognize(image, targetLang);
157
+ },
158
+ options: opts.alwaysExtractEmbeddedImages
159
+ ? { alwaysExtractEmbeddedImages: true }
160
+ : undefined,
161
+ });
159
162
 
160
163
  return {
161
164
  async preload(langs) {
@@ -170,8 +173,7 @@ export function enableOcr<T extends OcrCapableEngine>(
170
173
  },
171
174
  async dispose() {
172
175
  await pool.dispose();
173
- delete engine.ocrImage;
174
- delete engine.ocrConfig;
176
+ await engineHandle.dispose();
175
177
  },
176
178
  };
177
179
  }