@llamaindex/liteparse 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -97,6 +97,9 @@ lit parse document.pdf --target-pages "1-5,10,15-20"
97
97
 
98
98
  # Parse without OCR
99
99
  lit parse document.pdf --no-ocr
100
+
101
+ # Parse a remote PDF
102
+ curl -sL https://example.com/report.pdf | lit parse -
100
103
  ```
101
104
 
102
105
  ### Batch Parsing
@@ -145,7 +148,7 @@ console.log(result.text);
145
148
 
146
149
  #### Buffer / Uint8Array Input
147
150
 
148
- You can pass raw bytes directly instead of a file path. PDF buffers are parsed with **zero disk I/O** — no temp files are written:
151
+ You can pass raw bytes directly instead of a file path, which is useful for remote files:
149
152
 
150
153
  ```typescript
151
154
  import { LiteParse } from '@llamaindex/liteparse';
@@ -97,6 +97,9 @@ export interface LiteParseConfig {
97
97
  * Calculate precise bounding boxes for each text line. Disable for faster
98
98
  * parsing when bounding boxes aren't needed.
99
99
  *
100
+ * @deprecated Controls the deprecated `boundingBoxes` output. Will be removed in v2.0.
101
+ * Text item coordinates (`x`, `y`, `width`, `height`) are always present regardless.
102
+ *
100
103
  * @defaultValue `true`
101
104
  */
102
105
  preciseBoundingBox: boolean;
@@ -226,6 +229,8 @@ export interface OcrData {
226
229
  * An axis-aligned bounding box defined by its top-left and bottom-right corners.
227
230
  *
228
231
  * All coordinates are in PDF points.
232
+ *
233
+ * @deprecated Use {@link TextItem} coordinates (`x`, `y`, `width`, `height`) instead. Will be removed in v2.0.
229
234
  */
230
235
  export interface BoundingBox {
231
236
  /** X coordinate of the top-left corner. */
@@ -251,9 +256,44 @@ export interface ParsedPage {
251
256
  text: string;
252
257
  /** Individual text elements extracted from the page. */
253
258
  textItems: TextItem[];
254
- /** Bounding boxes for text lines. Present when {@link LiteParseConfig.preciseBoundingBox} is enabled. */
259
+ /**
260
+ * @deprecated Use {@link TextItem} coordinates instead. Will be removed in v2.0.
261
+ * Present when {@link LiteParseConfig.preciseBoundingBox} is enabled.
262
+ */
255
263
  boundingBoxes?: BoundingBox[];
256
264
  }
265
+ /**
266
+ * A text element from the JSON output with position, size, and font metadata.
267
+ */
268
+ export interface JsonTextItem {
269
+ /** The text content of this item. */
270
+ text: string;
271
+ /** X coordinate of the top-left corner, in PDF points. */
272
+ x: number;
273
+ /** Y coordinate of the top-left corner, in PDF points. */
274
+ y: number;
275
+ /** Width of the text item in PDF points. */
276
+ width: number;
277
+ /** Height of the text item in PDF points. */
278
+ height: number;
279
+ /** Font name. */
280
+ fontName?: string;
281
+ /** Font size in PDF points. */
282
+ fontSize?: number;
283
+ }
284
+ /**
285
+ * Options for {@link searchItems}.
286
+ */
287
+ export interface SearchItemsOptions {
288
+ /** Find text items containing this phrase. Matches can span multiple adjacent items. */
289
+ phrase: string;
290
+ /**
291
+ * Whether the search should be case-sensitive.
292
+ *
293
+ * @defaultValue `false`
294
+ */
295
+ caseSensitive?: boolean;
296
+ }
257
297
  /**
258
298
  * Structured JSON representation of parsed document data.
259
299
  * Returned when {@link LiteParseConfig.outputFormat} is `"json"`.
@@ -270,16 +310,10 @@ export interface ParseResultJson {
270
310
  /** Full text content of the page. */
271
311
  text: string;
272
312
  /** Individual text elements with position and font metadata. */
273
- textItems: Array<{
274
- text: string;
275
- x: number;
276
- y: number;
277
- width: number;
278
- height: number;
279
- fontName?: string;
280
- fontSize?: number;
281
- }>;
282
- /** Bounding boxes for text lines. */
313
+ textItems: JsonTextItem[];
314
+ /**
315
+ * @deprecated Use `textItems` coordinates instead. Will be removed in v2.0.
316
+ */
283
317
  boundingBoxes: BoundingBox[];
284
318
  }>;
285
319
  }
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/core/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,MAAM,CAAC;AAE3C;;;;;;GAMG;AACH,MAAM,MAAM,cAAc,GAAG,MAAM,GAAG,MAAM,GAAG,UAAU,CAAC;AAE1D;;;;;;;;;;;;;;;GAeG;AACH,MAAM,WAAW,eAAe;IAC9B;;;;;OAKG;IACH,WAAW,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAE/B;;;;;OAKG;IACH,UAAU,EAAE,OAAO,CAAC;IAEpB;;;;;OAKG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;;;;;;OAQG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;;;OAKG;IACH,UAAU,EAAE,MAAM,CAAC;IAEnB;;;;OAIG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB;;;;;OAKG;IACH,GAAG,EAAE,MAAM,CAAC;IAEZ;;;;OAIG;IACH,YAAY,EAAE,YAAY,CAAC;IAE3B;;;;;OAKG;IACH,kBAAkB,EAAE,OAAO,CAAC;IAE5B;;;;OAIG;IACH,qBAAqB,EAAE,OAAO,CAAC;IAE/B;;;;OAIG;IACH,kCAAkC,EAAE,OAAO,CAAC;IAE5C;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;;;;GAKG;AACH,MAAM,WAAW,QAAQ;IACvB,qCAAqC;IACrC,GAAG,EAAE,MAAM,CAAC;IACZ,0DAA0D;IAC1D,CAAC,EAAE,MAAM,CAAC;IACV,0DAA0D;IAC1D,CAAC,EAAE,MAAM,CAAC;IACV,4CAA4C;IAC5C,KAAK,EAAE,MAAM,CAAC;IACd,6CAA6C;IAC7C,MAAM,EAAE,MAAM,CAAC;IACf,gDAAgD;IAChD,CAAC,EAAE,MAAM,CAAC;IACV,kDAAkD;IAClD,CAAC,EAAE,MAAM,CAAC;IACV,uFAAuF;IACvF,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,oEAAoE;IACpE,CAAC,CAAC,EAAE,MAAM,CAAC;IACX,kDAAkD;IAClD,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,kDAAkD;IAClD,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,8EAA8E;IAC9E,MAAM,CAAC,EAAE,UAAU,CAAC;IACpB,6DAA6D;IAC7D,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,uEAAuE;IACvE,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,0FAA0F;IAC1F,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,sCAAsC;IACtC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,sCAAsC;IACtC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,CAAC,CAAC,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,UAAU,CAAC;IACpB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,OAAO,CAAC,EAAE,OAAO,CAAC;IAGlB,IAAI,CAAC,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,CAAC;IACnC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,CAAC,CAAC,EAAE,MAAM,CAAC;CACZ;AAED;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;CACX;AAED;;;GAGG;AACH,MAAM,WAAW,OAAO;IACtB,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,wCAAwC;IACxC,UAAU,EAAE,MAAM,CAAC;IACnB,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;GAIG;AACH,MAAM,WAAW,WAAW;IAC1B,2CAA2C;IAC3C,EAAE,EAAE,MAAM,CAAC;IACX,2CAA2C;IAC3C,EAAE,EAAE,MAAM,CAAC;IACX,+CAA+C;IAC/C,EAAE,EAAE,MAAM,CAAC;IACX,+CAA+C;IAC/C,EAAE,EAAE,MAAM,CAAC;CACZ;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,iCAAiC;IACjC,MAAM,EAAE,MAAM,CAAC;IACf,mEAAmE;IACnE,IAAI,EAAE,MAAM,CAAC;IACb,wDAAwD;IACxD,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,yGAAyG;IACzG,aAAa,CAAC,EAAE,WAAW,EAAE,CAAC;CAC/B;AAED;;;GAGG;AACH,MAAM,WAAW,eAAe;IAC9B,0BAA0B;IAC1B,KAAK,EAAE,KAAK,CAAC;QACX,6BAA6B;QAC7B,IAAI,EAAE,MAAM,CAAC;QACb,gCAAgC;QAChC,KAAK,EAAE,MAAM,CAAC;QACd,iCAAiC;QACjC,MAAM,EAAE,MAAM,CAAC;QACf,qCAAqC;QACrC,IAAI,EAAE,MAAM,CAAC;QACb,gEAAgE;QAChE,SAAS,EAAE,KAAK,CAAC;YACf,IAAI,EAAE,MAAM,CAAC;YACb,CAAC,EAAE,MAAM,CAAC;YACV,CAAC,EAAE,MAAM,CAAC;YACV,KAAK,EAAE,MAAM,CAAC;YACd,MAAM,EAAE,MAAM,CAAC;YACf,QAAQ,CAAC,EAAE,MAAM,CAAC;YAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;SACnB,CAAC,CAAC;QACH,qCAAqC;QACrC,aAAa,EAAE,WAAW,EAAE,CAAC;KAC9B,CAAC,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,4BAA4B;IAC5B,KAAK,EAAE,UAAU,EAAE,CAAC;IACpB,uDAAuD;IACvD,IAAI,EAAE,MAAM,CAAC;IACb,2FAA2F;IAC3F,IAAI,CAAC,EAAE,eAAe,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,6BAA6B;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,8BAA8B;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,uDAAuD;IACvD,WAAW,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/core/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,MAAM,CAAC;AAE3C;;;;;;GAMG;AACH,MAAM,MAAM,cAAc,GAAG,MAAM,GAAG,MAAM,GAAG,UAAU,CAAC;AAE1D;;;;;;;;;;;;;;;GAeG;AACH,MAAM,WAAW,eAAe;IAC9B;;;;;OAKG;IACH,WAAW,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAE/B;;;;;OAKG;IACH,UAAU,EAAE,OAAO,CAAC;IAEpB;;;;;OAKG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;;;;;;OAQG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;;;OAKG;IACH,UAAU,EAAE,MAAM,CAAC;IAEnB;;;;OAIG;IACH,QAAQ,EAAE,MAAM,CAAC;IAEjB;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB;;;;;OAKG;IACH,GAAG,EAAE,MAAM,CAAC;IAEZ;;;;OAIG;IACH,YAAY,EAAE,YAAY,CAAC;IAE3B;;;;;;;;OAQG;IACH,kBAAkB,EAAE,OAAO,CAAC;IAE5B;;;;OAIG;IACH,qBAAqB,EAAE,OAAO,CAAC;IAE/B;;;;OAIG;IACH,kCAAkC,EAAE,OAAO,CAAC;IAE5C;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;;;;GAKG;AACH,MAAM,WAAW,QAAQ;IACvB,qCAAqC;IACrC,GAAG,EAAE,MAAM,CAAC;IACZ,0DAA0D;IAC1D,CAAC,EAAE,MAAM,CAAC;IACV,0DAA0D;IAC1D,CAAC,EAAE,MAAM,CAAC;IACV,4CAA4C;IAC5C,KAAK,EAAE,MAAM,CAAC;IACd,6CAA6C;IAC7C,MAAM,EAAE,MAAM,CAAC;IACf,gDAAgD;IAChD,CAAC,EAAE,MAAM,CAAC;IACV,kDAAkD;IAClD,CAAC,EAAE,MAAM,CAAC;IACV,uFAAuF;IACvF,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,oEAAoE;IACpE,CAAC,CAAC,EAAE,MAAM,CAAC;IACX,kDAAkD;IAClD,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,kDAAkD;IAClD,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,8EAA8E;IAC9E,MAAM,CAAC,EAAE,UAAU,CAAC;IACpB,6DAA6D;IAC7D,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,uEAAuE;IACvE,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,0FAA0F;IAC1F,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,sCAAsC;IACtC,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,sCAAsC;IACtC,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,CAAC,CAAC,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,UAAU,CAAC;IACpB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,OAAO,CAAC,EAAE,OAAO,CAAC;IAGlB,IAAI,CAAC,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,CAAC;IACnC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,CAAC,CAAC,EAAE,MAAM,CAAC;CACZ;AAED;;;GAGG;AACH,MAAM,WAAW,WAAW;IAC1B,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;CACX;AAED;;;GAGG;AACH,MAAM,WAAW,OAAO;IACtB,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;IACV,wCAAwC;IACxC,UAAU,EAAE,MAAM,CAAC;IACnB,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;;;GAMG;AACH,MAAM,WAAW,WAAW;IAC1B,2CAA2C;IAC3C,EAAE,EAAE,MAAM,CAAC;IACX,2CAA2C;IAC3C,EAAE,EAAE,MAAM,CAAC;IACX,+CAA+C;IAC/C,EAAE,EAAE,MAAM,CAAC;IACX,+CAA+C;IAC/C,EAAE,EAAE,MAAM,CAAC;CACZ;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,gCAAgC;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,iCAAiC;IACjC,MAAM,EAAE,MAAM,CAAC;IACf,mEAAmE;IACnE,IAAI,EAAE,MAAM,CAAC;IACb,wDAAwD;IACxD,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB;;;OAGG;IACH,aAAa,CAAC,EAAE,WAAW,EAAE,CAAC;CAC/B;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,qCAAqC;IACrC,IAAI,EAAE,MAAM,CAAC;IACb,0DAA0D;IAC1D,CAAC,EAAE,MAAM,CAAC;IACV,0DAA0D;IAC1D,CAAC,EAAE,MAAM,CAAC;IACV,4CAA4C;IAC5C,KAAK,EAAE,MAAM,CAAC;IACd,6CAA6C;IAC7C,MAAM,EAAE,MAAM,CAAC;IACf,iBAAiB;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAC;IACf;;;;OAIG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED;;;GAGG;AACH,MAAM,WAAW,eAAe;IAC9B,0BAA0B;IAC1B,KAAK,EAAE,KAAK,CAAC;QACX,6BAA6B;QAC7B,IAAI,EAAE,MAAM,CAAC;QACb,gCAAgC;QAChC,KAAK,EAAE,MAAM,CAAC;QACd,iCAAiC;QACjC,MAAM,EAAE,MAAM,CAAC;QACf,qCAAqC;QACrC,IAAI,EAAE,MAAM,CAAC;QACb,gEAAgE;QAChE,SAAS,EAAE,YAAY,EAAE,CAAC;QAC1B;;WAEG;QACH,aAAa,EAAE,WAAW,EAAE,CAAC;KAC9B,CAAC,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,4BAA4B;IAC5B,KAAK,EAAE,UAAU,EAAE,CAAC;IACpB,uDAAuD;IACvD,IAAI,EAAE,MAAM,CAAC;IACb,2FAA2F;IAC3F,IAAI,CAAC,EAAE,eAAe,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,6BAA6B;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,8BAA8B;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,uDAAuD;IACvD,WAAW,EAAE,MAAM,CAAC;IACpB,qDAAqD;IACrD,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB"}
@@ -1 +1 @@
1
- {"version":3,"file":"tesseract.d.ts","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAElE,qBAAa,eAAgB,YAAW,SAAS;IAC/C,IAAI,SAAe;IACnB,OAAO,CAAC,SAAS,CAAC,CAAY;IAC9B,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,eAAe,CAAC,CAAS;IACjC,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,YAAY,CAAC,CAAS;gBAElB,WAAW,GAAE,MAAU,EAAE,YAAY,CAAC,EAAE,MAAM;IAMpD,UAAU,CAAC,QAAQ,GAAE,MAAc,GAAG,OAAO,CAAC,IAAI,CAAC;IA+EnD,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IAmD5E,cAAc,CAAC,MAAM,EAAE,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,EAAE,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;IAmBxF,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC;IAShC;;;OAGG;IACH,OAAO,CAAC,iBAAiB;CAuB1B"}
1
+ {"version":3,"file":"tesseract.d.ts","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAElE,qBAAa,eAAgB,YAAW,SAAS;IAC/C,IAAI,SAAe;IACnB,OAAO,CAAC,SAAS,CAAC,CAAY;IAC9B,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,eAAe,CAAC,CAAS;IACjC,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,YAAY,CAAC,CAAS;gBAElB,WAAW,GAAE,MAAU,EAAE,YAAY,CAAC,EAAE,MAAM;IAMpD,UAAU,CAAC,QAAQ,GAAE,MAAc,GAAG,OAAO,CAAC,IAAI,CAAC;IAmFnD,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IAmD5E,cAAc,CAAC,MAAM,EAAE,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,EAAE,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;IAmBxF,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC;IAShC;;;OAGG;IACH,OAAO,CAAC,iBAAiB;CAuB1B"}
@@ -26,6 +26,10 @@ export class TesseractEngine {
26
26
  workerOptions.cachePath = this.tessdataPath;
27
27
  workerOptions.gzip = false; // Pre-cached files are not gzipped
28
28
  }
29
+ workerOptions.errorHandler = () => {
30
+ // Let createWorker reject so LiteParse can convert the failure into
31
+ // an actionable initialization error instead of crashing the process.
32
+ };
29
33
  // Create worker pool
30
34
  for (let i = 0; i < this.concurrency; i++) {
31
35
  let worker;
@@ -1 +1 @@
1
- {"version":3,"file":"tesseract.js","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,eAAe,EAAqB,MAAM,cAAc,CAAC;AAGhF,MAAM,OAAO,eAAe;IAC1B,IAAI,GAAG,WAAW,CAAC;IACX,SAAS,CAAa;IACtB,OAAO,GAAa,EAAE,CAAC;IACvB,eAAe,CAAU;IACzB,WAAW,CAAS;IACpB,YAAY,CAAU;IAE9B,YAAY,cAAsB,CAAC,EAAE,YAAqB;QACxD,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,uFAAuF;QACvF,IAAI,CAAC,YAAY,GAAG,YAAY,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,SAAS,CAAC;IAC/E,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,WAAmB,KAAK;QACvC,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,eAAe,KAAK,QAAQ,EAAE,CAAC;YACxD,OAAO,CAAC,wCAAwC;QAClD,CAAC;QAED,8DAA8D;QAC9D,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QAEvB,mBAAmB;QACnB,IAAI,CAAC,SAAS,GAAG,eAAe,EAAE,CAAC;QAEnC,kDAAkD;QAClD,MAAM,aAAa,GAA4B,EAAE,CAAC;QAClD,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,aAAa,CAAC,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC;YAC3C,aAAa,CAAC,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC;YAC5C,aAAa,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC,mCAAmC;QACjE,CAAC;QAED,qBAAqB;QACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,IAAI,MAAc,CAAC;YACnB,IAAI,CAAC;gBACH,MAAM,GAAG,MAAM,YAAY,CACzB,QAAQ,EACR,CAAC,EACD,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS,CAClE,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,uCAAuC;gBACvC,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;gBACvB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAEvE,kDAAkD;gBAClD,IACE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC;oBACzB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC;oBAC3B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;oBAC7B,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EACnC,CAAC;oBACD,MAAM,IAAI,KAAK,CACb,mDAAmD,QAAQ,KAAK;wBAC9D,yDAAyD;wBACzD,wBAAwB;wBACxB,kEAAkE,QAAQ,gBAAgB;wBAC1F,mEAAmE;wBACnE,2CAA2C,EAC7C;wBACE,KAAK,EAAE,KAAK;qBACb,CACF,CAAC;gBACJ,CAAC;gBACD,IACE,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC;oBAC/B,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;oBAC5B,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EACpC,CAAC;oBACD,MAAM,IAAI,KAAK,CACb,+CAA+C,QAAQ,MAAM,OAAO,IAAI;wBACtE,UAAU,QAAQ,yDAAyD;wBAC3E,0CAA0C,EAC5C;wBACE,KAAK,EAAE,KAAK;qBACb,CACF,CAAC;gBACJ,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,wCAAwC,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;YACvF,CAAC;YACD,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;YACtD,CAAC;YACD,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACnC,CAAC;QAED,IAAI,CAAC,eAAe,GAAG,QAAQ,CAAC;IAClC,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,KAAsB,EAAE,OAAmB;QACzD,8EAA8E;QAC9E,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CACrC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CACzE,CAAC;QAEF,iCAAiC;QACjC,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAEhC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,IAAI,CAAC;YACH,4CAA4C;YAC5C,8DAA8D;YAC9D,8EAA8E;YAC9E,MAAM,EACJ,IAAI,EAAE,EAAE,MAAM,EAAE,GACjB,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,WAAW,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;YAE1E,wFAAwF;YACxF,MAAM,OAAO,GAAgB,EAAE,CAAC;YAChC,KAAK,MAAM,KAAK,IAAI,MAAM,IAAI,EAAE,EAAE,CAAC;gBACjC,KAAK,MAAM,SAAS,IAAI,KAAK,CAAC,UAAU,IAAI,EAAE,EAAE,CAAC;oBAC/C,KAAK,MAAM,IAAI,IAAI,SAAS,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;wBACzC,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;4BACpC,OAAO,CAAC,IAAI,CAAC;gCACX,IAAI,EAAE,IAAI,CAAC,IAAI;gCACf,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAK5D;gCACD,UAAU,EAAE,IAAI,CAAC,UAAU,GAAG,GAAG,EAAE,uCAAuC;6BAC3E,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;YAED,gDAAgD;YAChD,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,GAAG,CAAC,CAAC;QACnD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,KAAK,GAAG,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC;YAC7D,OAAO,CAAC,KAAK,CAAC,6BAA6B,KAAK,GAAG,EAAE,KAAK,CAAC,CAAC;YAC5D,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,MAA2B,EAAE,OAAmB;QACnE,kBAAkB;QAClB,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CACrC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CACzE,CAAC;QAEF,iCAAiC;QACjC,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAEhC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,kEAAkE;QAClE,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC,CAAC;QAEnE,OAAO,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,SAAS;QACb,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,CAAC;YACjC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC7B,CAAC;QACD,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC;QAClB,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;IACnC,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,IAAY;QACpC,MAAM,WAAW,GAA2B;YAC1C,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,SAAS;YACb,OAAO,EAAE,SAAS;YAClB,OAAO,EAAE,SAAS;YAClB,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;SACV,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAC7C,OAAO,WAAW,CAAC,UAAU,CAAC,IAAI,UAAU,CAAC;IAC/C,CAAC;CACF"}
1
+ {"version":3,"file":"tesseract.js","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,eAAe,EAAqB,MAAM,cAAc,CAAC;AAGhF,MAAM,OAAO,eAAe;IAC1B,IAAI,GAAG,WAAW,CAAC;IACX,SAAS,CAAa;IACtB,OAAO,GAAa,EAAE,CAAC;IACvB,eAAe,CAAU;IACzB,WAAW,CAAS;IACpB,YAAY,CAAU;IAE9B,YAAY,cAAsB,CAAC,EAAE,YAAqB;QACxD,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,uFAAuF;QACvF,IAAI,CAAC,YAAY,GAAG,YAAY,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,SAAS,CAAC;IAC/E,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,WAAmB,KAAK;QACvC,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,eAAe,KAAK,QAAQ,EAAE,CAAC;YACxD,OAAO,CAAC,wCAAwC;QAClD,CAAC;QAED,8DAA8D;QAC9D,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QAEvB,mBAAmB;QACnB,IAAI,CAAC,SAAS,GAAG,eAAe,EAAE,CAAC;QAEnC,kDAAkD;QAClD,MAAM,aAAa,GAA4B,EAAE,CAAC;QAClD,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,aAAa,CAAC,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC;YAC3C,aAAa,CAAC,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC;YAC5C,aAAa,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC,mCAAmC;QACjE,CAAC;QACD,aAAa,CAAC,YAAY,GAAG,GAAG,EAAE;YAChC,oEAAoE;YACpE,sEAAsE;QACxE,CAAC,CAAC;QAEF,qBAAqB;QACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,IAAI,MAAc,CAAC;YACnB,IAAI,CAAC;gBACH,MAAM,GAAG,MAAM,YAAY,CACzB,QAAQ,EACR,CAAC,EACD,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS,CAClE,CAAC;YACJ,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,uCAAuC;gBACvC,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;gBACvB,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAEvE,kDAAkD;gBAClD,IACE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC;oBACzB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC;oBAC3B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;oBAC7B,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EACnC,CAAC;oBACD,MAAM,IAAI,KAAK,CACb,mDAAmD,QAAQ,KAAK;wBAC9D,yDAAyD;wBACzD,wBAAwB;wBACxB,kEAAkE,QAAQ,gBAAgB;wBAC1F,mEAAmE;wBACnE,2CAA2C,EAC7C;wBACE,KAAK,EAAE,KAAK;qBACb,CACF,CAAC;gBACJ,CAAC;gBACD,IACE,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC;oBAC/B,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;oBAC5B,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EACpC,CAAC;oBACD,MAAM,IAAI,KAAK,CACb,+CAA+C,QAAQ,MAAM,OAAO,IAAI;wBACtE,UAAU,QAAQ,yDAAyD;wBAC3E,0CAA0C,EAC5C;wBACE,KAAK,EAAE,KAAK;qBACb,CACF,CAAC;gBACJ,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,wCAAwC,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;YACvF,CAAC;YACD,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;gBACvB,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;YACtD,CAAC;YACD,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACnC,CAAC;QAED,IAAI,CAAC,eAAe,GAAG,QAAQ,CAAC;IAClC,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,KAAsB,EAAE,OAAmB;QACzD,8EAA8E;QAC9E,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CACrC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CACzE,CAAC;QAEF,iCAAiC;QACjC,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAEhC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,IAAI,CAAC;YACH,4CAA4C;YAC5C,8DAA8D;YAC9D,8EAA8E;YAC9E,MAAM,EACJ,IAAI,EAAE,EAAE,MAAM,EAAE,GACjB,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,WAAW,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;YAE1E,wFAAwF;YACxF,MAAM,OAAO,GAAgB,EAAE,CAAC;YAChC,KAAK,MAAM,KAAK,IAAI,MAAM,IAAI,EAAE,EAAE,CAAC;gBACjC,KAAK,MAAM,SAAS,IAAI,KAAK,CAAC,UAAU,IAAI,EAAE,EAAE,CAAC;oBAC/C,KAAK,MAAM,IAAI,IAAI,SAAS,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;wBACzC,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;4BACpC,OAAO,CAAC,IAAI,CAAC;gCACX,IAAI,EAAE,IAAI,CAAC,IAAI;gCACf,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAK5D;gCACD,UAAU,EAAE,IAAI,CAAC,UAAU,GAAG,GAAG,EAAE,uCAAuC;6BAC3E,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;YAED,gDAAgD;YAChD,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,GAAG,CAAC,CAAC;QACnD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,KAAK,GAAG,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC;YAC7D,OAAO,CAAC,KAAK,CAAC,6BAA6B,KAAK,GAAG,EAAE,KAAK,CAAC,CAAC;YAC5D,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,MAA2B,EAAE,OAAmB;QACnE,kBAAkB;QAClB,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CACrC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CACzE,CAAC;QAEF,iCAAiC;QACjC,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAEhC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,kEAAkE;QAClE,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC,CAAC;QAEnE,OAAO,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,SAAS;QACb,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,CAAC;YACjC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC7B,CAAC;QACD,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC;QAClB,IAAI,CAAC,eAAe,GAAG,SAAS,CAAC;IACnC,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,IAAY;QACpC,MAAM,WAAW,GAA2B;YAC1C,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,SAAS;YACb,OAAO,EAAE,SAAS;YAClB,OAAO,EAAE,SAAS;YAClB,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;YACT,EAAE,EAAE,KAAK;SACV,CAAC;QAEF,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAC7C,OAAO,WAAW,CAAC,UAAU,CAAC,IAAI,UAAU,CAAC;IAC/C,CAAC;CACF"}
@@ -46,10 +46,16 @@ vi.mock("tesseract.js", async () => {
46
46
  const actual = await vi.importActual("tesseract.js");
47
47
  return {
48
48
  ...actual,
49
- createWorker: vi.fn(async (language, _num) => {
49
+ createWorker: vi.fn(
50
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
51
+ async (language, _num, options) => {
50
52
  if (language == "it" || language == "ita") {
51
53
  return;
52
54
  }
55
+ if (language == "offline" || language == "fetchfail") {
56
+ options?.errorHandler?.("TypeError: fetch failed");
57
+ throw new Error("TypeError: fetch failed");
58
+ }
53
59
  return mockTesseractWorker;
54
60
  }),
55
61
  };
@@ -67,6 +73,10 @@ describe("test Tesseract OCR (single image)", () => {
67
73
  expect(engine.name).toBe("tesseract");
68
74
  await expect(engine.recognize("cat.png", { language: "it" })).rejects.toThrow("Tesseract worker not initialized");
69
75
  });
76
+ it("test engine failure (fetch failed) returns actionable guidance", async () => {
77
+ const engine = new TesseractEngine();
78
+ await expect(engine.recognize("cat.png", { language: "offline" })).rejects.toThrow('Tesseract failed to download language data for "offline"');
79
+ });
70
80
  });
71
81
  describe("test OCR simple HTTP server (batch)", () => {
72
82
  it("test engine success", async () => {
@@ -1 +1 @@
1
- {"version":3,"file":"tesseract.test.js","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAElD,+EAA+E;AAC/E,MAAM,SAAS,GAAG;IAChB;QACE,IAAI,EAAE,OAAO;QACb,UAAU,EAAE,EAAE;QACd,IAAI,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE;KACvC;IACD;QACE,IAAI,EAAE,OAAO;QACb,UAAU,EAAE,EAAE;QACd,IAAI,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,EAAE,EAAE;KACzC;CACF,CAAC;AAEF,MAAM,mBAAmB,GAAG;IAC1B,IAAI,EAAE;QACJ,IAAI,EAAE,aAAa;QACnB,MAAM,EAAE;YACN;gBACE,UAAU,EAAE;oBACV;wBACE,KAAK,EAAE;4BACL;gCACE,KAAK,EAAE,SAAS;6BACjB;yBACF;qBACF;iBACF;aACF;SACF;QACD,UAAU,EAAE,EAAE;KACf;CACF,CAAC;AAEF,MAAM,WAAW,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAC3C,IAAI,EAAE,IAAI,CAAC,IAAI;IACf,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAK5D;IACD,UAAU,EAAE,IAAI,CAAC,UAAU,GAAG,GAAG,EAAE,uCAAuC;CAC3E,CAAC,CAAC,CAAC;AAEJ,MAAM,mBAAmB,GAAG;IAC1B,SAAS,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE,GAAE,CAAC,CAAC;IAChC,SAAS,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE;QAC1B,OAAO,mBAAmB,CAAC;IAC7B,CAAC,CAAC;CACH,CAAC;AAEF,EAAE,CAAC,IAAI,CAAC,cAAc,EAAE,KAAK,IAAI,EAAE;IACjC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,YAAY,CAAgC,cAAc,CAAC,CAAC;IACpF,OAAO;QACL,GAAG,MAAM;QACT,YAAY,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,EAAE,QAAgB,EAAE,IAAY,EAAE,EAAE;YAC3D,IAAI,QAAQ,IAAI,IAAI,IAAI,QAAQ,IAAI,KAAK,EAAE,CAAC;gBAC1C,OAAO;YACT,CAAC;YACD,OAAO,mBAAmB,CAAC;QAC7B,CAAC,CAAC;KACH,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,QAAQ,CAAC,mCAAmC,EAAE,GAAG,EAAE;IACjD,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;QACnC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QACrE,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC3E,kCAAkC,CACnC,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,qCAAqC,EAAE,GAAG,EAAE;IACnD,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;QACnC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QACvF,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC7F,kCAAkC,CACnC,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"tesseract.test.js","sourceRoot":"","sources":["../../../../src/engines/ocr/tesseract.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAElD,+EAA+E;AAC/E,MAAM,SAAS,GAAG;IAChB;QACE,IAAI,EAAE,OAAO;QACb,UAAU,EAAE,EAAE;QACd,IAAI,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE;KACvC;IACD;QACE,IAAI,EAAE,OAAO;QACb,UAAU,EAAE,EAAE;QACd,IAAI,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,EAAE,EAAE;KACzC;CACF,CAAC;AAEF,MAAM,mBAAmB,GAAG;IAC1B,IAAI,EAAE;QACJ,IAAI,EAAE,aAAa;QACnB,MAAM,EAAE;YACN;gBACE,UAAU,EAAE;oBACV;wBACE,KAAK,EAAE;4BACL;gCACE,KAAK,EAAE,SAAS;6BACjB;yBACF;qBACF;iBACF;aACF;SACF;QACD,UAAU,EAAE,EAAE;KACf;CACF,CAAC;AAEF,MAAM,WAAW,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAC3C,IAAI,EAAE,IAAI,CAAC,IAAI;IACf,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAK5D;IACD,UAAU,EAAE,IAAI,CAAC,UAAU,GAAG,GAAG,EAAE,uCAAuC;CAC3E,CAAC,CAAC,CAAC;AAEJ,MAAM,mBAAmB,GAAG;IAC1B,SAAS,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE,GAAE,CAAC,CAAC;IAChC,SAAS,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE;QAC1B,OAAO,mBAAmB,CAAC;IAC7B,CAAC,CAAC;CACH,CAAC;AAEF,EAAE,CAAC,IAAI,CAAC,cAAc,EAAE,KAAK,IAAI,EAAE;IACjC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,YAAY,CAAgC,cAAc,CAAC,CAAC;IACpF,OAAO;QACL,GAAG,MAAM;QACT,YAAY,EAAE,EAAE,CAAC,EAAE;QACjB,8DAA8D;QAC9D,KAAK,EAAE,QAAgB,EAAE,IAAY,EAAE,OAA+C,EAAE,EAAE;YACxF,IAAI,QAAQ,IAAI,IAAI,IAAI,QAAQ,IAAI,KAAK,EAAE,CAAC;gBAC1C,OAAO;YACT,CAAC;YACD,IAAI,QAAQ,IAAI,SAAS,IAAI,QAAQ,IAAI,WAAW,EAAE,CAAC;gBACrD,OAAO,EAAE,YAAY,EAAE,CAAC,yBAAyB,CAAC,CAAC;gBACnD,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;YAC7C,CAAC;YACD,OAAO,mBAAmB,CAAC;QAC7B,CAAC,CACF;KACF,CAAC;AACJ,CAAC,CAAC,CAAC;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,QAAQ,CAAC,mCAAmC,EAAE,GAAG,EAAE;IACjD,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;QACnC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QACrE,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC3E,kCAAkC,CACnC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gEAAgE,EAAE,KAAK,IAAI,EAAE;QAC9E,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAChF,0DAA0D,CAC3D,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,qCAAqC,EAAE,GAAG,EAAE;IACnD,EAAE,CAAC,qBAAqB,EAAE,KAAK,IAAI,EAAE;QACnC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,cAAc,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QACvF,MAAM,CAAC,MAAM,CAAC,CAAC,aAAa,CAAC,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAC7F,kCAAkC,CACnC,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
package/dist/src/lib.d.ts CHANGED
@@ -13,5 +13,6 @@
13
13
  * ```
14
14
  */
15
15
  export { LiteParse } from "./core/parser.js";
16
- export type { LiteParseConfig, LiteParseInput, OutputFormat, ParseResult, ParseResultJson, ParsedPage, BoundingBox, TextItem, ScreenshotResult, MarkupData, } from "./core/types.js";
16
+ export { searchItems } from "./processing/searchItems.js";
17
+ export type { LiteParseConfig, LiteParseInput, OutputFormat, ParseResult, ParseResultJson, ParsedPage, BoundingBox, TextItem, JsonTextItem, SearchItemsOptions, ScreenshotResult, MarkupData, } from "./core/types.js";
17
18
  //# sourceMappingURL=lib.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"lib.d.ts","sourceRoot":"","sources":["../../src/lib.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AACH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,YAAY,EACV,eAAe,EACf,cAAc,EACd,YAAY,EACZ,WAAW,EACX,eAAe,EACf,UAAU,EACV,WAAW,EACX,QAAQ,EACR,gBAAgB,EAChB,UAAU,GACX,MAAM,iBAAiB,CAAC"}
1
+ {"version":3,"file":"lib.d.ts","sourceRoot":"","sources":["../../src/lib.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AACH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,MAAM,6BAA6B,CAAC;AAC1D,YAAY,EACV,eAAe,EACf,cAAc,EACd,YAAY,EACZ,WAAW,EACX,eAAe,EACf,UAAU,EACV,WAAW,EACX,QAAQ,EACR,YAAY,EACZ,kBAAkB,EAClB,gBAAgB,EAChB,UAAU,GACX,MAAM,iBAAiB,CAAC"}
package/dist/src/lib.js CHANGED
@@ -13,4 +13,5 @@
13
13
  * ```
14
14
  */
15
15
  export { LiteParse } from "./core/parser.js";
16
+ export { searchItems } from "./processing/searchItems.js";
16
17
  //# sourceMappingURL=lib.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"lib.js","sourceRoot":"","sources":["../../src/lib.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AACH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC"}
1
+ {"version":3,"file":"lib.js","sourceRoot":"","sources":["../../src/lib.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AACH,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,MAAM,6BAA6B,CAAC"}
@@ -0,0 +1,26 @@
1
+ import { JsonTextItem, SearchItemsOptions } from "../core/types.js";
2
+ /**
3
+ * Search text items for matches, returning synthetic merged items for each match.
4
+ *
5
+ * For phrase searches, consecutive text items are concatenated and searched.
6
+ * When a phrase spans multiple items, the result is a single merged item with
7
+ * combined bounding box and the matched text. Font metadata is taken from the
8
+ * first matched item.
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * import { LiteParse, searchItems } from "@llamaindex/liteparse";
13
+ *
14
+ * const parser = new LiteParse({ outputFormat: "json" });
15
+ * const result = await parser.parse("report.pdf");
16
+ *
17
+ * for (const page of result.json.pages) {
18
+ * const matches = searchItems(page.textItems, { phrase: "0°C to 70°C" });
19
+ * for (const match of matches) {
20
+ * console.log(`Found "${match.text}" at (${match.x}, ${match.y})`);
21
+ * }
22
+ * }
23
+ * ```
24
+ */
25
+ export declare function searchItems(items: JsonTextItem[], options: SearchItemsOptions): JsonTextItem[];
26
+ //# sourceMappingURL=searchItems.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"searchItems.d.ts","sourceRoot":"","sources":["../../../src/processing/searchItems.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAEpE;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,YAAY,EAAE,EAAE,OAAO,EAAE,kBAAkB,GAAG,YAAY,EAAE,CAuD9F"}
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Search text items for matches, returning synthetic merged items for each match.
3
+ *
4
+ * For phrase searches, consecutive text items are concatenated and searched.
5
+ * When a phrase spans multiple items, the result is a single merged item with
6
+ * combined bounding box and the matched text. Font metadata is taken from the
7
+ * first matched item.
8
+ *
9
+ * @example
10
+ * ```typescript
11
+ * import { LiteParse, searchItems } from "@llamaindex/liteparse";
12
+ *
13
+ * const parser = new LiteParse({ outputFormat: "json" });
14
+ * const result = await parser.parse("report.pdf");
15
+ *
16
+ * for (const page of result.json.pages) {
17
+ * const matches = searchItems(page.textItems, { phrase: "0°C to 70°C" });
18
+ * for (const match of matches) {
19
+ * console.log(`Found "${match.text}" at (${match.x}, ${match.y})`);
20
+ * }
21
+ * }
22
+ * ```
23
+ */
24
+ export function searchItems(items, options) {
25
+ const results = [];
26
+ const caseSensitive = options.caseSensitive ?? false;
27
+ const normalize = caseSensitive ? (s) => s : (s) => s.toLowerCase();
28
+ const q = normalize(options.phrase);
29
+ let start = 0;
30
+ while (start < items.length) {
31
+ let combined = "";
32
+ let found = false;
33
+ for (let end = start; end < items.length; end++) {
34
+ combined += (end > start ? " " : "") + items[end].text;
35
+ if (normalize(combined).includes(q)) {
36
+ // Narrow from the left: drop leading items that aren't part of the match
37
+ let narrowed = combined;
38
+ let s = start;
39
+ while (s < end) {
40
+ const without = narrowed.slice(items[s].text.length + 1);
41
+ if (normalize(without).includes(q)) {
42
+ narrowed = without;
43
+ s++;
44
+ }
45
+ else {
46
+ break;
47
+ }
48
+ }
49
+ // Merge bounding boxes of the matched items
50
+ const matched = items.slice(s, end + 1);
51
+ const x = Math.min(...matched.map((m) => m.x));
52
+ const y = Math.min(...matched.map((m) => m.y));
53
+ const x2 = Math.max(...matched.map((m) => m.x + m.width));
54
+ const y2 = Math.max(...matched.map((m) => m.y + m.height));
55
+ results.push({
56
+ text: options.phrase,
57
+ x,
58
+ y,
59
+ width: x2 - x,
60
+ height: y2 - y,
61
+ fontName: matched[0].fontName,
62
+ fontSize: matched[0].fontSize,
63
+ });
64
+ // Advance past the match to avoid duplicates
65
+ start = end + 1;
66
+ found = true;
67
+ break;
68
+ }
69
+ // Stop expanding if the combined text is already much longer than the query
70
+ if (combined.length > q.length * 2)
71
+ break;
72
+ }
73
+ if (!found)
74
+ start++;
75
+ }
76
+ return results;
77
+ }
78
+ //# sourceMappingURL=searchItems.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"searchItems.js","sourceRoot":"","sources":["../../../src/processing/searchItems.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,WAAW,CAAC,KAAqB,EAAE,OAA2B;IAC5E,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,KAAK,CAAC;IACrD,MAAM,SAAS,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;IACpF,MAAM,CAAC,GAAG,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAEpC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,OAAO,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QAC5B,IAAI,QAAQ,GAAG,EAAE,CAAC;QAClB,IAAI,KAAK,GAAG,KAAK,CAAC;QAClB,KAAK,IAAI,GAAG,GAAG,KAAK,EAAE,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;YAChD,QAAQ,IAAI,CAAC,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;YACvD,IAAI,SAAS,CAAC,QAAQ,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;gBACpC,yEAAyE;gBACzE,IAAI,QAAQ,GAAG,QAAQ,CAAC;gBACxB,IAAI,CAAC,GAAG,KAAK,CAAC;gBACd,OAAO,CAAC,GAAG,GAAG,EAAE,CAAC;oBACf,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBACzD,IAAI,SAAS,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;wBACnC,QAAQ,GAAG,OAAO,CAAC;wBACnB,CAAC,EAAE,CAAC;oBACN,CAAC;yBAAM,CAAC;wBACN,MAAM;oBACR,CAAC;gBACH,CAAC;gBAED,4CAA4C;gBAC5C,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC;gBACxC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC/C,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC/C,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;gBAC1D,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;gBAE3D,OAAO,CAAC,IAAI,CAAC;oBACX,IAAI,EAAE,OAAO,CAAC,MAAM;oBACpB,CAAC;oBACD,CAAC;oBACD,KAAK,EAAE,EAAE,GAAG,CAAC;oBACb,MAAM,EAAE,EAAE,GAAG,CAAC;oBACd,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ;oBAC7B,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ;iBAC9B,CAAC,CAAC;gBAEH,6CAA6C;gBAC7C,KAAK,GAAG,GAAG,GAAG,CAAC,CAAC;gBAChB,KAAK,GAAG,IAAI,CAAC;gBACb,MAAM;YACR,CAAC;YACD,4EAA4E;YAC5E,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC;gBAAE,MAAM;QAC5C,CAAC;QACD,IAAI,CAAC,KAAK;YAAE,KAAK,EAAE,CAAC;IACtB,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=searchItems.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"searchItems.test.d.ts","sourceRoot":"","sources":["../../../src/processing/searchItems.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,55 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { searchItems } from "./searchItems";
3
+ function item(text, x, y, width, height = 12) {
4
+ return { text, x, y, width, height };
5
+ }
6
+ describe("searchItems", () => {
7
+ it("matches a phrase within a single item", () => {
8
+ const items = [item("hello world", 10, 20, 100)];
9
+ const results = searchItems(items, { phrase: "hello world" });
10
+ expect(results).toHaveLength(1);
11
+ expect(results[0].text).toBe("hello world");
12
+ expect(results[0].x).toBe(10);
13
+ expect(results[0].width).toBe(100);
14
+ });
15
+ it("matches a phrase spanning multiple items", () => {
16
+ const items = [item("0°C", 10, 50, 30), item("to", 45, 50, 15), item("70°C", 65, 50, 35)];
17
+ const results = searchItems(items, { phrase: "0°C to 70°C" });
18
+ expect(results).toHaveLength(1);
19
+ expect(results[0].text).toBe("0°C to 70°C");
20
+ expect(results[0].x).toBe(10);
21
+ expect(results[0].width).toBe(90); // 65 + 35 - 10
22
+ });
23
+ it("narrows match and does not include unrelated leading items", () => {
24
+ const items = [item("Operating", 10, 50, 70), item("0°C to 70°C", 85, 50, 90)];
25
+ const results = searchItems(items, { phrase: "0°C to 70°C" });
26
+ expect(results).toHaveLength(1);
27
+ expect(results[0].x).toBe(85);
28
+ expect(results[0].width).toBe(90);
29
+ });
30
+ it("is case-insensitive by default", () => {
31
+ const items = [item("Revenue Grew", 10, 20, 100)];
32
+ const results = searchItems(items, { phrase: "revenue grew" });
33
+ expect(results).toHaveLength(1);
34
+ expect(results[0].text).toBe("revenue grew");
35
+ });
36
+ it("respects caseSensitive option", () => {
37
+ const items = [item("pH Level", 10, 20, 80)];
38
+ expect(searchItems(items, { phrase: "pH", caseSensitive: true })).toHaveLength(1);
39
+ expect(searchItems(items, { phrase: "ph", caseSensitive: true })).toHaveLength(0);
40
+ expect(searchItems(items, { phrase: "PH", caseSensitive: true })).toHaveLength(0);
41
+ });
42
+ it("returns empty array when no match", () => {
43
+ const items = [item("hello", 10, 20, 50)];
44
+ const results = searchItems(items, { phrase: "goodbye" });
45
+ expect(results).toHaveLength(0);
46
+ });
47
+ it("merges bounding boxes vertically for wrapped phrases", () => {
48
+ const items = [item("temperature", 10, 50, 80, 12), item("range", 10, 65, 40, 12)];
49
+ const results = searchItems(items, { phrase: "temperature range" });
50
+ expect(results).toHaveLength(1);
51
+ expect(results[0].y).toBe(50);
52
+ expect(results[0].height).toBe(27); // 65 + 12 - 50
53
+ });
54
+ });
55
+ //# sourceMappingURL=searchItems.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"searchItems.test.js","sourceRoot":"","sources":["../../../src/processing/searchItems.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAG5C,SAAS,IAAI,CAAC,IAAY,EAAE,CAAS,EAAE,CAAS,EAAE,KAAa,EAAE,MAAM,GAAG,EAAE;IAC1E,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;AACvC,CAAC;AAED,QAAQ,CAAC,aAAa,EAAE,GAAG,EAAE;IAC3B,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,aAAa,EAAE,EAAE,EAAE,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC;QACjD,MAAM,OAAO,GAAG,WAAW,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC,CAAC;QAC9D,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC5C,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9B,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,IAAI,CAAC,IAAI,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;QAC1F,MAAM,OAAO,GAAG,WAAW,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC,CAAC;QAC9D,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC5C,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9B,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,eAAe;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4DAA4D,EAAE,GAAG,EAAE;QACpE,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,IAAI,CAAC,aAAa,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;QAC/E,MAAM,OAAO,GAAG,WAAW,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC,CAAC;QAC9D,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9B,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,cAAc,EAAE,EAAE,EAAE,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC;QAClD,MAAM,OAAO,GAAG,WAAW,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,cAAc,EAAE,CAAC,CAAC;QAC/D,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;QACvC,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,UAAU,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;QAC7C,MAAM,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAClF,MAAM,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAClF,MAAM,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACpF,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;QAC1C,MAAM,OAAO,GAAG,WAAW,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;QAC1D,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAClC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAC9D,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,aAAa,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,EAAE,IAAI,CAAC,OAAO,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;QACnF,MAAM,OAAO,GAAG,WAAW,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,mBAAmB,EAAE,CAAC,CAAC;QACpE,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9B,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,eAAe;IACrD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@llamaindex/liteparse",
3
- "version": "1.2.0",
3
+ "version": "1.3.0",
4
4
  "description": "Open-source PDF parsing with spatial text extraction and OCR processing",
5
5
  "type": "module",
6
6
  "main": "./dist/src/lib.js",