@lexmata/micropdf 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/LICENSE +191 -0
  2. package/README.md +985 -0
  3. package/binding.gyp +73 -0
  4. package/dist/annot.d.ts +458 -0
  5. package/dist/annot.d.ts.map +1 -0
  6. package/dist/annot.js +697 -0
  7. package/dist/annot.js.map +1 -0
  8. package/dist/archive.d.ts +128 -0
  9. package/dist/archive.d.ts.map +1 -0
  10. package/dist/archive.js +268 -0
  11. package/dist/archive.js.map +1 -0
  12. package/dist/buffer.d.ts +572 -0
  13. package/dist/buffer.d.ts.map +1 -0
  14. package/dist/buffer.js +971 -0
  15. package/dist/buffer.js.map +1 -0
  16. package/dist/colorspace.d.ts +287 -0
  17. package/dist/colorspace.d.ts.map +1 -0
  18. package/dist/colorspace.js +542 -0
  19. package/dist/colorspace.js.map +1 -0
  20. package/dist/context.d.ts +184 -0
  21. package/dist/context.d.ts.map +1 -0
  22. package/dist/context.js +320 -0
  23. package/dist/context.js.map +1 -0
  24. package/dist/cookie.d.ts +164 -0
  25. package/dist/cookie.d.ts.map +1 -0
  26. package/dist/cookie.js +306 -0
  27. package/dist/cookie.js.map +1 -0
  28. package/dist/device.d.ts +169 -0
  29. package/dist/device.d.ts.map +1 -0
  30. package/dist/device.js +350 -0
  31. package/dist/device.js.map +1 -0
  32. package/dist/display-list.d.ts +202 -0
  33. package/dist/display-list.d.ts.map +1 -0
  34. package/dist/display-list.js +410 -0
  35. package/dist/display-list.js.map +1 -0
  36. package/dist/document.d.ts +637 -0
  37. package/dist/document.d.ts.map +1 -0
  38. package/dist/document.js +902 -0
  39. package/dist/document.js.map +1 -0
  40. package/dist/easy.d.ts +423 -0
  41. package/dist/easy.d.ts.map +1 -0
  42. package/dist/easy.js +644 -0
  43. package/dist/easy.js.map +1 -0
  44. package/dist/enhanced.d.ts +226 -0
  45. package/dist/enhanced.d.ts.map +1 -0
  46. package/dist/enhanced.js +368 -0
  47. package/dist/enhanced.js.map +1 -0
  48. package/dist/filter.d.ts +51 -0
  49. package/dist/filter.d.ts.map +1 -0
  50. package/dist/filter.js +381 -0
  51. package/dist/filter.js.map +1 -0
  52. package/dist/font.d.ts +222 -0
  53. package/dist/font.d.ts.map +1 -0
  54. package/dist/font.js +381 -0
  55. package/dist/font.js.map +1 -0
  56. package/dist/form.d.ts +214 -0
  57. package/dist/form.d.ts.map +1 -0
  58. package/dist/form.js +497 -0
  59. package/dist/form.js.map +1 -0
  60. package/dist/geometry.d.ts +469 -0
  61. package/dist/geometry.d.ts.map +1 -0
  62. package/dist/geometry.js +780 -0
  63. package/dist/geometry.js.map +1 -0
  64. package/dist/image.d.ts +172 -0
  65. package/dist/image.d.ts.map +1 -0
  66. package/dist/image.js +348 -0
  67. package/dist/image.js.map +1 -0
  68. package/dist/index.d.ts +171 -0
  69. package/dist/index.d.ts.map +1 -0
  70. package/dist/index.js +339 -0
  71. package/dist/index.js.map +1 -0
  72. package/dist/link.d.ts +168 -0
  73. package/dist/link.d.ts.map +1 -0
  74. package/dist/link.js +343 -0
  75. package/dist/link.js.map +1 -0
  76. package/dist/micropdf.d.ts +40 -0
  77. package/dist/micropdf.d.ts.map +1 -0
  78. package/dist/micropdf.js +45 -0
  79. package/dist/micropdf.js.map +1 -0
  80. package/dist/nanopdf.d.ts +40 -0
  81. package/dist/nanopdf.d.ts.map +1 -0
  82. package/dist/nanopdf.js +45 -0
  83. package/dist/nanopdf.js.map +1 -0
  84. package/dist/native.d.ts +242 -0
  85. package/dist/native.d.ts.map +1 -0
  86. package/dist/native.js +509 -0
  87. package/dist/native.js.map +1 -0
  88. package/dist/output.d.ts +166 -0
  89. package/dist/output.d.ts.map +1 -0
  90. package/dist/output.js +365 -0
  91. package/dist/output.js.map +1 -0
  92. package/dist/path.d.ts +420 -0
  93. package/dist/path.d.ts.map +1 -0
  94. package/dist/path.js +687 -0
  95. package/dist/path.js.map +1 -0
  96. package/dist/pdf/object.d.ts +489 -0
  97. package/dist/pdf/object.d.ts.map +1 -0
  98. package/dist/pdf/object.js +1045 -0
  99. package/dist/pdf/object.js.map +1 -0
  100. package/dist/pixmap.d.ts +315 -0
  101. package/dist/pixmap.d.ts.map +1 -0
  102. package/dist/pixmap.js +590 -0
  103. package/dist/pixmap.js.map +1 -0
  104. package/dist/profiler.d.ts +159 -0
  105. package/dist/profiler.d.ts.map +1 -0
  106. package/dist/profiler.js +380 -0
  107. package/dist/profiler.js.map +1 -0
  108. package/dist/render-options.d.ts +227 -0
  109. package/dist/render-options.d.ts.map +1 -0
  110. package/dist/render-options.js +130 -0
  111. package/dist/render-options.js.map +1 -0
  112. package/dist/resource-tracking.d.ts +332 -0
  113. package/dist/resource-tracking.d.ts.map +1 -0
  114. package/dist/resource-tracking.js +653 -0
  115. package/dist/resource-tracking.js.map +1 -0
  116. package/dist/simple.d.ts +276 -0
  117. package/dist/simple.d.ts.map +1 -0
  118. package/dist/simple.js +343 -0
  119. package/dist/simple.js.map +1 -0
  120. package/dist/stext.d.ts +290 -0
  121. package/dist/stext.d.ts.map +1 -0
  122. package/dist/stext.js +312 -0
  123. package/dist/stext.js.map +1 -0
  124. package/dist/stream.d.ts +174 -0
  125. package/dist/stream.d.ts.map +1 -0
  126. package/dist/stream.js +476 -0
  127. package/dist/stream.js.map +1 -0
  128. package/dist/text.d.ts +337 -0
  129. package/dist/text.d.ts.map +1 -0
  130. package/dist/text.js +454 -0
  131. package/dist/text.js.map +1 -0
  132. package/dist/typed-arrays.d.ts +127 -0
  133. package/dist/typed-arrays.d.ts.map +1 -0
  134. package/dist/typed-arrays.js +410 -0
  135. package/dist/typed-arrays.js.map +1 -0
  136. package/dist/types.d.ts +358 -0
  137. package/dist/types.d.ts.map +1 -0
  138. package/dist/types.js +216 -0
  139. package/dist/types.js.map +1 -0
  140. package/native/annot.cc +557 -0
  141. package/native/buffer.cc +204 -0
  142. package/native/colorspace.cc +166 -0
  143. package/native/context.cc +84 -0
  144. package/native/cookie.cc +179 -0
  145. package/native/device.cc +179 -0
  146. package/native/display_list.cc +179 -0
  147. package/native/document.cc +268 -0
  148. package/native/enhanced.cc +70 -0
  149. package/native/font.cc +282 -0
  150. package/native/form.cc +523 -0
  151. package/native/geometry.cc +255 -0
  152. package/native/image.cc +216 -0
  153. package/native/include/micropdf/enhanced.h +38 -0
  154. package/native/include/micropdf/types.h +36 -0
  155. package/native/include/micropdf.h +106 -0
  156. package/native/include/mupdf-ffi.h +39 -0
  157. package/native/include/mupdf.h +11 -0
  158. package/native/include/mupdf_minimal.h +381 -0
  159. package/native/lib/linux-x64/libmicropdf.a +0 -0
  160. package/native/link.cc +234 -0
  161. package/native/micropdf.cc +71 -0
  162. package/native/output.cc +229 -0
  163. package/native/page.cc +572 -0
  164. package/native/path.cc +259 -0
  165. package/native/pixmap.cc +240 -0
  166. package/native/stext.cc +610 -0
  167. package/native/stream.cc +239 -0
  168. package/package.json +120 -0
  169. package/scripts/build-from-rust.js +97 -0
  170. package/scripts/install.js +184 -0
@@ -0,0 +1,290 @@
1
+ /**
2
+ * Structured Text Extraction
3
+ *
4
+ * Provides layout-aware text extraction from PDF pages.
5
+ * Organizes text into a hierarchy: Page → Block → Line → Char
6
+ */
7
+ import type { Page } from './document.js';
8
+ import { Rect } from './geometry.js';
9
+ /**
10
+ * Quad - four-corner bounding box for rotated text
11
+ */
12
+ export interface Quad {
13
+ /** Upper-left corner */
14
+ ul: {
15
+ x: number;
16
+ y: number;
17
+ };
18
+ /** Upper-right corner */
19
+ ur: {
20
+ x: number;
21
+ y: number;
22
+ };
23
+ /** Lower-left corner */
24
+ ll: {
25
+ x: number;
26
+ y: number;
27
+ };
28
+ /** Lower-right corner */
29
+ lr: {
30
+ x: number;
31
+ y: number;
32
+ };
33
+ }
34
+ /**
35
+ * Text block type
36
+ */
37
+ export declare enum STextBlockType {
38
+ /** Regular text block */
39
+ Text = 0,
40
+ /** Image block */
41
+ Image = 1,
42
+ /** List item */
43
+ List = 2,
44
+ /** Table cell */
45
+ Table = 3
46
+ }
47
+ /**
48
+ * Writing mode for text lines
49
+ */
50
+ export declare enum WritingMode {
51
+ /** Horizontal left-to-right */
52
+ HorizontalLtr = 0,
53
+ /** Horizontal right-to-left */
54
+ HorizontalRtl = 1,
55
+ /** Vertical top-to-bottom */
56
+ VerticalTtb = 2,
57
+ /** Vertical bottom-to-top */
58
+ VerticalBtt = 3
59
+ }
60
+ /**
61
+ * Structured text character
62
+ */
63
+ export interface STextCharData {
64
+ /** Unicode character */
65
+ c: string;
66
+ /** Character quad (4 corners) */
67
+ quad: Quad;
68
+ /** Font size */
69
+ size: number;
70
+ /** Font name */
71
+ fontName: string;
72
+ }
73
+ /**
74
+ * Structured text line
75
+ */
76
+ export interface STextLineData {
77
+ /** Writing mode */
78
+ wmode: WritingMode;
79
+ /** Bounding box */
80
+ bbox: Rect;
81
+ /** Baseline coordinate */
82
+ baseline: number;
83
+ /** Text direction */
84
+ dir: {
85
+ x: number;
86
+ y: number;
87
+ };
88
+ /** Characters in this line */
89
+ chars: STextCharData[];
90
+ }
91
+ /**
92
+ * Structured text block
93
+ */
94
+ export interface STextBlockData {
95
+ /** Block type */
96
+ blockType: STextBlockType;
97
+ /** Bounding box */
98
+ bbox: Rect;
99
+ /** Lines in this block */
100
+ lines: STextLineData[];
101
+ }
102
+ /**
103
+ * Structured Text Page
104
+ *
105
+ * Represents extracted text from a PDF page with layout information.
106
+ * Provides hierarchical access to blocks, lines, and characters.
107
+ *
108
+ * @example
109
+ * ```typescript
110
+ * const doc = Document.open('document.pdf');
111
+ * const page = doc.loadPage(0);
112
+ * const stext = STextPage.fromPage(page);
113
+ *
114
+ * // Get all text
115
+ * console.log(stext.getText());
116
+ *
117
+ * // Search for text
118
+ * const hits = stext.search('keyword');
119
+ * console.log(`Found ${hits.length} matches`);
120
+ *
121
+ * stext.drop();
122
+ * page.drop();
123
+ * doc.close();
124
+ * ```
125
+ */
126
+ export declare class STextPage {
127
+ private _ctx;
128
+ private _handle;
129
+ private _dropped;
130
+ private constructor();
131
+ /**
132
+ * Create a structured text page from a document page
133
+ *
134
+ * @param page - The page to extract text from
135
+ * @returns A new STextPage instance
136
+ *
137
+ * @example
138
+ * ```typescript
139
+ * const stext = STextPage.fromPage(page);
140
+ * ```
141
+ */
142
+ static fromPage(page: Page): STextPage;
143
+ /**
144
+ * Get all text as a single string
145
+ *
146
+ * @returns Plain text content
147
+ *
148
+ * @example
149
+ * ```typescript
150
+ * const text = stext.getText();
151
+ * console.log(text);
152
+ * ```
153
+ */
154
+ getText(): string;
155
+ /**
156
+ * Search for text in the page
157
+ *
158
+ * @param needle - The text to search for
159
+ * @param maxHits - Maximum number of hits to return (default: 500)
160
+ * @returns Array of quads (bounding boxes) for matches
161
+ *
162
+ * @example
163
+ * ```typescript
164
+ * const hits = stext.search('important');
165
+ * for (const hit of hits) {
166
+ * console.log('Found at:', hit);
167
+ * }
168
+ * ```
169
+ */
170
+ search(needle: string, maxHits?: number): Quad[];
171
+ /**
172
+ * Get the page bounds
173
+ *
174
+ * @returns Rectangle representing the page bounds
175
+ *
176
+ * @example
177
+ * ```typescript
178
+ * const bounds = stext.getBounds();
179
+ * console.log(`Page size: ${bounds.width} x ${bounds.height}`);
180
+ * ```
181
+ */
182
+ getBounds(): Rect;
183
+ /**
184
+ * Drop (free) the structured text page
185
+ *
186
+ * Must be called when done to free resources.
187
+ *
188
+ * @example
189
+ * ```typescript
190
+ * const stext = STextPage.fromPage(page);
191
+ * // ... use stext ...
192
+ * stext.drop();
193
+ * ```
194
+ */
195
+ drop(): void;
196
+ /**
197
+ * Check if the structured text page has been dropped
198
+ *
199
+ * @returns true if dropped, false otherwise
200
+ */
201
+ isDropped(): boolean;
202
+ /**
203
+ * Get blocks from the page
204
+ *
205
+ * Returns the hierarchical structure of text blocks, lines, and characters.
206
+ * Note: This requires full FFI implementation. For now, returns simplified structure.
207
+ *
208
+ * @returns Array of text blocks
209
+ *
210
+ * @example
211
+ * ```typescript
212
+ * const blocks = stext.getBlocks();
213
+ * for (const block of blocks) {
214
+ * console.log(`Block type: ${block.blockType}`);
215
+ * for (const line of block.lines) {
216
+ * console.log(` Line: ${line.chars.map(c => c.c).join('')}`);
217
+ * }
218
+ * }
219
+ * ```
220
+ */
221
+ getBlocks(): STextBlockData[];
222
+ /**
223
+ * Get the number of blocks on the page
224
+ *
225
+ * @returns Number of blocks
226
+ *
227
+ * @example
228
+ * ```typescript
229
+ * const count = stext.blockCount();
230
+ * console.log(`Page has ${count} blocks`);
231
+ * ```
232
+ */
233
+ blockCount(): number;
234
+ /**
235
+ * Get the number of characters on the page
236
+ *
237
+ * @returns Total character count
238
+ *
239
+ * @example
240
+ * ```typescript
241
+ * const count = stext.charCount();
242
+ * console.log(`Page has ${count} characters`);
243
+ * ```
244
+ */
245
+ charCount(): number;
246
+ /**
247
+ * Get blocks of a specific type
248
+ *
249
+ * @param blockType - The block type to filter by
250
+ * @returns Array of blocks matching the type
251
+ *
252
+ * @example
253
+ * ```typescript
254
+ * const textBlocks = stext.getBlocksOfType(STextBlockType.Text);
255
+ * const imageBlocks = stext.getBlocksOfType(STextBlockType.Image);
256
+ * ```
257
+ */
258
+ getBlocksOfType(blockType: STextBlockType): STextBlockData[];
259
+ /**
260
+ * Get the native handle (for advanced use)
261
+ *
262
+ * @internal
263
+ */
264
+ get handle(): {
265
+ _handle: number;
266
+ };
267
+ private _checkDropped;
268
+ }
269
+ /**
270
+ * Convert a quad to a rectangle (axis-aligned bounding box)
271
+ *
272
+ * @param quad - The quad to convert
273
+ * @returns Rectangle enclosing the quad
274
+ *
275
+ * @example
276
+ * ```typescript
277
+ * const hits = stext.search('text');
278
+ * const rect = quadToRect(hits[0]);
279
+ * ```
280
+ */
281
+ export declare function quadToRect(quad: Quad): Rect;
282
+ /**
283
+ * Check if two quads overlap
284
+ *
285
+ * @param q1 - First quad
286
+ * @param q2 - Second quad
287
+ * @returns true if quads overlap, false otherwise
288
+ */
289
+ export declare function quadsOverlap(q1: Quad, q2: Quad): boolean;
290
+ //# sourceMappingURL=stext.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stext.d.ts","sourceRoot":"","sources":["../src/stext.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,eAAe,CAAC;AAC1C,OAAO,EAAE,IAAI,EAAE,MAAM,eAAe,CAAC;AAGrC;;GAEG;AACH,MAAM,WAAW,IAAI;IACnB,wBAAwB;IACxB,EAAE,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAC7B,yBAAyB;IACzB,EAAE,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAC7B,wBAAwB;IACxB,EAAE,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAC7B,yBAAyB;IACzB,EAAE,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CAC9B;AAED;;GAEG;AACH,oBAAY,cAAc;IACxB,yBAAyB;IACzB,IAAI,IAAI;IACR,kBAAkB;IAClB,KAAK,IAAI;IACT,gBAAgB;IAChB,IAAI,IAAI;IACR,iBAAiB;IACjB,KAAK,IAAI;CACV;AAED;;GAEG;AACH,oBAAY,WAAW;IACrB,+BAA+B;IAC/B,aAAa,IAAI;IACjB,+BAA+B;IAC/B,aAAa,IAAI;IACjB,6BAA6B;IAC7B,WAAW,IAAI;IACf,6BAA6B;IAC7B,WAAW,IAAI;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,wBAAwB;IACxB,CAAC,EAAE,MAAM,CAAC;IACV,iCAAiC;IACjC,IAAI,EAAE,IAAI,CAAC;IACX,gBAAgB;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,gBAAgB;IAChB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,mBAAmB;IACnB,KAAK,EAAE,WAAW,CAAC;IACnB,mBAAmB;IACnB,IAAI,EAAE,IAAI,CAAC;IACX,0BAA0B;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,qBAAqB;IACrB,GAAG,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAC9B,8BAA8B;IAC9B,KAAK,EAAE,aAAa,EAAE,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,iBAAiB;IACjB,SAAS,EAAE,cAAc,CAAC;IAC1B,mBAAmB;IACnB,IAAI,EAAE,IAAI,CAAC;IACX,0BAA0B;IAC1B,KAAK,EAAE,aAAa,EAAE,CAAC;CACxB;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,SAAS;IACpB,OAAO,CAAC,IAAI,CAAsB;IAClC,OAAO,CAAC,OAAO,CAAsB;IACrC,OAAO,CAAC,QAAQ,CAAS;IAEzB,OAAO;IAKP;;;;;;;;;;OAUG;IACH,MAAM,CAAC,QAAQ,CAAC,IAAI,EAAE,IAAI,GAAG,SAAS;IAWtC;;;;;;;;;;OAUG;IACH,OAAO,IAAI,MAAM;IAKjB;;;;;;;;;;;;;;OAcG;IACH,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,SAAM,GAAG,IAAI,EAAE;IAK7C;;;;;;;;;;OAUG;IACH,SAAS,IAAI,IAAI;IAMjB;;;;;;;;;;;OAWG;IACH,IAAI,IAAI,IAAI;IAOZ;;;;OAIG;IACH,SAAS,IAAI,OAAO;IAIpB;;;;;;;;;;;;;;;;;;OAkBG;IACH,SAAS,IAAI,cAAc,EAAE;IA2C7B;;;;;;;;;;OAUG;IACH,UAAU,IAAI,MAAM;IAKpB;;;;;;;;;;OAUG;IACH,SAAS,IAAI,MAAM;IAKnB;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,SAAS,EAAE,cAAc,GAAG,cAAc,EAAE;IAK5D;;;;OAIG;IACH,IAAI,MAAM,IAAI;QAAE,OAAO,EAAE,MAAM,CAAA;KAAE,CAEhC;IAED,OAAO,CAAC,aAAa;CAKtB;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI,CAO3C;AAED;;;;;;GAMG;AACH,wBAAgB,YAAY,CAAC,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,GAAG,OAAO,CAIxD"}
package/dist/stext.js ADDED
@@ -0,0 +1,312 @@
1
+ /**
2
+ * Structured Text Extraction
3
+ *
4
+ * Provides layout-aware text extraction from PDF pages.
5
+ * Organizes text into a hierarchy: Page → Block → Line → Char
6
+ */
7
+ import { Rect } from './geometry.js';
8
+ import { native_addon } from './native.js';
9
+ /**
10
+ * Text block type
11
+ */
12
+ export var STextBlockType;
13
+ (function (STextBlockType) {
14
+ /** Regular text block */
15
+ STextBlockType[STextBlockType["Text"] = 0] = "Text";
16
+ /** Image block */
17
+ STextBlockType[STextBlockType["Image"] = 1] = "Image";
18
+ /** List item */
19
+ STextBlockType[STextBlockType["List"] = 2] = "List";
20
+ /** Table cell */
21
+ STextBlockType[STextBlockType["Table"] = 3] = "Table";
22
+ })(STextBlockType || (STextBlockType = {}));
23
+ /**
24
+ * Writing mode for text lines
25
+ */
26
+ export var WritingMode;
27
+ (function (WritingMode) {
28
+ /** Horizontal left-to-right */
29
+ WritingMode[WritingMode["HorizontalLtr"] = 0] = "HorizontalLtr";
30
+ /** Horizontal right-to-left */
31
+ WritingMode[WritingMode["HorizontalRtl"] = 1] = "HorizontalRtl";
32
+ /** Vertical top-to-bottom */
33
+ WritingMode[WritingMode["VerticalTtb"] = 2] = "VerticalTtb";
34
+ /** Vertical bottom-to-top */
35
+ WritingMode[WritingMode["VerticalBtt"] = 3] = "VerticalBtt";
36
+ })(WritingMode || (WritingMode = {}));
37
+ /**
38
+ * Structured Text Page
39
+ *
40
+ * Represents extracted text from a PDF page with layout information.
41
+ * Provides hierarchical access to blocks, lines, and characters.
42
+ *
43
+ * @example
44
+ * ```typescript
45
+ * const doc = Document.open('document.pdf');
46
+ * const page = doc.loadPage(0);
47
+ * const stext = STextPage.fromPage(page);
48
+ *
49
+ * // Get all text
50
+ * console.log(stext.getText());
51
+ *
52
+ * // Search for text
53
+ * const hits = stext.search('keyword');
54
+ * console.log(`Found ${hits.length} matches`);
55
+ *
56
+ * stext.drop();
57
+ * page.drop();
58
+ * doc.close();
59
+ * ```
60
+ */
61
+ export class STextPage {
62
+ _ctx;
63
+ _handle;
64
+ _dropped = false;
65
+ constructor(ctx, handle) {
66
+ this._ctx = typeof ctx === 'bigint' ? { _handle: Number(ctx) } : ctx;
67
+ this._handle = typeof handle === 'bigint' ? { _handle: Number(handle) } : handle;
68
+ }
69
+ /**
70
+ * Create a structured text page from a document page
71
+ *
72
+ * @param page - The page to extract text from
73
+ * @returns A new STextPage instance
74
+ *
75
+ * @example
76
+ * ```typescript
77
+ * const stext = STextPage.fromPage(page);
78
+ * ```
79
+ */
80
+ static fromPage(page) {
81
+ // Access internal page state for FFI call
82
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
83
+ const pageAny = page;
84
+ const ctx = pageAny._ctx;
85
+ const pageHandle = pageAny._handle;
86
+ const handle = native_addon.newSTextPage(ctx, pageHandle);
87
+ return new STextPage(ctx, handle);
88
+ }
89
+ /**
90
+ * Get all text as a single string
91
+ *
92
+ * @returns Plain text content
93
+ *
94
+ * @example
95
+ * ```typescript
96
+ * const text = stext.getText();
97
+ * console.log(text);
98
+ * ```
99
+ */
100
+ getText() {
101
+ this._checkDropped();
102
+ return native_addon.getSTextAsText(this._ctx, this._handle);
103
+ }
104
+ /**
105
+ * Search for text in the page
106
+ *
107
+ * @param needle - The text to search for
108
+ * @param maxHits - Maximum number of hits to return (default: 500)
109
+ * @returns Array of quads (bounding boxes) for matches
110
+ *
111
+ * @example
112
+ * ```typescript
113
+ * const hits = stext.search('important');
114
+ * for (const hit of hits) {
115
+ * console.log('Found at:', hit);
116
+ * }
117
+ * ```
118
+ */
119
+ search(needle, maxHits = 500) {
120
+ this._checkDropped();
121
+ return native_addon.searchSTextPage(this._ctx, this._handle, needle, maxHits);
122
+ }
123
+ /**
124
+ * Get the page bounds
125
+ *
126
+ * @returns Rectangle representing the page bounds
127
+ *
128
+ * @example
129
+ * ```typescript
130
+ * const bounds = stext.getBounds();
131
+ * console.log(`Page size: ${bounds.width} x ${bounds.height}`);
132
+ * ```
133
+ */
134
+ getBounds() {
135
+ this._checkDropped();
136
+ const rect = native_addon.getSTextPageBounds(this._ctx, this._handle);
137
+ return new Rect(rect.x0, rect.y0, rect.x1, rect.y1);
138
+ }
139
+ /**
140
+ * Drop (free) the structured text page
141
+ *
142
+ * Must be called when done to free resources.
143
+ *
144
+ * @example
145
+ * ```typescript
146
+ * const stext = STextPage.fromPage(page);
147
+ * // ... use stext ...
148
+ * stext.drop();
149
+ * ```
150
+ */
151
+ drop() {
152
+ if (!this._dropped) {
153
+ native_addon.dropSTextPage(this._ctx, this._handle);
154
+ this._dropped = true;
155
+ }
156
+ }
157
+ /**
158
+ * Check if the structured text page has been dropped
159
+ *
160
+ * @returns true if dropped, false otherwise
161
+ */
162
+ isDropped() {
163
+ return this._dropped;
164
+ }
165
+ /**
166
+ * Get blocks from the page
167
+ *
168
+ * Returns the hierarchical structure of text blocks, lines, and characters.
169
+ * Note: This requires full FFI implementation. For now, returns simplified structure.
170
+ *
171
+ * @returns Array of text blocks
172
+ *
173
+ * @example
174
+ * ```typescript
175
+ * const blocks = stext.getBlocks();
176
+ * for (const block of blocks) {
177
+ * console.log(`Block type: ${block.blockType}`);
178
+ * for (const line of block.lines) {
179
+ * console.log(` Line: ${line.chars.map(c => c.c).join('')}`);
180
+ * }
181
+ * }
182
+ * ```
183
+ */
184
+ getBlocks() {
185
+ this._checkDropped();
186
+ // TODO: Implement native FFI for block/line/char access
187
+ // For now, provide a simplified version using getText()
188
+ const text = this.getText();
189
+ const bounds = this.getBounds();
190
+ // Create a single text block with the full text
191
+ const lines = text.split('\n').map((lineText, lineIndex) => {
192
+ const lineHeight = bounds.height / Math.max(1, text.split('\n').length);
193
+ const lineY = bounds.y0 + lineIndex * lineHeight;
194
+ const chars = [...lineText].map((char, charIndex) => ({
195
+ c: char,
196
+ quad: {
197
+ ul: { x: bounds.x0 + charIndex * 10, y: lineY },
198
+ ur: { x: bounds.x0 + (charIndex + 1) * 10, y: lineY },
199
+ ll: { x: bounds.x0 + charIndex * 10, y: lineY + lineHeight },
200
+ lr: { x: bounds.x0 + (charIndex + 1) * 10, y: lineY + lineHeight }
201
+ },
202
+ size: 12,
203
+ fontName: 'Unknown'
204
+ }));
205
+ return {
206
+ wmode: WritingMode.HorizontalLtr,
207
+ bbox: new Rect(bounds.x0, lineY, bounds.x1, lineY + lineHeight),
208
+ baseline: lineY + lineHeight * 0.8,
209
+ dir: { x: 1, y: 0 },
210
+ chars
211
+ };
212
+ });
213
+ return [
214
+ {
215
+ blockType: STextBlockType.Text,
216
+ bbox: bounds,
217
+ lines
218
+ }
219
+ ];
220
+ }
221
+ /**
222
+ * Get the number of blocks on the page
223
+ *
224
+ * @returns Number of blocks
225
+ *
226
+ * @example
227
+ * ```typescript
228
+ * const count = stext.blockCount();
229
+ * console.log(`Page has ${count} blocks`);
230
+ * ```
231
+ */
232
+ blockCount() {
233
+ this._checkDropped();
234
+ return this.getBlocks().length;
235
+ }
236
+ /**
237
+ * Get the number of characters on the page
238
+ *
239
+ * @returns Total character count
240
+ *
241
+ * @example
242
+ * ```typescript
243
+ * const count = stext.charCount();
244
+ * console.log(`Page has ${count} characters`);
245
+ * ```
246
+ */
247
+ charCount() {
248
+ this._checkDropped();
249
+ return this.getText().length;
250
+ }
251
+ /**
252
+ * Get blocks of a specific type
253
+ *
254
+ * @param blockType - The block type to filter by
255
+ * @returns Array of blocks matching the type
256
+ *
257
+ * @example
258
+ * ```typescript
259
+ * const textBlocks = stext.getBlocksOfType(STextBlockType.Text);
260
+ * const imageBlocks = stext.getBlocksOfType(STextBlockType.Image);
261
+ * ```
262
+ */
263
+ getBlocksOfType(blockType) {
264
+ this._checkDropped();
265
+ return this.getBlocks().filter((block) => block.blockType === blockType);
266
+ }
267
+ /**
268
+ * Get the native handle (for advanced use)
269
+ *
270
+ * @internal
271
+ */
272
+ get handle() {
273
+ return this._handle;
274
+ }
275
+ _checkDropped() {
276
+ if (this._dropped) {
277
+ throw new Error('STextPage has been dropped');
278
+ }
279
+ }
280
+ }
281
+ /**
282
+ * Convert a quad to a rectangle (axis-aligned bounding box)
283
+ *
284
+ * @param quad - The quad to convert
285
+ * @returns Rectangle enclosing the quad
286
+ *
287
+ * @example
288
+ * ```typescript
289
+ * const hits = stext.search('text');
290
+ * const rect = quadToRect(hits[0]);
291
+ * ```
292
+ */
293
+ export function quadToRect(quad) {
294
+ const minX = Math.min(quad.ul.x, quad.ur.x, quad.ll.x, quad.lr.x);
295
+ const minY = Math.min(quad.ul.y, quad.ur.y, quad.ll.y, quad.lr.y);
296
+ const maxX = Math.max(quad.ul.x, quad.ur.x, quad.ll.x, quad.lr.x);
297
+ const maxY = Math.max(quad.ul.y, quad.ur.y, quad.ll.y, quad.lr.y);
298
+ return new Rect(minX, minY, maxX, maxY);
299
+ }
300
+ /**
301
+ * Check if two quads overlap
302
+ *
303
+ * @param q1 - First quad
304
+ * @param q2 - Second quad
305
+ * @returns true if quads overlap, false otherwise
306
+ */
307
+ export function quadsOverlap(q1, q2) {
308
+ const r1 = quadToRect(q1);
309
+ const r2 = quadToRect(q2);
310
+ return r1.intersect(r2) !== null;
311
+ }
312
+ //# sourceMappingURL=stext.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stext.js","sourceRoot":"","sources":["../src/stext.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,IAAI,EAAE,MAAM,eAAe,CAAC;AACrC,OAAO,EAAE,YAAY,EAAuC,MAAM,aAAa,CAAC;AAgBhF;;GAEG;AACH,MAAM,CAAN,IAAY,cASX;AATD,WAAY,cAAc;IACxB,yBAAyB;IACzB,mDAAQ,CAAA;IACR,kBAAkB;IAClB,qDAAS,CAAA;IACT,gBAAgB;IAChB,mDAAQ,CAAA;IACR,iBAAiB;IACjB,qDAAS,CAAA;AACX,CAAC,EATW,cAAc,KAAd,cAAc,QASzB;AAED;;GAEG;AACH,MAAM,CAAN,IAAY,WASX;AATD,WAAY,WAAW;IACrB,+BAA+B;IAC/B,+DAAiB,CAAA;IACjB,+BAA+B;IAC/B,+DAAiB,CAAA;IACjB,6BAA6B;IAC7B,2DAAe,CAAA;IACf,6BAA6B;IAC7B,2DAAe,CAAA;AACjB,CAAC,EATW,WAAW,KAAX,WAAW,QAStB;AA4CD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,OAAO,SAAS;IACZ,IAAI,CAAsB;IAC1B,OAAO,CAAsB;IAC7B,QAAQ,GAAG,KAAK,CAAC;IAEzB,YAAoB,GAA2B,EAAE,MAAoC;QACnF,IAAI,CAAC,IAAI,GAAG,OAAO,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QACrE,IAAI,CAAC,OAAO,GAAG,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;IACnF,CAAC;IAED;;;;;;;;;;OAUG;IACH,MAAM,CAAC,QAAQ,CAAC,IAAU;QACxB,0CAA0C;QAC1C,8DAA8D;QAC9D,MAAM,OAAO,GAAG,IAAW,CAAC;QAC5B,MAAM,GAAG,GAAG,OAAO,CAAC,IAAqB,CAAC;QAC1C,MAAM,UAAU,GAAG,OAAO,CAAC,OAAqB,CAAC;QAEjD,MAAM,MAAM,GAAG,YAAY,CAAC,YAAY,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;QAC1D,OAAO,IAAI,SAAS,CAAC,GAAU,EAAE,MAAa,CAAC,CAAC;IAClD,CAAC;IAED;;;;;;;;;;OAUG;IACH,OAAO;QACL,IAAI,CAAC,aAAa,EAAE,CAAC;QACrB,OAAO,YAAY,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;IAC9D,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,MAAM,CAAC,MAAc,EAAE,OAAO,GAAG,GAAG;QAClC,IAAI,CAAC,aAAa,EAAE,CAAC;QACrB,OAAO,YAAY,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;IAChF,CAAC;IAED;;;;;;;;;;OAUG;IACH,SAAS;QACP,IAAI,CAAC,aAAa,EAAE,CAAC;QACrB,MAAM,IAAI,GAAG,YAAY,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QACtE,OAAO,IAAI,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC;IACtD,CAAC;IAED;;;;;;;;;;;OAWG;IACH,IAAI;QACF,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,YAAY,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;YACpD,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;;;OAIG;IACH,SAAS;QACP,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED;;;;;;;;;;;;;;;;;;OAkBG;IACH,SAAS;QACP,IAAI,CAAC,aAAa,EAAE,CAAC;QAErB,wDAAwD;QACxD,wDAAwD;QACxD,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEhC,gDAAgD;QAChD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,SAAS,EAAE,EAAE;YACzD,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC;YACxE,MAAM,KAAK,GAAG,MAAM,CAAC,EAAE,GAAG,SAAS,GAAG,UAAU,CAAC;YAEjD,MAAM,KAAK,GAAoB,CAAC,GAAG,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC;gBACrE,CAAC,EAAE,IAAI;gBACP,IAAI,EAAE;oBACJ,EAAE,EAAE,EAAE,CAAC,EAAE,MAAM,CAAC,EAAE,GAAG,SAAS,GAAG,EAAE,EAAE,CAAC,EAAE,KAAK,EAAE;oBAC/C,EAAE,EAAE,EAAE,CAAC,EAAE,MAAM,CAAC,EAAE,GAAG,CAAC,SAAS,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,KAAK,EAAE;oBACrD,EAAE,EAAE,EAAE,CAAC,EAAE,MAAM,CAAC,EAAE,GAAG,SAAS,GAAG,EAAE,EAAE,CAAC,EAAE,KAAK,GAAG,UAAU,EAAE;oBAC5D,EAAE,EAAE,EAAE,CAAC,EAAE,MAAM,CAAC,EAAE,GAAG,CAAC,SAAS,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,KAAK,GAAG,UAAU,EAAE;iBACnE;gBACD,IAAI,EAAE,EAAE;gBACR,QAAQ,EAAE,SAAS;aACpB,CAAC,CAAC,CAAC;YAEJ,OAAO;gBACL,KAAK,EAAE,WAAW,CAAC,aAAa;gBAChC,IAAI,EAAE,IAAI,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,EAAE,EAAE,KAAK,GAAG,UAAU,CAAC;gBAC/D,QAAQ,EAAE,KAAK,GAAG,UAAU,GAAG,GAAG;gBAClC,GAAG,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE;gBACnB,KAAK;aACN,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,OAAO;YACL;gBACE,SAAS,EAAE,cAAc,CAAC,IAAI;gBAC9B,IAAI,EAAE,MAAM;gBACZ,KAAK;aACN;SACF,CAAC;IACJ,CAAC;IAED;;;;;;;;;;OAUG;IACH,UAAU;QACR,IAAI,CAAC,aAAa,EAAE,CAAC;QACrB,OAAO,IAAI,CAAC,SAAS,EAAE,CAAC,MAAM,CAAC;IACjC,CAAC;IAED;;;;;;;;;;OAUG;IACH,SAAS;QACP,IAAI,CAAC,aAAa,EAAE,CAAC;QACrB,OAAO,IAAI,CAAC,OAAO,EAAE,CAAC,MAAM,CAAC;IAC/B,CAAC;IAED;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,SAAyB;QACvC,IAAI,CAAC,aAAa,EAAE,CAAC;QACrB,OAAO,IAAI,CAAC,SAAS,EAAE,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC;IAC3E,CAAC;IAED;;;;OAIG;IACH,IAAI,MAAM;QACR,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IAEO,aAAa;QACnB,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;CACF;AAED;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,UAAU,CAAC,IAAU;IACnC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAClE,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAClE,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAClE,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAElE,OAAO,IAAI,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;AAC1C,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CAAC,EAAQ,EAAE,EAAQ;IAC7C,MAAM,EAAE,GAAG,UAAU,CAAC,EAAE,CAAC,CAAC;IAC1B,MAAM,EAAE,GAAG,UAAU,CAAC,EAAE,CAAC,CAAC;IAC1B,OAAO,EAAE,CAAC,SAAS,CAAC,EAAE,CAAC,KAAK,IAAI,CAAC;AACnC,CAAC"}