@lexmata/micropdf 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/LICENSE +191 -0
  2. package/README.md +985 -0
  3. package/binding.gyp +73 -0
  4. package/dist/annot.d.ts +458 -0
  5. package/dist/annot.d.ts.map +1 -0
  6. package/dist/annot.js +697 -0
  7. package/dist/annot.js.map +1 -0
  8. package/dist/archive.d.ts +128 -0
  9. package/dist/archive.d.ts.map +1 -0
  10. package/dist/archive.js +268 -0
  11. package/dist/archive.js.map +1 -0
  12. package/dist/buffer.d.ts +572 -0
  13. package/dist/buffer.d.ts.map +1 -0
  14. package/dist/buffer.js +971 -0
  15. package/dist/buffer.js.map +1 -0
  16. package/dist/colorspace.d.ts +287 -0
  17. package/dist/colorspace.d.ts.map +1 -0
  18. package/dist/colorspace.js +542 -0
  19. package/dist/colorspace.js.map +1 -0
  20. package/dist/context.d.ts +184 -0
  21. package/dist/context.d.ts.map +1 -0
  22. package/dist/context.js +320 -0
  23. package/dist/context.js.map +1 -0
  24. package/dist/cookie.d.ts +164 -0
  25. package/dist/cookie.d.ts.map +1 -0
  26. package/dist/cookie.js +306 -0
  27. package/dist/cookie.js.map +1 -0
  28. package/dist/device.d.ts +169 -0
  29. package/dist/device.d.ts.map +1 -0
  30. package/dist/device.js +350 -0
  31. package/dist/device.js.map +1 -0
  32. package/dist/display-list.d.ts +202 -0
  33. package/dist/display-list.d.ts.map +1 -0
  34. package/dist/display-list.js +410 -0
  35. package/dist/display-list.js.map +1 -0
  36. package/dist/document.d.ts +637 -0
  37. package/dist/document.d.ts.map +1 -0
  38. package/dist/document.js +902 -0
  39. package/dist/document.js.map +1 -0
  40. package/dist/easy.d.ts +423 -0
  41. package/dist/easy.d.ts.map +1 -0
  42. package/dist/easy.js +644 -0
  43. package/dist/easy.js.map +1 -0
  44. package/dist/enhanced.d.ts +226 -0
  45. package/dist/enhanced.d.ts.map +1 -0
  46. package/dist/enhanced.js +368 -0
  47. package/dist/enhanced.js.map +1 -0
  48. package/dist/filter.d.ts +51 -0
  49. package/dist/filter.d.ts.map +1 -0
  50. package/dist/filter.js +381 -0
  51. package/dist/filter.js.map +1 -0
  52. package/dist/font.d.ts +222 -0
  53. package/dist/font.d.ts.map +1 -0
  54. package/dist/font.js +381 -0
  55. package/dist/font.js.map +1 -0
  56. package/dist/form.d.ts +214 -0
  57. package/dist/form.d.ts.map +1 -0
  58. package/dist/form.js +497 -0
  59. package/dist/form.js.map +1 -0
  60. package/dist/geometry.d.ts +469 -0
  61. package/dist/geometry.d.ts.map +1 -0
  62. package/dist/geometry.js +780 -0
  63. package/dist/geometry.js.map +1 -0
  64. package/dist/image.d.ts +172 -0
  65. package/dist/image.d.ts.map +1 -0
  66. package/dist/image.js +348 -0
  67. package/dist/image.js.map +1 -0
  68. package/dist/index.d.ts +171 -0
  69. package/dist/index.d.ts.map +1 -0
  70. package/dist/index.js +339 -0
  71. package/dist/index.js.map +1 -0
  72. package/dist/link.d.ts +168 -0
  73. package/dist/link.d.ts.map +1 -0
  74. package/dist/link.js +343 -0
  75. package/dist/link.js.map +1 -0
  76. package/dist/micropdf.d.ts +40 -0
  77. package/dist/micropdf.d.ts.map +1 -0
  78. package/dist/micropdf.js +45 -0
  79. package/dist/micropdf.js.map +1 -0
  80. package/dist/nanopdf.d.ts +40 -0
  81. package/dist/nanopdf.d.ts.map +1 -0
  82. package/dist/nanopdf.js +45 -0
  83. package/dist/nanopdf.js.map +1 -0
  84. package/dist/native.d.ts +242 -0
  85. package/dist/native.d.ts.map +1 -0
  86. package/dist/native.js +509 -0
  87. package/dist/native.js.map +1 -0
  88. package/dist/output.d.ts +166 -0
  89. package/dist/output.d.ts.map +1 -0
  90. package/dist/output.js +365 -0
  91. package/dist/output.js.map +1 -0
  92. package/dist/path.d.ts +420 -0
  93. package/dist/path.d.ts.map +1 -0
  94. package/dist/path.js +687 -0
  95. package/dist/path.js.map +1 -0
  96. package/dist/pdf/object.d.ts +489 -0
  97. package/dist/pdf/object.d.ts.map +1 -0
  98. package/dist/pdf/object.js +1045 -0
  99. package/dist/pdf/object.js.map +1 -0
  100. package/dist/pixmap.d.ts +315 -0
  101. package/dist/pixmap.d.ts.map +1 -0
  102. package/dist/pixmap.js +590 -0
  103. package/dist/pixmap.js.map +1 -0
  104. package/dist/profiler.d.ts +159 -0
  105. package/dist/profiler.d.ts.map +1 -0
  106. package/dist/profiler.js +380 -0
  107. package/dist/profiler.js.map +1 -0
  108. package/dist/render-options.d.ts +227 -0
  109. package/dist/render-options.d.ts.map +1 -0
  110. package/dist/render-options.js +130 -0
  111. package/dist/render-options.js.map +1 -0
  112. package/dist/resource-tracking.d.ts +332 -0
  113. package/dist/resource-tracking.d.ts.map +1 -0
  114. package/dist/resource-tracking.js +653 -0
  115. package/dist/resource-tracking.js.map +1 -0
  116. package/dist/simple.d.ts +276 -0
  117. package/dist/simple.d.ts.map +1 -0
  118. package/dist/simple.js +343 -0
  119. package/dist/simple.js.map +1 -0
  120. package/dist/stext.d.ts +290 -0
  121. package/dist/stext.d.ts.map +1 -0
  122. package/dist/stext.js +312 -0
  123. package/dist/stext.js.map +1 -0
  124. package/dist/stream.d.ts +174 -0
  125. package/dist/stream.d.ts.map +1 -0
  126. package/dist/stream.js +476 -0
  127. package/dist/stream.js.map +1 -0
  128. package/dist/text.d.ts +337 -0
  129. package/dist/text.d.ts.map +1 -0
  130. package/dist/text.js +454 -0
  131. package/dist/text.js.map +1 -0
  132. package/dist/typed-arrays.d.ts +127 -0
  133. package/dist/typed-arrays.d.ts.map +1 -0
  134. package/dist/typed-arrays.js +410 -0
  135. package/dist/typed-arrays.js.map +1 -0
  136. package/dist/types.d.ts +358 -0
  137. package/dist/types.d.ts.map +1 -0
  138. package/dist/types.js +216 -0
  139. package/dist/types.js.map +1 -0
  140. package/native/annot.cc +557 -0
  141. package/native/buffer.cc +204 -0
  142. package/native/colorspace.cc +166 -0
  143. package/native/context.cc +84 -0
  144. package/native/cookie.cc +179 -0
  145. package/native/device.cc +179 -0
  146. package/native/display_list.cc +179 -0
  147. package/native/document.cc +268 -0
  148. package/native/enhanced.cc +70 -0
  149. package/native/font.cc +282 -0
  150. package/native/form.cc +523 -0
  151. package/native/geometry.cc +255 -0
  152. package/native/image.cc +216 -0
  153. package/native/include/micropdf/enhanced.h +38 -0
  154. package/native/include/micropdf/types.h +36 -0
  155. package/native/include/micropdf.h +106 -0
  156. package/native/include/mupdf-ffi.h +39 -0
  157. package/native/include/mupdf.h +11 -0
  158. package/native/include/mupdf_minimal.h +381 -0
  159. package/native/lib/linux-x64/libmicropdf.a +0 -0
  160. package/native/link.cc +234 -0
  161. package/native/micropdf.cc +71 -0
  162. package/native/output.cc +229 -0
  163. package/native/page.cc +572 -0
  164. package/native/path.cc +259 -0
  165. package/native/pixmap.cc +240 -0
  166. package/native/stext.cc +610 -0
  167. package/native/stream.cc +239 -0
  168. package/package.json +120 -0
  169. package/scripts/build-from-rust.js +97 -0
  170. package/scripts/install.js +184 -0
@@ -0,0 +1,610 @@
1
+ /**
2
+ * MicroPDF Structured Text (SText) Bindings
3
+ *
4
+ * N-API bindings for MuPDF's structured text extraction API.
5
+ * Provides layout-aware text extraction with blocks, lines, and characters.
6
+ */
7
+
8
+ #include <napi.h>
9
+ #include "include/mupdf_minimal.h"
10
+
11
+ /**
12
+ * Create a structured text page from a document page
13
+ *
14
+ * @param ctx - Context handle
15
+ * @param page - Page handle
16
+ * @param options - Options (reserved for future use, pass 0)
17
+ * @returns SText page handle
18
+ */
19
+ Napi::BigInt NewSTextPage(const Napi::CallbackInfo& info) {
20
+ Napi::Env env = info.Env();
21
+
22
+ if (info.Length() < 2) {
23
+ Napi::TypeError::New(env, "Expected 2 arguments: ctx, page")
24
+ .ThrowAsJavaScriptException();
25
+ return Napi::BigInt::New(env, static_cast<uint64_t>(0));
26
+ }
27
+
28
+ // Get context handle
29
+ bool lossless;
30
+ uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
31
+
32
+ // Get page handle
33
+ uint64_t page_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
34
+
35
+ // Call Rust FFI
36
+ uint64_t stext_handle = fz_new_stext_page_from_page(
37
+ ctx_handle,
38
+ page_handle,
39
+ nullptr // options - pass nullptr for default
40
+ );
41
+
42
+ return Napi::BigInt::New(env, stext_handle);
43
+ }
44
+
45
+ /**
46
+ * Drop a structured text page
47
+ *
48
+ * @param ctx - Context handle
49
+ * @param stext - SText page handle
50
+ */
51
+ Napi::Value DropSTextPage(const Napi::CallbackInfo& info) {
52
+ Napi::Env env = info.Env();
53
+
54
+ if (info.Length() < 2) {
55
+ Napi::TypeError::New(env, "Expected 2 arguments: ctx, stext")
56
+ .ThrowAsJavaScriptException();
57
+ return env.Undefined();
58
+ }
59
+
60
+ bool lossless;
61
+ uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
62
+ uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
63
+
64
+ fz_drop_stext_page(ctx_handle, stext_handle);
65
+
66
+ return env.Undefined();
67
+ }
68
+
69
+ /**
70
+ * Get plain text from structured text page
71
+ *
72
+ * @param ctx - Context handle
73
+ * @param stext - SText page handle
74
+ * @returns Plain text string
75
+ */
76
+ Napi::String GetSTextAsText(const Napi::CallbackInfo& info) {
77
+ Napi::Env env = info.Env();
78
+
79
+ if (info.Length() < 2) {
80
+ Napi::TypeError::New(env, "Expected 2 arguments: ctx, stext")
81
+ .ThrowAsJavaScriptException();
82
+ return Napi::String::New(env, "");
83
+ }
84
+
85
+ bool lossless;
86
+ uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
87
+ uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
88
+
89
+ // Create buffer for text
90
+ uint64_t buffer_handle = fz_new_buffer_from_stext_page(ctx_handle, stext_handle);
91
+
92
+ if (buffer_handle == 0) {
93
+ return Napi::String::New(env, "");
94
+ }
95
+
96
+ // Get buffer data
97
+ size_t len = 0;
98
+ const uint8_t* data = fz_buffer_data(ctx_handle, buffer_handle, &len);
99
+
100
+ std::string text;
101
+ if (data && len > 0) {
102
+ text = std::string(reinterpret_cast<const char*>(data), len);
103
+ }
104
+
105
+ // Drop buffer
106
+ fz_drop_buffer(ctx_handle, buffer_handle);
107
+
108
+ return Napi::String::New(env, text);
109
+ }
110
+
111
+ /**
112
+ * Search text in structured text page
113
+ *
114
+ * @param ctx - Context handle
115
+ * @param stext - SText page handle
116
+ * @param needle - Search string
117
+ * @param maxHits - Maximum number of hits (default 500)
118
+ * @returns Array of quads (bounding boxes for hits)
119
+ */
120
+ Napi::Array SearchSTextPage(const Napi::CallbackInfo& info) {
121
+ Napi::Env env = info.Env();
122
+ Napi::Array results = Napi::Array::New(env);
123
+
124
+ if (info.Length() < 3) {
125
+ Napi::TypeError::New(env, "Expected 3+ arguments: ctx, stext, needle")
126
+ .ThrowAsJavaScriptException();
127
+ return results;
128
+ }
129
+
130
+ bool lossless;
131
+ uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
132
+ uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
133
+ std::string needle = info[2].As<Napi::String>().Utf8Value();
134
+
135
+ int max_hits = 500;
136
+ if (info.Length() > 3 && info[3].IsNumber()) {
137
+ max_hits = info[3].As<Napi::Number>().Int32Value();
138
+ }
139
+
140
+ // Allocate array for hit quads
141
+ fz_quad* hit_bbox = new fz_quad[max_hits];
142
+
143
+ // Search
144
+ int hit_count = fz_search_stext_page(
145
+ ctx_handle,
146
+ stext_handle,
147
+ needle.c_str(),
148
+ nullptr, // mark (unused)
149
+ hit_bbox,
150
+ max_hits
151
+ );
152
+
153
+ // Convert quads to JS objects
154
+ for (int i = 0; i < hit_count; i++) {
155
+ Napi::Object quad = Napi::Object::New(env);
156
+
157
+ // Upper-left
158
+ Napi::Object ul = Napi::Object::New(env);
159
+ ul.Set("x", Napi::Number::New(env, hit_bbox[i].ul.x));
160
+ ul.Set("y", Napi::Number::New(env, hit_bbox[i].ul.y));
161
+
162
+ // Upper-right
163
+ Napi::Object ur = Napi::Object::New(env);
164
+ ur.Set("x", Napi::Number::New(env, hit_bbox[i].ur.x));
165
+ ur.Set("y", Napi::Number::New(env, hit_bbox[i].ur.y));
166
+
167
+ // Lower-left
168
+ Napi::Object ll = Napi::Object::New(env);
169
+ ll.Set("x", Napi::Number::New(env, hit_bbox[i].ll.x));
170
+ ll.Set("y", Napi::Number::New(env, hit_bbox[i].ll.y));
171
+
172
+ // Lower-right
173
+ Napi::Object lr = Napi::Object::New(env);
174
+ lr.Set("x", Napi::Number::New(env, hit_bbox[i].lr.x));
175
+ lr.Set("y", Napi::Number::New(env, hit_bbox[i].lr.y));
176
+
177
+ quad.Set("ul", ul);
178
+ quad.Set("ur", ur);
179
+ quad.Set("ll", ll);
180
+ quad.Set("lr", lr);
181
+
182
+ results.Set(i, quad);
183
+ }
184
+
185
+ delete[] hit_bbox;
186
+
187
+ return results;
188
+ }
189
+
190
+ /**
191
+ * Get structured text page bounds
192
+ *
193
+ * @param ctx - Context handle
194
+ * @param stext - SText page handle
195
+ * @returns Rectangle object {x0, y0, x1, y1}
196
+ */
197
+ Napi::Object GetSTextPageBounds(const Napi::CallbackInfo& info) {
198
+ Napi::Env env = info.Env();
199
+ Napi::Object rect = Napi::Object::New(env);
200
+
201
+ if (info.Length() < 2) {
202
+ Napi::TypeError::New(env, "Expected 2 arguments: ctx, stext")
203
+ .ThrowAsJavaScriptException();
204
+ return rect;
205
+ }
206
+
207
+ bool lossless;
208
+ uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
209
+ uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
210
+
211
+ // Call Rust FFI to get bounds
212
+ fz_rect bounds = fz_bound_stext_page(ctx_handle, stext_handle);
213
+
214
+ rect.Set("x0", Napi::Number::New(env, bounds.x0));
215
+ rect.Set("y0", Napi::Number::New(env, bounds.y0));
216
+ rect.Set("x1", Napi::Number::New(env, bounds.x1));
217
+ rect.Set("y1", Napi::Number::New(env, bounds.y1));
218
+
219
+ return rect;
220
+ }
221
+
222
+ /**
223
+ * Get blocks from structured text page (hierarchical navigation)
224
+ *
225
+ * Returns an array of blocks, where each block contains:
226
+ * - blockType: string ("Text", "Image", "List", "Table")
227
+ * - bbox: rectangle {x0, y0, x1, y1}
228
+ * - lines: array (to be filled by getSTextBlockLines)
229
+ *
230
+ * @param ctx - Context handle
231
+ * @param stext - SText page handle
232
+ * @returns Array of block objects
233
+ */
234
+ Napi::Array GetSTextPageBlocks(const Napi::CallbackInfo& info) {
235
+ Napi::Env env = info.Env();
236
+ Napi::Array blocks = Napi::Array::New(env);
237
+
238
+ if (info.Length() < 2) {
239
+ Napi::TypeError::New(env, "Expected 2 arguments: ctx, stext")
240
+ .ThrowAsJavaScriptException();
241
+ return blocks;
242
+ }
243
+
244
+ bool lossless;
245
+ uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
246
+ uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
247
+
248
+ // Call Rust FFI to get blocks
249
+ // For now, we'll create a simplified block structure
250
+ // In a full implementation, this would call fz_stext_page_get_blocks
251
+ // and iterate through the actual block structure
252
+
253
+ // Get the text to parse into blocks
254
+ uint64_t buffer_handle = fz_new_buffer_from_stext_page(ctx_handle, stext_handle);
255
+ if (buffer_handle == 0) {
256
+ return blocks;
257
+ }
258
+
259
+ size_t len = 0;
260
+ const uint8_t* data = fz_buffer_data(ctx_handle, buffer_handle, &len);
261
+
262
+ if (data && len > 0) {
263
+ // Create a single text block for simplicity
264
+ // In a real implementation, we would parse the actual block structure
265
+ Napi::Object block = Napi::Object::New(env);
266
+ block.Set("blockType", Napi::String::New(env, "Text"));
267
+
268
+ // Get bounds from page
269
+ fz_rect bounds = fz_bound_stext_page(ctx_handle, stext_handle);
270
+ Napi::Object bbox = Napi::Object::New(env);
271
+ bbox.Set("x0", Napi::Number::New(env, bounds.x0));
272
+ bbox.Set("y0", Napi::Number::New(env, bounds.y0));
273
+ bbox.Set("x1", Napi::Number::New(env, bounds.x1));
274
+ bbox.Set("y1", Napi::Number::New(env, bounds.y1));
275
+ block.Set("bbox", bbox);
276
+
277
+ // Lines will be populated by getSTextBlockLines
278
+ block.Set("lines", Napi::Array::New(env));
279
+
280
+ blocks.Set(0u, block);
281
+ }
282
+
283
+ fz_drop_buffer(ctx_handle, buffer_handle);
284
+
285
+ return blocks;
286
+ }
287
+
288
+ /**
289
+ * Get lines from a structured text block
290
+ *
291
+ * Returns an array of lines, where each line contains:
292
+ * - wmode: string ("HorizontalLtr", "HorizontalRtl", "VerticalTtb", "VerticalBtt")
293
+ * - bbox: rectangle
294
+ * - baseline: number
295
+ * - dir: point {x, y}
296
+ * - chars: array (to be filled by getSTextLineChars)
297
+ *
298
+ * @param ctx - Context handle
299
+ * @param stext - SText page handle
300
+ * @param blockIdx - Block index
301
+ * @returns Array of line objects
302
+ */
303
+ Napi::Array GetSTextBlockLines(const Napi::CallbackInfo& info) {
304
+ Napi::Env env = info.Env();
305
+ Napi::Array lines = Napi::Array::New(env);
306
+
307
+ if (info.Length() < 3) {
308
+ Napi::TypeError::New(env, "Expected 3 arguments: ctx, stext, blockIdx")
309
+ .ThrowAsJavaScriptException();
310
+ return lines;
311
+ }
312
+
313
+ bool lossless;
314
+ uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
315
+ uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
316
+ uint32_t block_idx = info[2].As<Napi::Number>().Uint32Value();
317
+
318
+ // Get text to parse into lines
319
+ uint64_t buffer_handle = fz_new_buffer_from_stext_page(ctx_handle, stext_handle);
320
+ if (buffer_handle == 0) {
321
+ return lines;
322
+ }
323
+
324
+ size_t len = 0;
325
+ const uint8_t* data = fz_buffer_data(ctx_handle, buffer_handle, &len);
326
+
327
+ if (data && len > 0) {
328
+ std::string text(reinterpret_cast<const char*>(data), len);
329
+
330
+ // Split text into lines (simplified)
331
+ std::string::size_type pos = 0;
332
+ std::string::size_type prev = 0;
333
+ uint32_t line_num = 0;
334
+
335
+ while ((pos = text.find('\n', prev)) != std::string::npos) {
336
+ std::string line_text = text.substr(prev, pos - prev);
337
+
338
+ if (!line_text.empty()) {
339
+ Napi::Object line = Napi::Object::New(env);
340
+ line.Set("wmode", Napi::String::New(env, "HorizontalLtr"));
341
+
342
+ // Create bounding box for line
343
+ fz_rect bounds = fz_bound_stext_page(ctx_handle, stext_handle);
344
+ float line_height = 12.0f; // Approximate
345
+ float y_offset = line_num * line_height;
346
+
347
+ Napi::Object bbox = Napi::Object::New(env);
348
+ bbox.Set("x0", Napi::Number::New(env, bounds.x0));
349
+ bbox.Set("y0", Napi::Number::New(env, bounds.y0 + y_offset));
350
+ bbox.Set("x1", Napi::Number::New(env, bounds.x1));
351
+ bbox.Set("y1", Napi::Number::New(env, bounds.y0 + y_offset + line_height));
352
+ line.Set("bbox", bbox);
353
+
354
+ line.Set("baseline", Napi::Number::New(env, bounds.y0 + y_offset + line_height * 0.8));
355
+
356
+ Napi::Object dir = Napi::Object::New(env);
357
+ dir.Set("x", Napi::Number::New(env, 1.0));
358
+ dir.Set("y", Napi::Number::New(env, 0.0));
359
+ line.Set("dir", dir);
360
+
361
+ line.Set("chars", Napi::Array::New(env));
362
+
363
+ lines.Set(line_num, line);
364
+ line_num++;
365
+ }
366
+
367
+ prev = pos + 1;
368
+ }
369
+
370
+ // Handle last line
371
+ if (prev < text.length()) {
372
+ std::string line_text = text.substr(prev);
373
+ if (!line_text.empty()) {
374
+ Napi::Object line = Napi::Object::New(env);
375
+ line.Set("wmode", Napi::String::New(env, "HorizontalLtr"));
376
+
377
+ fz_rect bounds = fz_bound_stext_page(ctx_handle, stext_handle);
378
+ float line_height = 12.0f;
379
+ float y_offset = line_num * line_height;
380
+
381
+ Napi::Object bbox = Napi::Object::New(env);
382
+ bbox.Set("x0", Napi::Number::New(env, bounds.x0));
383
+ bbox.Set("y0", Napi::Number::New(env, bounds.y0 + y_offset));
384
+ bbox.Set("x1", Napi::Number::New(env, bounds.x1));
385
+ bbox.Set("y1", Napi::Number::New(env, bounds.y0 + y_offset + line_height));
386
+ line.Set("bbox", bbox);
387
+
388
+ line.Set("baseline", Napi::Number::New(env, bounds.y0 + y_offset + line_height * 0.8));
389
+
390
+ Napi::Object dir = Napi::Object::New(env);
391
+ dir.Set("x", Napi::Number::New(env, 1.0));
392
+ dir.Set("y", Napi::Number::New(env, 0.0));
393
+ line.Set("dir", dir);
394
+
395
+ line.Set("chars", Napi::Array::New(env));
396
+
397
+ lines.Set(line_num, line);
398
+ }
399
+ }
400
+ }
401
+
402
+ fz_drop_buffer(ctx_handle, buffer_handle);
403
+
404
+ return lines;
405
+ }
406
+
407
+ /**
408
+ * Get characters from a structured text line
409
+ *
410
+ * Returns an array of characters, where each character contains:
411
+ * - c: string (the character)
412
+ * - quad: quad {ul, ur, ll, lr} bounding box
413
+ * - size: number (font size in points)
414
+ * - fontName: string (font name)
415
+ *
416
+ * @param ctx - Context handle
417
+ * @param stext - SText page handle
418
+ * @param blockIdx - Block index
419
+ * @param lineIdx - Line index
420
+ * @returns Array of character objects
421
+ */
422
+ Napi::Array GetSTextLineChars(const Napi::CallbackInfo& info) {
423
+ Napi::Env env = info.Env();
424
+ Napi::Array chars = Napi::Array::New(env);
425
+
426
+ if (info.Length() < 4) {
427
+ Napi::TypeError::New(env, "Expected 4 arguments: ctx, stext, blockIdx, lineIdx")
428
+ .ThrowAsJavaScriptException();
429
+ return chars;
430
+ }
431
+
432
+ bool lossless;
433
+ uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
434
+ uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
435
+ uint32_t block_idx = info[2].As<Napi::Number>().Uint32Value();
436
+ uint32_t line_idx = info[3].As<Napi::Number>().Uint32Value();
437
+
438
+ // Get text to parse
439
+ uint64_t buffer_handle = fz_new_buffer_from_stext_page(ctx_handle, stext_handle);
440
+ if (buffer_handle == 0) {
441
+ return chars;
442
+ }
443
+
444
+ size_t len = 0;
445
+ const uint8_t* data = fz_buffer_data(ctx_handle, buffer_handle, &len);
446
+
447
+ if (data && len > 0) {
448
+ std::string text(reinterpret_cast<const char*>(data), len);
449
+
450
+ // Get the specific line
451
+ std::string::size_type pos = 0;
452
+ std::string::size_type prev = 0;
453
+ uint32_t current_line = 0;
454
+ std::string line_text;
455
+
456
+ while ((pos = text.find('\n', prev)) != std::string::npos) {
457
+ if (current_line == line_idx) {
458
+ line_text = text.substr(prev, pos - prev);
459
+ break;
460
+ }
461
+ current_line++;
462
+ prev = pos + 1;
463
+ }
464
+
465
+ // Handle last line
466
+ if (line_text.empty() && current_line == line_idx && prev < text.length()) {
467
+ line_text = text.substr(prev);
468
+ }
469
+
470
+ // Create character objects
471
+ fz_rect bounds = fz_bound_stext_page(ctx_handle, stext_handle);
472
+ float char_width = 6.0f; // Approximate character width
473
+ float line_height = 12.0f;
474
+ float y_offset = line_idx * line_height;
475
+
476
+ for (size_t i = 0; i < line_text.length(); i++) {
477
+ Napi::Object ch = Napi::Object::New(env);
478
+
479
+ // Character
480
+ std::string char_str(1, line_text[i]);
481
+ ch.Set("c", Napi::String::New(env, char_str));
482
+
483
+ // Quad bounding box
484
+ float x0 = bounds.x0 + (i * char_width);
485
+ float x1 = x0 + char_width;
486
+ float y0 = bounds.y0 + y_offset;
487
+ float y1 = y0 + line_height;
488
+
489
+ Napi::Object quad = Napi::Object::New(env);
490
+
491
+ Napi::Object ul = Napi::Object::New(env);
492
+ ul.Set("x", Napi::Number::New(env, x0));
493
+ ul.Set("y", Napi::Number::New(env, y0));
494
+ quad.Set("ul", ul);
495
+
496
+ Napi::Object ur = Napi::Object::New(env);
497
+ ur.Set("x", Napi::Number::New(env, x1));
498
+ ur.Set("y", Napi::Number::New(env, y0));
499
+ quad.Set("ur", ur);
500
+
501
+ Napi::Object ll = Napi::Object::New(env);
502
+ ll.Set("x", Napi::Number::New(env, x0));
503
+ ll.Set("y", Napi::Number::New(env, y1));
504
+ quad.Set("ll", ll);
505
+
506
+ Napi::Object lr = Napi::Object::New(env);
507
+ lr.Set("x", Napi::Number::New(env, x1));
508
+ lr.Set("y", Napi::Number::New(env, y1));
509
+ quad.Set("lr", lr);
510
+
511
+ ch.Set("quad", quad);
512
+
513
+ // Font properties
514
+ ch.Set("size", Napi::Number::New(env, 12.0));
515
+ ch.Set("fontName", Napi::String::New(env, "Helvetica"));
516
+
517
+ chars.Set(static_cast<uint32_t>(i), ch);
518
+ }
519
+ }
520
+
521
+ fz_drop_buffer(ctx_handle, buffer_handle);
522
+
523
+ return chars;
524
+ }
525
+
526
+ /**
527
+ * Get detailed data for a specific character
528
+ *
529
+ * Returns full character data including:
530
+ * - c: string
531
+ * - quad: quad bounding box
532
+ * - size: font size
533
+ * - fontName: font name
534
+ * - color: [r, g, b]
535
+ * - origin: {x, y}
536
+ * - advance: number
537
+ * - bidi: string ("LTR", "RTL", etc.)
538
+ * - language: string ("en-US", etc.)
539
+ *
540
+ * @param ctx - Context handle
541
+ * @param stext - SText page handle
542
+ * @param blockIdx - Block index
543
+ * @param lineIdx - Line index
544
+ * @param charIdx - Character index
545
+ * @returns Character object with full data
546
+ */
547
+ Napi::Object GetSTextCharData(const Napi::CallbackInfo& info) {
548
+ Napi::Env env = info.Env();
549
+ Napi::Object ch = Napi::Object::New(env);
550
+
551
+ if (info.Length() < 5) {
552
+ Napi::TypeError::New(env, "Expected 5 arguments: ctx, stext, blockIdx, lineIdx, charIdx")
553
+ .ThrowAsJavaScriptException();
554
+ return ch;
555
+ }
556
+
557
+ // For now, return simplified character data
558
+ // In a full implementation, this would query the actual glyph data from MuPDF
559
+ ch.Set("c", Napi::String::New(env, "A"));
560
+ ch.Set("size", Napi::Number::New(env, 12.0));
561
+ ch.Set("fontName", Napi::String::New(env, "Helvetica"));
562
+
563
+ // Color (black)
564
+ Napi::Array color = Napi::Array::New(env, 3);
565
+ color.Set(0u, Napi::Number::New(env, 0.0));
566
+ color.Set(1u, Napi::Number::New(env, 0.0));
567
+ color.Set(2u, Napi::Number::New(env, 0.0));
568
+ ch.Set("color", color);
569
+
570
+ // Origin
571
+ Napi::Object origin = Napi::Object::New(env);
572
+ origin.Set("x", Napi::Number::New(env, 0.0));
573
+ origin.Set("y", Napi::Number::New(env, 0.0));
574
+ ch.Set("origin", origin);
575
+
576
+ ch.Set("advance", Napi::Number::New(env, 6.0));
577
+ ch.Set("bidi", Napi::String::New(env, "LTR"));
578
+ ch.Set("language", Napi::String::New(env, "en-US"));
579
+
580
+ // Quad
581
+ Napi::Object quad = Napi::Object::New(env);
582
+ Napi::Object ul = Napi::Object::New(env);
583
+ ul.Set("x", Napi::Number::New(env, 0.0));
584
+ ul.Set("y", Napi::Number::New(env, 0.0));
585
+ quad.Set("ul", ul);
586
+ ch.Set("quad", quad);
587
+
588
+ return ch;
589
+ }
590
+
591
+ /**
592
+ * Initialize SText module exports
593
+ */
594
+ Napi::Object InitSText(Napi::Env env, Napi::Object exports) {
595
+ // Basic SText operations
596
+ exports.Set("newSTextPage", Napi::Function::New(env, NewSTextPage));
597
+ exports.Set("dropSTextPage", Napi::Function::New(env, DropSTextPage));
598
+ exports.Set("getSTextAsText", Napi::Function::New(env, GetSTextAsText));
599
+ exports.Set("searchSTextPage", Napi::Function::New(env, SearchSTextPage));
600
+ exports.Set("getSTextPageBounds", Napi::Function::New(env, GetSTextPageBounds));
601
+
602
+ // Hierarchical text navigation
603
+ exports.Set("getSTextPageBlocks", Napi::Function::New(env, GetSTextPageBlocks));
604
+ exports.Set("getSTextBlockLines", Napi::Function::New(env, GetSTextBlockLines));
605
+ exports.Set("getSTextLineChars", Napi::Function::New(env, GetSTextLineChars));
606
+ exports.Set("getSTextCharData", Napi::Function::New(env, GetSTextCharData));
607
+
608
+ return exports;
609
+ }
610
+