@lexmata/micropdf 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +191 -0
- package/README.md +985 -0
- package/binding.gyp +73 -0
- package/dist/annot.d.ts +458 -0
- package/dist/annot.d.ts.map +1 -0
- package/dist/annot.js +697 -0
- package/dist/annot.js.map +1 -0
- package/dist/archive.d.ts +128 -0
- package/dist/archive.d.ts.map +1 -0
- package/dist/archive.js +268 -0
- package/dist/archive.js.map +1 -0
- package/dist/buffer.d.ts +572 -0
- package/dist/buffer.d.ts.map +1 -0
- package/dist/buffer.js +971 -0
- package/dist/buffer.js.map +1 -0
- package/dist/colorspace.d.ts +287 -0
- package/dist/colorspace.d.ts.map +1 -0
- package/dist/colorspace.js +542 -0
- package/dist/colorspace.js.map +1 -0
- package/dist/context.d.ts +184 -0
- package/dist/context.d.ts.map +1 -0
- package/dist/context.js +320 -0
- package/dist/context.js.map +1 -0
- package/dist/cookie.d.ts +164 -0
- package/dist/cookie.d.ts.map +1 -0
- package/dist/cookie.js +306 -0
- package/dist/cookie.js.map +1 -0
- package/dist/device.d.ts +169 -0
- package/dist/device.d.ts.map +1 -0
- package/dist/device.js +350 -0
- package/dist/device.js.map +1 -0
- package/dist/display-list.d.ts +202 -0
- package/dist/display-list.d.ts.map +1 -0
- package/dist/display-list.js +410 -0
- package/dist/display-list.js.map +1 -0
- package/dist/document.d.ts +637 -0
- package/dist/document.d.ts.map +1 -0
- package/dist/document.js +902 -0
- package/dist/document.js.map +1 -0
- package/dist/easy.d.ts +423 -0
- package/dist/easy.d.ts.map +1 -0
- package/dist/easy.js +644 -0
- package/dist/easy.js.map +1 -0
- package/dist/enhanced.d.ts +226 -0
- package/dist/enhanced.d.ts.map +1 -0
- package/dist/enhanced.js +368 -0
- package/dist/enhanced.js.map +1 -0
- package/dist/filter.d.ts +51 -0
- package/dist/filter.d.ts.map +1 -0
- package/dist/filter.js +381 -0
- package/dist/filter.js.map +1 -0
- package/dist/font.d.ts +222 -0
- package/dist/font.d.ts.map +1 -0
- package/dist/font.js +381 -0
- package/dist/font.js.map +1 -0
- package/dist/form.d.ts +214 -0
- package/dist/form.d.ts.map +1 -0
- package/dist/form.js +497 -0
- package/dist/form.js.map +1 -0
- package/dist/geometry.d.ts +469 -0
- package/dist/geometry.d.ts.map +1 -0
- package/dist/geometry.js +780 -0
- package/dist/geometry.js.map +1 -0
- package/dist/image.d.ts +172 -0
- package/dist/image.d.ts.map +1 -0
- package/dist/image.js +348 -0
- package/dist/image.js.map +1 -0
- package/dist/index.d.ts +171 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +339 -0
- package/dist/index.js.map +1 -0
- package/dist/link.d.ts +168 -0
- package/dist/link.d.ts.map +1 -0
- package/dist/link.js +343 -0
- package/dist/link.js.map +1 -0
- package/dist/micropdf.d.ts +40 -0
- package/dist/micropdf.d.ts.map +1 -0
- package/dist/micropdf.js +45 -0
- package/dist/micropdf.js.map +1 -0
- package/dist/nanopdf.d.ts +40 -0
- package/dist/nanopdf.d.ts.map +1 -0
- package/dist/nanopdf.js +45 -0
- package/dist/nanopdf.js.map +1 -0
- package/dist/native.d.ts +242 -0
- package/dist/native.d.ts.map +1 -0
- package/dist/native.js +509 -0
- package/dist/native.js.map +1 -0
- package/dist/output.d.ts +166 -0
- package/dist/output.d.ts.map +1 -0
- package/dist/output.js +365 -0
- package/dist/output.js.map +1 -0
- package/dist/path.d.ts +420 -0
- package/dist/path.d.ts.map +1 -0
- package/dist/path.js +687 -0
- package/dist/path.js.map +1 -0
- package/dist/pdf/object.d.ts +489 -0
- package/dist/pdf/object.d.ts.map +1 -0
- package/dist/pdf/object.js +1045 -0
- package/dist/pdf/object.js.map +1 -0
- package/dist/pixmap.d.ts +315 -0
- package/dist/pixmap.d.ts.map +1 -0
- package/dist/pixmap.js +590 -0
- package/dist/pixmap.js.map +1 -0
- package/dist/profiler.d.ts +159 -0
- package/dist/profiler.d.ts.map +1 -0
- package/dist/profiler.js +380 -0
- package/dist/profiler.js.map +1 -0
- package/dist/render-options.d.ts +227 -0
- package/dist/render-options.d.ts.map +1 -0
- package/dist/render-options.js +130 -0
- package/dist/render-options.js.map +1 -0
- package/dist/resource-tracking.d.ts +332 -0
- package/dist/resource-tracking.d.ts.map +1 -0
- package/dist/resource-tracking.js +653 -0
- package/dist/resource-tracking.js.map +1 -0
- package/dist/simple.d.ts +276 -0
- package/dist/simple.d.ts.map +1 -0
- package/dist/simple.js +343 -0
- package/dist/simple.js.map +1 -0
- package/dist/stext.d.ts +290 -0
- package/dist/stext.d.ts.map +1 -0
- package/dist/stext.js +312 -0
- package/dist/stext.js.map +1 -0
- package/dist/stream.d.ts +174 -0
- package/dist/stream.d.ts.map +1 -0
- package/dist/stream.js +476 -0
- package/dist/stream.js.map +1 -0
- package/dist/text.d.ts +337 -0
- package/dist/text.d.ts.map +1 -0
- package/dist/text.js +454 -0
- package/dist/text.js.map +1 -0
- package/dist/typed-arrays.d.ts +127 -0
- package/dist/typed-arrays.d.ts.map +1 -0
- package/dist/typed-arrays.js +410 -0
- package/dist/typed-arrays.js.map +1 -0
- package/dist/types.d.ts +358 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +216 -0
- package/dist/types.js.map +1 -0
- package/native/annot.cc +557 -0
- package/native/buffer.cc +204 -0
- package/native/colorspace.cc +166 -0
- package/native/context.cc +84 -0
- package/native/cookie.cc +179 -0
- package/native/device.cc +179 -0
- package/native/display_list.cc +179 -0
- package/native/document.cc +268 -0
- package/native/enhanced.cc +70 -0
- package/native/font.cc +282 -0
- package/native/form.cc +523 -0
- package/native/geometry.cc +255 -0
- package/native/image.cc +216 -0
- package/native/include/micropdf/enhanced.h +38 -0
- package/native/include/micropdf/types.h +36 -0
- package/native/include/micropdf.h +106 -0
- package/native/include/mupdf-ffi.h +39 -0
- package/native/include/mupdf.h +11 -0
- package/native/include/mupdf_minimal.h +381 -0
- package/native/lib/linux-x64/libmicropdf.a +0 -0
- package/native/link.cc +234 -0
- package/native/micropdf.cc +71 -0
- package/native/output.cc +229 -0
- package/native/page.cc +572 -0
- package/native/path.cc +259 -0
- package/native/pixmap.cc +240 -0
- package/native/stext.cc +610 -0
- package/native/stream.cc +239 -0
- package/package.json +120 -0
- package/scripts/build-from-rust.js +97 -0
- package/scripts/install.js +184 -0
package/native/stext.cc
ADDED
|
@@ -0,0 +1,610 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MicroPDF Structured Text (SText) Bindings
|
|
3
|
+
*
|
|
4
|
+
* N-API bindings for MuPDF's structured text extraction API.
|
|
5
|
+
* Provides layout-aware text extraction with blocks, lines, and characters.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include <napi.h>
|
|
9
|
+
#include "include/mupdf_minimal.h"
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Create a structured text page from a document page
|
|
13
|
+
*
|
|
14
|
+
* @param ctx - Context handle
|
|
15
|
+
* @param page - Page handle
|
|
16
|
+
* @param options - Options (reserved for future use, pass 0)
|
|
17
|
+
* @returns SText page handle
|
|
18
|
+
*/
|
|
19
|
+
Napi::BigInt NewSTextPage(const Napi::CallbackInfo& info) {
|
|
20
|
+
Napi::Env env = info.Env();
|
|
21
|
+
|
|
22
|
+
if (info.Length() < 2) {
|
|
23
|
+
Napi::TypeError::New(env, "Expected 2 arguments: ctx, page")
|
|
24
|
+
.ThrowAsJavaScriptException();
|
|
25
|
+
return Napi::BigInt::New(env, static_cast<uint64_t>(0));
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Get context handle
|
|
29
|
+
bool lossless;
|
|
30
|
+
uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
31
|
+
|
|
32
|
+
// Get page handle
|
|
33
|
+
uint64_t page_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
34
|
+
|
|
35
|
+
// Call Rust FFI
|
|
36
|
+
uint64_t stext_handle = fz_new_stext_page_from_page(
|
|
37
|
+
ctx_handle,
|
|
38
|
+
page_handle,
|
|
39
|
+
nullptr // options - pass nullptr for default
|
|
40
|
+
);
|
|
41
|
+
|
|
42
|
+
return Napi::BigInt::New(env, stext_handle);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Drop a structured text page
|
|
47
|
+
*
|
|
48
|
+
* @param ctx - Context handle
|
|
49
|
+
* @param stext - SText page handle
|
|
50
|
+
*/
|
|
51
|
+
Napi::Value DropSTextPage(const Napi::CallbackInfo& info) {
|
|
52
|
+
Napi::Env env = info.Env();
|
|
53
|
+
|
|
54
|
+
if (info.Length() < 2) {
|
|
55
|
+
Napi::TypeError::New(env, "Expected 2 arguments: ctx, stext")
|
|
56
|
+
.ThrowAsJavaScriptException();
|
|
57
|
+
return env.Undefined();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
bool lossless;
|
|
61
|
+
uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
62
|
+
uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
63
|
+
|
|
64
|
+
fz_drop_stext_page(ctx_handle, stext_handle);
|
|
65
|
+
|
|
66
|
+
return env.Undefined();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Get plain text from structured text page
|
|
71
|
+
*
|
|
72
|
+
* @param ctx - Context handle
|
|
73
|
+
* @param stext - SText page handle
|
|
74
|
+
* @returns Plain text string
|
|
75
|
+
*/
|
|
76
|
+
Napi::String GetSTextAsText(const Napi::CallbackInfo& info) {
|
|
77
|
+
Napi::Env env = info.Env();
|
|
78
|
+
|
|
79
|
+
if (info.Length() < 2) {
|
|
80
|
+
Napi::TypeError::New(env, "Expected 2 arguments: ctx, stext")
|
|
81
|
+
.ThrowAsJavaScriptException();
|
|
82
|
+
return Napi::String::New(env, "");
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
bool lossless;
|
|
86
|
+
uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
87
|
+
uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
88
|
+
|
|
89
|
+
// Create buffer for text
|
|
90
|
+
uint64_t buffer_handle = fz_new_buffer_from_stext_page(ctx_handle, stext_handle);
|
|
91
|
+
|
|
92
|
+
if (buffer_handle == 0) {
|
|
93
|
+
return Napi::String::New(env, "");
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Get buffer data
|
|
97
|
+
size_t len = 0;
|
|
98
|
+
const uint8_t* data = fz_buffer_data(ctx_handle, buffer_handle, &len);
|
|
99
|
+
|
|
100
|
+
std::string text;
|
|
101
|
+
if (data && len > 0) {
|
|
102
|
+
text = std::string(reinterpret_cast<const char*>(data), len);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Drop buffer
|
|
106
|
+
fz_drop_buffer(ctx_handle, buffer_handle);
|
|
107
|
+
|
|
108
|
+
return Napi::String::New(env, text);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Search text in structured text page
|
|
113
|
+
*
|
|
114
|
+
* @param ctx - Context handle
|
|
115
|
+
* @param stext - SText page handle
|
|
116
|
+
* @param needle - Search string
|
|
117
|
+
* @param maxHits - Maximum number of hits (default 500)
|
|
118
|
+
* @returns Array of quads (bounding boxes for hits)
|
|
119
|
+
*/
|
|
120
|
+
Napi::Array SearchSTextPage(const Napi::CallbackInfo& info) {
|
|
121
|
+
Napi::Env env = info.Env();
|
|
122
|
+
Napi::Array results = Napi::Array::New(env);
|
|
123
|
+
|
|
124
|
+
if (info.Length() < 3) {
|
|
125
|
+
Napi::TypeError::New(env, "Expected 3+ arguments: ctx, stext, needle")
|
|
126
|
+
.ThrowAsJavaScriptException();
|
|
127
|
+
return results;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
bool lossless;
|
|
131
|
+
uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
132
|
+
uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
133
|
+
std::string needle = info[2].As<Napi::String>().Utf8Value();
|
|
134
|
+
|
|
135
|
+
int max_hits = 500;
|
|
136
|
+
if (info.Length() > 3 && info[3].IsNumber()) {
|
|
137
|
+
max_hits = info[3].As<Napi::Number>().Int32Value();
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Allocate array for hit quads
|
|
141
|
+
fz_quad* hit_bbox = new fz_quad[max_hits];
|
|
142
|
+
|
|
143
|
+
// Search
|
|
144
|
+
int hit_count = fz_search_stext_page(
|
|
145
|
+
ctx_handle,
|
|
146
|
+
stext_handle,
|
|
147
|
+
needle.c_str(),
|
|
148
|
+
nullptr, // mark (unused)
|
|
149
|
+
hit_bbox,
|
|
150
|
+
max_hits
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
// Convert quads to JS objects
|
|
154
|
+
for (int i = 0; i < hit_count; i++) {
|
|
155
|
+
Napi::Object quad = Napi::Object::New(env);
|
|
156
|
+
|
|
157
|
+
// Upper-left
|
|
158
|
+
Napi::Object ul = Napi::Object::New(env);
|
|
159
|
+
ul.Set("x", Napi::Number::New(env, hit_bbox[i].ul.x));
|
|
160
|
+
ul.Set("y", Napi::Number::New(env, hit_bbox[i].ul.y));
|
|
161
|
+
|
|
162
|
+
// Upper-right
|
|
163
|
+
Napi::Object ur = Napi::Object::New(env);
|
|
164
|
+
ur.Set("x", Napi::Number::New(env, hit_bbox[i].ur.x));
|
|
165
|
+
ur.Set("y", Napi::Number::New(env, hit_bbox[i].ur.y));
|
|
166
|
+
|
|
167
|
+
// Lower-left
|
|
168
|
+
Napi::Object ll = Napi::Object::New(env);
|
|
169
|
+
ll.Set("x", Napi::Number::New(env, hit_bbox[i].ll.x));
|
|
170
|
+
ll.Set("y", Napi::Number::New(env, hit_bbox[i].ll.y));
|
|
171
|
+
|
|
172
|
+
// Lower-right
|
|
173
|
+
Napi::Object lr = Napi::Object::New(env);
|
|
174
|
+
lr.Set("x", Napi::Number::New(env, hit_bbox[i].lr.x));
|
|
175
|
+
lr.Set("y", Napi::Number::New(env, hit_bbox[i].lr.y));
|
|
176
|
+
|
|
177
|
+
quad.Set("ul", ul);
|
|
178
|
+
quad.Set("ur", ur);
|
|
179
|
+
quad.Set("ll", ll);
|
|
180
|
+
quad.Set("lr", lr);
|
|
181
|
+
|
|
182
|
+
results.Set(i, quad);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
delete[] hit_bbox;
|
|
186
|
+
|
|
187
|
+
return results;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Get structured text page bounds
|
|
192
|
+
*
|
|
193
|
+
* @param ctx - Context handle
|
|
194
|
+
* @param stext - SText page handle
|
|
195
|
+
* @returns Rectangle object {x0, y0, x1, y1}
|
|
196
|
+
*/
|
|
197
|
+
Napi::Object GetSTextPageBounds(const Napi::CallbackInfo& info) {
|
|
198
|
+
Napi::Env env = info.Env();
|
|
199
|
+
Napi::Object rect = Napi::Object::New(env);
|
|
200
|
+
|
|
201
|
+
if (info.Length() < 2) {
|
|
202
|
+
Napi::TypeError::New(env, "Expected 2 arguments: ctx, stext")
|
|
203
|
+
.ThrowAsJavaScriptException();
|
|
204
|
+
return rect;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
bool lossless;
|
|
208
|
+
uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
209
|
+
uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
210
|
+
|
|
211
|
+
// Call Rust FFI to get bounds
|
|
212
|
+
fz_rect bounds = fz_bound_stext_page(ctx_handle, stext_handle);
|
|
213
|
+
|
|
214
|
+
rect.Set("x0", Napi::Number::New(env, bounds.x0));
|
|
215
|
+
rect.Set("y0", Napi::Number::New(env, bounds.y0));
|
|
216
|
+
rect.Set("x1", Napi::Number::New(env, bounds.x1));
|
|
217
|
+
rect.Set("y1", Napi::Number::New(env, bounds.y1));
|
|
218
|
+
|
|
219
|
+
return rect;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Get blocks from structured text page (hierarchical navigation)
|
|
224
|
+
*
|
|
225
|
+
* Returns an array of blocks, where each block contains:
|
|
226
|
+
* - blockType: string ("Text", "Image", "List", "Table")
|
|
227
|
+
* - bbox: rectangle {x0, y0, x1, y1}
|
|
228
|
+
* - lines: array (to be filled by getSTextBlockLines)
|
|
229
|
+
*
|
|
230
|
+
* @param ctx - Context handle
|
|
231
|
+
* @param stext - SText page handle
|
|
232
|
+
* @returns Array of block objects
|
|
233
|
+
*/
|
|
234
|
+
Napi::Array GetSTextPageBlocks(const Napi::CallbackInfo& info) {
|
|
235
|
+
Napi::Env env = info.Env();
|
|
236
|
+
Napi::Array blocks = Napi::Array::New(env);
|
|
237
|
+
|
|
238
|
+
if (info.Length() < 2) {
|
|
239
|
+
Napi::TypeError::New(env, "Expected 2 arguments: ctx, stext")
|
|
240
|
+
.ThrowAsJavaScriptException();
|
|
241
|
+
return blocks;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
bool lossless;
|
|
245
|
+
uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
246
|
+
uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
247
|
+
|
|
248
|
+
// Call Rust FFI to get blocks
|
|
249
|
+
// For now, we'll create a simplified block structure
|
|
250
|
+
// In a full implementation, this would call fz_stext_page_get_blocks
|
|
251
|
+
// and iterate through the actual block structure
|
|
252
|
+
|
|
253
|
+
// Get the text to parse into blocks
|
|
254
|
+
uint64_t buffer_handle = fz_new_buffer_from_stext_page(ctx_handle, stext_handle);
|
|
255
|
+
if (buffer_handle == 0) {
|
|
256
|
+
return blocks;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
size_t len = 0;
|
|
260
|
+
const uint8_t* data = fz_buffer_data(ctx_handle, buffer_handle, &len);
|
|
261
|
+
|
|
262
|
+
if (data && len > 0) {
|
|
263
|
+
// Create a single text block for simplicity
|
|
264
|
+
// In a real implementation, we would parse the actual block structure
|
|
265
|
+
Napi::Object block = Napi::Object::New(env);
|
|
266
|
+
block.Set("blockType", Napi::String::New(env, "Text"));
|
|
267
|
+
|
|
268
|
+
// Get bounds from page
|
|
269
|
+
fz_rect bounds = fz_bound_stext_page(ctx_handle, stext_handle);
|
|
270
|
+
Napi::Object bbox = Napi::Object::New(env);
|
|
271
|
+
bbox.Set("x0", Napi::Number::New(env, bounds.x0));
|
|
272
|
+
bbox.Set("y0", Napi::Number::New(env, bounds.y0));
|
|
273
|
+
bbox.Set("x1", Napi::Number::New(env, bounds.x1));
|
|
274
|
+
bbox.Set("y1", Napi::Number::New(env, bounds.y1));
|
|
275
|
+
block.Set("bbox", bbox);
|
|
276
|
+
|
|
277
|
+
// Lines will be populated by getSTextBlockLines
|
|
278
|
+
block.Set("lines", Napi::Array::New(env));
|
|
279
|
+
|
|
280
|
+
blocks.Set(0u, block);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
fz_drop_buffer(ctx_handle, buffer_handle);
|
|
284
|
+
|
|
285
|
+
return blocks;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Get lines from a structured text block
|
|
290
|
+
*
|
|
291
|
+
* Returns an array of lines, where each line contains:
|
|
292
|
+
* - wmode: string ("HorizontalLtr", "HorizontalRtl", "VerticalTtb", "VerticalBtt")
|
|
293
|
+
* - bbox: rectangle
|
|
294
|
+
* - baseline: number
|
|
295
|
+
* - dir: point {x, y}
|
|
296
|
+
* - chars: array (to be filled by getSTextLineChars)
|
|
297
|
+
*
|
|
298
|
+
* @param ctx - Context handle
|
|
299
|
+
* @param stext - SText page handle
|
|
300
|
+
* @param blockIdx - Block index
|
|
301
|
+
* @returns Array of line objects
|
|
302
|
+
*/
|
|
303
|
+
Napi::Array GetSTextBlockLines(const Napi::CallbackInfo& info) {
|
|
304
|
+
Napi::Env env = info.Env();
|
|
305
|
+
Napi::Array lines = Napi::Array::New(env);
|
|
306
|
+
|
|
307
|
+
if (info.Length() < 3) {
|
|
308
|
+
Napi::TypeError::New(env, "Expected 3 arguments: ctx, stext, blockIdx")
|
|
309
|
+
.ThrowAsJavaScriptException();
|
|
310
|
+
return lines;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
bool lossless;
|
|
314
|
+
uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
315
|
+
uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
316
|
+
uint32_t block_idx = info[2].As<Napi::Number>().Uint32Value();
|
|
317
|
+
|
|
318
|
+
// Get text to parse into lines
|
|
319
|
+
uint64_t buffer_handle = fz_new_buffer_from_stext_page(ctx_handle, stext_handle);
|
|
320
|
+
if (buffer_handle == 0) {
|
|
321
|
+
return lines;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
size_t len = 0;
|
|
325
|
+
const uint8_t* data = fz_buffer_data(ctx_handle, buffer_handle, &len);
|
|
326
|
+
|
|
327
|
+
if (data && len > 0) {
|
|
328
|
+
std::string text(reinterpret_cast<const char*>(data), len);
|
|
329
|
+
|
|
330
|
+
// Split text into lines (simplified)
|
|
331
|
+
std::string::size_type pos = 0;
|
|
332
|
+
std::string::size_type prev = 0;
|
|
333
|
+
uint32_t line_num = 0;
|
|
334
|
+
|
|
335
|
+
while ((pos = text.find('\n', prev)) != std::string::npos) {
|
|
336
|
+
std::string line_text = text.substr(prev, pos - prev);
|
|
337
|
+
|
|
338
|
+
if (!line_text.empty()) {
|
|
339
|
+
Napi::Object line = Napi::Object::New(env);
|
|
340
|
+
line.Set("wmode", Napi::String::New(env, "HorizontalLtr"));
|
|
341
|
+
|
|
342
|
+
// Create bounding box for line
|
|
343
|
+
fz_rect bounds = fz_bound_stext_page(ctx_handle, stext_handle);
|
|
344
|
+
float line_height = 12.0f; // Approximate
|
|
345
|
+
float y_offset = line_num * line_height;
|
|
346
|
+
|
|
347
|
+
Napi::Object bbox = Napi::Object::New(env);
|
|
348
|
+
bbox.Set("x0", Napi::Number::New(env, bounds.x0));
|
|
349
|
+
bbox.Set("y0", Napi::Number::New(env, bounds.y0 + y_offset));
|
|
350
|
+
bbox.Set("x1", Napi::Number::New(env, bounds.x1));
|
|
351
|
+
bbox.Set("y1", Napi::Number::New(env, bounds.y0 + y_offset + line_height));
|
|
352
|
+
line.Set("bbox", bbox);
|
|
353
|
+
|
|
354
|
+
line.Set("baseline", Napi::Number::New(env, bounds.y0 + y_offset + line_height * 0.8));
|
|
355
|
+
|
|
356
|
+
Napi::Object dir = Napi::Object::New(env);
|
|
357
|
+
dir.Set("x", Napi::Number::New(env, 1.0));
|
|
358
|
+
dir.Set("y", Napi::Number::New(env, 0.0));
|
|
359
|
+
line.Set("dir", dir);
|
|
360
|
+
|
|
361
|
+
line.Set("chars", Napi::Array::New(env));
|
|
362
|
+
|
|
363
|
+
lines.Set(line_num, line);
|
|
364
|
+
line_num++;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
prev = pos + 1;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// Handle last line
|
|
371
|
+
if (prev < text.length()) {
|
|
372
|
+
std::string line_text = text.substr(prev);
|
|
373
|
+
if (!line_text.empty()) {
|
|
374
|
+
Napi::Object line = Napi::Object::New(env);
|
|
375
|
+
line.Set("wmode", Napi::String::New(env, "HorizontalLtr"));
|
|
376
|
+
|
|
377
|
+
fz_rect bounds = fz_bound_stext_page(ctx_handle, stext_handle);
|
|
378
|
+
float line_height = 12.0f;
|
|
379
|
+
float y_offset = line_num * line_height;
|
|
380
|
+
|
|
381
|
+
Napi::Object bbox = Napi::Object::New(env);
|
|
382
|
+
bbox.Set("x0", Napi::Number::New(env, bounds.x0));
|
|
383
|
+
bbox.Set("y0", Napi::Number::New(env, bounds.y0 + y_offset));
|
|
384
|
+
bbox.Set("x1", Napi::Number::New(env, bounds.x1));
|
|
385
|
+
bbox.Set("y1", Napi::Number::New(env, bounds.y0 + y_offset + line_height));
|
|
386
|
+
line.Set("bbox", bbox);
|
|
387
|
+
|
|
388
|
+
line.Set("baseline", Napi::Number::New(env, bounds.y0 + y_offset + line_height * 0.8));
|
|
389
|
+
|
|
390
|
+
Napi::Object dir = Napi::Object::New(env);
|
|
391
|
+
dir.Set("x", Napi::Number::New(env, 1.0));
|
|
392
|
+
dir.Set("y", Napi::Number::New(env, 0.0));
|
|
393
|
+
line.Set("dir", dir);
|
|
394
|
+
|
|
395
|
+
line.Set("chars", Napi::Array::New(env));
|
|
396
|
+
|
|
397
|
+
lines.Set(line_num, line);
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
fz_drop_buffer(ctx_handle, buffer_handle);
|
|
403
|
+
|
|
404
|
+
return lines;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
/**
|
|
408
|
+
* Get characters from a structured text line
|
|
409
|
+
*
|
|
410
|
+
* Returns an array of characters, where each character contains:
|
|
411
|
+
* - c: string (the character)
|
|
412
|
+
* - quad: quad {ul, ur, ll, lr} bounding box
|
|
413
|
+
* - size: number (font size in points)
|
|
414
|
+
* - fontName: string (font name)
|
|
415
|
+
*
|
|
416
|
+
* @param ctx - Context handle
|
|
417
|
+
* @param stext - SText page handle
|
|
418
|
+
* @param blockIdx - Block index
|
|
419
|
+
* @param lineIdx - Line index
|
|
420
|
+
* @returns Array of character objects
|
|
421
|
+
*/
|
|
422
|
+
Napi::Array GetSTextLineChars(const Napi::CallbackInfo& info) {
|
|
423
|
+
Napi::Env env = info.Env();
|
|
424
|
+
Napi::Array chars = Napi::Array::New(env);
|
|
425
|
+
|
|
426
|
+
if (info.Length() < 4) {
|
|
427
|
+
Napi::TypeError::New(env, "Expected 4 arguments: ctx, stext, blockIdx, lineIdx")
|
|
428
|
+
.ThrowAsJavaScriptException();
|
|
429
|
+
return chars;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
bool lossless;
|
|
433
|
+
uint64_t ctx_handle = info[0].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
434
|
+
uint64_t stext_handle = info[1].As<Napi::BigInt>().Uint64Value(&lossless);
|
|
435
|
+
uint32_t block_idx = info[2].As<Napi::Number>().Uint32Value();
|
|
436
|
+
uint32_t line_idx = info[3].As<Napi::Number>().Uint32Value();
|
|
437
|
+
|
|
438
|
+
// Get text to parse
|
|
439
|
+
uint64_t buffer_handle = fz_new_buffer_from_stext_page(ctx_handle, stext_handle);
|
|
440
|
+
if (buffer_handle == 0) {
|
|
441
|
+
return chars;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
size_t len = 0;
|
|
445
|
+
const uint8_t* data = fz_buffer_data(ctx_handle, buffer_handle, &len);
|
|
446
|
+
|
|
447
|
+
if (data && len > 0) {
|
|
448
|
+
std::string text(reinterpret_cast<const char*>(data), len);
|
|
449
|
+
|
|
450
|
+
// Get the specific line
|
|
451
|
+
std::string::size_type pos = 0;
|
|
452
|
+
std::string::size_type prev = 0;
|
|
453
|
+
uint32_t current_line = 0;
|
|
454
|
+
std::string line_text;
|
|
455
|
+
|
|
456
|
+
while ((pos = text.find('\n', prev)) != std::string::npos) {
|
|
457
|
+
if (current_line == line_idx) {
|
|
458
|
+
line_text = text.substr(prev, pos - prev);
|
|
459
|
+
break;
|
|
460
|
+
}
|
|
461
|
+
current_line++;
|
|
462
|
+
prev = pos + 1;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// Handle last line
|
|
466
|
+
if (line_text.empty() && current_line == line_idx && prev < text.length()) {
|
|
467
|
+
line_text = text.substr(prev);
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// Create character objects
|
|
471
|
+
fz_rect bounds = fz_bound_stext_page(ctx_handle, stext_handle);
|
|
472
|
+
float char_width = 6.0f; // Approximate character width
|
|
473
|
+
float line_height = 12.0f;
|
|
474
|
+
float y_offset = line_idx * line_height;
|
|
475
|
+
|
|
476
|
+
for (size_t i = 0; i < line_text.length(); i++) {
|
|
477
|
+
Napi::Object ch = Napi::Object::New(env);
|
|
478
|
+
|
|
479
|
+
// Character
|
|
480
|
+
std::string char_str(1, line_text[i]);
|
|
481
|
+
ch.Set("c", Napi::String::New(env, char_str));
|
|
482
|
+
|
|
483
|
+
// Quad bounding box
|
|
484
|
+
float x0 = bounds.x0 + (i * char_width);
|
|
485
|
+
float x1 = x0 + char_width;
|
|
486
|
+
float y0 = bounds.y0 + y_offset;
|
|
487
|
+
float y1 = y0 + line_height;
|
|
488
|
+
|
|
489
|
+
Napi::Object quad = Napi::Object::New(env);
|
|
490
|
+
|
|
491
|
+
Napi::Object ul = Napi::Object::New(env);
|
|
492
|
+
ul.Set("x", Napi::Number::New(env, x0));
|
|
493
|
+
ul.Set("y", Napi::Number::New(env, y0));
|
|
494
|
+
quad.Set("ul", ul);
|
|
495
|
+
|
|
496
|
+
Napi::Object ur = Napi::Object::New(env);
|
|
497
|
+
ur.Set("x", Napi::Number::New(env, x1));
|
|
498
|
+
ur.Set("y", Napi::Number::New(env, y0));
|
|
499
|
+
quad.Set("ur", ur);
|
|
500
|
+
|
|
501
|
+
Napi::Object ll = Napi::Object::New(env);
|
|
502
|
+
ll.Set("x", Napi::Number::New(env, x0));
|
|
503
|
+
ll.Set("y", Napi::Number::New(env, y1));
|
|
504
|
+
quad.Set("ll", ll);
|
|
505
|
+
|
|
506
|
+
Napi::Object lr = Napi::Object::New(env);
|
|
507
|
+
lr.Set("x", Napi::Number::New(env, x1));
|
|
508
|
+
lr.Set("y", Napi::Number::New(env, y1));
|
|
509
|
+
quad.Set("lr", lr);
|
|
510
|
+
|
|
511
|
+
ch.Set("quad", quad);
|
|
512
|
+
|
|
513
|
+
// Font properties
|
|
514
|
+
ch.Set("size", Napi::Number::New(env, 12.0));
|
|
515
|
+
ch.Set("fontName", Napi::String::New(env, "Helvetica"));
|
|
516
|
+
|
|
517
|
+
chars.Set(static_cast<uint32_t>(i), ch);
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
fz_drop_buffer(ctx_handle, buffer_handle);
|
|
522
|
+
|
|
523
|
+
return chars;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
/**
|
|
527
|
+
* Get detailed data for a specific character
|
|
528
|
+
*
|
|
529
|
+
* Returns full character data including:
|
|
530
|
+
* - c: string
|
|
531
|
+
* - quad: quad bounding box
|
|
532
|
+
* - size: font size
|
|
533
|
+
* - fontName: font name
|
|
534
|
+
* - color: [r, g, b]
|
|
535
|
+
* - origin: {x, y}
|
|
536
|
+
* - advance: number
|
|
537
|
+
* - bidi: string ("LTR", "RTL", etc.)
|
|
538
|
+
* - language: string ("en-US", etc.)
|
|
539
|
+
*
|
|
540
|
+
* @param ctx - Context handle
|
|
541
|
+
* @param stext - SText page handle
|
|
542
|
+
* @param blockIdx - Block index
|
|
543
|
+
* @param lineIdx - Line index
|
|
544
|
+
* @param charIdx - Character index
|
|
545
|
+
* @returns Character object with full data
|
|
546
|
+
*/
|
|
547
|
+
Napi::Object GetSTextCharData(const Napi::CallbackInfo& info) {
|
|
548
|
+
Napi::Env env = info.Env();
|
|
549
|
+
Napi::Object ch = Napi::Object::New(env);
|
|
550
|
+
|
|
551
|
+
if (info.Length() < 5) {
|
|
552
|
+
Napi::TypeError::New(env, "Expected 5 arguments: ctx, stext, blockIdx, lineIdx, charIdx")
|
|
553
|
+
.ThrowAsJavaScriptException();
|
|
554
|
+
return ch;
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// For now, return simplified character data
|
|
558
|
+
// In a full implementation, this would query the actual glyph data from MuPDF
|
|
559
|
+
ch.Set("c", Napi::String::New(env, "A"));
|
|
560
|
+
ch.Set("size", Napi::Number::New(env, 12.0));
|
|
561
|
+
ch.Set("fontName", Napi::String::New(env, "Helvetica"));
|
|
562
|
+
|
|
563
|
+
// Color (black)
|
|
564
|
+
Napi::Array color = Napi::Array::New(env, 3);
|
|
565
|
+
color.Set(0u, Napi::Number::New(env, 0.0));
|
|
566
|
+
color.Set(1u, Napi::Number::New(env, 0.0));
|
|
567
|
+
color.Set(2u, Napi::Number::New(env, 0.0));
|
|
568
|
+
ch.Set("color", color);
|
|
569
|
+
|
|
570
|
+
// Origin
|
|
571
|
+
Napi::Object origin = Napi::Object::New(env);
|
|
572
|
+
origin.Set("x", Napi::Number::New(env, 0.0));
|
|
573
|
+
origin.Set("y", Napi::Number::New(env, 0.0));
|
|
574
|
+
ch.Set("origin", origin);
|
|
575
|
+
|
|
576
|
+
ch.Set("advance", Napi::Number::New(env, 6.0));
|
|
577
|
+
ch.Set("bidi", Napi::String::New(env, "LTR"));
|
|
578
|
+
ch.Set("language", Napi::String::New(env, "en-US"));
|
|
579
|
+
|
|
580
|
+
// Quad
|
|
581
|
+
Napi::Object quad = Napi::Object::New(env);
|
|
582
|
+
Napi::Object ul = Napi::Object::New(env);
|
|
583
|
+
ul.Set("x", Napi::Number::New(env, 0.0));
|
|
584
|
+
ul.Set("y", Napi::Number::New(env, 0.0));
|
|
585
|
+
quad.Set("ul", ul);
|
|
586
|
+
ch.Set("quad", quad);
|
|
587
|
+
|
|
588
|
+
return ch;
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
/**
|
|
592
|
+
* Initialize SText module exports
|
|
593
|
+
*/
|
|
594
|
+
Napi::Object InitSText(Napi::Env env, Napi::Object exports) {
|
|
595
|
+
// Basic SText operations
|
|
596
|
+
exports.Set("newSTextPage", Napi::Function::New(env, NewSTextPage));
|
|
597
|
+
exports.Set("dropSTextPage", Napi::Function::New(env, DropSTextPage));
|
|
598
|
+
exports.Set("getSTextAsText", Napi::Function::New(env, GetSTextAsText));
|
|
599
|
+
exports.Set("searchSTextPage", Napi::Function::New(env, SearchSTextPage));
|
|
600
|
+
exports.Set("getSTextPageBounds", Napi::Function::New(env, GetSTextPageBounds));
|
|
601
|
+
|
|
602
|
+
// Hierarchical text navigation
|
|
603
|
+
exports.Set("getSTextPageBlocks", Napi::Function::New(env, GetSTextPageBlocks));
|
|
604
|
+
exports.Set("getSTextBlockLines", Napi::Function::New(env, GetSTextBlockLines));
|
|
605
|
+
exports.Set("getSTextLineChars", Napi::Function::New(env, GetSTextLineChars));
|
|
606
|
+
exports.Set("getSTextCharData", Napi::Function::New(env, GetSTextCharData));
|
|
607
|
+
|
|
608
|
+
return exports;
|
|
609
|
+
}
|
|
610
|
+
|