unhwp-wasm 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 iyulab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,55 @@
1
+ # @iyulab/unhwp
2
+
3
+ WebAssembly bindings for [unhwp](https://github.com/iyulab/unhwp) — HWP/HWPX Korean document extraction.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install @iyulab/unhwp
9
+ ```
10
+
11
+ ## Usage (ES Module / browser)
12
+
13
+ ```js
14
+ import init, { parse } from '@iyulab/unhwp';
15
+
16
+ await init();
17
+
18
+ const response = await fetch('document.hwp');
19
+ const data = new Uint8Array(await response.arrayBuffer());
20
+ const doc = parse(data);
21
+
22
+ console.log(doc.toMarkdown());
23
+ console.log(doc.toText());
24
+ console.log(doc.sectionCount(), doc.paragraphCount());
25
+ ```
26
+
27
+ ## API
28
+
29
+ ### `parse(data: Uint8Array): HwpDocument`
30
+
31
+ HWP 또는 HWPX 파일 바이트를 파싱합니다. 파싱 실패 시 오류를 던집니다.
32
+
33
+ ### `HwpDocument`
34
+
35
+ | Method | Returns | Description |
36
+ |--------|---------|-------------|
37
+ | `toMarkdown()` | `string` | Markdown 렌더링 |
38
+ | `toText()` | `string` | 평문 텍스트 |
39
+ | `toJson()` | `string` | 구조화된 JSON |
40
+ | `sectionCount()` | `number` | 섹션 수 |
41
+ | `paragraphCount()` | `number` | 단락 수 |
42
+
43
+ ### `ParseOptions`
44
+
45
+ ```js
46
+ import { parse, ParseOptions } from '@iyulab/unhwp';
47
+
48
+ const opts = new ParseOptions().lenient().textOnly();
49
+ const doc = parseWithOptions(data, opts);
50
+ ```
51
+
52
+ | Method | Description |
53
+ |--------|-------------|
54
+ | `lenient()` | 잘못된 섹션을 건너뛰고 파싱 계속 |
55
+ | `textOnly()` | 텍스트만 추출 (이미지 제외, 빠른 처리) |
package/package.json ADDED
@@ -0,0 +1,28 @@
1
+ {
2
+ "name": "unhwp-wasm",
3
+ "type": "module",
4
+ "description": "HWP/HWPX extraction to Markdown, text, and JSON (WebAssembly)",
5
+ "version": "0.4.0",
6
+ "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "https://github.com/iyulab/unhwp"
10
+ },
11
+ "files": [
12
+ "unhwp_wasm_bg.wasm",
13
+ "unhwp_wasm.js",
14
+ "unhwp_wasm.d.ts"
15
+ ],
16
+ "main": "unhwp_wasm.js",
17
+ "types": "unhwp_wasm.d.ts",
18
+ "sideEffects": [
19
+ "./snippets/*"
20
+ ],
21
+ "keywords": [
22
+ "hwp",
23
+ "hwpx",
24
+ "korean",
25
+ "wasm",
26
+ "webassembly"
27
+ ]
28
+ }
@@ -0,0 +1,76 @@
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+
4
+ export class HwpDocument {
5
+ private constructor();
6
+ free(): void;
7
+ [Symbol.dispose](): void;
8
+ static fromBytes(data: Uint8Array): HwpDocument;
9
+ toMarkdown(): string;
10
+ sectionCount(): number;
11
+ paragraphCount(): number;
12
+ toJson(): string;
13
+ toText(): string;
14
+ }
15
+
16
+ export class ParseOptions {
17
+ free(): void;
18
+ [Symbol.dispose](): void;
19
+ constructor();
20
+ lenient(): ParseOptions;
21
+ textOnly(): ParseOptions;
22
+ }
23
+
24
+ export function parse(data: Uint8Array): HwpDocument;
25
+
26
+ /**
27
+ * Parse with options — options are applied where supported.
28
+ * Note: full option integration requires parse_reader_with_options in unhwp core.
29
+ */
30
+ export function parseWithOptions(data: Uint8Array, opts: ParseOptions): HwpDocument;
31
+
32
+ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
33
+
34
+ export interface InitOutput {
35
+ readonly memory: WebAssembly.Memory;
36
+ readonly __wbg_hwpdocument_free: (a: number, b: number) => void;
37
+ readonly hwpdocument_fromBytes: (a: number, b: number) => [number, number, number];
38
+ readonly hwpdocument_paragraphCount: (a: number) => number;
39
+ readonly hwpdocument_sectionCount: (a: number) => number;
40
+ readonly hwpdocument_toJson: (a: number) => [number, number, number, number];
41
+ readonly hwpdocument_toMarkdown: (a: number) => [number, number, number, number];
42
+ readonly hwpdocument_toText: (a: number) => [number, number];
43
+ readonly __wbg_parseoptions_free: (a: number, b: number) => void;
44
+ readonly parse: (a: number, b: number) => [number, number, number];
45
+ readonly parseWithOptions: (a: number, b: number, c: number) => [number, number, number];
46
+ readonly parseoptions_lenient: (a: number) => number;
47
+ readonly parseoptions_new: () => number;
48
+ readonly parseoptions_textOnly: (a: number) => number;
49
+ readonly __wbindgen_externrefs: WebAssembly.Table;
50
+ readonly __wbindgen_malloc: (a: number, b: number) => number;
51
+ readonly __externref_table_dealloc: (a: number) => void;
52
+ readonly __wbindgen_free: (a: number, b: number, c: number) => void;
53
+ readonly __wbindgen_start: () => void;
54
+ }
55
+
56
+ export type SyncInitInput = BufferSource | WebAssembly.Module;
57
+
58
+ /**
59
+ * Instantiates the given `module`, which can either be bytes or
60
+ * a precompiled `WebAssembly.Module`.
61
+ *
62
+ * @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated.
63
+ *
64
+ * @returns {InitOutput}
65
+ */
66
+ export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput;
67
+
68
+ /**
69
+ * If `module_or_path` is {RequestInfo} or {URL}, makes a request and
70
+ * for everything else, calls `WebAssembly.instantiate` directly.
71
+ *
72
+ * @param {{ module_or_path: InitInput | Promise<InitInput> }} module_or_path - Passing `InitInput` directly is deprecated.
73
+ *
74
+ * @returns {Promise<InitOutput>}
75
+ */
76
+ export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput> } | InitInput | Promise<InitInput>): Promise<InitOutput>;
package/unhwp_wasm.js ADDED
@@ -0,0 +1,352 @@
1
+ let wasm;
2
+
3
+ function _assertClass(instance, klass) {
4
+ if (!(instance instanceof klass)) {
5
+ throw new Error(`expected instance of ${klass.name}`);
6
+ }
7
+ }
8
+
9
+ function getStringFromWasm0(ptr, len) {
10
+ ptr = ptr >>> 0;
11
+ return decodeText(ptr, len);
12
+ }
13
+
14
+ let cachedUint8ArrayMemory0 = null;
15
+ function getUint8ArrayMemory0() {
16
+ if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
17
+ cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
18
+ }
19
+ return cachedUint8ArrayMemory0;
20
+ }
21
+
22
+ function passArray8ToWasm0(arg, malloc) {
23
+ const ptr = malloc(arg.length * 1, 1) >>> 0;
24
+ getUint8ArrayMemory0().set(arg, ptr / 1);
25
+ WASM_VECTOR_LEN = arg.length;
26
+ return ptr;
27
+ }
28
+
29
+ function takeFromExternrefTable0(idx) {
30
+ const value = wasm.__wbindgen_externrefs.get(idx);
31
+ wasm.__externref_table_dealloc(idx);
32
+ return value;
33
+ }
34
+
35
+ let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
36
+ cachedTextDecoder.decode();
37
+ const MAX_SAFARI_DECODE_BYTES = 2146435072;
38
+ let numBytesDecoded = 0;
39
+ function decodeText(ptr, len) {
40
+ numBytesDecoded += len;
41
+ if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
42
+ cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
43
+ cachedTextDecoder.decode();
44
+ numBytesDecoded = len;
45
+ }
46
+ return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
47
+ }
48
+
49
+ let WASM_VECTOR_LEN = 0;
50
+
51
+ const HwpDocumentFinalization = (typeof FinalizationRegistry === 'undefined')
52
+ ? { register: () => {}, unregister: () => {} }
53
+ : new FinalizationRegistry(ptr => wasm.__wbg_hwpdocument_free(ptr >>> 0, 1));
54
+
55
+ const ParseOptionsFinalization = (typeof FinalizationRegistry === 'undefined')
56
+ ? { register: () => {}, unregister: () => {} }
57
+ : new FinalizationRegistry(ptr => wasm.__wbg_parseoptions_free(ptr >>> 0, 1));
58
+
59
+ export class HwpDocument {
60
+ static __wrap(ptr) {
61
+ ptr = ptr >>> 0;
62
+ const obj = Object.create(HwpDocument.prototype);
63
+ obj.__wbg_ptr = ptr;
64
+ HwpDocumentFinalization.register(obj, obj.__wbg_ptr, obj);
65
+ return obj;
66
+ }
67
+ __destroy_into_raw() {
68
+ const ptr = this.__wbg_ptr;
69
+ this.__wbg_ptr = 0;
70
+ HwpDocumentFinalization.unregister(this);
71
+ return ptr;
72
+ }
73
+ free() {
74
+ const ptr = this.__destroy_into_raw();
75
+ wasm.__wbg_hwpdocument_free(ptr, 0);
76
+ }
77
+ /**
78
+ * @param {Uint8Array} data
79
+ * @returns {HwpDocument}
80
+ */
81
+ static fromBytes(data) {
82
+ const ptr0 = passArray8ToWasm0(data, wasm.__wbindgen_malloc);
83
+ const len0 = WASM_VECTOR_LEN;
84
+ const ret = wasm.hwpdocument_fromBytes(ptr0, len0);
85
+ if (ret[2]) {
86
+ throw takeFromExternrefTable0(ret[1]);
87
+ }
88
+ return HwpDocument.__wrap(ret[0]);
89
+ }
90
+ /**
91
+ * @returns {string}
92
+ */
93
+ toMarkdown() {
94
+ let deferred2_0;
95
+ let deferred2_1;
96
+ try {
97
+ const ret = wasm.hwpdocument_toMarkdown(this.__wbg_ptr);
98
+ var ptr1 = ret[0];
99
+ var len1 = ret[1];
100
+ if (ret[3]) {
101
+ ptr1 = 0; len1 = 0;
102
+ throw takeFromExternrefTable0(ret[2]);
103
+ }
104
+ deferred2_0 = ptr1;
105
+ deferred2_1 = len1;
106
+ return getStringFromWasm0(ptr1, len1);
107
+ } finally {
108
+ wasm.__wbindgen_free(deferred2_0, deferred2_1, 1);
109
+ }
110
+ }
111
+ /**
112
+ * @returns {number}
113
+ */
114
+ sectionCount() {
115
+ const ret = wasm.hwpdocument_sectionCount(this.__wbg_ptr);
116
+ return ret >>> 0;
117
+ }
118
+ /**
119
+ * @returns {number}
120
+ */
121
+ paragraphCount() {
122
+ const ret = wasm.hwpdocument_paragraphCount(this.__wbg_ptr);
123
+ return ret >>> 0;
124
+ }
125
+ /**
126
+ * @returns {string}
127
+ */
128
+ toJson() {
129
+ let deferred2_0;
130
+ let deferred2_1;
131
+ try {
132
+ const ret = wasm.hwpdocument_toJson(this.__wbg_ptr);
133
+ var ptr1 = ret[0];
134
+ var len1 = ret[1];
135
+ if (ret[3]) {
136
+ ptr1 = 0; len1 = 0;
137
+ throw takeFromExternrefTable0(ret[2]);
138
+ }
139
+ deferred2_0 = ptr1;
140
+ deferred2_1 = len1;
141
+ return getStringFromWasm0(ptr1, len1);
142
+ } finally {
143
+ wasm.__wbindgen_free(deferred2_0, deferred2_1, 1);
144
+ }
145
+ }
146
+ /**
147
+ * @returns {string}
148
+ */
149
+ toText() {
150
+ let deferred1_0;
151
+ let deferred1_1;
152
+ try {
153
+ const ret = wasm.hwpdocument_toText(this.__wbg_ptr);
154
+ deferred1_0 = ret[0];
155
+ deferred1_1 = ret[1];
156
+ return getStringFromWasm0(ret[0], ret[1]);
157
+ } finally {
158
+ wasm.__wbindgen_free(deferred1_0, deferred1_1, 1);
159
+ }
160
+ }
161
+ }
162
+ if (Symbol.dispose) HwpDocument.prototype[Symbol.dispose] = HwpDocument.prototype.free;
163
+
164
+ export class ParseOptions {
165
+ static __wrap(ptr) {
166
+ ptr = ptr >>> 0;
167
+ const obj = Object.create(ParseOptions.prototype);
168
+ obj.__wbg_ptr = ptr;
169
+ ParseOptionsFinalization.register(obj, obj.__wbg_ptr, obj);
170
+ return obj;
171
+ }
172
+ __destroy_into_raw() {
173
+ const ptr = this.__wbg_ptr;
174
+ this.__wbg_ptr = 0;
175
+ ParseOptionsFinalization.unregister(this);
176
+ return ptr;
177
+ }
178
+ free() {
179
+ const ptr = this.__destroy_into_raw();
180
+ wasm.__wbg_parseoptions_free(ptr, 0);
181
+ }
182
+ constructor() {
183
+ const ret = wasm.parseoptions_new();
184
+ this.__wbg_ptr = ret >>> 0;
185
+ ParseOptionsFinalization.register(this, this.__wbg_ptr, this);
186
+ return this;
187
+ }
188
+ /**
189
+ * @returns {ParseOptions}
190
+ */
191
+ lenient() {
192
+ const ptr = this.__destroy_into_raw();
193
+ const ret = wasm.parseoptions_lenient(ptr);
194
+ return ParseOptions.__wrap(ret);
195
+ }
196
+ /**
197
+ * @returns {ParseOptions}
198
+ */
199
+ textOnly() {
200
+ const ptr = this.__destroy_into_raw();
201
+ const ret = wasm.parseoptions_textOnly(ptr);
202
+ return ParseOptions.__wrap(ret);
203
+ }
204
+ }
205
+ if (Symbol.dispose) ParseOptions.prototype[Symbol.dispose] = ParseOptions.prototype.free;
206
+
207
+ /**
208
+ * @param {Uint8Array} data
209
+ * @returns {HwpDocument}
210
+ */
211
+ export function parse(data) {
212
+ const ptr0 = passArray8ToWasm0(data, wasm.__wbindgen_malloc);
213
+ const len0 = WASM_VECTOR_LEN;
214
+ const ret = wasm.parse(ptr0, len0);
215
+ if (ret[2]) {
216
+ throw takeFromExternrefTable0(ret[1]);
217
+ }
218
+ return HwpDocument.__wrap(ret[0]);
219
+ }
220
+
221
+ /**
222
+ * Parse with options — options are applied where supported.
223
+ * Note: full option integration requires parse_reader_with_options in unhwp core.
224
+ * @param {Uint8Array} data
225
+ * @param {ParseOptions} opts
226
+ * @returns {HwpDocument}
227
+ */
228
+ export function parseWithOptions(data, opts) {
229
+ const ptr0 = passArray8ToWasm0(data, wasm.__wbindgen_malloc);
230
+ const len0 = WASM_VECTOR_LEN;
231
+ _assertClass(opts, ParseOptions);
232
+ const ret = wasm.parseWithOptions(ptr0, len0, opts.__wbg_ptr);
233
+ if (ret[2]) {
234
+ throw takeFromExternrefTable0(ret[1]);
235
+ }
236
+ return HwpDocument.__wrap(ret[0]);
237
+ }
238
+
239
+ const EXPECTED_RESPONSE_TYPES = new Set(['basic', 'cors', 'default']);
240
+
241
+ async function __wbg_load(module, imports) {
242
+ if (typeof Response === 'function' && module instanceof Response) {
243
+ if (typeof WebAssembly.instantiateStreaming === 'function') {
244
+ try {
245
+ return await WebAssembly.instantiateStreaming(module, imports);
246
+ } catch (e) {
247
+ const validResponse = module.ok && EXPECTED_RESPONSE_TYPES.has(module.type);
248
+
249
+ if (validResponse && module.headers.get('Content-Type') !== 'application/wasm') {
250
+ console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve Wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
251
+
252
+ } else {
253
+ throw e;
254
+ }
255
+ }
256
+ }
257
+
258
+ const bytes = await module.arrayBuffer();
259
+ return await WebAssembly.instantiate(bytes, imports);
260
+ } else {
261
+ const instance = await WebAssembly.instantiate(module, imports);
262
+
263
+ if (instance instanceof WebAssembly.Instance) {
264
+ return { instance, module };
265
+ } else {
266
+ return instance;
267
+ }
268
+ }
269
+ }
270
+
271
+ function __wbg_get_imports() {
272
+ const imports = {};
273
+ imports.wbg = {};
274
+ imports.wbg.__wbg___wbindgen_throw_dd24417ed36fc46e = function(arg0, arg1) {
275
+ throw new Error(getStringFromWasm0(arg0, arg1));
276
+ };
277
+ imports.wbg.__wbindgen_cast_2241b6af4c4b2941 = function(arg0, arg1) {
278
+ // Cast intrinsic for `Ref(String) -> Externref`.
279
+ const ret = getStringFromWasm0(arg0, arg1);
280
+ return ret;
281
+ };
282
+ imports.wbg.__wbindgen_init_externref_table = function() {
283
+ const table = wasm.__wbindgen_externrefs;
284
+ const offset = table.grow(4);
285
+ table.set(0, undefined);
286
+ table.set(offset + 0, undefined);
287
+ table.set(offset + 1, null);
288
+ table.set(offset + 2, true);
289
+ table.set(offset + 3, false);
290
+ };
291
+
292
+ return imports;
293
+ }
294
+
295
+ function __wbg_finalize_init(instance, module) {
296
+ wasm = instance.exports;
297
+ __wbg_init.__wbindgen_wasm_module = module;
298
+ cachedUint8ArrayMemory0 = null;
299
+
300
+
301
+ wasm.__wbindgen_start();
302
+ return wasm;
303
+ }
304
+
305
+ function initSync(module) {
306
+ if (wasm !== undefined) return wasm;
307
+
308
+
309
+ if (typeof module !== 'undefined') {
310
+ if (Object.getPrototypeOf(module) === Object.prototype) {
311
+ ({module} = module)
312
+ } else {
313
+ console.warn('using deprecated parameters for `initSync()`; pass a single object instead')
314
+ }
315
+ }
316
+
317
+ const imports = __wbg_get_imports();
318
+ if (!(module instanceof WebAssembly.Module)) {
319
+ module = new WebAssembly.Module(module);
320
+ }
321
+ const instance = new WebAssembly.Instance(module, imports);
322
+ return __wbg_finalize_init(instance, module);
323
+ }
324
+
325
+ async function __wbg_init(module_or_path) {
326
+ if (wasm !== undefined) return wasm;
327
+
328
+
329
+ if (typeof module_or_path !== 'undefined') {
330
+ if (Object.getPrototypeOf(module_or_path) === Object.prototype) {
331
+ ({module_or_path} = module_or_path)
332
+ } else {
333
+ console.warn('using deprecated parameters for the initialization function; pass a single object instead')
334
+ }
335
+ }
336
+
337
+ if (typeof module_or_path === 'undefined') {
338
+ module_or_path = new URL('unhwp_wasm_bg.wasm', import.meta.url);
339
+ }
340
+ const imports = __wbg_get_imports();
341
+
342
+ if (typeof module_or_path === 'string' || (typeof Request === 'function' && module_or_path instanceof Request) || (typeof URL === 'function' && module_or_path instanceof URL)) {
343
+ module_or_path = fetch(module_or_path);
344
+ }
345
+
346
+ const { instance, module } = await __wbg_load(await module_or_path, imports);
347
+
348
+ return __wbg_finalize_init(instance, module);
349
+ }
350
+
351
+ export { initSync };
352
+ export default __wbg_init;
Binary file