hwp2md 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,687 @@
1
+ #!/usr/bin/env node
2
+ import { Command } from "commander";
3
+ import { readFile, writeFile } from "node:fs/promises";
4
+ import * as CFB from "cfb";
5
+ import { inflateRaw } from "pako";
6
+ import "fflate";
7
+ import "fast-xml-parser";
8
+
9
+ //#region src/utils/compression.ts
10
+ /**
11
+ * Decompress raw deflate data (no zlib header)
12
+ * Equivalent to Python: zlib.decompress(data, -15)
13
+ *
14
+ * HWP files use raw deflate compression without zlib wrapper headers.
15
+ * The windowBits=-15 in Python indicates raw deflate mode.
16
+ *
17
+ * @param data - Compressed data
18
+ * @returns Decompressed data
19
+ */
20
+ function decompressRaw(data) {
21
+ try {
22
+ return inflateRaw(data);
23
+ } catch (error) {
24
+ throw new Error(`Failed to decompress data: ${error.message}`);
25
+ }
26
+ }
27
+
28
+ //#endregion
29
+ //#region src/parser.ts
30
+ /**
31
+ * HWP 5.0 File Parser
32
+ * Parses OLE Compound File format HWP files
33
+ */
34
+ /**
35
+ * HWP File Parser
36
+ * Reads and parses HWP 5.0 files using OLE Compound File format
37
+ */
38
+ var HWPFile = class HWPFile {
39
+ cfb = null;
40
+ _fileHeader = null;
41
+ _isCompressed = false;
42
+ /**
43
+ * Create HWPFile from raw data
44
+ * @param data - Raw HWP file data
45
+ */
46
+ constructor(data) {
47
+ this.data = data;
48
+ }
49
+ /**
50
+ * Create HWPFile from file path (Node.js only)
51
+ * @param path - Path to HWP file
52
+ */
53
+ static async fromFile(path) {
54
+ const data = await (await import("node:fs/promises")).readFile(path);
55
+ return new HWPFile(new Uint8Array(data));
56
+ }
57
+ /**
58
+ * Create HWPFile from ArrayBuffer
59
+ * @param data - ArrayBuffer data
60
+ */
61
+ static fromArrayBuffer(data) {
62
+ return new HWPFile(new Uint8Array(data));
63
+ }
64
+ /**
65
+ * Create HWPFile from Uint8Array
66
+ * @param data - Uint8Array data
67
+ */
68
+ static fromUint8Array(data) {
69
+ return new HWPFile(data);
70
+ }
71
+ /**
72
+ * Open and parse HWP file
73
+ */
74
+ open() {
75
+ const uint8Array = this.data instanceof Uint8Array ? this.data : new Uint8Array(this.data);
76
+ if (uint8Array.length >= 30) {
77
+ if (new TextDecoder("utf-8").decode(uint8Array.slice(0, 30)).replace(/\0+$/, "").startsWith("HWP Document File V3")) throw new Error("HWP 3.0 format (HWP 97, 2002, etc.) is not supported. Please use HWP 5.0 or later format. You can convert HWP 3.0 files to HWP 5.0 format using Hancom Office.");
78
+ }
79
+ try {
80
+ if (typeof Buffer !== "undefined") {
81
+ const buffer = Buffer.from(uint8Array);
82
+ this.cfb = CFB.read(buffer, { type: "buffer" });
83
+ } else {
84
+ const buffer = Array.from(uint8Array);
85
+ this.cfb = CFB.read(buffer, { type: "array" });
86
+ }
87
+ } catch (error) {
88
+ throw new Error(`Failed to parse HWP file: ${error.message}`);
89
+ }
90
+ this._fileHeader = this.parseFileHeader();
91
+ this._isCompressed = this._fileHeader.isCompressed;
92
+ }
93
+ /**
94
+ * Close HWP file and release resources
95
+ */
96
+ close() {
97
+ this.cfb = null;
98
+ this._fileHeader = null;
99
+ }
100
+ /**
101
+ * Get file header information
102
+ */
103
+ get fileHeader() {
104
+ return this._fileHeader;
105
+ }
106
+ /**
107
+ * Check if file is compressed
108
+ */
109
+ get isCompressed() {
110
+ return this._isCompressed;
111
+ }
112
+ /**
113
+ * Parse FileHeader stream (256 bytes fixed)
114
+ */
115
+ parseFileHeader() {
116
+ if (!this.cfb) throw new Error("HWP file not opened");
117
+ const entry = CFB.find(this.cfb, "FileHeader");
118
+ if (!entry) throw new Error("FileHeader not found in HWP file");
119
+ const data = entry.content;
120
+ if (data.length !== 256) throw new Error(`Invalid FileHeader size: ${data.length} (expected 256)`);
121
+ const signatureBytes = data.slice(0, 32);
122
+ const signature = new TextDecoder("utf-8").decode(signatureBytes).replace(/\0+$/, "");
123
+ if (!signature.startsWith("HWP Document File")) throw new Error(`Invalid HWP signature: ${signature}`);
124
+ const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
125
+ const versionRaw = view.getUint32(32, true);
126
+ const major = versionRaw >> 24 & 255;
127
+ const minor = versionRaw >> 16 & 255;
128
+ const patch = versionRaw >> 8 & 255;
129
+ const rev = versionRaw & 255;
130
+ const properties = view.getUint32(36, true);
131
+ const isCompressed = Boolean(properties & 1);
132
+ const isEncrypted = Boolean(properties & 2);
133
+ return {
134
+ signature,
135
+ version: `${major}.${minor}.${patch}.${rev}`,
136
+ isCompressed,
137
+ isEncrypted,
138
+ rawProperties: properties
139
+ };
140
+ }
141
+ /**
142
+ * Read and decompress stream
143
+ * @param streamPath - Stream path (e.g., 'DocInfo', 'BodyText/Section0')
144
+ * @returns Decompressed data or null if stream doesn't exist
145
+ */
146
+ readStream(streamPath) {
147
+ if (!this.cfb) return null;
148
+ const entry = CFB.find(this.cfb, streamPath);
149
+ if (!entry) return null;
150
+ let data = entry.content;
151
+ if (this._isCompressed) try {
152
+ data = decompressRaw(data);
153
+ } catch (error) {
154
+ throw new Error(`Failed to decompress ${streamPath}: ${error.message}`);
155
+ }
156
+ return data;
157
+ }
158
+ /**
159
+ * List all streams in HWP file
160
+ * @returns Array of stream paths
161
+ */
162
+ listStreams() {
163
+ if (!this.cfb) return [];
164
+ const streams = [];
165
+ for (const entry of this.cfb.FileIndex) if (entry.type === 2) {
166
+ const path = entry.name.split("/").filter((p) => p);
167
+ if (path.length > 0) streams.push(path);
168
+ }
169
+ return streams;
170
+ }
171
+ /**
172
+ * Get file information
173
+ */
174
+ getFileInfo() {
175
+ if (!this._fileHeader) return {};
176
+ return {
177
+ signature: this._fileHeader.signature,
178
+ version: this._fileHeader.version,
179
+ compressed: this._isCompressed,
180
+ encrypted: this._fileHeader.isEncrypted,
181
+ streams: this.listStreams()
182
+ };
183
+ }
184
+ /**
185
+ * Get number of sections in BodyText
186
+ */
187
+ getSectionCount() {
188
+ if (!this.cfb) return 0;
189
+ let count = 0;
190
+ while (CFB.find(this.cfb, `BodyText/Section${count}`) || CFB.find(this.cfb, `Section${count}`)) count++;
191
+ return count;
192
+ }
193
+ /**
194
+ * Read section data
195
+ * @param sectionIndex - Section index (0-based)
196
+ * @returns Decompressed section data
197
+ */
198
+ readSection(sectionIndex) {
199
+ let data = this.readStream(`BodyText/Section${sectionIndex}`);
200
+ if (!data) data = this.readStream(`Section${sectionIndex}`);
201
+ return data;
202
+ }
203
+ };
204
+
205
+ //#endregion
206
+ //#region src/record.ts
207
+ const HWPTAG_BEGIN = 16;
208
+ const HWPTAG_PARA_HEADER = HWPTAG_BEGIN + 50;
209
+ const HWPTAG_PARA_TEXT = HWPTAG_BEGIN + 51;
210
+ const HWPTAG_PARA_CHAR_SHAPE = HWPTAG_BEGIN + 52;
211
+ const HWPTAG_PARA_LINE_SEG = HWPTAG_BEGIN + 53;
212
+ const HWPTAG_PARA_RANGE_TAG = HWPTAG_BEGIN + 54;
213
+ const HWPTAG_CTRL_HEADER = HWPTAG_BEGIN + 55;
214
+ const HWPTAG_LIST_HEADER = HWPTAG_BEGIN + 56;
215
+ const HWPTAG_PAGE_DEF = HWPTAG_BEGIN + 57;
216
+ const HWPTAG_FOOTNOTE_SHAPE = HWPTAG_BEGIN + 58;
217
+ const HWPTAG_PAGE_BORDER_FILL = HWPTAG_BEGIN + 59;
218
+ const HWPTAG_SHAPE_COMPONENT = HWPTAG_BEGIN + 60;
219
+ const HWPTAG_TABLE = HWPTAG_BEGIN + 61;
220
+ const HWPTAG_SHAPE_COMPONENT_LINE = HWPTAG_BEGIN + 62;
221
+ const HWPTAG_CTRL_DATA = HWPTAG_BEGIN + 71;
222
+ /**
223
+ * HWP Record Reader
224
+ * Reads binary records from HWP stream data
225
+ */
226
+ var RecordReader = class {
227
+ data;
228
+ offset = 0;
229
+ constructor(data) {
230
+ this.data = data;
231
+ }
232
+ /**
233
+ * Check if there are more records to read
234
+ */
235
+ hasMore() {
236
+ return this.offset < this.data.length;
237
+ }
238
+ /**
239
+ * Read next record
240
+ * @returns Next record, or null if no more records
241
+ */
242
+ readRecord() {
243
+ if (this.offset + 4 > this.data.length) return null;
244
+ const header = new DataView(this.data.buffer, this.data.byteOffset + this.offset, 4).getUint32(0, true);
245
+ const tagId = header & 1023;
246
+ const level = header >> 10 & 1023;
247
+ let size = header >> 20 & 4095;
248
+ let dataOffset = this.offset + 4;
249
+ if (size === 4095) {
250
+ if (dataOffset + 4 > this.data.length) return null;
251
+ size = new DataView(this.data.buffer, this.data.byteOffset + dataOffset, 4).getUint32(0, true);
252
+ dataOffset += 4;
253
+ }
254
+ if (dataOffset + size > this.data.length) return null;
255
+ const recordData = this.data.slice(dataOffset, dataOffset + size);
256
+ this.offset = dataOffset + size;
257
+ return {
258
+ tagId,
259
+ level,
260
+ data: recordData,
261
+ size
262
+ };
263
+ }
264
+ /**
265
+ * Peek at next record header without consuming it
266
+ * @returns Header info with tagId, level, size
267
+ */
268
+ peekRecordHeader() {
269
+ if (this.offset + 4 > this.data.length) return null;
270
+ const header = new DataView(this.data.buffer, this.data.byteOffset + this.offset, 4).getUint32(0, true);
271
+ const tagId = header & 1023;
272
+ const level = header >> 10 & 1023;
273
+ let size = header >> 20 & 4095;
274
+ let dataOffset = this.offset + 4;
275
+ if (size === 4095) {
276
+ if (dataOffset + 4 > this.data.length) return null;
277
+ size = new DataView(this.data.buffer, this.data.byteOffset + dataOffset, 4).getUint32(0, true);
278
+ }
279
+ return {
280
+ tagId,
281
+ level,
282
+ size
283
+ };
284
+ }
285
+ /**
286
+ * Read all records (considering hierarchy)
287
+ * @param parentLevel - Stop when reaching this level or below
288
+ * @returns All records at current level
289
+ */
290
+ readAllRecords(parentLevel) {
291
+ const records = [];
292
+ while (this.hasMore()) {
293
+ const header = this.peekRecordHeader();
294
+ if (!header) break;
295
+ if (parentLevel !== void 0 && header.level <= parentLevel) break;
296
+ const record = this.readRecord();
297
+ if (record) records.push(record);
298
+ }
299
+ return records;
300
+ }
301
+ /**
302
+ * Get current position
303
+ */
304
+ get position() {
305
+ return this.offset;
306
+ }
307
+ /**
308
+ * Get remaining bytes
309
+ */
310
+ get remaining() {
311
+ return this.data.length - this.offset;
312
+ }
313
+ };
314
+
315
+ //#endregion
316
+ //#region src/table.ts
317
+ /**
318
+ * HWP Table Parser
319
+ * Parses table records and converts to Markdown
320
+ */
321
+ /**
322
+ * Parse table properties from TABLE record data
323
+ * @param data - TABLE record data
324
+ * @returns Table properties with rows, cols
325
+ */
326
+ function parseTableProperties(data) {
327
+ if (data.length < 8) throw new Error(`Invalid TABLE record size: ${data.length}`);
328
+ const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
329
+ return {
330
+ properties: view.getUint32(0, true),
331
+ rows: view.getUint16(4, true),
332
+ cols: view.getUint16(6, true)
333
+ };
334
+ }
335
+ /**
336
+ * Parse table from TABLE record and subsequent records
337
+ * @param tableRecordData - TABLE record data
338
+ * @param reader - Record reader for reading cell data
339
+ * @param lineBreakStyle - How to handle line breaks in cells
340
+ * @returns Parsed table
341
+ */
342
+ function parseTable(tableRecordData, reader, lineBreakStyle = "space") {
343
+ const props = parseTableProperties(tableRecordData);
344
+ const rows = props.rows;
345
+ const cols = props.cols;
346
+ const cells = [];
347
+ let cellIndex = 0;
348
+ while (reader.hasMore() && cellIndex < rows * cols) {
349
+ const header = reader.peekRecordHeader();
350
+ if (!header) break;
351
+ if (header.level < 2) break;
352
+ if (header.tagId === HWPTAG_LIST_HEADER) {
353
+ const listRecord = reader.readRecord();
354
+ if (!listRecord) break;
355
+ let row, col, colspan, rowspan;
356
+ if (listRecord.data.length >= 16) {
357
+ const view = new DataView(listRecord.data.buffer, listRecord.data.byteOffset, listRecord.data.byteLength);
358
+ col = view.getUint16(8, true);
359
+ row = view.getUint16(10, true);
360
+ colspan = view.getUint16(12, true);
361
+ rowspan = view.getUint16(14, true);
362
+ if (colspan === 0) colspan = 1;
363
+ if (rowspan === 0) rowspan = 1;
364
+ } else {
365
+ row = Math.floor(cellIndex / cols);
366
+ col = cellIndex % cols;
367
+ colspan = 1;
368
+ rowspan = 1;
369
+ }
370
+ const textParts = [];
371
+ while (reader.hasMore()) {
372
+ const paraHeader = reader.peekRecordHeader();
373
+ if (!paraHeader) break;
374
+ if (paraHeader.tagId === HWPTAG_LIST_HEADER) break;
375
+ if (paraHeader.level < 2) break;
376
+ if (paraHeader.tagId === HWPTAG_PARA_HEADER) {
377
+ const paraRec = reader.readRecord();
378
+ if (!paraRec) break;
379
+ if (paraRec.data.length >= 4) {
380
+ const nchars = new DataView(paraRec.data.buffer, paraRec.data.byteOffset, paraRec.data.byteLength).getUint32(0, true) & 2147483647;
381
+ if (nchars > 0 && reader.hasMore()) {
382
+ const nextH = reader.peekRecordHeader();
383
+ if (nextH && nextH.tagId === HWPTAG_PARA_TEXT) {
384
+ const textRec = reader.readRecord();
385
+ if (textRec) {
386
+ const text = processControlChars(parseParaText(textRec.data, nchars)).text;
387
+ if (text.trim()) textParts.push(text.trim());
388
+ }
389
+ }
390
+ }
391
+ }
392
+ } else reader.readRecord();
393
+ }
394
+ let cellText;
395
+ if (lineBreakStyle === "br") {
396
+ cellText = textParts.join("<br>");
397
+ cellText = cellText.replace(/\n/g, "<br>");
398
+ } else {
399
+ cellText = textParts.join(" ");
400
+ cellText = cellText.replace(/\n/g, " ");
401
+ }
402
+ const cell = {
403
+ row,
404
+ col,
405
+ rowspan,
406
+ colspan,
407
+ text: cellText
408
+ };
409
+ cells.push(cell);
410
+ cellIndex++;
411
+ } else reader.readRecord();
412
+ }
413
+ return {
414
+ rows,
415
+ cols,
416
+ cells
417
+ };
418
+ }
419
+ /**
420
+ * Convert table to Markdown
421
+ * @param table - Table object
422
+ * @param mergeStrategy - 'repeat' (default) or 'blank'
423
+ * @returns Markdown table
424
+ */
425
+ function tableToMarkdown(table, mergeStrategy = "repeat") {
426
+ const { rows, cols, cells } = table;
427
+ const matrix = Array.from({ length: rows }, () => Array(cols).fill(""));
428
+ for (const cell of cells) if (mergeStrategy === "repeat") {
429
+ for (let r = cell.row; r < cell.row + cell.rowspan; r++) for (let c = cell.col; c < cell.col + cell.colspan; c++) if (r < rows && c < cols) matrix[r][c] = cell.text;
430
+ } else if (cell.row < rows && cell.col < cols) matrix[cell.row][cell.col] = cell.text;
431
+ const lines = [];
432
+ if (rows === 0 || cols === 0) return "[Empty Table]";
433
+ lines.push("| " + Array(cols).fill("").join(" | ") + " |");
434
+ lines.push("| " + Array(cols).fill("---").join(" | ") + " |");
435
+ for (const row of matrix) lines.push("| " + row.join(" | ") + " |");
436
+ return lines.join("\n");
437
+ }
438
+
439
+ //#endregion
440
+ //#region src/paragraph.ts
441
+ /**
442
+ * HWP Paragraph Parser
443
+ * Parses paragraph records and extracts text
444
+ */
445
+ /**
446
+ * Parse paragraph header
447
+ * @param data - PARA_HEADER record data
448
+ * @returns Paragraph header information
449
+ */
450
+ function parseParaHeader(data) {
451
+ if (data.length < 22) throw new Error(`Invalid PARA_HEADER size: ${data.length}`);
452
+ const view = new DataView(data.buffer, data.byteOffset, data.byteLength);
453
+ return {
454
+ textCount: view.getUint32(0, true) & 2147483647,
455
+ controlMask: view.getUint32(4, true),
456
+ paraShapeId: view.getUint16(8, true),
457
+ styleId: view.getUint8(10),
458
+ columnType: view.getUint8(11),
459
+ charShapeCount: view.getUint16(12, true)
460
+ };
461
+ }
462
+ /**
463
+ * Parse paragraph text with control info table
464
+ *
465
+ * PARA_TEXT structure:
466
+ * [Control info table: 16 bytes per control] + [Actual text]
467
+ *
468
+ * Each control info block (16 bytes):
469
+ * - 2 bytes: control code
470
+ * - 4 bytes: control ID
471
+ * - 8 bytes: control data
472
+ * - 2 bytes: control code (repeated)
473
+ *
474
+ * @param data - PARA_TEXT record data
475
+ * @param nchars - Number of WCHAR characters (control table + text)
476
+ * @returns Decoded text
477
+ */
478
+ function parseParaText(data, _nchars) {
479
+ const CHAR_CONTROLS = new Set([
480
+ 0,
481
+ 10,
482
+ 13,
483
+ 30,
484
+ 31
485
+ ]);
486
+ let offset = 0;
487
+ while (offset + 2 <= data.length) {
488
+ const charCode = new DataView(data.buffer, data.byteOffset + offset, 2).getUint16(0, true);
489
+ if (charCode < 32 && !CHAR_CONTROLS.has(charCode)) {
490
+ offset += 16;
491
+ if (offset > data.length) break;
492
+ } else break;
493
+ }
494
+ const textData = data.slice(offset);
495
+ if (textData.length === 0) return "";
496
+ try {
497
+ return new TextDecoder("utf-16le").decode(textData);
498
+ } catch (error) {
499
+ return "";
500
+ }
501
+ }
502
+ /**
503
+ * Process control characters in text
504
+ * @param text - Raw text with control characters
505
+ * @returns Processed text and has_table flag
506
+ */
507
+ function processControlChars(text) {
508
+ const result = [];
509
+ let hasTable = false;
510
+ for (const char of text) {
511
+ const code = char.codePointAt(0) ?? 0;
512
+ if (code < 32) {
513
+ if (code === 10) result.push("\n");
514
+ else if (code === 11) hasTable = true;
515
+ else if (code === 13) result.push("\n");
516
+ } else result.push(char);
517
+ }
518
+ return {
519
+ text: result.join(""),
520
+ hasTable
521
+ };
522
+ }
523
+ /**
524
+ * Paragraph Parser
525
+ * Parses paragraphs from HWP record stream
526
+ */
527
+ var ParagraphParser = class {
528
+ constructor(reader, options = {}) {
529
+ this.reader = reader;
530
+ this.options = options;
531
+ if (!this.options.tableLineBreakStyle) this.options.tableLineBreakStyle = "space";
532
+ }
533
+ /**
534
+ * Parse next paragraph
535
+ * @returns Parsed paragraph, or null if no more paragraphs
536
+ */
537
+ parseParagraph() {
538
+ if (!this.reader.hasMore()) return null;
539
+ let record = null;
540
+ while (this.reader.hasMore()) {
541
+ record = this.reader.readRecord();
542
+ if (record && record.tagId === HWPTAG_PARA_HEADER) break;
543
+ record = null;
544
+ }
545
+ if (!record) return null;
546
+ const header = parseParaHeader(record.data);
547
+ let text = "";
548
+ if (header.textCount > 0) {
549
+ const nextHeader = this.reader.peekRecordHeader();
550
+ if (nextHeader && nextHeader.tagId === HWPTAG_PARA_TEXT) {
551
+ record = this.reader.readRecord();
552
+ if (record) text = processControlChars(parseParaText(record.data, header.textCount)).text;
553
+ }
554
+ }
555
+ const tables = [];
556
+ while (this.reader.hasMore()) {
557
+ const nextHeader = this.reader.peekRecordHeader();
558
+ if (!nextHeader) break;
559
+ if (nextHeader.tagId === HWPTAG_PARA_HEADER) break;
560
+ if (nextHeader.level === 0) break;
561
+ if (nextHeader.tagId === HWPTAG_CTRL_HEADER) {
562
+ this.reader.readRecord();
563
+ const tableHeader = this.reader.peekRecordHeader();
564
+ if (tableHeader && tableHeader.tagId === HWPTAG_TABLE) {
565
+ const tableRecord = this.reader.readRecord();
566
+ if (tableRecord) try {
567
+ const tableMd = tableToMarkdown(parseTable(tableRecord.data, this.reader, this.options.tableLineBreakStyle));
568
+ tables.push(tableMd);
569
+ } catch (error) {
570
+ console.warn("Warning: Failed to parse table:", error);
571
+ tables.push("[TABLE - Parse Error]");
572
+ }
573
+ }
574
+ } else this.reader.readRecord();
575
+ }
576
+ if (tables.length > 0) {
577
+ text = text.trimEnd();
578
+ if (text.length > 0 && text.length < 5 && [...text].every((c) => {
579
+ return (c.codePointAt(0) ?? 0) > 127 || /\s/.test(c);
580
+ })) text = tables.join("\n\n");
581
+ else {
582
+ if (text) text += "\n\n";
583
+ text += tables.join("\n\n");
584
+ }
585
+ }
586
+ return {
587
+ text,
588
+ header
589
+ };
590
+ }
591
+ /**
592
+ * Parse all paragraphs in section
593
+ * @returns All paragraphs
594
+ */
595
+ parseAllParagraphs() {
596
+ const paragraphs = [];
597
+ while (this.reader.hasMore()) {
598
+ const para = this.parseParagraph();
599
+ if (para) paragraphs.push(para);
600
+ else break;
601
+ }
602
+ return paragraphs;
603
+ }
604
+ };
605
+
606
+ //#endregion
607
+ //#region src/converter.ts
608
+ /**
609
+ * Convert paragraphs to Markdown
610
+ * @param paragraphs - List of paragraphs
611
+ * @returns Markdown text
612
+ */
613
+ function paragraphsToMarkdown(paragraphs) {
614
+ const lines = [];
615
+ for (const para of paragraphs) {
616
+ const text = para.text.trim();
617
+ if (!text) continue;
618
+ if (text.length < 5 && !text.startsWith("|") && [...text].every((c) => (c.codePointAt(0) ?? 0) > 127)) continue;
619
+ lines.push(text);
620
+ }
621
+ return lines.join("\n\n");
622
+ }
623
+ /**
624
+ * Convert HWP file to Markdown
625
+ * @param hwp - Opened HWP file
626
+ * @param options - Conversion options
627
+ * @returns Markdown content
628
+ */
629
+ function convertHwpToMarkdown(hwp, options = {}) {
630
+ const allParagraphs = [];
631
+ if (!options.tableLineBreakStyle) options.tableLineBreakStyle = "space";
632
+ const sectionCount = hwp.getSectionCount();
633
+ for (let i = 0; i < sectionCount; i++) {
634
+ const sectionData = hwp.readSection(i);
635
+ if (!sectionData) continue;
636
+ const paragraphs = new ParagraphParser(new RecordReader(sectionData), options).parseAllParagraphs();
637
+ allParagraphs.push(...paragraphs);
638
+ }
639
+ return paragraphsToMarkdown(allParagraphs);
640
+ }
641
+
642
+ //#endregion
643
+ //#region src/cli/index.ts
644
+ /**
645
+ * HWP2MD CLI
646
+ * Command-line interface for HWP to Markdown conversion
647
+ */
648
+ const program = new Command();
649
+ program.name("hwp2md").description("HWP to Markdown converter").version("0.1.0");
650
+ program.command("info <file>").description("Display HWP file information").action(async (file) => {
651
+ try {
652
+ const data = await readFile(file);
653
+ const hwp = new HWPFile(new Uint8Array(data));
654
+ hwp.open();
655
+ const header = hwp.fileHeader;
656
+ console.log(`File: ${file}`);
657
+ console.log(`Signature: ${header?.signature}`);
658
+ console.log(`Version: ${header?.version}`);
659
+ console.log(`Compressed: ${hwp.isCompressed}`);
660
+ console.log(`Encrypted: ${header?.isEncrypted}`);
661
+ console.log(`Sections: ${hwp.getSectionCount()}`);
662
+ hwp.close();
663
+ } catch (error) {
664
+ console.error(`Error: ${error.message}`);
665
+ process.exit(1);
666
+ }
667
+ });
668
+ program.command("convert <input> [output]").description("Convert HWP file to Markdown").option("--table-line-breaks <style>", "Line break style in table cells: space or br", "space").action(async (input, output, options) => {
669
+ const outputPath = output ?? input.replace(/\.hwp$/i, ".md");
670
+ try {
671
+ console.log(`Converting ${input} to ${outputPath}...`);
672
+ const data = await readFile(input);
673
+ const hwp = new HWPFile(new Uint8Array(data));
674
+ hwp.open();
675
+ await writeFile(outputPath, convertHwpToMarkdown(hwp, { tableLineBreakStyle: options.tableLineBreaks }), "utf-8");
676
+ console.log(`Conversion completed: ${outputPath}`);
677
+ hwp.close();
678
+ } catch (error) {
679
+ console.error(`Error: ${error.message}`);
680
+ process.exit(1);
681
+ }
682
+ });
683
+ program.parse();
684
+
685
+ //#endregion
686
+ export { };
687
+ //# sourceMappingURL=index.mjs.map