kordoc 0.1.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,20 @@
1
1
  #!/usr/bin/env node
2
- var __defProp = Object.defineProperty;
3
- var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
4
- get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
5
- }) : x)(function(x) {
6
- if (typeof require !== "undefined") return require.apply(this, arguments);
7
- throw Error('Dynamic require of "' + x + '" is not supported');
8
- });
9
- var __export = (target, all) => {
10
- for (var name in all)
11
- __defProp(target, name, { get: all[name], enumerable: true });
12
- };
13
2
 
14
3
  // src/detect.ts
4
+ function magicBytes(buffer) {
5
+ return new Uint8Array(buffer, 0, Math.min(4, buffer.byteLength));
6
+ }
15
7
  function isHwpxFile(buffer) {
16
- const bytes = new Uint8Array(buffer.slice(0, 4));
17
- return bytes[0] === 80 && bytes[1] === 75 && bytes[2] === 3 && bytes[3] === 4;
8
+ const b = magicBytes(buffer);
9
+ return b[0] === 80 && b[1] === 75 && b[2] === 3 && b[3] === 4;
18
10
  }
19
11
  function isOldHwpFile(buffer) {
20
- const bytes = new Uint8Array(buffer.slice(0, 4));
21
- return bytes[0] === 208 && bytes[1] === 207 && bytes[2] === 17 && bytes[3] === 224;
12
+ const b = magicBytes(buffer);
13
+ return b[0] === 208 && b[1] === 207 && b[2] === 17 && b[3] === 224;
22
14
  }
23
15
  function isPdfFile(buffer) {
24
- const bytes = new Uint8Array(buffer.slice(0, 4));
25
- return bytes[0] === 37 && bytes[1] === 80 && bytes[2] === 68 && bytes[3] === 70;
16
+ const b = magicBytes(buffer);
17
+ return b[0] === 37 && b[1] === 80 && b[2] === 68 && b[3] === 70;
26
18
  }
27
19
  function detectFormat(buffer) {
28
20
  if (isHwpxFile(buffer)) return "hwpx";
@@ -31,29 +23,40 @@ function detectFormat(buffer) {
31
23
  return "unknown";
32
24
  }
33
25
 
26
+ // src/utils.ts
27
+ var VERSION = true ? "0.2.2" : "0.0.0-dev";
28
+ function toArrayBuffer(buf) {
29
+ return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
30
+ }
31
+
34
32
  // src/hwpx/parser.ts
35
33
  import JSZip from "jszip";
34
+ import { inflateRawSync } from "zlib";
36
35
  import { DOMParser } from "@xmldom/xmldom";
37
36
 
38
37
  // src/table/builder.ts
38
+ var MAX_COLS = 200;
39
+ var MAX_ROWS = 1e4;
39
40
  function buildTable(rows) {
41
+ if (rows.length > MAX_ROWS) rows = rows.slice(0, MAX_ROWS);
40
42
  const numRows = rows.length;
41
- const tempOccupied = Array.from({ length: numRows }, () => Array(100).fill(false));
43
+ const tempOccupied = /* @__PURE__ */ new Set();
42
44
  let maxCols = 0;
43
45
  for (let rowIdx = 0; rowIdx < numRows; rowIdx++) {
44
46
  let colIdx = 0;
45
47
  for (const cell of rows[rowIdx]) {
46
- while (colIdx < 100 && tempOccupied[rowIdx][colIdx]) colIdx++;
47
- if (colIdx >= 100) break;
48
+ while (colIdx < MAX_COLS && tempOccupied.has(rowIdx * MAX_COLS + colIdx)) colIdx++;
49
+ if (colIdx >= MAX_COLS) break;
48
50
  for (let r = rowIdx; r < Math.min(rowIdx + cell.rowSpan, numRows); r++) {
49
- for (let c = colIdx; c < Math.min(colIdx + cell.colSpan, 100); c++) {
50
- tempOccupied[r][c] = true;
51
+ for (let c = colIdx; c < Math.min(colIdx + cell.colSpan, MAX_COLS); c++) {
52
+ tempOccupied.add(r * MAX_COLS + c);
51
53
  }
52
54
  }
53
55
  colIdx += cell.colSpan;
54
56
  if (colIdx > maxCols) maxCols = colIdx;
55
57
  }
56
58
  }
59
+ tempOccupied.clear();
57
60
  if (maxCols === 0) return { rows: 0, cols: 0, cells: [], hasHeader: false };
58
61
  const grid = Array.from(
59
62
  { length: numRows },
@@ -146,13 +149,9 @@ function tableToMarkdown(table) {
146
149
  }
147
150
  }
148
151
  const uniqueRows = [];
149
- const seen = /* @__PURE__ */ new Set();
150
152
  for (const row of display) {
151
- const key = row.join("||");
152
- if (!seen.has(key)) {
153
- seen.add(key);
154
- uniqueRows.push(row);
155
- }
153
+ const isEmptyPlaceholder = row.every((cell) => cell === "");
154
+ if (!isEmptyPlaceholder) uniqueRows.push(row);
156
155
  }
157
156
  if (uniqueRows.length === 0) return "";
158
157
  const md = [];
@@ -165,6 +164,14 @@ function tableToMarkdown(table) {
165
164
  }
166
165
 
167
166
  // src/hwpx/parser.ts
167
+ var MAX_DECOMPRESS_SIZE = 100 * 1024 * 1024;
168
+ var MAX_ZIP_ENTRIES = 500;
169
+ function clampSpan(val, max) {
170
+ return Math.max(1, Math.min(val, max));
171
+ }
172
+ function stripDtd(xml) {
173
+ return xml.replace(/<!DOCTYPE\s[^[>]*(\[[\s\S]*?\])?\s*>/gi, "");
174
+ }
168
175
  async function parseHwpxDocument(buffer) {
169
176
  let zip;
170
177
  try {
@@ -174,11 +181,14 @@ async function parseHwpxDocument(buffer) {
174
181
  }
175
182
  const sectionPaths = await resolveSectionPaths(zip);
176
183
  if (sectionPaths.length === 0) throw new Error("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
184
+ let totalDecompressed = 0;
177
185
  const blocks = [];
178
186
  for (const path of sectionPaths) {
179
187
  const file = zip.file(path);
180
188
  if (!file) continue;
181
189
  const xml = await file.async("text");
190
+ totalDecompressed += xml.length * 2;
191
+ if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new Error("ZIP \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (ZIP bomb \uC758\uC2EC)");
182
192
  blocks.push(...parseSectionXml(xml));
183
193
  }
184
194
  return blocksToMarkdown(blocks);
@@ -188,15 +198,31 @@ function extractFromBrokenZip(buffer) {
188
198
  const view = new DataView(buffer);
189
199
  let pos = 0;
190
200
  const texts = [];
201
+ let totalDecompressed = 0;
202
+ let entryCount = 0;
191
203
  while (pos < data.length - 30) {
192
204
  if (data[pos] !== 80 || data[pos + 1] !== 75 || data[pos + 2] !== 3 || data[pos + 3] !== 4) break;
205
+ if (++entryCount > MAX_ZIP_ENTRIES) break;
193
206
  const method = view.getUint16(pos + 8, true);
194
207
  const compSize = view.getUint32(pos + 18, true);
195
208
  const nameLen = view.getUint16(pos + 26, true);
196
209
  const extraLen = view.getUint16(pos + 28, true);
210
+ if (nameLen > 1024 || extraLen > 65535) {
211
+ pos += 30 + nameLen + extraLen;
212
+ continue;
213
+ }
214
+ const fileStart = pos + 30 + nameLen + extraLen;
215
+ if (fileStart + compSize > data.length) break;
216
+ if (compSize === 0 && method !== 0) {
217
+ pos = fileStart;
218
+ continue;
219
+ }
197
220
  const nameBytes = data.slice(pos + 30, pos + 30 + nameLen);
198
221
  const name = new TextDecoder().decode(nameBytes);
199
- const fileStart = pos + 30 + nameLen + extraLen;
222
+ if (name.includes("..") || name.startsWith("/")) {
223
+ pos = fileStart + compSize;
224
+ continue;
225
+ }
200
226
  const fileData = data.slice(fileStart, fileStart + compSize);
201
227
  pos = fileStart + compSize;
202
228
  if (!name.toLowerCase().includes("section") || !name.endsWith(".xml")) continue;
@@ -205,11 +231,13 @@ function extractFromBrokenZip(buffer) {
205
231
  if (method === 0) {
206
232
  content = new TextDecoder().decode(fileData);
207
233
  } else if (method === 8) {
208
- const { inflateRawSync: inflateRawSync2 } = __require("zlib");
209
- content = new TextDecoder().decode(inflateRawSync2(Buffer.from(fileData)));
234
+ const decompressed = inflateRawSync(Buffer.from(fileData), { maxOutputLength: MAX_DECOMPRESS_SIZE });
235
+ content = new TextDecoder().decode(decompressed);
210
236
  } else {
211
237
  continue;
212
238
  }
239
+ totalDecompressed += content.length * 2;
240
+ if (totalDecompressed > MAX_DECOMPRESS_SIZE) throw new Error("\uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC");
213
241
  const sectionText = blocksToMarkdown(parseSectionXml(content));
214
242
  if (sectionText) texts.push(sectionText);
215
243
  } catch {
@@ -233,7 +261,7 @@ async function resolveSectionPaths(zip) {
233
261
  }
234
262
  function parseSectionPathsFromManifest(xml) {
235
263
  const parser = new DOMParser();
236
- const doc = parser.parseFromString(xml, "text/xml");
264
+ const doc = parser.parseFromString(stripDtd(xml), "text/xml");
237
265
  const items = doc.getElementsByTagName("opf:item");
238
266
  const spine = doc.getElementsByTagName("opf:itemref");
239
267
  const isSectionId = (id) => /^s/i.test(id) || id.toLowerCase().includes("section");
@@ -260,7 +288,7 @@ function parseSectionPathsFromManifest(xml) {
260
288
  }
261
289
  function parseSectionXml(xml) {
262
290
  const parser = new DOMParser();
263
- const doc = parser.parseFromString(xml, "text/xml");
291
+ const doc = parser.parseFromString(stripDtd(xml), "text/xml");
264
292
  if (!doc.documentElement) return [];
265
293
  const blocks = [];
266
294
  walkSection(doc.documentElement, blocks, null, []);
@@ -318,8 +346,8 @@ function walkSection(node, blocks, tableCtx, tableStack) {
318
346
  if (tableCtx?.cell) {
319
347
  const cs = parseInt(el.getAttribute("colSpan") || "1", 10);
320
348
  const rs = parseInt(el.getAttribute("rowSpan") || "1", 10);
321
- if (cs > 0) tableCtx.cell.colSpan = cs;
322
- if (rs > 0) tableCtx.cell.rowSpan = rs;
349
+ tableCtx.cell.colSpan = clampSpan(cs, MAX_COLS);
350
+ tableCtx.cell.rowSpan = clampSpan(rs, MAX_ROWS);
323
351
  }
324
352
  break;
325
353
  case "p": {
@@ -381,7 +409,7 @@ function extractParagraphText(para) {
381
409
  }
382
410
 
383
411
  // src/hwp5/record.ts
384
- import { inflateRawSync, inflateSync } from "zlib";
412
+ import { inflateRawSync as inflateRawSync2, inflateSync } from "zlib";
385
413
  var TAG_PARA_HEADER = 66;
386
414
  var TAG_PARA_TEXT = 67;
387
415
  var TAG_CTRL_HEADER = 71;
@@ -416,16 +444,19 @@ function readRecords(data) {
416
444
  }
417
445
  return records;
418
446
  }
447
+ var MAX_DECOMPRESS_SIZE2 = 100 * 1024 * 1024;
419
448
  function decompressStream(data) {
449
+ const opts = { maxOutputLength: MAX_DECOMPRESS_SIZE2 };
420
450
  if (data.length >= 2 && data[0] === 120) {
421
451
  try {
422
- return inflateSync(data);
452
+ return inflateSync(data, opts);
423
453
  } catch {
424
454
  }
425
455
  }
426
- return inflateRawSync(data);
456
+ return inflateRawSync2(data, opts);
427
457
  }
428
458
  function parseFileHeader(data) {
459
+ if (data.length < 40) throw new Error("FileHeader\uAC00 \uB108\uBB34 \uC9E7\uC2B5\uB2C8\uB2E4 (\uCD5C\uC18C 40\uBC14\uC774\uD2B8)");
429
460
  const sig = data.subarray(0, 32).toString("utf8").replace(/\0+$/, "");
430
461
  return {
431
462
  signature: sig,
@@ -461,6 +492,15 @@ function extractText(data) {
461
492
  const isInline = ch >= 4 && ch <= 9 || ch >= 19 && ch <= 20;
462
493
  if ((isExt || isInline) && i + 14 <= data.length) i += 14;
463
494
  } else if (ch >= 32) {
495
+ if (ch >= 55296 && ch <= 56319 && i + 1 < data.length) {
496
+ const lo = data.readUInt16LE(i);
497
+ if (lo >= 56320 && lo <= 57343) {
498
+ i += 2;
499
+ const codePoint = (ch - 55296 << 10) + (lo - 56320) + 65536;
500
+ result += String.fromCodePoint(codePoint);
501
+ break;
502
+ }
503
+ }
464
504
  result += String.fromCharCode(ch);
465
505
  }
466
506
  break;
@@ -473,6 +513,8 @@ function extractText(data) {
473
513
  import { createRequire } from "module";
474
514
  var require2 = createRequire(import.meta.url);
475
515
  var CFB = require2("cfb");
516
+ var MAX_SECTIONS = 100;
517
+ var MAX_TOTAL_DECOMPRESS = 100 * 1024 * 1024;
476
518
  function parseHwp5Document(buffer) {
477
519
  const cfb = CFB.parse(buffer);
478
520
  const headerEntry = CFB.find(cfb, "/FileHeader");
@@ -485,8 +527,11 @@ function parseHwp5Document(buffer) {
485
527
  const sections = findSections(cfb);
486
528
  if (sections.length === 0) throw new Error("\uC139\uC158 \uC2A4\uD2B8\uB9BC\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
487
529
  const blocks = [];
530
+ let totalDecompressed = 0;
488
531
  for (const sectionData of sections) {
489
532
  const data = compressed ? decompressStream(Buffer.from(sectionData)) : Buffer.from(sectionData);
533
+ totalDecompressed += data.length;
534
+ if (totalDecompressed > MAX_TOTAL_DECOMPRESS) throw new Error("\uCD1D \uC555\uCD95 \uD574\uC81C \uD06C\uAE30 \uCD08\uACFC (decompression bomb \uC758\uC2EC)");
490
535
  const records = readRecords(data);
491
536
  blocks.push(...parseSection(records));
492
537
  }
@@ -494,7 +539,7 @@ function parseHwp5Document(buffer) {
494
539
  }
495
540
  function findSections(cfb) {
496
541
  const sections = [];
497
- for (let i = 0; ; i++) {
542
+ for (let i = 0; i < MAX_SECTIONS; i++) {
498
543
  const entry = CFB.find(cfb, `/BodyText/Section${i}`);
499
544
  if (!entry?.content) break;
500
545
  sections.push({ idx: i, content: Buffer.from(entry.content) });
@@ -585,20 +630,29 @@ function parseTableBlock(records, startIdx) {
585
630
  return { table: buildTable(cellRows), nextIdx: i };
586
631
  }
587
632
  function parseCellBlock(records, startIdx, tableLevel) {
588
- const cellLevel = records[startIdx].level;
633
+ const rec = records[startIdx];
634
+ const cellLevel = rec.level;
589
635
  const texts = [];
636
+ let colSpan = 1;
637
+ let rowSpan = 1;
638
+ if (rec.data.length >= 14) {
639
+ const cs = rec.data.readUInt16LE(10);
640
+ const rs = rec.data.readUInt16LE(12);
641
+ if (cs > 0) colSpan = Math.min(cs, MAX_COLS);
642
+ if (rs > 0) rowSpan = Math.min(rs, MAX_ROWS);
643
+ }
590
644
  let i = startIdx + 1;
591
645
  while (i < records.length) {
592
- const rec = records[i];
593
- if (rec.tagId === TAG_LIST_HEADER && rec.level <= cellLevel) break;
594
- if (rec.level <= tableLevel && (rec.tagId === TAG_PARA_HEADER || rec.tagId === TAG_CTRL_HEADER)) break;
595
- if (rec.tagId === TAG_PARA_TEXT) {
596
- const t = extractText(rec.data).trim();
646
+ const r = records[i];
647
+ if (r.tagId === TAG_LIST_HEADER && r.level <= cellLevel) break;
648
+ if (r.level <= tableLevel && (r.tagId === TAG_PARA_HEADER || r.tagId === TAG_CTRL_HEADER)) break;
649
+ if (r.tagId === TAG_PARA_TEXT) {
650
+ const t = extractText(r.data).trim();
597
651
  if (t) texts.push(t);
598
652
  }
599
653
  i++;
600
654
  }
601
- return { cell: { text: texts.join("\n"), colSpan: 1, rowSpan: 1 }, nextIdx: i };
655
+ return { cell: { text: texts.join("\n"), colSpan, rowSpan }, nextIdx: i };
602
656
  }
603
657
  function arrangeCells(rows, cols, cells) {
604
658
  const grid = Array.from({ length: rows }, () => Array(cols).fill(null));
@@ -627,13 +681,18 @@ var pdfjsModule = null;
627
681
  async function loadPdfjs() {
628
682
  if (pdfjsModule) return pdfjsModule;
629
683
  try {
630
- pdfjsModule = await import("pdfjs-dist/legacy/build/pdf.mjs");
684
+ const mod = await import("pdfjs-dist/legacy/build/pdf.mjs");
631
685
  const req = createRequire2(import.meta.url);
632
686
  const workerPath = req.resolve("pdfjs-dist/legacy/build/pdf.worker.mjs");
633
- pdfjsModule.GlobalWorkerOptions.workerSrc = pathToFileURL(workerPath).href;
634
- return pdfjsModule;
635
- } catch {
636
- return null;
687
+ mod.GlobalWorkerOptions.workerSrc = pathToFileURL(workerPath).href;
688
+ pdfjsModule = mod;
689
+ return mod;
690
+ } catch (err) {
691
+ const msg = err instanceof Error ? err.message : String(err);
692
+ if (msg.includes("Cannot find") || msg.includes("MODULE_NOT_FOUND")) {
693
+ return null;
694
+ }
695
+ throw new Error(`pdfjs-dist \uB85C\uB529 \uC2E4\uD328: ${msg}`);
637
696
  }
638
697
  }
639
698
  async function parsePdfDocument(buffer) {
@@ -646,15 +705,14 @@ async function parsePdfDocument(buffer) {
646
705
  error: "pdfjs-dist\uAC00 \uC124\uCE58\uB418\uC9C0 \uC54A\uC558\uC2B5\uB2C8\uB2E4. npm install pdfjs-dist"
647
706
  };
648
707
  }
708
+ const data = new Uint8Array(buffer);
709
+ const doc = await pdfjs.getDocument({
710
+ data,
711
+ useSystemFonts: true,
712
+ disableFontFace: true,
713
+ isEvalSupported: false
714
+ }).promise;
649
715
  try {
650
- const data = new Uint8Array(buffer);
651
- const doc = await pdfjs.getDocument({
652
- data,
653
- useSystemFonts: true,
654
- disableFontFace: true,
655
- isEvalSupported: false,
656
- workerSrc: ""
657
- }).promise;
658
716
  const pageCount = doc.numPages;
659
717
  if (pageCount === 0) {
660
718
  return { success: false, fileType: "pdf", pageCount: 0, error: "PDF\uC5D0 \uD398\uC774\uC9C0\uAC00 \uC5C6\uC2B5\uB2C8\uB2E4." };
@@ -689,20 +747,14 @@ async function parsePdfDocument(buffer) {
689
747
  }
690
748
  markdown = reconstructTables(markdown);
691
749
  return { success: true, fileType: "pdf", markdown, pageCount, isImageBased: false };
692
- } catch (err) {
693
- return {
694
- success: false,
695
- fileType: "pdf",
696
- pageCount: 0,
697
- error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328"
698
- };
750
+ } finally {
751
+ await doc.destroy().catch(() => {
752
+ });
699
753
  }
700
754
  }
701
755
  function groupTextItemsByLine(items) {
702
756
  if (items.length === 0) return [];
703
- const textItems = items.filter(
704
- (item) => typeof item.str === "string" && item.str.trim() !== ""
705
- );
757
+ const textItems = items.filter((item) => typeof item.str === "string" && item.str.trim() !== "");
706
758
  if (textItems.length === 0) return [];
707
759
  textItems.sort((a, b) => {
708
760
  const yDiff = b.transform[5] - a.transform[5];
@@ -760,8 +812,9 @@ function reconstructTables(text) {
760
812
  function formatAsMarkdownTable(rows) {
761
813
  const maxCols = Math.max(...rows.map((r) => r.length));
762
814
  const normalized = rows.map((r) => {
763
- while (r.length < maxCols) r.push("");
764
- return r;
815
+ const copy = [...r];
816
+ while (copy.length < maxCols) copy.push("");
817
+ return copy;
765
818
  });
766
819
  const lines = [];
767
820
  lines.push("| " + normalized[0].join(" | ") + " |");
@@ -774,6 +827,9 @@ function formatAsMarkdownTable(rows) {
774
827
 
775
828
  // src/index.ts
776
829
  async function parse(buffer) {
830
+ if (!buffer || buffer.byteLength === 0) {
831
+ return { success: false, fileType: "unknown", error: "\uBE48 \uBC84\uD37C\uC774\uAC70\uB098 \uC720\uD6A8\uD558\uC9C0 \uC54A\uC740 \uC785\uB825\uC785\uB2C8\uB2E4." };
832
+ }
777
833
  const format = detectFormat(buffer);
778
834
  switch (format) {
779
835
  case "hwpx":
@@ -803,11 +859,16 @@ async function parseHwp(buffer) {
803
859
  }
804
860
  }
805
861
  async function parsePdf(buffer) {
806
- return parsePdfDocument(buffer);
862
+ try {
863
+ return await parsePdfDocument(buffer);
864
+ } catch (err) {
865
+ return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328" };
866
+ }
807
867
  }
808
868
 
809
869
  export {
810
- __export,
811
870
  detectFormat,
871
+ VERSION,
872
+ toArrayBuffer,
812
873
  parse
813
874
  };
package/dist/cli.js CHANGED
@@ -1,21 +1,31 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
+ VERSION,
3
4
  detectFormat,
4
- parse
5
- } from "./chunk-P2BZDRLZ.js";
5
+ parse,
6
+ toArrayBuffer
7
+ } from "./chunk-KZMWHK72.js";
6
8
 
7
9
  // src/cli.ts
8
- import { readFileSync, writeFileSync } from "fs";
10
+ import { readFileSync, writeFileSync, mkdirSync, statSync } from "fs";
9
11
  import { basename, resolve } from "path";
10
12
  import { Command } from "commander";
11
13
  var program = new Command();
12
- program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\uACA0\uB2E4 \u2014 HWP, HWPX, PDF \u2192 Markdown").version("0.1.0").argument("<files...>", "\uBCC0\uD658\uD560 \uD30C\uC77C \uACBD\uB85C (HWP, HWPX, PDF)").option("-o, --output <path>", "\uCD9C\uB825 \uD30C\uC77C \uACBD\uB85C (\uB2E8\uC77C \uD30C\uC77C \uC2DC)").option("-d, --out-dir <dir>", "\uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC (\uB2E4\uC911 \uD30C\uC77C \uC2DC)").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown (\uAE30\uBCF8) \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (files, opts) => {
14
+ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\uACA0\uB2E4 \u2014 HWP, HWPX, PDF \u2192 Markdown").version(VERSION).argument("<files...>", "\uBCC0\uD658\uD560 \uD30C\uC77C \uACBD\uB85C (HWP, HWPX, PDF)").option("-o, --output <path>", "\uCD9C\uB825 \uD30C\uC77C \uACBD\uB85C (\uB2E8\uC77C \uD30C\uC77C \uC2DC)").option("-d, --out-dir <dir>", "\uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC (\uB2E4\uC911 \uD30C\uC77C \uC2DC)").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown (\uAE30\uBCF8) \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (files, opts) => {
13
15
  for (const filePath of files) {
14
16
  const absPath = resolve(filePath);
15
17
  const fileName = basename(absPath);
16
18
  try {
19
+ const fileSize = statSync(absPath).size;
20
+ if (fileSize > 500 * 1024 * 1024) {
21
+ process.stderr.write(`
22
+ [kordoc] SKIP: ${fileName} \u2014 \uD30C\uC77C\uC774 \uB108\uBB34 \uD07D\uB2C8\uB2E4 (${(fileSize / 1024 / 1024).toFixed(1)}MB)
23
+ `);
24
+ process.exitCode = 1;
25
+ continue;
26
+ }
17
27
  const buffer = readFileSync(absPath);
18
- const arrayBuffer = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
28
+ const arrayBuffer = toArrayBuffer(buffer);
19
29
  const format = detectFormat(arrayBuffer);
20
30
  if (!opts.silent) {
21
31
  process.stderr.write(`[kordoc] ${fileName} (${format}) ...`);
@@ -31,13 +41,15 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
31
41
  }
32
42
  if (!opts.silent) process.stderr.write(` OK
33
43
  `);
34
- const output = opts.format === "json" ? JSON.stringify(result, null, 2) : result.markdown || "";
44
+ const output = opts.format === "json" ? JSON.stringify(result, null, 2) : result.markdown;
35
45
  if (opts.output && files.length === 1) {
36
46
  writeFileSync(opts.output, output, "utf-8");
37
47
  if (!opts.silent) process.stderr.write(` \u2192 ${opts.output}
38
48
  `);
39
49
  } else if (opts.outDir) {
40
- const outPath = resolve(opts.outDir, fileName.replace(/\.[^.]+$/, ".md"));
50
+ mkdirSync(opts.outDir, { recursive: true });
51
+ const outExt = opts.format === "json" ? ".json" : ".md";
52
+ const outPath = resolve(opts.outDir, fileName.replace(/\.[^.]+$/, outExt));
41
53
  writeFileSync(outPath, output, "utf-8");
42
54
  if (!opts.silent) process.stderr.write(` \u2192 ${outPath}
43
55
  `);