kordoc 2.0.2 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2,12 +2,12 @@
2
2
  import {
3
3
  detectFormat,
4
4
  parse
5
- } from "./chunk-XJYM2AUA.js";
5
+ } from "./chunk-4UH6ABAY.js";
6
6
  import {
7
7
  VERSION,
8
8
  toArrayBuffer
9
- } from "./chunk-EVWOJ4T5.js";
10
- import "./chunk-MOL7MDBG.js";
9
+ } from "./chunk-25TXW6EP.js";
10
+ import "./chunk-3TBUDJDE.js";
11
11
 
12
12
  // src/cli.ts
13
13
  import { readFileSync, writeFileSync, mkdirSync, statSync } from "fs";
@@ -92,7 +92,7 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
92
92
  process.stdout.write(output + "\n");
93
93
  }
94
94
  } catch (err) {
95
- const { sanitizeError } = await import("./utils-6JEIFBCJ.js");
95
+ const { sanitizeError } = await import("./utils-BTZ4WSYX.js");
96
96
  process.stderr.write(`
97
97
  [kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
98
98
  `);
@@ -101,7 +101,7 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
101
101
  }
102
102
  });
103
103
  program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
104
- const { watchDirectory } = await import("./watch-BCPDLGOE.js");
104
+ const { watchDirectory } = await import("./watch-QD3PDNXQ.js");
105
105
  await watchDirectory({
106
106
  dir,
107
107
  outDir: opts.outDir,
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\n\nimport { readFileSync, writeFileSync, mkdirSync, statSync } from \"fs\"\nimport { basename, resolve } from \"path\"\nimport { Command } from \"commander\"\nimport { parse, detectFormat } from \"./index.js\"\nimport type { ParseOptions } from \"./types.js\"\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\n\nconst program = new Command()\n\nprogram\n .name(\"kordoc\")\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF, XLSX, DOCX → Markdown\")\n .version(VERSION)\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF, XLSX, DOCX)\")\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위 (예: 1-3, 1,3,5)\")\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\n .option(\"--no-header-footer\", \"PDF 머리글/바닥글 자동 제거\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (files: string[], opts) => {\n const validFormats = [\"markdown\", \"json\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 형식: ${opts.format} (markdown 또는 json)\\n`)\n process.exit(1)\n }\n for (let fi = 0; fi < files.length; fi++) {\n const filePath = files[fi]\n const absPath = resolve(filePath)\n const fileName = basename(absPath)\n const filePrefix = files.length > 1 ? `[${fi + 1}/${files.length}] ` : \"\"\n\n try {\n const fileSize = statSync(absPath).size\n if (fileSize > 500 * 1024 * 1024) {\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\n process.exitCode = 1\n continue\n }\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const format = detectFormat(arrayBuffer)\n\n if (!opts.silent) {\n process.stderr.write(`[kordoc] ${filePrefix}${fileName} (${format}) ...`)\n }\n\n const parseOptions: ParseOptions = {}\n if (opts.pages) parseOptions.pages = opts.pages as string\n if (opts.headerFooter === false) parseOptions.removeHeaderFooter = false\n if (!opts.silent) {\n parseOptions.onProgress = (current: number, total: number) => {\n process.stderr.write(`\\r[kordoc] ${filePrefix}${fileName} (${format}) [${current}/${total}]`)\n }\n }\n const result = await parse(arrayBuffer, parseOptions)\n\n if (!result.success) {\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${result.error}\\n`)\n process.exitCode = 1\n continue\n }\n\n if (!opts.silent) process.stderr.write(` OK\\n`)\n\n let markdown = result.markdown\n // --out-dir 시 이미지 참조 경로에 images/ 접두사 추가\n if (opts.outDir && result.images?.length) {\n markdown = markdown.replace(/!\\[image\\]\\(image_/g, \"![image](images/image_\")\n }\n const output = opts.format === \"json\"\n ? JSON.stringify(result, null, 2)\n : markdown\n\n // 이미지 저장 (--out-dir 또는 --output 시)\n const saveImages = (dir: string) => {\n if (!result.images?.length) return\n const imgDir = resolve(dir, \"images\")\n mkdirSync(imgDir, { recursive: true })\n for (const img of result.images) {\n writeFileSync(resolve(imgDir, img.filename), img.data)\n }\n if (!opts.silent) process.stderr.write(` → ${result.images.length}개 이미지 → ${imgDir}\\n`)\n }\n\n if (opts.output && files.length === 1) {\n writeFileSync(opts.output, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\n saveImages(resolve(opts.output, \"..\"))\n } else if (opts.outDir) {\n mkdirSync(opts.outDir, { recursive: true })\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\n saveImages(opts.outDir)\n } else {\n process.stdout.write(output + \"\\n\")\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${sanitizeError(err)}\\n`)\n process.exitCode = 1\n }\n }\n })\n\nprogram\n .command(\"watch <dir>\")\n .description(\"디렉토리 감시 — 새 문서 자동 변환\")\n .option(\"--webhook <url>\", \"결과 전송 웹훅 URL\")\n .option(\"-d, --out-dir <dir>\", \"변환 결과 출력 디렉토리\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위\")\n .option(\"--format <type>\", \"출력 형식: markdown 또는 json\", \"markdown\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (dir: string, opts) => {\n const { watchDirectory } = await import(\"./watch.js\")\n await watchDirectory({\n dir,\n outDir: opts.outDir,\n webhook: opts.webhook,\n format: opts.format,\n pages: opts.pages,\n silent: opts.silent,\n })\n })\n\nprogram.parse()\n"],"mappings":";;;;;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,gBAAgB;AACjE,SAAS,UAAU,eAAe;AAClC,SAAS,eAAe;AAKxB,IAAM,UAAU,IAAI,QAAQ;AAE5B,QACG,KAAK,QAAQ,EACb,YAAY,2GAAoD,EAChE,QAAQ,OAAO,EACf,SAAS,cAAc,2EAAwC,EAC/D,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,uBAAuB,mEAA2B,EACzD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,sBAAsB,qEAAmB,EAChD,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAiB,SAAS;AACvC,QAAM,eAAe,CAAC,YAAY,MAAM;AACxC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAuB;AAC/E,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,WAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,UAAM,WAAW,MAAM,EAAE;AACzB,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AACjC,UAAM,aAAa,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,IAAI,MAAM,MAAM,OAAO;AAEvE,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,UAAU,GAAG,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC1E;AAEA,YAAM,eAA6B,CAAC;AACpC,UAAI,KAAK,MAAO,cAAa,QAAQ,KAAK;AAC1C,UAAI,KAAK,iBAAiB,MAAO,cAAa,qBAAqB;AACnE,UAAI,CAAC,KAAK,QAAQ;AAChB,qBAAa,aAAa,CAAC,SAAiB,UAAkB;AAC5D,kBAAQ,OAAO,MAAM,cAAc,UAAU,GAAG,QAAQ,KAAK,MAAM,MAAM,OAAO,IAAI,KAAK,GAAG;AAAA,QAC9F;AAAA,MACF;AACA,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAE9C,UAAI,WAAW,OAAO;AAEtB,UAAI,KAAK,UAAU,OAAO,QAAQ,QAAQ;AACxC,mBAAW,SAAS,QAAQ,uBAAuB,wBAAwB;AAAA,MAC7E;AACA,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B;AAGJ,YAAM,aAAa,CAAC,QAAgB;AAClC,YAAI,CAAC,OAAO,QAAQ,OAAQ;AAC5B,cAAM,SAAS,QAAQ,KAAK,QAAQ;AACpC,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,wBAAc,QAAQ,QAAQ,IAAI,QAAQ,GAAG,IAAI,IAAI;AAAA,QACvD;AACA,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO,OAAO,MAAM,oCAAW,MAAM;AAAA,CAAI;AAAA,MACzF;AAEA,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAC7D,mBAAW,QAAQ,KAAK,QAAQ,IAAI,CAAC;AAAA,MACvC,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACzD,mBAAW,KAAK,MAAM;AAAA,MACxB,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAAA,MACpC;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,cAAc,GAAG,CAAC;AAAA,CAAI;AAC9E,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF,CAAC;AAEH,QACG,QAAQ,aAAa,EACrB,YAAY,4FAAsB,EAClC,OAAO,mBAAmB,4CAAc,EACxC,OAAO,uBAAuB,iEAAe,EAC7C,OAAO,uBAAuB,8CAAW,EACzC,OAAO,mBAAmB,yDAA2B,UAAU,EAC/D,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,KAAa,SAAS;AACnC,QAAM,EAAE,eAAe,IAAI,MAAM,OAAO,qBAAY;AACpD,QAAM,eAAe;AAAA,IACnB;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,SAAS,KAAK;AAAA,IACd,QAAQ,KAAK;AAAA,IACb,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,EACf,CAAC;AACH,CAAC;AAEH,QAAQ,MAAM;","names":[]}
1
+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\r\n\r\nimport { readFileSync, writeFileSync, mkdirSync, statSync } from \"fs\"\r\nimport { basename, resolve } from \"path\"\r\nimport { Command } from \"commander\"\r\nimport { parse, detectFormat } from \"./index.js\"\r\nimport type { ParseOptions } from \"./types.js\"\r\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\r\n\r\nconst program = new Command()\r\n\r\nprogram\r\n .name(\"kordoc\")\r\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF, XLSX, DOCX → Markdown\")\r\n .version(VERSION)\r\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF, XLSX, DOCX)\")\r\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\r\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\r\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위 (예: 1-3, 1,3,5)\")\r\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\r\n .option(\"--no-header-footer\", \"PDF 머리글/바닥글 자동 제거\")\r\n .option(\"--silent\", \"진행 메시지 숨기기\")\r\n .action(async (files: string[], opts) => {\r\n const validFormats = [\"markdown\", \"json\"]\r\n if (!validFormats.includes(opts.format)) {\r\n process.stderr.write(`[kordoc] 지원하지 않는 형식: ${opts.format} (markdown 또는 json)\\n`)\r\n process.exit(1)\r\n }\r\n for (let fi = 0; fi < files.length; fi++) {\r\n const filePath = files[fi]\r\n const absPath = resolve(filePath)\r\n const fileName = basename(absPath)\r\n const filePrefix = files.length > 1 ? `[${fi + 1}/${files.length}] ` : \"\"\r\n\r\n try {\r\n const fileSize = statSync(absPath).size\r\n if (fileSize > 500 * 1024 * 1024) {\r\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\r\n process.exitCode = 1\r\n continue\r\n }\r\n const buffer = readFileSync(absPath)\r\n const arrayBuffer = toArrayBuffer(buffer)\r\n const format = detectFormat(arrayBuffer)\r\n\r\n if (!opts.silent) {\r\n process.stderr.write(`[kordoc] ${filePrefix}${fileName} (${format}) ...`)\r\n }\r\n\r\n const parseOptions: ParseOptions = {}\r\n if (opts.pages) parseOptions.pages = opts.pages as string\r\n if (opts.headerFooter === false) parseOptions.removeHeaderFooter = false\r\n if (!opts.silent) {\r\n parseOptions.onProgress = (current: number, total: number) => {\r\n process.stderr.write(`\\r[kordoc] ${filePrefix}${fileName} (${format}) [${current}/${total}]`)\r\n }\r\n }\r\n const result = await parse(arrayBuffer, parseOptions)\r\n\r\n if (!result.success) {\r\n process.stderr.write(` FAIL\\n`)\r\n process.stderr.write(` → ${result.error}\\n`)\r\n process.exitCode = 1\r\n continue\r\n }\r\n\r\n if (!opts.silent) process.stderr.write(` OK\\n`)\r\n\r\n let markdown = result.markdown\r\n // --out-dir 시 이미지 참조 경로에 images/ 접두사 추가\r\n if (opts.outDir && result.images?.length) {\r\n markdown = markdown.replace(/!\\[image\\]\\(image_/g, \"![image](images/image_\")\r\n }\r\n const output = opts.format === \"json\"\r\n ? JSON.stringify(result, null, 2)\r\n : markdown\r\n\r\n // 이미지 저장 (--out-dir 또는 --output 시)\r\n const saveImages = (dir: string) => {\r\n if (!result.images?.length) return\r\n const imgDir = resolve(dir, \"images\")\r\n mkdirSync(imgDir, { recursive: true })\r\n for (const img of result.images) {\r\n writeFileSync(resolve(imgDir, img.filename), img.data)\r\n }\r\n if (!opts.silent) process.stderr.write(` → ${result.images.length}개 이미지 → ${imgDir}\\n`)\r\n }\r\n\r\n if (opts.output && files.length === 1) {\r\n writeFileSync(opts.output, output, \"utf-8\")\r\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\r\n saveImages(resolve(opts.output, \"..\"))\r\n } else if (opts.outDir) {\r\n mkdirSync(opts.outDir, { recursive: true })\r\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\r\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\r\n writeFileSync(outPath, output, \"utf-8\")\r\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\r\n saveImages(opts.outDir)\r\n } else {\r\n process.stdout.write(output + \"\\n\")\r\n }\r\n } catch (err) {\r\n const { sanitizeError } = await import(\"./utils.js\")\r\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${sanitizeError(err)}\\n`)\r\n process.exitCode = 1\r\n }\r\n }\r\n })\r\n\r\nprogram\r\n .command(\"watch <dir>\")\r\n .description(\"디렉토리 감시 — 새 문서 자동 변환\")\r\n .option(\"--webhook <url>\", \"결과 전송 웹훅 URL\")\r\n .option(\"-d, --out-dir <dir>\", \"변환 결과 출력 디렉토리\")\r\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위\")\r\n .option(\"--format <type>\", \"출력 형식: markdown 또는 json\", \"markdown\")\r\n .option(\"--silent\", \"진행 메시지 숨기기\")\r\n .action(async (dir: string, opts) => {\r\n const { watchDirectory } = await import(\"./watch.js\")\r\n await watchDirectory({\r\n dir,\r\n outDir: opts.outDir,\r\n webhook: opts.webhook,\r\n format: opts.format,\r\n pages: opts.pages,\r\n silent: opts.silent,\r\n })\r\n })\r\n\r\nprogram.parse()\r\n"],"mappings":";;;;;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,gBAAgB;AACjE,SAAS,UAAU,eAAe;AAClC,SAAS,eAAe;AAKxB,IAAM,UAAU,IAAI,QAAQ;AAE5B,QACG,KAAK,QAAQ,EACb,YAAY,2GAAoD,EAChE,QAAQ,OAAO,EACf,SAAS,cAAc,2EAAwC,EAC/D,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,uBAAuB,mEAA2B,EACzD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,sBAAsB,qEAAmB,EAChD,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAiB,SAAS;AACvC,QAAM,eAAe,CAAC,YAAY,MAAM;AACxC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAuB;AAC/E,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,WAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,UAAM,WAAW,MAAM,EAAE;AACzB,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AACjC,UAAM,aAAa,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,IAAI,MAAM,MAAM,OAAO;AAEvE,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,UAAU,GAAG,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC1E;AAEA,YAAM,eAA6B,CAAC;AACpC,UAAI,KAAK,MAAO,cAAa,QAAQ,KAAK;AAC1C,UAAI,KAAK,iBAAiB,MAAO,cAAa,qBAAqB;AACnE,UAAI,CAAC,KAAK,QAAQ;AAChB,qBAAa,aAAa,CAAC,SAAiB,UAAkB;AAC5D,kBAAQ,OAAO,MAAM,cAAc,UAAU,GAAG,QAAQ,KAAK,MAAM,MAAM,OAAO,IAAI,KAAK,GAAG;AAAA,QAC9F;AAAA,MACF;AACA,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAE9C,UAAI,WAAW,OAAO;AAEtB,UAAI,KAAK,UAAU,OAAO,QAAQ,QAAQ;AACxC,mBAAW,SAAS,QAAQ,uBAAuB,wBAAwB;AAAA,MAC7E;AACA,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B;AAGJ,YAAM,aAAa,CAAC,QAAgB;AAClC,YAAI,CAAC,OAAO,QAAQ,OAAQ;AAC5B,cAAM,SAAS,QAAQ,KAAK,QAAQ;AACpC,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,wBAAc,QAAQ,QAAQ,IAAI,QAAQ,GAAG,IAAI,IAAI;AAAA,QACvD;AACA,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO,OAAO,MAAM,oCAAW,MAAM;AAAA,CAAI;AAAA,MACzF;AAEA,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAC7D,mBAAW,QAAQ,KAAK,QAAQ,IAAI,CAAC;AAAA,MACvC,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACzD,mBAAW,KAAK,MAAM;AAAA,MACxB,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAAA,MACpC;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,cAAc,GAAG,CAAC;AAAA,CAAI;AAC9E,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF,CAAC;AAEH,QACG,QAAQ,aAAa,EACrB,YAAY,4FAAsB,EAClC,OAAO,mBAAmB,4CAAc,EACxC,OAAO,uBAAuB,iEAAe,EAC7C,OAAO,uBAAuB,8CAAW,EACzC,OAAO,mBAAmB,yDAA2B,UAAU,EAC/D,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,KAAa,SAAS;AACnC,QAAM,EAAE,eAAe,IAAI,MAAM,OAAO,qBAAY;AACpD,QAAM,eAAe;AAAA,IACnB;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,SAAS,KAAK;AAAA,IACd,QAAQ,KAAK;AAAA,IACb,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,EACf,CAAC;AACH,CAAC;AAEH,QAAQ,MAAM;","names":[]}
package/dist/index.cjs CHANGED
@@ -182,7 +182,7 @@ var import_zlib = require("zlib");
182
182
  var import_xmldom = require("@xmldom/xmldom");
183
183
 
184
184
  // src/utils.ts
185
- var VERSION = true ? "2.0.2" : "0.0.0-dev";
185
+ var VERSION = true ? "2.0.3" : "0.0.0-dev";
186
186
  function toArrayBuffer(buf) {
187
187
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
188
188
  return buf.buffer;
@@ -371,6 +371,47 @@ function sanitizeText(text) {
371
371
  }
372
372
  return result;
373
373
  }
374
+ function flattenLayoutTables(blocks) {
375
+ const result = [];
376
+ for (const block of blocks) {
377
+ if (block.type !== "table" || !block.table) {
378
+ result.push(block);
379
+ continue;
380
+ }
381
+ const { rows: numRows, cols: numCols, cells } = block.table;
382
+ if (numRows === 1 && numCols === 1) {
383
+ result.push(block);
384
+ continue;
385
+ }
386
+ if (numRows <= 3) {
387
+ let totalNewlines = 0;
388
+ let totalTextLen = 0;
389
+ for (let r = 0; r < numRows; r++) {
390
+ for (let c = 0; c < numCols; c++) {
391
+ const t = cells[r]?.[c]?.text || "";
392
+ totalNewlines += (t.match(/\n/g) || []).length;
393
+ totalTextLen += t.length;
394
+ }
395
+ }
396
+ if (totalNewlines > 5 || numRows <= 2 && totalTextLen > 300) {
397
+ for (let r = 0; r < numRows; r++) {
398
+ for (let c = 0; c < numCols; c++) {
399
+ const cellText = cells[r]?.[c]?.text?.trim();
400
+ if (!cellText) continue;
401
+ for (const line of cellText.split("\n")) {
402
+ const trimmed = line.trim();
403
+ if (!trimmed) continue;
404
+ result.push({ type: "paragraph", text: trimmed, pageNumber: block.pageNumber });
405
+ }
406
+ }
407
+ }
408
+ continue;
409
+ }
410
+ }
411
+ result.push(block);
412
+ }
413
+ return result;
414
+ }
374
415
  function blocksToMarkdown(blocks) {
375
416
  const lines = [];
376
417
  for (let i = 0; i < blocks.length; i++) {
@@ -1269,8 +1310,9 @@ var TAG_CHAR_SHAPE = 68;
1269
1310
  var TAG_CTRL_HEADER = 71;
1270
1311
  var TAG_LIST_HEADER = 72;
1271
1312
  var TAG_TABLE = 77;
1272
- var TAG_DOC_CHAR_SHAPE = 55;
1273
- var TAG_DOC_STYLE = 58;
1313
+ var TAG_DOC_CHAR_SHAPE = 21;
1314
+ var TAG_DOC_PARA_SHAPE = 25;
1315
+ var TAG_DOC_STYLE = 26;
1274
1316
  var CHAR_LINE = 0;
1275
1317
  var CHAR_SECTION_BREAK = 10;
1276
1318
  var CHAR_PARA = 13;
@@ -1326,8 +1368,14 @@ function parseFileHeader(data) {
1326
1368
  }
1327
1369
  function parseDocInfo(records) {
1328
1370
  const charShapes = [];
1371
+ const paraShapes = [];
1329
1372
  const styles = [];
1330
1373
  for (const rec of records) {
1374
+ if (rec.tagId === TAG_DOC_PARA_SHAPE && rec.data.length >= 4) {
1375
+ const flags = rec.data.readUInt32LE(0);
1376
+ const outlineLevel = flags >> 25 & 7;
1377
+ paraShapes.push({ outlineLevel });
1378
+ }
1331
1379
  if (rec.tagId === TAG_DOC_CHAR_SHAPE && rec.data.length >= 18) {
1332
1380
  if (rec.data.length >= 50) {
1333
1381
  const fontSize = rec.data.readUInt32LE(42);
@@ -1367,7 +1415,7 @@ function parseDocInfo(records) {
1367
1415
  }
1368
1416
  }
1369
1417
  }
1370
- return { charShapes, styles };
1418
+ return { charShapes, paraShapes, styles };
1371
1419
  }
1372
1420
  function extractText(data) {
1373
1421
  let result = "";
@@ -2379,12 +2427,13 @@ function parseHwp5Document(buffer, options) {
2379
2427
  }
2380
2428
  }
2381
2429
  const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
2430
+ const flatBlocks = flattenLayoutTables(blocks);
2382
2431
  if (docInfo) {
2383
- detectHwp5Headings(blocks, docInfo);
2432
+ detectHwp5Headings(flatBlocks, docInfo);
2384
2433
  }
2385
- const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
2386
- const markdown = blocksToMarkdown(blocks);
2387
- return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
2434
+ const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
2435
+ const markdown = blocksToMarkdown(flatBlocks);
2436
+ return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
2388
2437
  }
2389
2438
  function parseDocInfoStream(cfb, compressed) {
2390
2439
  try {
@@ -2435,16 +2484,21 @@ function detectHwp5Headings(blocks, docInfo) {
2435
2484
  }
2436
2485
  if (baseFontSize <= 0) return;
2437
2486
  for (const block of blocks) {
2438
- if (block.type !== "paragraph" || !block.text || !block.style?.fontSize) continue;
2487
+ if (block.type === "heading") continue;
2488
+ if (block.type !== "paragraph" || !block.text) continue;
2439
2489
  const text = block.text.trim();
2440
2490
  if (text.length === 0 || text.length > 200) continue;
2441
2491
  if (/^\d+$/.test(text)) continue;
2442
- const ratio = block.style.fontSize / baseFontSize;
2443
2492
  let level = 0;
2444
- if (ratio >= HEADING_RATIO_H1) level = 1;
2445
- else if (ratio >= HEADING_RATIO_H2) level = 2;
2446
- else if (ratio >= HEADING_RATIO_H3) level = 3;
2447
- if (/^제\d+[조장절편]/.test(text) && text.length <= 50) {
2493
+ if (block.style?.fontSize && baseFontSize > 0) {
2494
+ const ratio = block.style.fontSize / baseFontSize;
2495
+ if (ratio >= HEADING_RATIO_H1) level = 1;
2496
+ else if (ratio >= HEADING_RATIO_H2) level = 2;
2497
+ else if (ratio >= HEADING_RATIO_H3) level = 3;
2498
+ }
2499
+ if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
2500
+ if (level === 0) level = 2;
2501
+ } else if (/^제\d+(조의?\d*)\s*[\((]/.test(text) && text.length <= 80) {
2448
2502
  if (level === 0) level = 3;
2449
2503
  }
2450
2504
  if (level > 0) {
@@ -2676,13 +2730,20 @@ function parseSection(records, docInfo, warnings, sectionNum) {
2676
2730
  while (i < records.length) {
2677
2731
  const rec = records[i];
2678
2732
  if (rec.tagId === TAG_PARA_HEADER && rec.level === 0) {
2679
- const { paragraph, tables, nextIdx, charShapeIds } = parseParagraphWithTables(records, i);
2733
+ const { paragraph, tables, nextIdx, charShapeIds, paraShapeId } = parseParagraphWithTables(records, i);
2680
2734
  if (paragraph) {
2681
2735
  const block = { type: "paragraph", text: paragraph, pageNumber: sectionNum };
2682
2736
  if (docInfo && charShapeIds.length > 0) {
2683
2737
  const style = resolveCharStyle(charShapeIds, docInfo);
2684
2738
  if (style) block.style = style;
2685
2739
  }
2740
+ if (docInfo && paraShapeId >= 0 && paraShapeId < docInfo.paraShapes.length) {
2741
+ const ol = docInfo.paraShapes[paraShapeId].outlineLevel;
2742
+ if (ol >= 1 && ol <= 6) {
2743
+ block.type = "heading";
2744
+ block.level = ol;
2745
+ }
2746
+ }
2686
2747
  blocks.push(block);
2687
2748
  }
2688
2749
  for (const t of tables) blocks.push({ type: "table", table: t, pageNumber: sectionNum });
@@ -2802,6 +2863,8 @@ function parseParagraphWithTables(records, startIdx) {
2802
2863
  let text = "";
2803
2864
  const tables = [];
2804
2865
  const charShapeIds = [];
2866
+ const paraHeaderData = records[startIdx].data;
2867
+ const paraShapeId = paraHeaderData.length >= 10 ? paraHeaderData.readUInt16LE(8) : -1;
2805
2868
  let i = startIdx + 1;
2806
2869
  while (i < records.length) {
2807
2870
  const rec = records[i];
@@ -2826,7 +2889,7 @@ function parseParagraphWithTables(records, startIdx) {
2826
2889
  i++;
2827
2890
  }
2828
2891
  const trimmed = text.trim();
2829
- return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds };
2892
+ return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds, paraShapeId };
2830
2893
  }
2831
2894
  function parseTableBlock(records, startIdx) {
2832
2895
  const tableLevel = records[startIdx].level;