kordoc 2.0.1 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2,12 +2,12 @@
2
2
  import {
3
3
  detectFormat,
4
4
  parse
5
- } from "./chunk-JJ65GKUH.js";
5
+ } from "./chunk-4UH6ABAY.js";
6
6
  import {
7
7
  VERSION,
8
8
  toArrayBuffer
9
- } from "./chunk-L4OFASDS.js";
10
- import "./chunk-MOL7MDBG.js";
9
+ } from "./chunk-25TXW6EP.js";
10
+ import "./chunk-3TBUDJDE.js";
11
11
 
12
12
  // src/cli.ts
13
13
  import { readFileSync, writeFileSync, mkdirSync, statSync } from "fs";
@@ -92,7 +92,7 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
92
92
  process.stdout.write(output + "\n");
93
93
  }
94
94
  } catch (err) {
95
- const { sanitizeError } = await import("./utils-4HVKHULU.js");
95
+ const { sanitizeError } = await import("./utils-BTZ4WSYX.js");
96
96
  process.stderr.write(`
97
97
  [kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
98
98
  `);
@@ -101,7 +101,7 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
101
101
  }
102
102
  });
103
103
  program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
104
- const { watchDirectory } = await import("./watch-RNZ3KESY.js");
104
+ const { watchDirectory } = await import("./watch-QD3PDNXQ.js");
105
105
  await watchDirectory({
106
106
  dir,
107
107
  outDir: opts.outDir,
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\n\nimport { readFileSync, writeFileSync, mkdirSync, statSync } from \"fs\"\nimport { basename, resolve } from \"path\"\nimport { Command } from \"commander\"\nimport { parse, detectFormat } from \"./index.js\"\nimport type { ParseOptions } from \"./types.js\"\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\n\nconst program = new Command()\n\nprogram\n .name(\"kordoc\")\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF, XLSX, DOCX → Markdown\")\n .version(VERSION)\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF, XLSX, DOCX)\")\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위 (예: 1-3, 1,3,5)\")\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\n .option(\"--no-header-footer\", \"PDF 머리글/바닥글 자동 제거\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (files: string[], opts) => {\n const validFormats = [\"markdown\", \"json\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 형식: ${opts.format} (markdown 또는 json)\\n`)\n process.exit(1)\n }\n for (let fi = 0; fi < files.length; fi++) {\n const filePath = files[fi]\n const absPath = resolve(filePath)\n const fileName = basename(absPath)\n const filePrefix = files.length > 1 ? `[${fi + 1}/${files.length}] ` : \"\"\n\n try {\n const fileSize = statSync(absPath).size\n if (fileSize > 500 * 1024 * 1024) {\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\n process.exitCode = 1\n continue\n }\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const format = detectFormat(arrayBuffer)\n\n if (!opts.silent) {\n process.stderr.write(`[kordoc] ${filePrefix}${fileName} (${format}) ...`)\n }\n\n const parseOptions: ParseOptions = {}\n if (opts.pages) parseOptions.pages = opts.pages as string\n if (opts.headerFooter === false) parseOptions.removeHeaderFooter = false\n if (!opts.silent) {\n parseOptions.onProgress = (current: number, total: number) => {\n process.stderr.write(`\\r[kordoc] ${filePrefix}${fileName} (${format}) [${current}/${total}]`)\n }\n }\n const result = await parse(arrayBuffer, parseOptions)\n\n if (!result.success) {\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${result.error}\\n`)\n process.exitCode = 1\n continue\n }\n\n if (!opts.silent) process.stderr.write(` OK\\n`)\n\n let markdown = result.markdown\n // --out-dir 시 이미지 참조 경로에 images/ 접두사 추가\n if (opts.outDir && result.images?.length) {\n markdown = markdown.replace(/!\\[image\\]\\(image_/g, \"![image](images/image_\")\n }\n const output = opts.format === \"json\"\n ? JSON.stringify(result, null, 2)\n : markdown\n\n // 이미지 저장 (--out-dir 또는 --output 시)\n const saveImages = (dir: string) => {\n if (!result.images?.length) return\n const imgDir = resolve(dir, \"images\")\n mkdirSync(imgDir, { recursive: true })\n for (const img of result.images) {\n writeFileSync(resolve(imgDir, img.filename), img.data)\n }\n if (!opts.silent) process.stderr.write(` → ${result.images.length}개 이미지 → ${imgDir}\\n`)\n }\n\n if (opts.output && files.length === 1) {\n writeFileSync(opts.output, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\n saveImages(resolve(opts.output, \"..\"))\n } else if (opts.outDir) {\n mkdirSync(opts.outDir, { recursive: true })\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\n saveImages(opts.outDir)\n } else {\n process.stdout.write(output + \"\\n\")\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${sanitizeError(err)}\\n`)\n process.exitCode = 1\n }\n }\n })\n\nprogram\n .command(\"watch <dir>\")\n .description(\"디렉토리 감시 — 새 문서 자동 변환\")\n .option(\"--webhook <url>\", \"결과 전송 웹훅 URL\")\n .option(\"-d, --out-dir <dir>\", \"변환 결과 출력 디렉토리\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위\")\n .option(\"--format <type>\", \"출력 형식: markdown 또는 json\", \"markdown\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (dir: string, opts) => {\n const { watchDirectory } = await import(\"./watch.js\")\n await watchDirectory({\n dir,\n outDir: opts.outDir,\n webhook: opts.webhook,\n format: opts.format,\n pages: opts.pages,\n silent: opts.silent,\n })\n })\n\nprogram.parse()\n"],"mappings":";;;;;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,gBAAgB;AACjE,SAAS,UAAU,eAAe;AAClC,SAAS,eAAe;AAKxB,IAAM,UAAU,IAAI,QAAQ;AAE5B,QACG,KAAK,QAAQ,EACb,YAAY,2GAAoD,EAChE,QAAQ,OAAO,EACf,SAAS,cAAc,2EAAwC,EAC/D,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,uBAAuB,mEAA2B,EACzD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,sBAAsB,qEAAmB,EAChD,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAiB,SAAS;AACvC,QAAM,eAAe,CAAC,YAAY,MAAM;AACxC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAuB;AAC/E,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,WAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,UAAM,WAAW,MAAM,EAAE;AACzB,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AACjC,UAAM,aAAa,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,IAAI,MAAM,MAAM,OAAO;AAEvE,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,UAAU,GAAG,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC1E;AAEA,YAAM,eAA6B,CAAC;AACpC,UAAI,KAAK,MAAO,cAAa,QAAQ,KAAK;AAC1C,UAAI,KAAK,iBAAiB,MAAO,cAAa,qBAAqB;AACnE,UAAI,CAAC,KAAK,QAAQ;AAChB,qBAAa,aAAa,CAAC,SAAiB,UAAkB;AAC5D,kBAAQ,OAAO,MAAM,cAAc,UAAU,GAAG,QAAQ,KAAK,MAAM,MAAM,OAAO,IAAI,KAAK,GAAG;AAAA,QAC9F;AAAA,MACF;AACA,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAE9C,UAAI,WAAW,OAAO;AAEtB,UAAI,KAAK,UAAU,OAAO,QAAQ,QAAQ;AACxC,mBAAW,SAAS,QAAQ,uBAAuB,wBAAwB;AAAA,MAC7E;AACA,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B;AAGJ,YAAM,aAAa,CAAC,QAAgB;AAClC,YAAI,CAAC,OAAO,QAAQ,OAAQ;AAC5B,cAAM,SAAS,QAAQ,KAAK,QAAQ;AACpC,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,wBAAc,QAAQ,QAAQ,IAAI,QAAQ,GAAG,IAAI,IAAI;AAAA,QACvD;AACA,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO,OAAO,MAAM,oCAAW,MAAM;AAAA,CAAI;AAAA,MACzF;AAEA,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAC7D,mBAAW,QAAQ,KAAK,QAAQ,IAAI,CAAC;AAAA,MACvC,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACzD,mBAAW,KAAK,MAAM;AAAA,MACxB,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAAA,MACpC;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,cAAc,GAAG,CAAC;AAAA,CAAI;AAC9E,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF,CAAC;AAEH,QACG,QAAQ,aAAa,EACrB,YAAY,4FAAsB,EAClC,OAAO,mBAAmB,4CAAc,EACxC,OAAO,uBAAuB,iEAAe,EAC7C,OAAO,uBAAuB,8CAAW,EACzC,OAAO,mBAAmB,yDAA2B,UAAU,EAC/D,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,KAAa,SAAS;AACnC,QAAM,EAAE,eAAe,IAAI,MAAM,OAAO,qBAAY;AACpD,QAAM,eAAe;AAAA,IACnB;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,SAAS,KAAK;AAAA,IACd,QAAQ,KAAK;AAAA,IACb,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,EACf,CAAC;AACH,CAAC;AAEH,QAAQ,MAAM;","names":[]}
1
+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\r\n\r\nimport { readFileSync, writeFileSync, mkdirSync, statSync } from \"fs\"\r\nimport { basename, resolve } from \"path\"\r\nimport { Command } from \"commander\"\r\nimport { parse, detectFormat } from \"./index.js\"\r\nimport type { ParseOptions } from \"./types.js\"\r\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\r\n\r\nconst program = new Command()\r\n\r\nprogram\r\n .name(\"kordoc\")\r\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF, XLSX, DOCX → Markdown\")\r\n .version(VERSION)\r\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF, XLSX, DOCX)\")\r\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\r\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\r\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위 (예: 1-3, 1,3,5)\")\r\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\r\n .option(\"--no-header-footer\", \"PDF 머리글/바닥글 자동 제거\")\r\n .option(\"--silent\", \"진행 메시지 숨기기\")\r\n .action(async (files: string[], opts) => {\r\n const validFormats = [\"markdown\", \"json\"]\r\n if (!validFormats.includes(opts.format)) {\r\n process.stderr.write(`[kordoc] 지원하지 않는 형식: ${opts.format} (markdown 또는 json)\\n`)\r\n process.exit(1)\r\n }\r\n for (let fi = 0; fi < files.length; fi++) {\r\n const filePath = files[fi]\r\n const absPath = resolve(filePath)\r\n const fileName = basename(absPath)\r\n const filePrefix = files.length > 1 ? `[${fi + 1}/${files.length}] ` : \"\"\r\n\r\n try {\r\n const fileSize = statSync(absPath).size\r\n if (fileSize > 500 * 1024 * 1024) {\r\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\r\n process.exitCode = 1\r\n continue\r\n }\r\n const buffer = readFileSync(absPath)\r\n const arrayBuffer = toArrayBuffer(buffer)\r\n const format = detectFormat(arrayBuffer)\r\n\r\n if (!opts.silent) {\r\n process.stderr.write(`[kordoc] ${filePrefix}${fileName} (${format}) ...`)\r\n }\r\n\r\n const parseOptions: ParseOptions = {}\r\n if (opts.pages) parseOptions.pages = opts.pages as string\r\n if (opts.headerFooter === false) parseOptions.removeHeaderFooter = false\r\n if (!opts.silent) {\r\n parseOptions.onProgress = (current: number, total: number) => {\r\n process.stderr.write(`\\r[kordoc] ${filePrefix}${fileName} (${format}) [${current}/${total}]`)\r\n }\r\n }\r\n const result = await parse(arrayBuffer, parseOptions)\r\n\r\n if (!result.success) {\r\n process.stderr.write(` FAIL\\n`)\r\n process.stderr.write(` → ${result.error}\\n`)\r\n process.exitCode = 1\r\n continue\r\n }\r\n\r\n if (!opts.silent) process.stderr.write(` OK\\n`)\r\n\r\n let markdown = result.markdown\r\n // --out-dir 시 이미지 참조 경로에 images/ 접두사 추가\r\n if (opts.outDir && result.images?.length) {\r\n markdown = markdown.replace(/!\\[image\\]\\(image_/g, \"![image](images/image_\")\r\n }\r\n const output = opts.format === \"json\"\r\n ? JSON.stringify(result, null, 2)\r\n : markdown\r\n\r\n // 이미지 저장 (--out-dir 또는 --output 시)\r\n const saveImages = (dir: string) => {\r\n if (!result.images?.length) return\r\n const imgDir = resolve(dir, \"images\")\r\n mkdirSync(imgDir, { recursive: true })\r\n for (const img of result.images) {\r\n writeFileSync(resolve(imgDir, img.filename), img.data)\r\n }\r\n if (!opts.silent) process.stderr.write(` → ${result.images.length}개 이미지 → ${imgDir}\\n`)\r\n }\r\n\r\n if (opts.output && files.length === 1) {\r\n writeFileSync(opts.output, output, \"utf-8\")\r\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\r\n saveImages(resolve(opts.output, \"..\"))\r\n } else if (opts.outDir) {\r\n mkdirSync(opts.outDir, { recursive: true })\r\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\r\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\r\n writeFileSync(outPath, output, \"utf-8\")\r\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\r\n saveImages(opts.outDir)\r\n } else {\r\n process.stdout.write(output + \"\\n\")\r\n }\r\n } catch (err) {\r\n const { sanitizeError } = await import(\"./utils.js\")\r\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${sanitizeError(err)}\\n`)\r\n process.exitCode = 1\r\n }\r\n }\r\n })\r\n\r\nprogram\r\n .command(\"watch <dir>\")\r\n .description(\"디렉토리 감시 — 새 문서 자동 변환\")\r\n .option(\"--webhook <url>\", \"결과 전송 웹훅 URL\")\r\n .option(\"-d, --out-dir <dir>\", \"변환 결과 출력 디렉토리\")\r\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위\")\r\n .option(\"--format <type>\", \"출력 형식: markdown 또는 json\", \"markdown\")\r\n .option(\"--silent\", \"진행 메시지 숨기기\")\r\n .action(async (dir: string, opts) => {\r\n const { watchDirectory } = await import(\"./watch.js\")\r\n await watchDirectory({\r\n dir,\r\n outDir: opts.outDir,\r\n webhook: opts.webhook,\r\n format: opts.format,\r\n pages: opts.pages,\r\n silent: opts.silent,\r\n })\r\n })\r\n\r\nprogram.parse()\r\n"],"mappings":";;;;;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,gBAAgB;AACjE,SAAS,UAAU,eAAe;AAClC,SAAS,eAAe;AAKxB,IAAM,UAAU,IAAI,QAAQ;AAE5B,QACG,KAAK,QAAQ,EACb,YAAY,2GAAoD,EAChE,QAAQ,OAAO,EACf,SAAS,cAAc,2EAAwC,EAC/D,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,uBAAuB,mEAA2B,EACzD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,sBAAsB,qEAAmB,EAChD,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAiB,SAAS;AACvC,QAAM,eAAe,CAAC,YAAY,MAAM;AACxC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAuB;AAC/E,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,WAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,UAAM,WAAW,MAAM,EAAE;AACzB,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AACjC,UAAM,aAAa,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,IAAI,MAAM,MAAM,OAAO;AAEvE,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,UAAU,GAAG,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC1E;AAEA,YAAM,eAA6B,CAAC;AACpC,UAAI,KAAK,MAAO,cAAa,QAAQ,KAAK;AAC1C,UAAI,KAAK,iBAAiB,MAAO,cAAa,qBAAqB;AACnE,UAAI,CAAC,KAAK,QAAQ;AAChB,qBAAa,aAAa,CAAC,SAAiB,UAAkB;AAC5D,kBAAQ,OAAO,MAAM,cAAc,UAAU,GAAG,QAAQ,KAAK,MAAM,MAAM,OAAO,IAAI,KAAK,GAAG;AAAA,QAC9F;AAAA,MACF;AACA,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAE9C,UAAI,WAAW,OAAO;AAEtB,UAAI,KAAK,UAAU,OAAO,QAAQ,QAAQ;AACxC,mBAAW,SAAS,QAAQ,uBAAuB,wBAAwB;AAAA,MAC7E;AACA,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B;AAGJ,YAAM,aAAa,CAAC,QAAgB;AAClC,YAAI,CAAC,OAAO,QAAQ,OAAQ;AAC5B,cAAM,SAAS,QAAQ,KAAK,QAAQ;AACpC,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,wBAAc,QAAQ,QAAQ,IAAI,QAAQ,GAAG,IAAI,IAAI;AAAA,QACvD;AACA,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO,OAAO,MAAM,oCAAW,MAAM;AAAA,CAAI;AAAA,MACzF;AAEA,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAC7D,mBAAW,QAAQ,KAAK,QAAQ,IAAI,CAAC;AAAA,MACvC,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACzD,mBAAW,KAAK,MAAM;AAAA,MACxB,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAAA,MACpC;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,cAAc,GAAG,CAAC;AAAA,CAAI;AAC9E,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF,CAAC;AAEH,QACG,QAAQ,aAAa,EACrB,YAAY,4FAAsB,EAClC,OAAO,mBAAmB,4CAAc,EACxC,OAAO,uBAAuB,iEAAe,EAC7C,OAAO,uBAAuB,8CAAW,EACzC,OAAO,mBAAmB,yDAA2B,UAAU,EAC/D,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,KAAa,SAAS;AACnC,QAAM,EAAE,eAAe,IAAI,MAAM,OAAO,qBAAY;AACpD,QAAM,eAAe;AAAA,IACnB;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,SAAS,KAAK;AAAA,IACd,QAAQ,KAAK;AAAA,IACb,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,EACf,CAAC;AACH,CAAC;AAEH,QAAQ,MAAM;","names":[]}
package/dist/index.cjs CHANGED
@@ -182,7 +182,7 @@ var import_zlib = require("zlib");
182
182
  var import_xmldom = require("@xmldom/xmldom");
183
183
 
184
184
  // src/utils.ts
185
- var VERSION = true ? "2.0.1" : "0.0.0-dev";
185
+ var VERSION = true ? "2.0.3" : "0.0.0-dev";
186
186
  function toArrayBuffer(buf) {
187
187
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
188
188
  return buf.buffer;
@@ -371,6 +371,47 @@ function sanitizeText(text) {
371
371
  }
372
372
  return result;
373
373
  }
374
+ function flattenLayoutTables(blocks) {
375
+ const result = [];
376
+ for (const block of blocks) {
377
+ if (block.type !== "table" || !block.table) {
378
+ result.push(block);
379
+ continue;
380
+ }
381
+ const { rows: numRows, cols: numCols, cells } = block.table;
382
+ if (numRows === 1 && numCols === 1) {
383
+ result.push(block);
384
+ continue;
385
+ }
386
+ if (numRows <= 3) {
387
+ let totalNewlines = 0;
388
+ let totalTextLen = 0;
389
+ for (let r = 0; r < numRows; r++) {
390
+ for (let c = 0; c < numCols; c++) {
391
+ const t = cells[r]?.[c]?.text || "";
392
+ totalNewlines += (t.match(/\n/g) || []).length;
393
+ totalTextLen += t.length;
394
+ }
395
+ }
396
+ if (totalNewlines > 5 || numRows <= 2 && totalTextLen > 300) {
397
+ for (let r = 0; r < numRows; r++) {
398
+ for (let c = 0; c < numCols; c++) {
399
+ const cellText = cells[r]?.[c]?.text?.trim();
400
+ if (!cellText) continue;
401
+ for (const line of cellText.split("\n")) {
402
+ const trimmed = line.trim();
403
+ if (!trimmed) continue;
404
+ result.push({ type: "paragraph", text: trimmed, pageNumber: block.pageNumber });
405
+ }
406
+ }
407
+ }
408
+ continue;
409
+ }
410
+ }
411
+ result.push(block);
412
+ }
413
+ return result;
414
+ }
374
415
  function blocksToMarkdown(blocks) {
375
416
  const lines = [];
376
417
  for (let i = 0; i < blocks.length; i++) {
@@ -432,8 +473,11 @@ function blocksToMarkdown(blocks) {
432
473
  if (lines.length > 0 && lines[lines.length - 1] !== "") {
433
474
  lines.push("");
434
475
  }
435
- lines.push(tableToMarkdown(block.table));
436
- lines.push("");
476
+ const tableMd = tableToMarkdown(block.table);
477
+ if (tableMd) {
478
+ lines.push(tableMd);
479
+ lines.push("");
480
+ }
437
481
  }
438
482
  }
439
483
  return lines.join("\n").trim();
@@ -443,6 +487,7 @@ function tableToMarkdown(table) {
443
487
  const { cells, rows: numRows, cols: numCols } = table;
444
488
  if (numRows === 1 && numCols === 1) {
445
489
  const content = sanitizeText(cells[0][0].text);
490
+ if (!content) return "";
446
491
  return content.split(/\n/).map((line) => {
447
492
  const trimmed = line.trim();
448
493
  if (!trimmed) return "";
@@ -479,9 +524,9 @@ function tableToMarkdown(table) {
479
524
  const row = display[r];
480
525
  const isEmptyPlaceholder = row.every((cell) => cell === "");
481
526
  if (isEmptyPlaceholder) continue;
482
- const hasSkippedCols = row.some((cell, c) => cell === "" && skip.has(`${r},${c}`));
483
527
  const nonEmptyCols = row.filter((cell) => cell !== "");
484
- if (!hasSkippedCols && nonEmptyCols.length === 1 && row[0] !== "" && row.slice(1).every((c) => c === "")) {
528
+ const hasSkipInRow = row.some((_, c) => skip.has(`${r},${c}`));
529
+ if (!hasSkipInRow && nonEmptyCols.length === 1 && row[0] !== "" && row.slice(1).every((c) => c === "")) {
485
530
  pendingFirstCol = row[0];
486
531
  continue;
487
532
  }
@@ -896,7 +941,8 @@ function detectHwpxHeadings(blocks, styleMap) {
896
941
  else if (ratio >= HEADING_RATIO_H2) level = 2;
897
942
  else if (ratio >= HEADING_RATIO_H3) level = 3;
898
943
  }
899
- if (/^제\d+[조장절편]/.test(text) && text.length <= 50) {
944
+ const compactText = text.replace(/\s+/g, "");
945
+ if (/^제\d+[조장절편]/.test(compactText) && text.length <= 50) {
900
946
  if (level === 0) level = 3;
901
947
  }
902
948
  if (level > 0) {
@@ -948,9 +994,14 @@ function walkSection(node, blocks, tableCtx, tableStack, styleMap, warnings, sec
948
994
  if (newTable.rows.length > 0) {
949
995
  if (tableStack.length > 0) {
950
996
  const parentTable = tableStack.pop();
951
- const nestedText = convertTableToText(newTable.rows);
952
- if (parentTable.cell) {
953
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
997
+ const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
998
+ if (newTable.rows.length >= 3 && nestedCols >= 2) {
999
+ blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
1000
+ } else {
1001
+ const nestedText = convertTableToText(newTable.rows);
1002
+ if (parentTable.cell) {
1003
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
1004
+ }
954
1005
  }
955
1006
  tableCtx = parentTable;
956
1007
  } else {
@@ -1050,9 +1101,14 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
1050
1101
  if (newTable.rows.length > 0) {
1051
1102
  if (tableStack.length > 0) {
1052
1103
  const parentTable = tableStack.pop();
1053
- const nestedText = convertTableToText(newTable.rows);
1054
- if (parentTable.cell) {
1055
- parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
1104
+ const nestedCols = Math.max(...newTable.rows.map((r) => r.length));
1105
+ if (newTable.rows.length >= 3 && nestedCols >= 2) {
1106
+ blocks.push({ type: "table", table: buildTable(newTable.rows), pageNumber: sectionNum });
1107
+ } else {
1108
+ const nestedText = convertTableToText(newTable.rows);
1109
+ if (parentTable.cell) {
1110
+ parentTable.cell.text += (parentTable.cell.text ? "\n" : "") + nestedText;
1111
+ }
1056
1112
  }
1057
1113
  tableCtx = parentTable;
1058
1114
  } else {
@@ -1063,13 +1119,20 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
1063
1119
  tableCtx = tableStack.length > 0 ? tableStack.pop() : null;
1064
1120
  }
1065
1121
  } else if (localTag === "pic" || localTag === "shape" || localTag === "drawingObject") {
1066
- const imgRef = extractImageRef(el);
1067
- if (imgRef) {
1068
- blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
1069
- } else if (warnings && sectionNum) {
1070
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
1122
+ const drawTextChild = findDescendant(el, "drawText");
1123
+ if (drawTextChild) {
1124
+ extractDrawTextBlocks(drawTextChild, blocks, styleMap, sectionNum);
1125
+ } else {
1126
+ const imgRef = extractImageRef(el);
1127
+ if (imgRef) {
1128
+ blocks.push({ type: "image", text: imgRef, pageNumber: sectionNum });
1129
+ } else if (warnings && sectionNum) {
1130
+ warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC694\uC18C: ${localTag}`, code: "SKIPPED_IMAGE" });
1131
+ }
1071
1132
  }
1072
- } else if (localTag === "r" || localTag === "run" || localTag === "ctrl") {
1133
+ } else if (localTag === "drawText") {
1134
+ extractDrawTextBlocks(el, blocks, styleMap, sectionNum);
1135
+ } else if (localTag === "r" || localTag === "run" || localTag === "ctrl" || localTag === "rect" || localTag === "ellipse" || localTag === "polygon" || localTag === "line" || localTag === "arc" || localTag === "curve" || localTag === "connectLine" || localTag === "container") {
1073
1136
  walkChildren(el, d + 1);
1074
1137
  }
1075
1138
  }
@@ -1077,6 +1140,40 @@ function walkParagraphChildren(node, blocks, tableCtx, tableStack, styleMap, war
1077
1140
  walkChildren(node, depth);
1078
1141
  return tableCtx;
1079
1142
  }
1143
+ function findDescendant(node, targetTag, depth = 0) {
1144
+ if (depth > 5) return null;
1145
+ const children = node.childNodes;
1146
+ if (!children) return null;
1147
+ for (let i = 0; i < children.length; i++) {
1148
+ const child = children[i];
1149
+ if (child.nodeType !== 1) continue;
1150
+ const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
1151
+ if (tag === targetTag) return child;
1152
+ const found = findDescendant(child, targetTag, depth + 1);
1153
+ if (found) return found;
1154
+ }
1155
+ return null;
1156
+ }
1157
+ function extractDrawTextBlocks(drawTextNode, blocks, styleMap, sectionNum) {
1158
+ const children = drawTextNode.childNodes;
1159
+ if (!children) return;
1160
+ for (let i = 0; i < children.length; i++) {
1161
+ const child = children[i];
1162
+ if (child.nodeType !== 1) continue;
1163
+ const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
1164
+ if (tag === "subList" || tag === "p" || tag === "para") {
1165
+ if (tag === "subList") {
1166
+ extractDrawTextBlocks(child, blocks, styleMap, sectionNum);
1167
+ } else {
1168
+ const info = extractParagraphInfo(child, styleMap);
1169
+ const text = info.text.trim();
1170
+ if (text) {
1171
+ blocks.push({ type: "paragraph", text, style: info.style ?? void 0, pageNumber: sectionNum });
1172
+ }
1173
+ }
1174
+ }
1175
+ }
1176
+ }
1080
1177
  function extractParagraphInfo(para, styleMap) {
1081
1178
  let text = "";
1082
1179
  let href;
@@ -1095,11 +1192,18 @@ function extractParagraphInfo(para, styleMap) {
1095
1192
  const tag = (child.tagName || child.localName || "").replace(/^[^:]+:/, "");
1096
1193
  switch (tag) {
1097
1194
  case "t":
1098
- text += child.textContent || "";
1195
+ walk(child);
1099
1196
  break;
1100
- case "tab":
1101
- text += " ";
1197
+ // 자식 순회 (tab 등 하위 요소 처리)
1198
+ case "tab": {
1199
+ const leader = child.getAttribute("leader");
1200
+ if (leader && leader !== "0") {
1201
+ text += "";
1202
+ } else {
1203
+ text += " ";
1204
+ }
1102
1205
  break;
1206
+ }
1103
1207
  case "br":
1104
1208
  if ((child.getAttribute("type") || "line") === "line") text += "\n";
1105
1209
  break;
@@ -1166,6 +1270,8 @@ function extractParagraphInfo(para, styleMap) {
1166
1270
  }
1167
1271
  };
1168
1272
  walk(para);
1273
+ const leaderIdx = text.indexOf("");
1274
+ if (leaderIdx >= 0) text = text.substring(0, leaderIdx);
1169
1275
  let cleanText = text.replace(/[ \t]+/g, " ").trim();
1170
1276
  if (/^그림입니다\.?\s*원본\s*그림의\s*(이름|크기)/.test(cleanText)) cleanText = "";
1171
1277
  cleanText = cleanText.replace(/그림입니다\.?\s*원본\s*그림의\s*(이름|크기)[^\n]*(\n[^\n]*원본\s*그림의\s*(이름|크기)[^\n]*)*/g, "").trim();
@@ -1204,8 +1310,9 @@ var TAG_CHAR_SHAPE = 68;
1204
1310
  var TAG_CTRL_HEADER = 71;
1205
1311
  var TAG_LIST_HEADER = 72;
1206
1312
  var TAG_TABLE = 77;
1207
- var TAG_DOC_CHAR_SHAPE = 55;
1208
- var TAG_DOC_STYLE = 58;
1313
+ var TAG_DOC_CHAR_SHAPE = 21;
1314
+ var TAG_DOC_PARA_SHAPE = 25;
1315
+ var TAG_DOC_STYLE = 26;
1209
1316
  var CHAR_LINE = 0;
1210
1317
  var CHAR_SECTION_BREAK = 10;
1211
1318
  var CHAR_PARA = 13;
@@ -1261,8 +1368,14 @@ function parseFileHeader(data) {
1261
1368
  }
1262
1369
  function parseDocInfo(records) {
1263
1370
  const charShapes = [];
1371
+ const paraShapes = [];
1264
1372
  const styles = [];
1265
1373
  for (const rec of records) {
1374
+ if (rec.tagId === TAG_DOC_PARA_SHAPE && rec.data.length >= 4) {
1375
+ const flags = rec.data.readUInt32LE(0);
1376
+ const outlineLevel = flags >> 25 & 7;
1377
+ paraShapes.push({ outlineLevel });
1378
+ }
1266
1379
  if (rec.tagId === TAG_DOC_CHAR_SHAPE && rec.data.length >= 18) {
1267
1380
  if (rec.data.length >= 50) {
1268
1381
  const fontSize = rec.data.readUInt32LE(42);
@@ -1302,7 +1415,7 @@ function parseDocInfo(records) {
1302
1415
  }
1303
1416
  }
1304
1417
  }
1305
- return { charShapes, styles };
1418
+ return { charShapes, paraShapes, styles };
1306
1419
  }
1307
1420
  function extractText(data) {
1308
1421
  let result = "";
@@ -2314,12 +2427,13 @@ function parseHwp5Document(buffer, options) {
2314
2427
  }
2315
2428
  }
2316
2429
  const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
2430
+ const flatBlocks = flattenLayoutTables(blocks);
2317
2431
  if (docInfo) {
2318
- detectHwp5Headings(blocks, docInfo);
2432
+ detectHwp5Headings(flatBlocks, docInfo);
2319
2433
  }
2320
- const outline = blocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
2321
- const markdown = blocksToMarkdown(blocks);
2322
- return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
2434
+ const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
2435
+ const markdown = blocksToMarkdown(flatBlocks);
2436
+ return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
2323
2437
  }
2324
2438
  function parseDocInfoStream(cfb, compressed) {
2325
2439
  try {
@@ -2370,16 +2484,21 @@ function detectHwp5Headings(blocks, docInfo) {
2370
2484
  }
2371
2485
  if (baseFontSize <= 0) return;
2372
2486
  for (const block of blocks) {
2373
- if (block.type !== "paragraph" || !block.text || !block.style?.fontSize) continue;
2487
+ if (block.type === "heading") continue;
2488
+ if (block.type !== "paragraph" || !block.text) continue;
2374
2489
  const text = block.text.trim();
2375
2490
  if (text.length === 0 || text.length > 200) continue;
2376
2491
  if (/^\d+$/.test(text)) continue;
2377
- const ratio = block.style.fontSize / baseFontSize;
2378
2492
  let level = 0;
2379
- if (ratio >= HEADING_RATIO_H1) level = 1;
2380
- else if (ratio >= HEADING_RATIO_H2) level = 2;
2381
- else if (ratio >= HEADING_RATIO_H3) level = 3;
2382
- if (/^제\d+[조장절편]/.test(text) && text.length <= 50) {
2493
+ if (block.style?.fontSize && baseFontSize > 0) {
2494
+ const ratio = block.style.fontSize / baseFontSize;
2495
+ if (ratio >= HEADING_RATIO_H1) level = 1;
2496
+ else if (ratio >= HEADING_RATIO_H2) level = 2;
2497
+ else if (ratio >= HEADING_RATIO_H3) level = 3;
2498
+ }
2499
+ if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
2500
+ if (level === 0) level = 2;
2501
+ } else if (/^제\d+(조의?\d*)\s*[\((]/.test(text) && text.length <= 80) {
2383
2502
  if (level === 0) level = 3;
2384
2503
  }
2385
2504
  if (level > 0) {
@@ -2611,13 +2730,20 @@ function parseSection(records, docInfo, warnings, sectionNum) {
2611
2730
  while (i < records.length) {
2612
2731
  const rec = records[i];
2613
2732
  if (rec.tagId === TAG_PARA_HEADER && rec.level === 0) {
2614
- const { paragraph, tables, nextIdx, charShapeIds } = parseParagraphWithTables(records, i);
2733
+ const { paragraph, tables, nextIdx, charShapeIds, paraShapeId } = parseParagraphWithTables(records, i);
2615
2734
  if (paragraph) {
2616
2735
  const block = { type: "paragraph", text: paragraph, pageNumber: sectionNum };
2617
2736
  if (docInfo && charShapeIds.length > 0) {
2618
2737
  const style = resolveCharStyle(charShapeIds, docInfo);
2619
2738
  if (style) block.style = style;
2620
2739
  }
2740
+ if (docInfo && paraShapeId >= 0 && paraShapeId < docInfo.paraShapes.length) {
2741
+ const ol = docInfo.paraShapes[paraShapeId].outlineLevel;
2742
+ if (ol >= 1 && ol <= 6) {
2743
+ block.type = "heading";
2744
+ block.level = ol;
2745
+ }
2746
+ }
2621
2747
  blocks.push(block);
2622
2748
  }
2623
2749
  for (const t of tables) blocks.push({ type: "table", table: t, pageNumber: sectionNum });
@@ -2637,7 +2763,10 @@ function parseSection(records, docInfo, warnings, sectionNum) {
2637
2763
  if (binId >= 0) {
2638
2764
  blocks.push({ type: "image", text: String(binId), pageNumber: sectionNum });
2639
2765
  } else {
2640
- warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
2766
+ const boxText = extractTextBoxText(records, i);
2767
+ if (boxText) {
2768
+ blocks.push({ type: "paragraph", text: boxText, pageNumber: sectionNum });
2769
+ }
2641
2770
  }
2642
2771
  } else if (ctrlId === " elo" || ctrlId === "ole ") {
2643
2772
  warnings.push({ page: sectionNum, message: `\uC2A4\uD0B5\uB41C \uC81C\uC5B4 \uC694\uC18C: ${ctrlId.trim()}`, code: "SKIPPED_IMAGE" });
@@ -2676,6 +2805,19 @@ function extractNoteText(records, ctrlIdx) {
2676
2805
  }
2677
2806
  return texts.length > 0 ? texts.join(" ") : null;
2678
2807
  }
2808
+ function extractTextBoxText(records, ctrlIdx) {
2809
+ const ctrlLevel = records[ctrlIdx].level;
2810
+ const texts = [];
2811
+ for (let j = ctrlIdx + 1; j < records.length && j < ctrlIdx + 200; j++) {
2812
+ const r = records[j];
2813
+ if (r.level <= ctrlLevel) break;
2814
+ if (r.tagId === TAG_PARA_TEXT) {
2815
+ const t = extractText(r.data).trim();
2816
+ if (t) texts.push(t);
2817
+ }
2818
+ }
2819
+ return texts.length > 0 ? texts.join("\n") : null;
2820
+ }
2679
2821
  function extractHyperlinkUrl(data) {
2680
2822
  try {
2681
2823
  const httpSig = Buffer.from("http", "utf16le");
@@ -2721,6 +2863,8 @@ function parseParagraphWithTables(records, startIdx) {
2721
2863
  let text = "";
2722
2864
  const tables = [];
2723
2865
  const charShapeIds = [];
2866
+ const paraHeaderData = records[startIdx].data;
2867
+ const paraShapeId = paraHeaderData.length >= 10 ? paraHeaderData.readUInt16LE(8) : -1;
2724
2868
  let i = startIdx + 1;
2725
2869
  while (i < records.length) {
2726
2870
  const rec = records[i];
@@ -2745,7 +2889,7 @@ function parseParagraphWithTables(records, startIdx) {
2745
2889
  i++;
2746
2890
  }
2747
2891
  const trimmed = text.trim();
2748
- return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds };
2892
+ return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds, paraShapeId };
2749
2893
  }
2750
2894
  function parseTableBlock(records, startIdx) {
2751
2895
  const tableLevel = records[startIdx].level;