kordoc 2.0.2 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +291 -291
- package/dist/{chunk-EVWOJ4T5.js → chunk-25TXW6EP.js} +2 -2
- package/dist/chunk-25TXW6EP.js.map +1 -0
- package/dist/{chunk-MOL7MDBG.js → chunk-3TBUDJDE.js} +1 -1
- package/dist/chunk-3TBUDJDE.js.map +1 -0
- package/dist/{chunk-XJYM2AUA.js → chunk-4UH6ABAY.js} +83 -20
- package/dist/chunk-4UH6ABAY.js.map +1 -0
- package/dist/cli.js +5 -5
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +79 -16
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +79 -16
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +3 -3
- package/dist/mcp.js.map +1 -1
- package/dist/page-range-OF5I4PQY.js +8 -0
- package/dist/{provider-A4FHJSID.js → provider-EU3CG724.js} +1 -1
- package/dist/provider-EU3CG724.js.map +1 -0
- package/dist/{utils-6JEIFBCJ.js → utils-BTZ4WSYX.js} +2 -2
- package/dist/{watch-BCPDLGOE.js → watch-QD3PDNXQ.js} +4 -4
- package/dist/watch-QD3PDNXQ.js.map +1 -0
- package/package.json +1 -1
- package/dist/chunk-EVWOJ4T5.js.map +0 -1
- package/dist/chunk-MOL7MDBG.js.map +0 -1
- package/dist/chunk-XJYM2AUA.js.map +0 -1
- package/dist/page-range-737B4EZW.js +0 -8
- package/dist/provider-A4FHJSID.js.map +0 -1
- package/dist/watch-BCPDLGOE.js.map +0 -1
- /package/dist/{page-range-737B4EZW.js.map → page-range-OF5I4PQY.js.map} +0 -0
- /package/dist/{utils-6JEIFBCJ.js.map → utils-BTZ4WSYX.js.map} +0 -0
package/dist/cli.js
CHANGED
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
import {
|
|
3
3
|
detectFormat,
|
|
4
4
|
parse
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-4UH6ABAY.js";
|
|
6
6
|
import {
|
|
7
7
|
VERSION,
|
|
8
8
|
toArrayBuffer
|
|
9
|
-
} from "./chunk-
|
|
10
|
-
import "./chunk-
|
|
9
|
+
} from "./chunk-25TXW6EP.js";
|
|
10
|
+
import "./chunk-3TBUDJDE.js";
|
|
11
11
|
|
|
12
12
|
// src/cli.ts
|
|
13
13
|
import { readFileSync, writeFileSync, mkdirSync, statSync } from "fs";
|
|
@@ -92,7 +92,7 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
|
|
|
92
92
|
process.stdout.write(output + "\n");
|
|
93
93
|
}
|
|
94
94
|
} catch (err) {
|
|
95
|
-
const { sanitizeError } = await import("./utils-
|
|
95
|
+
const { sanitizeError } = await import("./utils-BTZ4WSYX.js");
|
|
96
96
|
process.stderr.write(`
|
|
97
97
|
[kordoc] ERROR: ${fileName} \u2014 ${sanitizeError(err)}
|
|
98
98
|
`);
|
|
@@ -101,7 +101,7 @@ program.name("kordoc").description("\uBAA8\uB450 \uD30C\uC2F1\uD574\uBC84\uB9AC\
|
|
|
101
101
|
}
|
|
102
102
|
});
|
|
103
103
|
program.command("watch <dir>").description("\uB514\uB809\uD1A0\uB9AC \uAC10\uC2DC \u2014 \uC0C8 \uBB38\uC11C \uC790\uB3D9 \uBCC0\uD658").option("--webhook <url>", "\uACB0\uACFC \uC804\uC1A1 \uC6F9\uD6C5 URL").option("-d, --out-dir <dir>", "\uBCC0\uD658 \uACB0\uACFC \uCD9C\uB825 \uB514\uB809\uD1A0\uB9AC").option("-p, --pages <range>", "\uD398\uC774\uC9C0/\uC139\uC158 \uBC94\uC704").option("--format <type>", "\uCD9C\uB825 \uD615\uC2DD: markdown \uB610\uB294 json", "markdown").option("--silent", "\uC9C4\uD589 \uBA54\uC2DC\uC9C0 \uC228\uAE30\uAE30").action(async (dir, opts) => {
|
|
104
|
-
const { watchDirectory } = await import("./watch-
|
|
104
|
+
const { watchDirectory } = await import("./watch-QD3PDNXQ.js");
|
|
105
105
|
await watchDirectory({
|
|
106
106
|
dir,
|
|
107
107
|
outDir: opts.outDir,
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\n\nimport { readFileSync, writeFileSync, mkdirSync, statSync } from \"fs\"\nimport { basename, resolve } from \"path\"\nimport { Command } from \"commander\"\nimport { parse, detectFormat } from \"./index.js\"\nimport type { ParseOptions } from \"./types.js\"\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\n\nconst program = new Command()\n\nprogram\n .name(\"kordoc\")\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF, XLSX, DOCX → Markdown\")\n .version(VERSION)\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF, XLSX, DOCX)\")\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위 (예: 1-3, 1,3,5)\")\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\n .option(\"--no-header-footer\", \"PDF 머리글/바닥글 자동 제거\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (files: string[], opts) => {\n const validFormats = [\"markdown\", \"json\"]\n if (!validFormats.includes(opts.format)) {\n process.stderr.write(`[kordoc] 지원하지 않는 형식: ${opts.format} (markdown 또는 json)\\n`)\n process.exit(1)\n }\n for (let fi = 0; fi < files.length; fi++) {\n const filePath = files[fi]\n const absPath = resolve(filePath)\n const fileName = basename(absPath)\n const filePrefix = files.length > 1 ? `[${fi + 1}/${files.length}] ` : \"\"\n\n try {\n const fileSize = statSync(absPath).size\n if (fileSize > 500 * 1024 * 1024) {\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\n process.exitCode = 1\n continue\n }\n const buffer = readFileSync(absPath)\n const arrayBuffer = toArrayBuffer(buffer)\n const format = detectFormat(arrayBuffer)\n\n if (!opts.silent) {\n process.stderr.write(`[kordoc] ${filePrefix}${fileName} (${format}) ...`)\n }\n\n const parseOptions: ParseOptions = {}\n if (opts.pages) parseOptions.pages = opts.pages as string\n if (opts.headerFooter === false) parseOptions.removeHeaderFooter = false\n if (!opts.silent) {\n parseOptions.onProgress = (current: number, total: number) => {\n process.stderr.write(`\\r[kordoc] ${filePrefix}${fileName} (${format}) [${current}/${total}]`)\n }\n }\n const result = await parse(arrayBuffer, parseOptions)\n\n if (!result.success) {\n process.stderr.write(` FAIL\\n`)\n process.stderr.write(` → ${result.error}\\n`)\n process.exitCode = 1\n continue\n }\n\n if (!opts.silent) process.stderr.write(` OK\\n`)\n\n let markdown = result.markdown\n // --out-dir 시 이미지 참조 경로에 images/ 접두사 추가\n if (opts.outDir && result.images?.length) {\n markdown = markdown.replace(/!\\[image\\]\\(image_/g, \"\n }\n const output = opts.format === \"json\"\n ? JSON.stringify(result, null, 2)\n : markdown\n\n // 이미지 저장 (--out-dir 또는 --output 시)\n const saveImages = (dir: string) => {\n if (!result.images?.length) return\n const imgDir = resolve(dir, \"images\")\n mkdirSync(imgDir, { recursive: true })\n for (const img of result.images) {\n writeFileSync(resolve(imgDir, img.filename), img.data)\n }\n if (!opts.silent) process.stderr.write(` → ${result.images.length}개 이미지 → ${imgDir}\\n`)\n }\n\n if (opts.output && files.length === 1) {\n writeFileSync(opts.output, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\n saveImages(resolve(opts.output, \"..\"))\n } else if (opts.outDir) {\n mkdirSync(opts.outDir, { recursive: true })\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\n writeFileSync(outPath, output, \"utf-8\")\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\n saveImages(opts.outDir)\n } else {\n process.stdout.write(output + \"\\n\")\n }\n } catch (err) {\n const { sanitizeError } = await import(\"./utils.js\")\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${sanitizeError(err)}\\n`)\n process.exitCode = 1\n }\n }\n })\n\nprogram\n .command(\"watch <dir>\")\n .description(\"디렉토리 감시 — 새 문서 자동 변환\")\n .option(\"--webhook <url>\", \"결과 전송 웹훅 URL\")\n .option(\"-d, --out-dir <dir>\", \"변환 결과 출력 디렉토리\")\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위\")\n .option(\"--format <type>\", \"출력 형식: markdown 또는 json\", \"markdown\")\n .option(\"--silent\", \"진행 메시지 숨기기\")\n .action(async (dir: string, opts) => {\n const { watchDirectory } = await import(\"./watch.js\")\n await watchDirectory({\n dir,\n outDir: opts.outDir,\n webhook: opts.webhook,\n format: opts.format,\n pages: opts.pages,\n silent: opts.silent,\n })\n })\n\nprogram.parse()\n"],"mappings":";;;;;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,gBAAgB;AACjE,SAAS,UAAU,eAAe;AAClC,SAAS,eAAe;AAKxB,IAAM,UAAU,IAAI,QAAQ;AAE5B,QACG,KAAK,QAAQ,EACb,YAAY,2GAAoD,EAChE,QAAQ,OAAO,EACf,SAAS,cAAc,2EAAwC,EAC/D,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,uBAAuB,mEAA2B,EACzD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,sBAAsB,qEAAmB,EAChD,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAiB,SAAS;AACvC,QAAM,eAAe,CAAC,YAAY,MAAM;AACxC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAuB;AAC/E,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,WAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,UAAM,WAAW,MAAM,EAAE;AACzB,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AACjC,UAAM,aAAa,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,IAAI,MAAM,MAAM,OAAO;AAEvE,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,UAAU,GAAG,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC1E;AAEA,YAAM,eAA6B,CAAC;AACpC,UAAI,KAAK,MAAO,cAAa,QAAQ,KAAK;AAC1C,UAAI,KAAK,iBAAiB,MAAO,cAAa,qBAAqB;AACnE,UAAI,CAAC,KAAK,QAAQ;AAChB,qBAAa,aAAa,CAAC,SAAiB,UAAkB;AAC5D,kBAAQ,OAAO,MAAM,cAAc,UAAU,GAAG,QAAQ,KAAK,MAAM,MAAM,OAAO,IAAI,KAAK,GAAG;AAAA,QAC9F;AAAA,MACF;AACA,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAE9C,UAAI,WAAW,OAAO;AAEtB,UAAI,KAAK,UAAU,OAAO,QAAQ,QAAQ;AACxC,mBAAW,SAAS,QAAQ,uBAAuB,wBAAwB;AAAA,MAC7E;AACA,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B;AAGJ,YAAM,aAAa,CAAC,QAAgB;AAClC,YAAI,CAAC,OAAO,QAAQ,OAAQ;AAC5B,cAAM,SAAS,QAAQ,KAAK,QAAQ;AACpC,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,wBAAc,QAAQ,QAAQ,IAAI,QAAQ,GAAG,IAAI,IAAI;AAAA,QACvD;AACA,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO,OAAO,MAAM,oCAAW,MAAM;AAAA,CAAI;AAAA,MACzF;AAEA,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAC7D,mBAAW,QAAQ,KAAK,QAAQ,IAAI,CAAC;AAAA,MACvC,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACzD,mBAAW,KAAK,MAAM;AAAA,MACxB,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAAA,MACpC;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,cAAc,GAAG,CAAC;AAAA,CAAI;AAC9E,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF,CAAC;AAEH,QACG,QAAQ,aAAa,EACrB,YAAY,4FAAsB,EAClC,OAAO,mBAAmB,4CAAc,EACxC,OAAO,uBAAuB,iEAAe,EAC7C,OAAO,uBAAuB,8CAAW,EACzC,OAAO,mBAAmB,yDAA2B,UAAU,EAC/D,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,KAAa,SAAS;AACnC,QAAM,EAAE,eAAe,IAAI,MAAM,OAAO,qBAAY;AACpD,QAAM,eAAe;AAAA,IACnB;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,SAAS,KAAK;AAAA,IACd,QAAQ,KAAK;AAAA,IACb,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,EACf,CAAC;AACH,CAAC;AAEH,QAAQ,MAAM;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/cli.ts"],"sourcesContent":["/** kordoc CLI — 모두 파싱해버리겠다 */\r\n\r\nimport { readFileSync, writeFileSync, mkdirSync, statSync } from \"fs\"\r\nimport { basename, resolve } from \"path\"\r\nimport { Command } from \"commander\"\r\nimport { parse, detectFormat } from \"./index.js\"\r\nimport type { ParseOptions } from \"./types.js\"\r\nimport { VERSION, toArrayBuffer } from \"./utils.js\"\r\n\r\nconst program = new Command()\r\n\r\nprogram\r\n .name(\"kordoc\")\r\n .description(\"모두 파싱해버리겠다 — HWP, HWPX, PDF, XLSX, DOCX → Markdown\")\r\n .version(VERSION)\r\n .argument(\"<files...>\", \"변환할 파일 경로 (HWP, HWPX, PDF, XLSX, DOCX)\")\r\n .option(\"-o, --output <path>\", \"출력 파일 경로 (단일 파일 시)\")\r\n .option(\"-d, --out-dir <dir>\", \"출력 디렉토리 (다중 파일 시)\")\r\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위 (예: 1-3, 1,3,5)\")\r\n .option(\"--format <type>\", \"출력 형식: markdown (기본) 또는 json\", \"markdown\")\r\n .option(\"--no-header-footer\", \"PDF 머리글/바닥글 자동 제거\")\r\n .option(\"--silent\", \"진행 메시지 숨기기\")\r\n .action(async (files: string[], opts) => {\r\n const validFormats = [\"markdown\", \"json\"]\r\n if (!validFormats.includes(opts.format)) {\r\n process.stderr.write(`[kordoc] 지원하지 않는 형식: ${opts.format} (markdown 또는 json)\\n`)\r\n process.exit(1)\r\n }\r\n for (let fi = 0; fi < files.length; fi++) {\r\n const filePath = files[fi]\r\n const absPath = resolve(filePath)\r\n const fileName = basename(absPath)\r\n const filePrefix = files.length > 1 ? `[${fi + 1}/${files.length}] ` : \"\"\r\n\r\n try {\r\n const fileSize = statSync(absPath).size\r\n if (fileSize > 500 * 1024 * 1024) {\r\n process.stderr.write(`\\n[kordoc] SKIP: ${fileName} — 파일이 너무 큽니다 (${(fileSize / 1024 / 1024).toFixed(1)}MB)\\n`)\r\n process.exitCode = 1\r\n continue\r\n }\r\n const buffer = readFileSync(absPath)\r\n const arrayBuffer = toArrayBuffer(buffer)\r\n const format = detectFormat(arrayBuffer)\r\n\r\n if (!opts.silent) {\r\n process.stderr.write(`[kordoc] ${filePrefix}${fileName} (${format}) ...`)\r\n }\r\n\r\n const parseOptions: ParseOptions = {}\r\n if (opts.pages) parseOptions.pages = opts.pages as string\r\n if (opts.headerFooter === false) parseOptions.removeHeaderFooter = false\r\n if (!opts.silent) {\r\n parseOptions.onProgress = (current: number, total: number) => {\r\n process.stderr.write(`\\r[kordoc] ${filePrefix}${fileName} (${format}) [${current}/${total}]`)\r\n }\r\n }\r\n const result = await parse(arrayBuffer, parseOptions)\r\n\r\n if (!result.success) {\r\n process.stderr.write(` FAIL\\n`)\r\n process.stderr.write(` → ${result.error}\\n`)\r\n process.exitCode = 1\r\n continue\r\n }\r\n\r\n if (!opts.silent) process.stderr.write(` OK\\n`)\r\n\r\n let markdown = result.markdown\r\n // --out-dir 시 이미지 참조 경로에 images/ 접두사 추가\r\n if (opts.outDir && result.images?.length) {\r\n markdown = markdown.replace(/!\\[image\\]\\(image_/g, \"\r\n }\r\n const output = opts.format === \"json\"\r\n ? JSON.stringify(result, null, 2)\r\n : markdown\r\n\r\n // 이미지 저장 (--out-dir 또는 --output 시)\r\n const saveImages = (dir: string) => {\r\n if (!result.images?.length) return\r\n const imgDir = resolve(dir, \"images\")\r\n mkdirSync(imgDir, { recursive: true })\r\n for (const img of result.images) {\r\n writeFileSync(resolve(imgDir, img.filename), img.data)\r\n }\r\n if (!opts.silent) process.stderr.write(` → ${result.images.length}개 이미지 → ${imgDir}\\n`)\r\n }\r\n\r\n if (opts.output && files.length === 1) {\r\n writeFileSync(opts.output, output, \"utf-8\")\r\n if (!opts.silent) process.stderr.write(` → ${opts.output}\\n`)\r\n saveImages(resolve(opts.output, \"..\"))\r\n } else if (opts.outDir) {\r\n mkdirSync(opts.outDir, { recursive: true })\r\n const outExt = opts.format === \"json\" ? \".json\" : \".md\"\r\n const outPath = resolve(opts.outDir, fileName.replace(/\\.[^.]+$/, outExt))\r\n writeFileSync(outPath, output, \"utf-8\")\r\n if (!opts.silent) process.stderr.write(` → ${outPath}\\n`)\r\n saveImages(opts.outDir)\r\n } else {\r\n process.stdout.write(output + \"\\n\")\r\n }\r\n } catch (err) {\r\n const { sanitizeError } = await import(\"./utils.js\")\r\n process.stderr.write(`\\n[kordoc] ERROR: ${fileName} — ${sanitizeError(err)}\\n`)\r\n process.exitCode = 1\r\n }\r\n }\r\n })\r\n\r\nprogram\r\n .command(\"watch <dir>\")\r\n .description(\"디렉토리 감시 — 새 문서 자동 변환\")\r\n .option(\"--webhook <url>\", \"결과 전송 웹훅 URL\")\r\n .option(\"-d, --out-dir <dir>\", \"변환 결과 출력 디렉토리\")\r\n .option(\"-p, --pages <range>\", \"페이지/섹션 범위\")\r\n .option(\"--format <type>\", \"출력 형식: markdown 또는 json\", \"markdown\")\r\n .option(\"--silent\", \"진행 메시지 숨기기\")\r\n .action(async (dir: string, opts) => {\r\n const { watchDirectory } = await import(\"./watch.js\")\r\n await watchDirectory({\r\n dir,\r\n outDir: opts.outDir,\r\n webhook: opts.webhook,\r\n format: opts.format,\r\n pages: opts.pages,\r\n silent: opts.silent,\r\n })\r\n })\r\n\r\nprogram.parse()\r\n"],"mappings":";;;;;;;;;;;;AAEA,SAAS,cAAc,eAAe,WAAW,gBAAgB;AACjE,SAAS,UAAU,eAAe;AAClC,SAAS,eAAe;AAKxB,IAAM,UAAU,IAAI,QAAQ;AAE5B,QACG,KAAK,QAAQ,EACb,YAAY,2GAAoD,EAChE,QAAQ,OAAO,EACf,SAAS,cAAc,2EAAwC,EAC/D,OAAO,uBAAuB,2EAAoB,EAClD,OAAO,uBAAuB,0EAAmB,EACjD,OAAO,uBAAuB,mEAA2B,EACzD,OAAO,mBAAmB,wEAAgC,UAAU,EACpE,OAAO,sBAAsB,qEAAmB,EAChD,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,OAAiB,SAAS;AACvC,QAAM,eAAe,CAAC,YAAY,MAAM;AACxC,MAAI,CAAC,aAAa,SAAS,KAAK,MAAM,GAAG;AACvC,YAAQ,OAAO,MAAM,gEAAwB,KAAK,MAAM;AAAA,CAAuB;AAC/E,YAAQ,KAAK,CAAC;AAAA,EAChB;AACA,WAAS,KAAK,GAAG,KAAK,MAAM,QAAQ,MAAM;AACxC,UAAM,WAAW,MAAM,EAAE;AACzB,UAAM,UAAU,QAAQ,QAAQ;AAChC,UAAM,WAAW,SAAS,OAAO;AACjC,UAAM,aAAa,MAAM,SAAS,IAAI,IAAI,KAAK,CAAC,IAAI,MAAM,MAAM,OAAO;AAEvE,QAAI;AACF,YAAM,WAAW,SAAS,OAAO,EAAE;AACnC,UAAI,WAAW,MAAM,OAAO,MAAM;AAChC,gBAAQ,OAAO,MAAM;AAAA,iBAAoB,QAAQ,gEAAmB,WAAW,OAAO,MAAM,QAAQ,CAAC,CAAC;AAAA,CAAO;AAC7G,gBAAQ,WAAW;AACnB;AAAA,MACF;AACA,YAAM,SAAS,aAAa,OAAO;AACnC,YAAM,cAAc,cAAc,MAAM;AACxC,YAAM,SAAS,aAAa,WAAW;AAEvC,UAAI,CAAC,KAAK,QAAQ;AAChB,gBAAQ,OAAO,MAAM,YAAY,UAAU,GAAG,QAAQ,KAAK,MAAM,OAAO;AAAA,MAC1E;AAEA,YAAM,eAA6B,CAAC;AACpC,UAAI,KAAK,MAAO,cAAa,QAAQ,KAAK;AAC1C,UAAI,KAAK,iBAAiB,MAAO,cAAa,qBAAqB;AACnE,UAAI,CAAC,KAAK,QAAQ;AAChB,qBAAa,aAAa,CAAC,SAAiB,UAAkB;AAC5D,kBAAQ,OAAO,MAAM,cAAc,UAAU,GAAG,QAAQ,KAAK,MAAM,MAAM,OAAO,IAAI,KAAK,GAAG;AAAA,QAC9F;AAAA,MACF;AACA,YAAM,SAAS,MAAM,MAAM,aAAa,YAAY;AAEpD,UAAI,CAAC,OAAO,SAAS;AACnB,gBAAQ,OAAO,MAAM;AAAA,CAAS;AAC9B,gBAAQ,OAAO,MAAM,YAAO,OAAO,KAAK;AAAA,CAAI;AAC5C,gBAAQ,WAAW;AACnB;AAAA,MACF;AAEA,UAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM;AAAA,CAAO;AAE9C,UAAI,WAAW,OAAO;AAEtB,UAAI,KAAK,UAAU,OAAO,QAAQ,QAAQ;AACxC,mBAAW,SAAS,QAAQ,uBAAuB,wBAAwB;AAAA,MAC7E;AACA,YAAM,SAAS,KAAK,WAAW,SAC3B,KAAK,UAAU,QAAQ,MAAM,CAAC,IAC9B;AAGJ,YAAM,aAAa,CAAC,QAAgB;AAClC,YAAI,CAAC,OAAO,QAAQ,OAAQ;AAC5B,cAAM,SAAS,QAAQ,KAAK,QAAQ;AACpC,kBAAU,QAAQ,EAAE,WAAW,KAAK,CAAC;AACrC,mBAAW,OAAO,OAAO,QAAQ;AAC/B,wBAAc,QAAQ,QAAQ,IAAI,QAAQ,GAAG,IAAI,IAAI;AAAA,QACvD;AACA,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO,OAAO,MAAM,oCAAW,MAAM;AAAA,CAAI;AAAA,MACzF;AAEA,UAAI,KAAK,UAAU,MAAM,WAAW,GAAG;AACrC,sBAAc,KAAK,QAAQ,QAAQ,OAAO;AAC1C,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,KAAK,MAAM;AAAA,CAAI;AAC7D,mBAAW,QAAQ,KAAK,QAAQ,IAAI,CAAC;AAAA,MACvC,WAAW,KAAK,QAAQ;AACtB,kBAAU,KAAK,QAAQ,EAAE,WAAW,KAAK,CAAC;AAC1C,cAAM,SAAS,KAAK,WAAW,SAAS,UAAU;AAClD,cAAM,UAAU,QAAQ,KAAK,QAAQ,SAAS,QAAQ,YAAY,MAAM,CAAC;AACzE,sBAAc,SAAS,QAAQ,OAAO;AACtC,YAAI,CAAC,KAAK,OAAQ,SAAQ,OAAO,MAAM,YAAO,OAAO;AAAA,CAAI;AACzD,mBAAW,KAAK,MAAM;AAAA,MACxB,OAAO;AACL,gBAAQ,OAAO,MAAM,SAAS,IAAI;AAAA,MACpC;AAAA,IACF,SAAS,KAAK;AACZ,YAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAY;AACnD,cAAQ,OAAO,MAAM;AAAA,kBAAqB,QAAQ,WAAM,cAAc,GAAG,CAAC;AAAA,CAAI;AAC9E,cAAQ,WAAW;AAAA,IACrB;AAAA,EACF;AACF,CAAC;AAEH,QACG,QAAQ,aAAa,EACrB,YAAY,4FAAsB,EAClC,OAAO,mBAAmB,4CAAc,EACxC,OAAO,uBAAuB,iEAAe,EAC7C,OAAO,uBAAuB,8CAAW,EACzC,OAAO,mBAAmB,yDAA2B,UAAU,EAC/D,OAAO,YAAY,oDAAY,EAC/B,OAAO,OAAO,KAAa,SAAS;AACnC,QAAM,EAAE,eAAe,IAAI,MAAM,OAAO,qBAAY;AACpD,QAAM,eAAe;AAAA,IACnB;AAAA,IACA,QAAQ,KAAK;AAAA,IACb,SAAS,KAAK;AAAA,IACd,QAAQ,KAAK;AAAA,IACb,OAAO,KAAK;AAAA,IACZ,QAAQ,KAAK;AAAA,EACf,CAAC;AACH,CAAC;AAEH,QAAQ,MAAM;","names":[]}
|
package/dist/index.cjs
CHANGED
|
@@ -182,7 +182,7 @@ var import_zlib = require("zlib");
|
|
|
182
182
|
var import_xmldom = require("@xmldom/xmldom");
|
|
183
183
|
|
|
184
184
|
// src/utils.ts
|
|
185
|
-
var VERSION = true ? "2.0.
|
|
185
|
+
var VERSION = true ? "2.0.3" : "0.0.0-dev";
|
|
186
186
|
function toArrayBuffer(buf) {
|
|
187
187
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
188
188
|
return buf.buffer;
|
|
@@ -371,6 +371,47 @@ function sanitizeText(text) {
|
|
|
371
371
|
}
|
|
372
372
|
return result;
|
|
373
373
|
}
|
|
374
|
+
function flattenLayoutTables(blocks) {
|
|
375
|
+
const result = [];
|
|
376
|
+
for (const block of blocks) {
|
|
377
|
+
if (block.type !== "table" || !block.table) {
|
|
378
|
+
result.push(block);
|
|
379
|
+
continue;
|
|
380
|
+
}
|
|
381
|
+
const { rows: numRows, cols: numCols, cells } = block.table;
|
|
382
|
+
if (numRows === 1 && numCols === 1) {
|
|
383
|
+
result.push(block);
|
|
384
|
+
continue;
|
|
385
|
+
}
|
|
386
|
+
if (numRows <= 3) {
|
|
387
|
+
let totalNewlines = 0;
|
|
388
|
+
let totalTextLen = 0;
|
|
389
|
+
for (let r = 0; r < numRows; r++) {
|
|
390
|
+
for (let c = 0; c < numCols; c++) {
|
|
391
|
+
const t = cells[r]?.[c]?.text || "";
|
|
392
|
+
totalNewlines += (t.match(/\n/g) || []).length;
|
|
393
|
+
totalTextLen += t.length;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
if (totalNewlines > 5 || numRows <= 2 && totalTextLen > 300) {
|
|
397
|
+
for (let r = 0; r < numRows; r++) {
|
|
398
|
+
for (let c = 0; c < numCols; c++) {
|
|
399
|
+
const cellText = cells[r]?.[c]?.text?.trim();
|
|
400
|
+
if (!cellText) continue;
|
|
401
|
+
for (const line of cellText.split("\n")) {
|
|
402
|
+
const trimmed = line.trim();
|
|
403
|
+
if (!trimmed) continue;
|
|
404
|
+
result.push({ type: "paragraph", text: trimmed, pageNumber: block.pageNumber });
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
continue;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
result.push(block);
|
|
412
|
+
}
|
|
413
|
+
return result;
|
|
414
|
+
}
|
|
374
415
|
function blocksToMarkdown(blocks) {
|
|
375
416
|
const lines = [];
|
|
376
417
|
for (let i = 0; i < blocks.length; i++) {
|
|
@@ -1269,8 +1310,9 @@ var TAG_CHAR_SHAPE = 68;
|
|
|
1269
1310
|
var TAG_CTRL_HEADER = 71;
|
|
1270
1311
|
var TAG_LIST_HEADER = 72;
|
|
1271
1312
|
var TAG_TABLE = 77;
|
|
1272
|
-
var TAG_DOC_CHAR_SHAPE =
|
|
1273
|
-
var
|
|
1313
|
+
var TAG_DOC_CHAR_SHAPE = 21;
|
|
1314
|
+
var TAG_DOC_PARA_SHAPE = 25;
|
|
1315
|
+
var TAG_DOC_STYLE = 26;
|
|
1274
1316
|
var CHAR_LINE = 0;
|
|
1275
1317
|
var CHAR_SECTION_BREAK = 10;
|
|
1276
1318
|
var CHAR_PARA = 13;
|
|
@@ -1326,8 +1368,14 @@ function parseFileHeader(data) {
|
|
|
1326
1368
|
}
|
|
1327
1369
|
function parseDocInfo(records) {
|
|
1328
1370
|
const charShapes = [];
|
|
1371
|
+
const paraShapes = [];
|
|
1329
1372
|
const styles = [];
|
|
1330
1373
|
for (const rec of records) {
|
|
1374
|
+
if (rec.tagId === TAG_DOC_PARA_SHAPE && rec.data.length >= 4) {
|
|
1375
|
+
const flags = rec.data.readUInt32LE(0);
|
|
1376
|
+
const outlineLevel = flags >> 25 & 7;
|
|
1377
|
+
paraShapes.push({ outlineLevel });
|
|
1378
|
+
}
|
|
1331
1379
|
if (rec.tagId === TAG_DOC_CHAR_SHAPE && rec.data.length >= 18) {
|
|
1332
1380
|
if (rec.data.length >= 50) {
|
|
1333
1381
|
const fontSize = rec.data.readUInt32LE(42);
|
|
@@ -1367,7 +1415,7 @@ function parseDocInfo(records) {
|
|
|
1367
1415
|
}
|
|
1368
1416
|
}
|
|
1369
1417
|
}
|
|
1370
|
-
return { charShapes, styles };
|
|
1418
|
+
return { charShapes, paraShapes, styles };
|
|
1371
1419
|
}
|
|
1372
1420
|
function extractText(data) {
|
|
1373
1421
|
let result = "";
|
|
@@ -2379,12 +2427,13 @@ function parseHwp5Document(buffer, options) {
|
|
|
2379
2427
|
}
|
|
2380
2428
|
}
|
|
2381
2429
|
const images = cfb ? extractHwp5Images(cfb, blocks, compressed, warnings) : extractHwp5ImagesLenient(lenientCfb, blocks, compressed, warnings);
|
|
2430
|
+
const flatBlocks = flattenLayoutTables(blocks);
|
|
2382
2431
|
if (docInfo) {
|
|
2383
|
-
detectHwp5Headings(
|
|
2432
|
+
detectHwp5Headings(flatBlocks, docInfo);
|
|
2384
2433
|
}
|
|
2385
|
-
const outline =
|
|
2386
|
-
const markdown = blocksToMarkdown(
|
|
2387
|
-
return { markdown, blocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
2434
|
+
const outline = flatBlocks.filter((b) => b.type === "heading" && b.level && b.text).map((b) => ({ level: b.level, text: b.text, pageNumber: b.pageNumber }));
|
|
2435
|
+
const markdown = blocksToMarkdown(flatBlocks);
|
|
2436
|
+
return { markdown, blocks: flatBlocks, metadata, outline: outline.length > 0 ? outline : void 0, warnings: warnings.length > 0 ? warnings : void 0, images: images.length > 0 ? images : void 0 };
|
|
2388
2437
|
}
|
|
2389
2438
|
function parseDocInfoStream(cfb, compressed) {
|
|
2390
2439
|
try {
|
|
@@ -2435,16 +2484,21 @@ function detectHwp5Headings(blocks, docInfo) {
|
|
|
2435
2484
|
}
|
|
2436
2485
|
if (baseFontSize <= 0) return;
|
|
2437
2486
|
for (const block of blocks) {
|
|
2438
|
-
if (block.type
|
|
2487
|
+
if (block.type === "heading") continue;
|
|
2488
|
+
if (block.type !== "paragraph" || !block.text) continue;
|
|
2439
2489
|
const text = block.text.trim();
|
|
2440
2490
|
if (text.length === 0 || text.length > 200) continue;
|
|
2441
2491
|
if (/^\d+$/.test(text)) continue;
|
|
2442
|
-
const ratio = block.style.fontSize / baseFontSize;
|
|
2443
2492
|
let level = 0;
|
|
2444
|
-
if (
|
|
2445
|
-
|
|
2446
|
-
|
|
2447
|
-
|
|
2493
|
+
if (block.style?.fontSize && baseFontSize > 0) {
|
|
2494
|
+
const ratio = block.style.fontSize / baseFontSize;
|
|
2495
|
+
if (ratio >= HEADING_RATIO_H1) level = 1;
|
|
2496
|
+
else if (ratio >= HEADING_RATIO_H2) level = 2;
|
|
2497
|
+
else if (ratio >= HEADING_RATIO_H3) level = 3;
|
|
2498
|
+
}
|
|
2499
|
+
if (/^제\d+[장절편]\s/.test(text) && text.length <= 50) {
|
|
2500
|
+
if (level === 0) level = 2;
|
|
2501
|
+
} else if (/^제\d+(조의?\d*)\s*[\((]/.test(text) && text.length <= 80) {
|
|
2448
2502
|
if (level === 0) level = 3;
|
|
2449
2503
|
}
|
|
2450
2504
|
if (level > 0) {
|
|
@@ -2676,13 +2730,20 @@ function parseSection(records, docInfo, warnings, sectionNum) {
|
|
|
2676
2730
|
while (i < records.length) {
|
|
2677
2731
|
const rec = records[i];
|
|
2678
2732
|
if (rec.tagId === TAG_PARA_HEADER && rec.level === 0) {
|
|
2679
|
-
const { paragraph, tables, nextIdx, charShapeIds } = parseParagraphWithTables(records, i);
|
|
2733
|
+
const { paragraph, tables, nextIdx, charShapeIds, paraShapeId } = parseParagraphWithTables(records, i);
|
|
2680
2734
|
if (paragraph) {
|
|
2681
2735
|
const block = { type: "paragraph", text: paragraph, pageNumber: sectionNum };
|
|
2682
2736
|
if (docInfo && charShapeIds.length > 0) {
|
|
2683
2737
|
const style = resolveCharStyle(charShapeIds, docInfo);
|
|
2684
2738
|
if (style) block.style = style;
|
|
2685
2739
|
}
|
|
2740
|
+
if (docInfo && paraShapeId >= 0 && paraShapeId < docInfo.paraShapes.length) {
|
|
2741
|
+
const ol = docInfo.paraShapes[paraShapeId].outlineLevel;
|
|
2742
|
+
if (ol >= 1 && ol <= 6) {
|
|
2743
|
+
block.type = "heading";
|
|
2744
|
+
block.level = ol;
|
|
2745
|
+
}
|
|
2746
|
+
}
|
|
2686
2747
|
blocks.push(block);
|
|
2687
2748
|
}
|
|
2688
2749
|
for (const t of tables) blocks.push({ type: "table", table: t, pageNumber: sectionNum });
|
|
@@ -2802,6 +2863,8 @@ function parseParagraphWithTables(records, startIdx) {
|
|
|
2802
2863
|
let text = "";
|
|
2803
2864
|
const tables = [];
|
|
2804
2865
|
const charShapeIds = [];
|
|
2866
|
+
const paraHeaderData = records[startIdx].data;
|
|
2867
|
+
const paraShapeId = paraHeaderData.length >= 10 ? paraHeaderData.readUInt16LE(8) : -1;
|
|
2805
2868
|
let i = startIdx + 1;
|
|
2806
2869
|
while (i < records.length) {
|
|
2807
2870
|
const rec = records[i];
|
|
@@ -2826,7 +2889,7 @@ function parseParagraphWithTables(records, startIdx) {
|
|
|
2826
2889
|
i++;
|
|
2827
2890
|
}
|
|
2828
2891
|
const trimmed = text.trim();
|
|
2829
|
-
return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds };
|
|
2892
|
+
return { paragraph: trimmed || null, tables, nextIdx: i, charShapeIds, paraShapeId };
|
|
2830
2893
|
}
|
|
2831
2894
|
function parseTableBlock(records, startIdx) {
|
|
2832
2895
|
const tableLevel = records[startIdx].level;
|