@clazic/kordoc 2.5.2 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -1
- package/dist/{chunk-5CILZHRW.js → chunk-TND4YFBV.js} +2 -2
- package/dist/{chunk-25ZYYLVP.js → chunk-TS3F57LY.js} +158 -6
- package/dist/chunk-TS3F57LY.js.map +1 -0
- package/dist/cli.js +52 -5
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +333 -135
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +71 -2
- package/dist/index.d.ts +71 -2
- package/dist/index.js +320 -125
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +43 -2
- package/dist/mcp.js.map +1 -1
- package/dist/{utils-H2BL5GNR.js → utils-F66K7PXH.js} +2 -2
- package/dist/{watch-D6ODQLPJ.js → watch-2S5ULHAM.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-25ZYYLVP.js.map +0 -1
- /package/dist/{chunk-5CILZHRW.js.map → chunk-TND4YFBV.js.map} +0 -0
- /package/dist/{utils-H2BL5GNR.js.map → utils-F66K7PXH.js.map} +0 -0
- /package/dist/{watch-D6ODQLPJ.js.map → watch-2S5ULHAM.js.map} +0 -0
package/README.md
CHANGED
|
@@ -126,6 +126,25 @@ if (result.success) {
|
|
|
126
126
|
}
|
|
127
127
|
```
|
|
128
128
|
|
|
129
|
+
### PDF 변환 (HWP/HWPX → PDF)
|
|
130
|
+
|
|
131
|
+
```typescript
|
|
132
|
+
import { convertToPdf } from "@clazic/kordoc"
|
|
133
|
+
import { readFileSync, writeFileSync } from "fs"
|
|
134
|
+
|
|
135
|
+
const buffer = readFileSync("사업계획서.hwp")
|
|
136
|
+
const result = await convertToPdf(buffer)
|
|
137
|
+
|
|
138
|
+
if (result.success) {
|
|
139
|
+
writeFileSync("사업계획서.pdf", result.pdf)
|
|
140
|
+
console.log(`변환 완료: ${result.sourceFormat} → PDF`)
|
|
141
|
+
} else {
|
|
142
|
+
console.error(`변환 실패: ${result.error}`)
|
|
143
|
+
}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
**요구사항**: 시스템에 [LibreOffice](https://www.libreoffice.org/)가 설치되어 있어야 합니다.
|
|
147
|
+
|
|
129
148
|
### 문서 비교 (신구대조표)
|
|
130
149
|
|
|
131
150
|
```typescript
|
|
@@ -272,6 +291,11 @@ kordoc convert 보고서.md -o 최종보고서.hwpx # 출력 경로 지정
|
|
|
272
291
|
kordoc convert 보고서.md --image-dir ./이미지 # 이미지 폴더 지정
|
|
273
292
|
kordoc convert 보고서.md --template 기본.hwpx # HWPX 템플릿 적용
|
|
274
293
|
|
|
294
|
+
# HWP/HWPX → PDF 변환
|
|
295
|
+
kordoc convert-pdf 사업계획서.hwp # → 사업계획서.pdf
|
|
296
|
+
kordoc convert-pdf 보고서.hwpx -o 결과.pdf # 출력 경로 지정
|
|
297
|
+
kordoc convert-pdf 문서.hwp --pages 1-3 # 페이지 범위
|
|
298
|
+
|
|
275
299
|
# 폴더 감시 모드
|
|
276
300
|
kordoc watch ./수신함 -d ./변환결과 # 폴더 감시 모드
|
|
277
301
|
kordoc watch ./문서 --webhook https://api/hook # 웹훅 알림
|
|
@@ -381,12 +405,13 @@ HWP 5.x를 제외한 모든 포맷 지원. 설치 없이 바로 연결 가능합
|
|
|
381
405
|
}
|
|
382
406
|
```
|
|
383
407
|
|
|
384
|
-
**
|
|
408
|
+
**9개 도구:**
|
|
385
409
|
|
|
386
410
|
| 도구 | 설명 |
|
|
387
411
|
|------|------|
|
|
388
412
|
| `parse_document` | HWP/HWPX/PDF/XLSX/DOCX → 마크다운 (메타데이터·이미지 포함, `image_dir` 지원) |
|
|
389
413
|
| `convert_document` | Markdown → HWPX 또는 XLSX 변환 (이미지 폴더·템플릿 지원) |
|
|
414
|
+
| `convert_to_pdf` | HWP/HWPX → PDF 변환 (페이지 범위·타임아웃 지원) |
|
|
390
415
|
| `detect_format` | 매직 바이트로 포맷 감지 |
|
|
391
416
|
| `parse_metadata` | 메타데이터만 빠르게 추출 |
|
|
392
417
|
| `parse_pages` | 특정 페이지 범위만 파싱 |
|
|
@@ -406,6 +431,7 @@ HWP 5.x를 제외한 모든 포맷 지원. 설치 없이 바로 연결 가능합
|
|
|
406
431
|
| `parsePdf(buffer, options?)` | PDF 전용 |
|
|
407
432
|
| `parseXlsx(buffer, options?)` | XLSX 전용 |
|
|
408
433
|
| `parseDocx(buffer, options?)` | DOCX 전용 |
|
|
434
|
+
| `convertToPdf(input, options?)` | HWP/HWPX → PDF 변환 |
|
|
409
435
|
| `detectFormat(buffer)` | `"hwpx" \| "hwp" \| "pdf" \| "xlsx" \| "docx" \| "unknown"` |
|
|
410
436
|
|
|
411
437
|
### 고급 함수
|
|
@@ -429,6 +455,8 @@ import type {
|
|
|
429
455
|
FormField, FormResult,
|
|
430
456
|
OcrProvider, WatchOptions,
|
|
431
457
|
MarkdownToXlsxOptions,
|
|
458
|
+
// PDF 변환 타입
|
|
459
|
+
ConvertToPdfOptions, ConvertToPdfResult,
|
|
432
460
|
} from "@clazic/kordoc"
|
|
433
461
|
```
|
|
434
462
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/utils.ts
|
|
4
|
-
var VERSION = true ? "2.5.
|
|
4
|
+
var VERSION = true ? "2.5.2" : "0.0.0-dev";
|
|
5
5
|
function toArrayBuffer(buf) {
|
|
6
6
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
7
7
|
return buf.buffer;
|
|
@@ -105,4 +105,4 @@ export {
|
|
|
105
105
|
classifyError,
|
|
106
106
|
normalizeKordocError
|
|
107
107
|
};
|
|
108
|
-
//# sourceMappingURL=chunk-
|
|
108
|
+
//# sourceMappingURL=chunk-TND4YFBV.js.map
|
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
precheckZipSize,
|
|
8
8
|
sanitizeHref,
|
|
9
9
|
toArrayBuffer
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-TND4YFBV.js";
|
|
11
11
|
import {
|
|
12
12
|
parsePageRange
|
|
13
13
|
} from "./chunk-MOL7MDBG.js";
|
|
@@ -6686,7 +6686,7 @@ function mergeKoreanLines(text) {
|
|
|
6686
6686
|
}
|
|
6687
6687
|
|
|
6688
6688
|
// src/index.ts
|
|
6689
|
-
import { readFile } from "fs/promises";
|
|
6689
|
+
import { readFile as readFile2 } from "fs/promises";
|
|
6690
6690
|
|
|
6691
6691
|
// src/xlsx/parser.ts
|
|
6692
6692
|
import JSZip3 from "jszip";
|
|
@@ -9821,10 +9821,161 @@ async function markdownToXlsx(markdown, options) {
|
|
|
9821
9821
|
return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
|
|
9822
9822
|
}
|
|
9823
9823
|
|
|
9824
|
-
// src/
|
|
9825
|
-
import {
|
|
9824
|
+
// src/convert/index.ts
|
|
9825
|
+
import { readFile } from "fs/promises";
|
|
9826
|
+
|
|
9827
|
+
// src/convert/libreoffice.ts
|
|
9826
9828
|
import libre from "libreoffice-convert";
|
|
9829
|
+
|
|
9830
|
+
// src/convert/error.ts
|
|
9831
|
+
var ConvertError = class extends Error {
|
|
9832
|
+
constructor(code, message) {
|
|
9833
|
+
super(message);
|
|
9834
|
+
this.code = code;
|
|
9835
|
+
this.name = "ConvertError";
|
|
9836
|
+
}
|
|
9837
|
+
};
|
|
9838
|
+
|
|
9839
|
+
// src/convert/libreoffice.ts
|
|
9827
9840
|
var libreConvert = libre.convert;
|
|
9841
|
+
async function assertSofficeAvailable() {
|
|
9842
|
+
const { runCommand } = await import("./utils-F66K7PXH.js");
|
|
9843
|
+
try {
|
|
9844
|
+
await runCommand("soffice", ["--version"]);
|
|
9845
|
+
} catch {
|
|
9846
|
+
throw new ConvertError(
|
|
9847
|
+
"SOFFICE_NOT_FOUND",
|
|
9848
|
+
"soffice\uB97C \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4. LibreOffice\uB97C \uC124\uCE58\uD574 \uC8FC\uC138\uC694."
|
|
9849
|
+
);
|
|
9850
|
+
}
|
|
9851
|
+
}
|
|
9852
|
+
async function convertBuffer(buffer, targetExt, timeoutMs = 6e4) {
|
|
9853
|
+
return new Promise((resolve2, reject) => {
|
|
9854
|
+
const timer = setTimeout(() => {
|
|
9855
|
+
reject(
|
|
9856
|
+
new ConvertError("TIMEOUT", `\uBCC0\uD658 \uD0C0\uC784\uC544\uC6C3 (${timeoutMs}ms \uCD08\uACFC)`)
|
|
9857
|
+
);
|
|
9858
|
+
}, timeoutMs);
|
|
9859
|
+
libreConvert(buffer, targetExt, void 0, (err, done) => {
|
|
9860
|
+
clearTimeout(timer);
|
|
9861
|
+
if (err || !done) {
|
|
9862
|
+
reject(
|
|
9863
|
+
new ConvertError(
|
|
9864
|
+
"CONVERT_FAILED",
|
|
9865
|
+
err?.message ?? "LibreOffice \uBCC0\uD658 \uC2E4\uD328"
|
|
9866
|
+
)
|
|
9867
|
+
);
|
|
9868
|
+
return;
|
|
9869
|
+
}
|
|
9870
|
+
resolve2(done);
|
|
9871
|
+
});
|
|
9872
|
+
});
|
|
9873
|
+
}
|
|
9874
|
+
|
|
9875
|
+
// src/convert/index.ts
|
|
9876
|
+
var isConverting = false;
|
|
9877
|
+
var queue = [];
|
|
9878
|
+
async function acquireConvertLock() {
|
|
9879
|
+
if (!isConverting) {
|
|
9880
|
+
isConverting = true;
|
|
9881
|
+
return () => {
|
|
9882
|
+
isConverting = false;
|
|
9883
|
+
const next = queue.shift();
|
|
9884
|
+
next?.();
|
|
9885
|
+
};
|
|
9886
|
+
}
|
|
9887
|
+
return new Promise((resolve2) => {
|
|
9888
|
+
queue.push(() => {
|
|
9889
|
+
isConverting = true;
|
|
9890
|
+
resolve2(() => {
|
|
9891
|
+
isConverting = false;
|
|
9892
|
+
const next = queue.shift();
|
|
9893
|
+
next?.();
|
|
9894
|
+
});
|
|
9895
|
+
});
|
|
9896
|
+
});
|
|
9897
|
+
}
|
|
9898
|
+
async function convertToPdf(input, options) {
|
|
9899
|
+
let buffer;
|
|
9900
|
+
try {
|
|
9901
|
+
if (typeof input === "string") {
|
|
9902
|
+
buffer = await readFile(input);
|
|
9903
|
+
} else if (Buffer.isBuffer(input)) {
|
|
9904
|
+
buffer = input;
|
|
9905
|
+
} else {
|
|
9906
|
+
buffer = Buffer.from(input);
|
|
9907
|
+
}
|
|
9908
|
+
} catch (err) {
|
|
9909
|
+
return {
|
|
9910
|
+
success: false,
|
|
9911
|
+
code: "PARSE_ERROR",
|
|
9912
|
+
error: `\uC785\uB825 \uC77D\uAE30 \uC2E4\uD328: ${err instanceof Error ? err.message : String(err)}`,
|
|
9913
|
+
stage: "detect"
|
|
9914
|
+
};
|
|
9915
|
+
}
|
|
9916
|
+
const MAX_FILE_SIZE = 500 * 1024 * 1024;
|
|
9917
|
+
if (buffer.length > MAX_FILE_SIZE) {
|
|
9918
|
+
return {
|
|
9919
|
+
success: false,
|
|
9920
|
+
code: "FILE_TOO_LARGE",
|
|
9921
|
+
error: `\uD30C\uC77C \uD06C\uAE30 \uCD08\uACFC: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (\uCD5C\uB300 500MB)`,
|
|
9922
|
+
stage: "detect"
|
|
9923
|
+
};
|
|
9924
|
+
}
|
|
9925
|
+
const format = detectFormat(toArrayBuffer(buffer));
|
|
9926
|
+
if (format !== "hwp" && format !== "hwpx") {
|
|
9927
|
+
return {
|
|
9928
|
+
success: false,
|
|
9929
|
+
code: "UNSUPPORTED_FORMAT",
|
|
9930
|
+
error: `\uC9C0\uC6D0\uD558\uC9C0 \uC54A\uB294 \uD3EC\uB9F7\uC785\uB2C8\uB2E4: ${format}`,
|
|
9931
|
+
stage: "detect"
|
|
9932
|
+
};
|
|
9933
|
+
}
|
|
9934
|
+
try {
|
|
9935
|
+
await assertSofficeAvailable();
|
|
9936
|
+
} catch (err) {
|
|
9937
|
+
if (err instanceof ConvertError) {
|
|
9938
|
+
return {
|
|
9939
|
+
success: false,
|
|
9940
|
+
code: err.code,
|
|
9941
|
+
error: err.message,
|
|
9942
|
+
stage: "validate"
|
|
9943
|
+
};
|
|
9944
|
+
}
|
|
9945
|
+
throw err;
|
|
9946
|
+
}
|
|
9947
|
+
const releaseLock = await acquireConvertLock();
|
|
9948
|
+
try {
|
|
9949
|
+
options?.onProgress?.(10, "convert");
|
|
9950
|
+
const pdf = await convertBuffer(buffer, ".pdf", options?.timeoutMs);
|
|
9951
|
+
options?.onProgress?.(100, "done");
|
|
9952
|
+
return {
|
|
9953
|
+
success: true,
|
|
9954
|
+
pdf: new Uint8Array(pdf),
|
|
9955
|
+
sourceFormat: format
|
|
9956
|
+
};
|
|
9957
|
+
} catch (err) {
|
|
9958
|
+
if (err instanceof ConvertError) {
|
|
9959
|
+
return {
|
|
9960
|
+
success: false,
|
|
9961
|
+
code: err.code,
|
|
9962
|
+
error: err.message,
|
|
9963
|
+
stage: "convert"
|
|
9964
|
+
};
|
|
9965
|
+
}
|
|
9966
|
+
return {
|
|
9967
|
+
success: false,
|
|
9968
|
+
code: classifyError(err),
|
|
9969
|
+
error: err instanceof Error ? err.message : "\uBCC0\uD658 \uC2E4\uD328",
|
|
9970
|
+
stage: "convert"
|
|
9971
|
+
};
|
|
9972
|
+
} finally {
|
|
9973
|
+
releaseLock();
|
|
9974
|
+
}
|
|
9975
|
+
}
|
|
9976
|
+
|
|
9977
|
+
// src/pipeline/unified-ocr.ts
|
|
9978
|
+
import { performance } from "perf_hooks";
|
|
9828
9979
|
var OCR_PROMPT = [
|
|
9829
9980
|
"\uC774 PDF \uD398\uC774\uC9C0 \uC774\uBBF8\uC9C0\uC5D0\uC11C \uD14D\uC2A4\uD2B8\uC640 \uD45C\uB97C \uCD94\uCD9C\uD558\uC5EC Markdown\uC73C\uB85C \uBCC0\uD658\uD558\uACE0, OCR \uC624\uC778\uC2DD \uC624\uB958\uB97C \uC989\uC2DC \uAD50\uC815\uD558\uC5EC \uCD5C\uC885 \uACB0\uACFC\uBB3C\uC744 \uCD9C\uB825\uD558\uC138\uC694.",
|
|
9830
9981
|
"",
|
|
@@ -9864,7 +10015,7 @@ async function parse2(input, options) {
|
|
|
9864
10015
|
let buffer;
|
|
9865
10016
|
if (typeof input === "string") {
|
|
9866
10017
|
try {
|
|
9867
|
-
const buf = await
|
|
10018
|
+
const buf = await readFile2(input);
|
|
9868
10019
|
buffer = toArrayBuffer(buf);
|
|
9869
10020
|
} catch (err) {
|
|
9870
10021
|
const msg = err instanceof Error && "code" in err && err.code === "ENOENT" ? `\uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4: ${input}` : `\uD30C\uC77C \uC77D\uAE30 \uC2E4\uD328: ${input}`;
|
|
@@ -10199,6 +10350,7 @@ export {
|
|
|
10199
10350
|
extractFormFields,
|
|
10200
10351
|
markdownToHwpx,
|
|
10201
10352
|
markdownToXlsx,
|
|
10353
|
+
convertToPdf,
|
|
10202
10354
|
parse2 as parse
|
|
10203
10355
|
};
|
|
10204
10356
|
/*! Bundled license information:
|
|
@@ -10206,4 +10358,4 @@ export {
|
|
|
10206
10358
|
cfb/cfb.js:
|
|
10207
10359
|
(*! crc32.js (C) 2014-present SheetJS -- http://sheetjs.com *)
|
|
10208
10360
|
*/
|
|
10209
|
-
//# sourceMappingURL=chunk-
|
|
10361
|
+
//# sourceMappingURL=chunk-TS3F57LY.js.map
|