@clazic/kordoc 2.1.6 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -4
- package/dist/chunk-3NF22UFF.js +9617 -0
- package/dist/chunk-3NF22UFF.js.map +1 -0
- package/dist/{chunk-TFYOEQE2.js → chunk-7MXQWWUW.js} +2 -2
- package/dist/chunk-ZWE3DS7E.js +39 -0
- package/dist/cli.js +114 -11
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +2985 -179
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +36 -4
- package/dist/index.d.ts +36 -4
- package/dist/index.js +3009 -178
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +100 -7
- package/dist/mcp.js.map +1 -1
- package/dist/{page-range-737B4EZW.js → page-range-ALIRXAL5.js} +2 -1
- package/dist/provider-XVKP5OGI.js +167 -0
- package/dist/provider-XVKP5OGI.js.map +1 -0
- package/dist/resolve-Z4DEPDUS.js +179 -0
- package/dist/resolve-Z4DEPDUS.js.map +1 -0
- package/dist/tesseract-provider-UNJOI25M.js +24 -0
- package/dist/tesseract-provider-UNJOI25M.js.map +1 -0
- package/dist/{utils-7JE5SKSL.js → utils-I4UIMOH7.js} +3 -2
- package/dist/utils-I4UIMOH7.js.map +1 -0
- package/dist/{watch-XALC6VOR.js → watch-XPLMUIZB.js} +4 -3
- package/dist/{watch-XALC6VOR.js.map → watch-XPLMUIZB.js.map} +1 -1
- package/package.json +5 -2
- package/dist/chunk-H7HMKSLX.js +0 -5494
- package/dist/chunk-H7HMKSLX.js.map +0 -1
- package/dist/provider-A4FHJSID.js +0 -38
- package/dist/provider-A4FHJSID.js.map +0 -1
- /package/dist/{chunk-TFYOEQE2.js.map → chunk-7MXQWWUW.js.map} +0 -0
- /package/dist/{page-range-737B4EZW.js.map → chunk-ZWE3DS7E.js.map} +0 -0
- /package/dist/{utils-7JE5SKSL.js.map → page-range-ALIRXAL5.js.map} +0 -0
package/dist/index.d.cts
CHANGED
|
@@ -102,6 +102,14 @@ interface ParseOptions {
|
|
|
102
102
|
pages?: number[] | string;
|
|
103
103
|
/** 이미지 기반 PDF용 OCR 프로바이더 (선택) */
|
|
104
104
|
ocr?: OcrProvider;
|
|
105
|
+
/**
|
|
106
|
+
* OCR 모드 (CLI 자동 탐색용).
|
|
107
|
+
* - "auto": 설치된 CLI 자동 탐색 (gemini→claude→codex→ollama→tesseract)
|
|
108
|
+
* - "gemini"|"claude"|"codex"|"ollama"|"tesseract": 특정 도구 강제 지정
|
|
109
|
+
* - "off": OCR 비활성화 (이미지 기반 PDF면 에러)
|
|
110
|
+
* - undefined: 라이브러리 API 기존 동작 유지 (자동 탐색 안 함)
|
|
111
|
+
*/
|
|
112
|
+
ocrMode?: OcrMode;
|
|
105
113
|
/** 진행률 콜백 — current: 현재 페이지/섹션, total: 전체 수 */
|
|
106
114
|
onProgress?: (current: number, total: number) => void;
|
|
107
115
|
/** PDF 머리글/바닥글 자동 제거 */
|
|
@@ -116,7 +124,7 @@ interface ParseWarning {
|
|
|
116
124
|
/** 구조화된 경고 코드 */
|
|
117
125
|
code: WarningCode;
|
|
118
126
|
}
|
|
119
|
-
type WarningCode = "SKIPPED_IMAGE" | "SKIPPED_OLE" | "TRUNCATED_TABLE" | "OCR_FALLBACK" | "UNSUPPORTED_ELEMENT" | "BROKEN_ZIP_RECOVERY" | "HIDDEN_TEXT_FILTERED" | "MALFORMED_XML" | "PARTIAL_PARSE" | "LENIENT_CFB_RECOVERY";
|
|
127
|
+
type WarningCode = "SKIPPED_IMAGE" | "SKIPPED_OLE" | "TRUNCATED_TABLE" | "OCR_FALLBACK" | "UNSUPPORTED_ELEMENT" | "BROKEN_ZIP_RECOVERY" | "HIDDEN_TEXT_FILTERED" | "MALFORMED_XML" | "PARTIAL_PARSE" | "LENIENT_CFB_RECOVERY" | "OCR_PAGE_FAILED" | "OCR_CLI_FALLBACK";
|
|
120
128
|
/** 문서 구조 (헤딩 트리) */
|
|
121
129
|
interface OutlineItem {
|
|
122
130
|
level: number;
|
|
@@ -204,8 +212,15 @@ interface FormResult {
|
|
|
204
212
|
/** 양식 확신도 (0-1) */
|
|
205
213
|
confidence: number;
|
|
206
214
|
}
|
|
207
|
-
/**
|
|
208
|
-
|
|
215
|
+
/** Vision LLM이 반환하는 구조화된 OCR 결과 */
|
|
216
|
+
interface StructuredOcrResult {
|
|
217
|
+
/** 구조화된 Markdown (테이블/헤딩/리스트 포함) */
|
|
218
|
+
markdown: string;
|
|
219
|
+
}
|
|
220
|
+
/** OCR 모드 — CLI --ocr 옵션 허용값 */
|
|
221
|
+
type OcrMode = "auto" | "gemini" | "claude" | "codex" | "ollama" | "tesseract" | "off";
|
|
222
|
+
/** 사용자 제공 OCR 함수 — 페이지 이미지를 받아 텍스트 또는 구조화된 결과 반환 */
|
|
223
|
+
type OcrProvider = (pageImage: Uint8Array, pageNumber: number, mimeType: "image/png") => Promise<string | StructuredOcrResult>;
|
|
209
224
|
interface WatchOptions {
|
|
210
225
|
dir: string;
|
|
211
226
|
outDir?: string;
|
|
@@ -252,6 +267,23 @@ interface MarkdownToHwpxOptions {
|
|
|
252
267
|
*/
|
|
253
268
|
declare function markdownToHwpx(markdown: string, options?: MarkdownToHwpxOptions | ArrayBuffer): Promise<ArrayBuffer>;
|
|
254
269
|
|
|
270
|
+
/**
|
|
271
|
+
* Markdown → XLSX 변환기
|
|
272
|
+
*
|
|
273
|
+
* 지원: 헤딩, 단락, 코드, blockquote, hr, 테이블(별도 시트), 이미지
|
|
274
|
+
*/
|
|
275
|
+
|
|
276
|
+
interface MarkdownToXlsxOptions {
|
|
277
|
+
warnings?: string[];
|
|
278
|
+
images?: ExtractedImage[];
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* 마크다운 텍스트를 XLSX (ArrayBuffer)로 변환.
|
|
282
|
+
* @param markdown 마크다운 텍스트
|
|
283
|
+
* @param options 경고 수집, 이미지 데이터 등
|
|
284
|
+
*/
|
|
285
|
+
declare function markdownToXlsx(markdown: string, options?: MarkdownToXlsxOptions): Promise<ArrayBuffer>;
|
|
286
|
+
|
|
255
287
|
/** 매직 바이트 기반 파일 포맷 감지 */
|
|
256
288
|
|
|
257
289
|
/** ZIP 파일 여부: PK\x03\x04 */
|
|
@@ -307,4 +339,4 @@ declare function parseXlsx(buffer: ArrayBuffer, options?: ParseOptions): Promise
|
|
|
307
339
|
/** DOCX 파일을 Markdown으로 변환 */
|
|
308
340
|
declare function parseDocx(buffer: ArrayBuffer, options?: ParseOptions): Promise<ParseResult>;
|
|
309
341
|
|
|
310
|
-
export { type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FormField, type FormResult, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type OcrProvider, type OutlineItem, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, compare, detectFormat, detectZipFormat, diffBlocks, extractFormFields, isHwpxFile, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, parse, parseDocx, parseHwp, parseHwpx, parsePdf, parseXlsx };
|
|
342
|
+
export { type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FormField, type FormResult, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type MarkdownToXlsxOptions, type OcrMode, type OcrProvider, type OutlineItem, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type StructuredOcrResult, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, compare, detectFormat, detectZipFormat, diffBlocks, extractFormFields, isHwpxFile, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToXlsx, parse, parseDocx, parseHwp, parseHwpx, parsePdf, parseXlsx };
|
package/dist/index.d.ts
CHANGED
|
@@ -102,6 +102,14 @@ interface ParseOptions {
|
|
|
102
102
|
pages?: number[] | string;
|
|
103
103
|
/** 이미지 기반 PDF용 OCR 프로바이더 (선택) */
|
|
104
104
|
ocr?: OcrProvider;
|
|
105
|
+
/**
|
|
106
|
+
* OCR 모드 (CLI 자동 탐색용).
|
|
107
|
+
* - "auto": 설치된 CLI 자동 탐색 (gemini→claude→codex→ollama→tesseract)
|
|
108
|
+
* - "gemini"|"claude"|"codex"|"ollama"|"tesseract": 특정 도구 강제 지정
|
|
109
|
+
* - "off": OCR 비활성화 (이미지 기반 PDF면 에러)
|
|
110
|
+
* - undefined: 라이브러리 API 기존 동작 유지 (자동 탐색 안 함)
|
|
111
|
+
*/
|
|
112
|
+
ocrMode?: OcrMode;
|
|
105
113
|
/** 진행률 콜백 — current: 현재 페이지/섹션, total: 전체 수 */
|
|
106
114
|
onProgress?: (current: number, total: number) => void;
|
|
107
115
|
/** PDF 머리글/바닥글 자동 제거 */
|
|
@@ -116,7 +124,7 @@ interface ParseWarning {
|
|
|
116
124
|
/** 구조화된 경고 코드 */
|
|
117
125
|
code: WarningCode;
|
|
118
126
|
}
|
|
119
|
-
type WarningCode = "SKIPPED_IMAGE" | "SKIPPED_OLE" | "TRUNCATED_TABLE" | "OCR_FALLBACK" | "UNSUPPORTED_ELEMENT" | "BROKEN_ZIP_RECOVERY" | "HIDDEN_TEXT_FILTERED" | "MALFORMED_XML" | "PARTIAL_PARSE" | "LENIENT_CFB_RECOVERY";
|
|
127
|
+
type WarningCode = "SKIPPED_IMAGE" | "SKIPPED_OLE" | "TRUNCATED_TABLE" | "OCR_FALLBACK" | "UNSUPPORTED_ELEMENT" | "BROKEN_ZIP_RECOVERY" | "HIDDEN_TEXT_FILTERED" | "MALFORMED_XML" | "PARTIAL_PARSE" | "LENIENT_CFB_RECOVERY" | "OCR_PAGE_FAILED" | "OCR_CLI_FALLBACK";
|
|
120
128
|
/** 문서 구조 (헤딩 트리) */
|
|
121
129
|
interface OutlineItem {
|
|
122
130
|
level: number;
|
|
@@ -204,8 +212,15 @@ interface FormResult {
|
|
|
204
212
|
/** 양식 확신도 (0-1) */
|
|
205
213
|
confidence: number;
|
|
206
214
|
}
|
|
207
|
-
/**
|
|
208
|
-
|
|
215
|
+
/** Vision LLM이 반환하는 구조화된 OCR 결과 */
|
|
216
|
+
interface StructuredOcrResult {
|
|
217
|
+
/** 구조화된 Markdown (테이블/헤딩/리스트 포함) */
|
|
218
|
+
markdown: string;
|
|
219
|
+
}
|
|
220
|
+
/** OCR 모드 — CLI --ocr 옵션 허용값 */
|
|
221
|
+
type OcrMode = "auto" | "gemini" | "claude" | "codex" | "ollama" | "tesseract" | "off";
|
|
222
|
+
/** 사용자 제공 OCR 함수 — 페이지 이미지를 받아 텍스트 또는 구조화된 결과 반환 */
|
|
223
|
+
type OcrProvider = (pageImage: Uint8Array, pageNumber: number, mimeType: "image/png") => Promise<string | StructuredOcrResult>;
|
|
209
224
|
interface WatchOptions {
|
|
210
225
|
dir: string;
|
|
211
226
|
outDir?: string;
|
|
@@ -252,6 +267,23 @@ interface MarkdownToHwpxOptions {
|
|
|
252
267
|
*/
|
|
253
268
|
declare function markdownToHwpx(markdown: string, options?: MarkdownToHwpxOptions | ArrayBuffer): Promise<ArrayBuffer>;
|
|
254
269
|
|
|
270
|
+
/**
|
|
271
|
+
* Markdown → XLSX 변환기
|
|
272
|
+
*
|
|
273
|
+
* 지원: 헤딩, 단락, 코드, blockquote, hr, 테이블(별도 시트), 이미지
|
|
274
|
+
*/
|
|
275
|
+
|
|
276
|
+
interface MarkdownToXlsxOptions {
|
|
277
|
+
warnings?: string[];
|
|
278
|
+
images?: ExtractedImage[];
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* 마크다운 텍스트를 XLSX (ArrayBuffer)로 변환.
|
|
282
|
+
* @param markdown 마크다운 텍스트
|
|
283
|
+
* @param options 경고 수집, 이미지 데이터 등
|
|
284
|
+
*/
|
|
285
|
+
declare function markdownToXlsx(markdown: string, options?: MarkdownToXlsxOptions): Promise<ArrayBuffer>;
|
|
286
|
+
|
|
255
287
|
/** 매직 바이트 기반 파일 포맷 감지 */
|
|
256
288
|
|
|
257
289
|
/** ZIP 파일 여부: PK\x03\x04 */
|
|
@@ -307,4 +339,4 @@ declare function parseXlsx(buffer: ArrayBuffer, options?: ParseOptions): Promise
|
|
|
307
339
|
/** DOCX 파일을 Markdown으로 변환 */
|
|
308
340
|
declare function parseDocx(buffer: ArrayBuffer, options?: ParseOptions): Promise<ParseResult>;
|
|
309
341
|
|
|
310
|
-
export { type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FormField, type FormResult, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type OcrProvider, type OutlineItem, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, compare, detectFormat, detectZipFormat, diffBlocks, extractFormFields, isHwpxFile, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, parse, parseDocx, parseHwp, parseHwpx, parsePdf, parseXlsx };
|
|
342
|
+
export { type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FormField, type FormResult, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type MarkdownToXlsxOptions, type OcrMode, type OcrProvider, type OutlineItem, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type StructuredOcrResult, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, compare, detectFormat, detectZipFormat, diffBlocks, extractFormFields, isHwpxFile, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToXlsx, parse, parseDocx, parseHwp, parseHwpx, parsePdf, parseXlsx };
|