kordoc 2.8.0 → 2.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +499 -450
- package/dist/{chunk-HXWPJPRO.cjs → chunk-FWAXCTSX.cjs} +2 -2
- package/dist/chunk-FWAXCTSX.cjs.map +1 -0
- package/dist/{chunk-3QA624ON.js → chunk-GQQNAYZA.js} +6 -6
- package/dist/chunk-GQQNAYZA.js.map +1 -0
- package/dist/{chunk-5CJGKKMZ.js → chunk-MEPHGCPQ.js} +1 -1
- package/dist/chunk-MEPHGCPQ.js.map +1 -0
- package/dist/chunk-MOL7MDBG.js +0 -0
- package/dist/chunk-MUOQXDZ4.cjs.map +1 -1
- package/dist/{chunk-XSF3N6GU.js → chunk-ODF24QXC.js} +2 -2
- package/dist/chunk-ODF24QXC.js.map +1 -0
- package/dist/{chunk-DLQY6FJH.js → chunk-Z6TLTWYK.js} +2 -2
- package/dist/chunk-Z6TLTWYK.js.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/cli.js.map +1 -1
- package/dist/{detect-PJZMUL2Z.js → detect-RI2MQ33K.js} +2 -2
- package/dist/formula-JCNF43NE.js +0 -0
- package/dist/formula-XGG6ZP42.cjs.map +1 -1
- package/dist/index.cjs +99 -99
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +29 -1
- package/dist/index.d.ts +29 -1
- package/dist/index.js +4 -4
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +5 -5
- package/dist/mcp.js.map +1 -1
- package/dist/page-range-3C7UGGEK.cjs.map +1 -1
- package/dist/page-range-737B4EZW.js +0 -0
- package/dist/{parser-ZQQM6J7T.js → parser-BKYM3LKN.js} +146 -6
- package/dist/parser-BKYM3LKN.js.map +1 -0
- package/dist/{parser-LKF6PGPD.cjs → parser-BTIPAEDZ.cjs} +159 -19
- package/dist/parser-BTIPAEDZ.cjs.map +1 -0
- package/dist/{parser-UCO6WPUW.js → parser-FJNQEW7K.js} +146 -6
- package/dist/parser-FJNQEW7K.js.map +1 -0
- package/dist/{provider-WPIYEALY.js → provider-2SEHU2FM.js} +1 -1
- package/dist/provider-2SEHU2FM.js.map +1 -0
- package/dist/{provider-7H4CPZYS.js → provider-AKROB7WQ.js} +1 -1
- package/dist/provider-AKROB7WQ.js.map +1 -0
- package/dist/{provider-YN2SSK4X.cjs → provider-SNONEZNW.cjs} +1 -1
- package/dist/provider-SNONEZNW.cjs.map +1 -0
- package/dist/setup-57FB3LSP.js +0 -0
- package/dist/{watch-MRHNFJPC.js → watch-SBLSWHL7.js} +4 -4
- package/dist/watch-SBLSWHL7.js.map +1 -0
- package/package.json +98 -98
- package/dist/chunk-3QA624ON.js.map +0 -1
- package/dist/chunk-5CJGKKMZ.js.map +0 -1
- package/dist/chunk-DLQY6FJH.js.map +0 -1
- package/dist/chunk-HXWPJPRO.cjs.map +0 -1
- package/dist/chunk-XSF3N6GU.js.map +0 -1
- package/dist/parser-LKF6PGPD.cjs.map +0 -1
- package/dist/parser-UCO6WPUW.js.map +0 -1
- package/dist/parser-ZQQM6J7T.js.map +0 -1
- package/dist/provider-7H4CPZYS.js.map +0 -1
- package/dist/provider-WPIYEALY.js.map +0 -1
- package/dist/provider-YN2SSK4X.cjs.map +0 -1
- package/dist/watch-MRHNFJPC.js.map +0 -1
- /package/dist/{detect-PJZMUL2Z.js.map → detect-RI2MQ33K.js.map} +0 -0
package/dist/index.d.cts
CHANGED
|
@@ -164,6 +164,34 @@ interface ParseSuccess extends ParseResultBase {
|
|
|
164
164
|
warnings?: ParseWarning[];
|
|
165
165
|
/** 추출된 이미지 목록 — 마크다운에서 파일명으로 참조됨 */
|
|
166
166
|
images?: ExtractedImage[];
|
|
167
|
+
/** 페이지별 텍스트 품질 신호 — PDF에서만 제공 */
|
|
168
|
+
pageQuality?: PageQuality[];
|
|
169
|
+
/** 문서 단위 품질 요약 — PDF에서만 제공 */
|
|
170
|
+
qualitySummary?: DocumentQualitySummary;
|
|
171
|
+
}
|
|
172
|
+
/** 페이지별 텍스트 품질 신호 (PDF 전용). 자세한 설명은 src/pdf/quality.ts */
|
|
173
|
+
interface PageQuality {
|
|
174
|
+
page: number;
|
|
175
|
+
textChars: number;
|
|
176
|
+
hangulRatio: number;
|
|
177
|
+
controlCharRatio: number;
|
|
178
|
+
replacementCharRatio: number;
|
|
179
|
+
puaRatio: number;
|
|
180
|
+
needsOcr: boolean;
|
|
181
|
+
ocrReason?: "low_text" | "high_pua" | "high_control" | "high_replacement";
|
|
182
|
+
}
|
|
183
|
+
/** 문서 단위 품질 요약 (PDF 전용). */
|
|
184
|
+
interface DocumentQualitySummary {
|
|
185
|
+
totalPages: number;
|
|
186
|
+
totalTextChars: number;
|
|
187
|
+
avgHangulRatio: number;
|
|
188
|
+
avgControlCharRatio: number;
|
|
189
|
+
avgReplacementCharRatio: number;
|
|
190
|
+
avgPuaRatio: number;
|
|
191
|
+
lowTextPageCount: number;
|
|
192
|
+
highPuaPageCount: number;
|
|
193
|
+
needsOcr: boolean;
|
|
194
|
+
ocrCandidatePages: number[];
|
|
167
195
|
}
|
|
168
196
|
/** 추출된 이미지 — ParseSuccess.images에 포함 */
|
|
169
197
|
interface ExtractedImage {
|
|
@@ -491,4 +519,4 @@ interface FillFormOutput {
|
|
|
491
519
|
*/
|
|
492
520
|
declare function fillForm(input: string | ArrayBuffer | Buffer, values: Record<string, string>, outputFormat?: FillOutputFormat): Promise<FillFormOutput>;
|
|
493
521
|
|
|
494
|
-
export { type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FillFormOutput, type FillOutputFormat, type FillResult, type FormField, type FormResult, type HwpxFillResult, type HwpxTheme, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type MarkdownToHwpxOptions, type OcrProvider, type OutlineItem, type PageMargin, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type PrintOptions, type PrintPreset, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, blocksToPdf, compare, detectFormat, detectOle2Format, detectZipFormat, diffBlocks, extractFormFields, fillForm, fillFormFields, fillHwpx, isHwpxFile, isLabelCell, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToPdf, parse, parseDocx, parseHwp, parseHwp3, parseHwpml, parseHwpx, parsePdf, parseXls, parseXlsx, renderHtml };
|
|
522
|
+
export { type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type DocumentQualitySummary, type ErrorCode, type ExtractedImage, type FileType, type FillFormOutput, type FillOutputFormat, type FillResult, type FormField, type FormResult, type HwpxFillResult, type HwpxTheme, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type MarkdownToHwpxOptions, type OcrProvider, type OutlineItem, type PageMargin, type PageQuality, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type PrintOptions, type PrintPreset, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, blocksToPdf, compare, detectFormat, detectOle2Format, detectZipFormat, diffBlocks, extractFormFields, fillForm, fillFormFields, fillHwpx, isHwpxFile, isLabelCell, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToPdf, parse, parseDocx, parseHwp, parseHwp3, parseHwpml, parseHwpx, parsePdf, parseXls, parseXlsx, renderHtml };
|
package/dist/index.d.ts
CHANGED
|
@@ -164,6 +164,34 @@ interface ParseSuccess extends ParseResultBase {
|
|
|
164
164
|
warnings?: ParseWarning[];
|
|
165
165
|
/** 추출된 이미지 목록 — 마크다운에서 파일명으로 참조됨 */
|
|
166
166
|
images?: ExtractedImage[];
|
|
167
|
+
/** 페이지별 텍스트 품질 신호 — PDF에서만 제공 */
|
|
168
|
+
pageQuality?: PageQuality[];
|
|
169
|
+
/** 문서 단위 품질 요약 — PDF에서만 제공 */
|
|
170
|
+
qualitySummary?: DocumentQualitySummary;
|
|
171
|
+
}
|
|
172
|
+
/** 페이지별 텍스트 품질 신호 (PDF 전용). 자세한 설명은 src/pdf/quality.ts */
|
|
173
|
+
interface PageQuality {
|
|
174
|
+
page: number;
|
|
175
|
+
textChars: number;
|
|
176
|
+
hangulRatio: number;
|
|
177
|
+
controlCharRatio: number;
|
|
178
|
+
replacementCharRatio: number;
|
|
179
|
+
puaRatio: number;
|
|
180
|
+
needsOcr: boolean;
|
|
181
|
+
ocrReason?: "low_text" | "high_pua" | "high_control" | "high_replacement";
|
|
182
|
+
}
|
|
183
|
+
/** 문서 단위 품질 요약 (PDF 전용). */
|
|
184
|
+
interface DocumentQualitySummary {
|
|
185
|
+
totalPages: number;
|
|
186
|
+
totalTextChars: number;
|
|
187
|
+
avgHangulRatio: number;
|
|
188
|
+
avgControlCharRatio: number;
|
|
189
|
+
avgReplacementCharRatio: number;
|
|
190
|
+
avgPuaRatio: number;
|
|
191
|
+
lowTextPageCount: number;
|
|
192
|
+
highPuaPageCount: number;
|
|
193
|
+
needsOcr: boolean;
|
|
194
|
+
ocrCandidatePages: number[];
|
|
167
195
|
}
|
|
168
196
|
/** 추출된 이미지 — ParseSuccess.images에 포함 */
|
|
169
197
|
interface ExtractedImage {
|
|
@@ -491,4 +519,4 @@ interface FillFormOutput {
|
|
|
491
519
|
*/
|
|
492
520
|
declare function fillForm(input: string | ArrayBuffer | Buffer, values: Record<string, string>, outputFormat?: FillOutputFormat): Promise<FillFormOutput>;
|
|
493
521
|
|
|
494
|
-
export { type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type ErrorCode, type ExtractedImage, type FileType, type FillFormOutput, type FillOutputFormat, type FillResult, type FormField, type FormResult, type HwpxFillResult, type HwpxTheme, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type MarkdownToHwpxOptions, type OcrProvider, type OutlineItem, type PageMargin, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type PrintOptions, type PrintPreset, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, blocksToPdf, compare, detectFormat, detectOle2Format, detectZipFormat, diffBlocks, extractFormFields, fillForm, fillFormFields, fillHwpx, isHwpxFile, isLabelCell, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToPdf, parse, parseDocx, parseHwp, parseHwp3, parseHwpml, parseHwpx, parsePdf, parseXls, parseXlsx, renderHtml };
|
|
522
|
+
export { type BlockDiff, type BoundingBox, type CellContext, type CellDiff, type DiffChangeType, type DiffResult, type DocumentMetadata, type DocumentQualitySummary, type ErrorCode, type ExtractedImage, type FileType, type FillFormOutput, type FillOutputFormat, type FillResult, type FormField, type FormResult, type HwpxFillResult, type HwpxTheme, type IRBlock, type IRBlockType, type IRCell, type IRTable, type ImageData, type InlineStyle, type MarkdownToHwpxOptions, type OcrProvider, type OutlineItem, type PageMargin, type PageQuality, type ParseFailure, type ParseOptions, type ParseResult, type ParseSuccess, type ParseWarning, type PrintOptions, type PrintPreset, VERSION, type WarningCode, type WatchOptions, blocksToMarkdown, blocksToPdf, compare, detectFormat, detectOle2Format, detectZipFormat, diffBlocks, extractFormFields, fillForm, fillFormFields, fillHwpx, isHwpxFile, isLabelCell, isOldHwpFile, isPdfFile, isZipFile, markdownToHwpx, markdownToPdf, parse, parseDocx, parseHwp, parseHwp3, parseHwpml, parseHwpx, parsePdf, parseXls, parseXlsx, renderHtml };
|
package/dist/index.js
CHANGED
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
sanitizeHref,
|
|
17
17
|
stripDtd,
|
|
18
18
|
toArrayBuffer
|
|
19
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-Z6TLTWYK.js";
|
|
20
20
|
import {
|
|
21
21
|
parsePageRange
|
|
22
22
|
} from "./chunk-SBVRCJFH.js";
|
|
@@ -19281,7 +19281,7 @@ async function parseHwp(buffer, options) {
|
|
|
19281
19281
|
async function parsePdf(buffer, options) {
|
|
19282
19282
|
let parsePdfDocument;
|
|
19283
19283
|
try {
|
|
19284
|
-
const mod = await import("./parser-
|
|
19284
|
+
const mod = await import("./parser-BKYM3LKN.js");
|
|
19285
19285
|
parsePdfDocument = mod.parsePdfDocument;
|
|
19286
19286
|
} catch {
|
|
19287
19287
|
return {
|
|
@@ -19292,8 +19292,8 @@ async function parsePdf(buffer, options) {
|
|
|
19292
19292
|
};
|
|
19293
19293
|
}
|
|
19294
19294
|
try {
|
|
19295
|
-
const { markdown, blocks, metadata, outline, warnings, isImageBased } = await parsePdfDocument(buffer, options);
|
|
19296
|
-
return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased };
|
|
19295
|
+
const { markdown, blocks, metadata, outline, warnings, isImageBased, pageQuality, qualitySummary } = await parsePdfDocument(buffer, options);
|
|
19296
|
+
return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased, pageQuality, qualitySummary };
|
|
19297
19297
|
} catch (err) {
|
|
19298
19298
|
const isImageBased = err instanceof Error && "isImageBased" in err ? true : void 0;
|
|
19299
19299
|
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code: classifyError(err), isImageBased };
|