kordoc 2.8.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +469 -450
- package/dist/{chunk-3QA624ON.js → chunk-M24KMDAR.js} +6 -6
- package/dist/chunk-M24KMDAR.js.map +1 -0
- package/dist/{chunk-5CJGKKMZ.js → chunk-MEPHGCPQ.js} +1 -1
- package/dist/chunk-MEPHGCPQ.js.map +1 -0
- package/dist/chunk-MOL7MDBG.js +0 -0
- package/dist/chunk-MUOQXDZ4.cjs.map +1 -1
- package/dist/{chunk-HXWPJPRO.cjs → chunk-QB7CS534.cjs} +2 -2
- package/dist/chunk-QB7CS534.cjs.map +1 -0
- package/dist/{chunk-DLQY6FJH.js → chunk-RXZLTACX.js} +2 -2
- package/dist/chunk-RXZLTACX.js.map +1 -0
- package/dist/{chunk-XSF3N6GU.js → chunk-SJ5TPMBT.js} +2 -2
- package/dist/chunk-SJ5TPMBT.js.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/cli.js.map +1 -1
- package/dist/{detect-PJZMUL2Z.js → detect-RI2MQ33K.js} +2 -2
- package/dist/formula-JCNF43NE.js +0 -0
- package/dist/formula-XGG6ZP42.cjs.map +1 -1
- package/dist/index.cjs +99 -99
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +28 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.js +4 -4
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +5 -5
- package/dist/mcp.js.map +1 -1
- package/dist/page-range-3C7UGGEK.cjs.map +1 -1
- package/dist/page-range-737B4EZW.js +0 -0
- package/dist/{parser-LKF6PGPD.cjs → parser-EL5YETUA.cjs} +159 -19
- package/dist/parser-EL5YETUA.cjs.map +1 -0
- package/dist/{parser-ZQQM6J7T.js → parser-OMPBVEFU.js} +146 -6
- package/dist/parser-OMPBVEFU.js.map +1 -0
- package/dist/{parser-UCO6WPUW.js → parser-XBYGROQB.js} +146 -6
- package/dist/parser-XBYGROQB.js.map +1 -0
- package/dist/{provider-WPIYEALY.js → provider-2SEHU2FM.js} +1 -1
- package/dist/provider-2SEHU2FM.js.map +1 -0
- package/dist/{provider-7H4CPZYS.js → provider-AKROB7WQ.js} +1 -1
- package/dist/provider-AKROB7WQ.js.map +1 -0
- package/dist/{provider-YN2SSK4X.cjs → provider-SNONEZNW.cjs} +1 -1
- package/dist/provider-SNONEZNW.cjs.map +1 -0
- package/dist/setup-57FB3LSP.js +0 -0
- package/dist/{watch-MRHNFJPC.js → watch-ULLLK7ID.js} +4 -4
- package/dist/watch-ULLLK7ID.js.map +1 -0
- package/package.json +98 -98
- package/dist/chunk-3QA624ON.js.map +0 -1
- package/dist/chunk-5CJGKKMZ.js.map +0 -1
- package/dist/chunk-DLQY6FJH.js.map +0 -1
- package/dist/chunk-HXWPJPRO.cjs.map +0 -1
- package/dist/chunk-XSF3N6GU.js.map +0 -1
- package/dist/parser-LKF6PGPD.cjs.map +0 -1
- package/dist/parser-UCO6WPUW.js.map +0 -1
- package/dist/parser-ZQQM6J7T.js.map +0 -1
- package/dist/provider-7H4CPZYS.js.map +0 -1
- package/dist/provider-WPIYEALY.js.map +0 -1
- package/dist/provider-YN2SSK4X.cjs.map +0 -1
- package/dist/watch-MRHNFJPC.js.map +0 -1
- /package/dist/{detect-PJZMUL2Z.js.map → detect-RI2MQ33K.js.map} +0 -0
package/dist/index.d.cts
CHANGED
|
@@ -164,6 +164,34 @@ interface ParseSuccess extends ParseResultBase {
|
|
|
164
164
|
warnings?: ParseWarning[];
|
|
165
165
|
/** 추출된 이미지 목록 — 마크다운에서 파일명으로 참조됨 */
|
|
166
166
|
images?: ExtractedImage[];
|
|
167
|
+
/** 페이지별 텍스트 품질 신호 — PDF에서만 제공 */
|
|
168
|
+
pageQuality?: PageQuality[];
|
|
169
|
+
/** 문서 단위 품질 요약 — PDF에서만 제공 */
|
|
170
|
+
qualitySummary?: DocumentQualitySummary;
|
|
171
|
+
}
|
|
172
|
+
/** 페이지별 텍스트 품질 신호 (PDF 전용). 자세한 설명은 src/pdf/quality.ts */
|
|
173
|
+
interface PageQuality {
|
|
174
|
+
page: number;
|
|
175
|
+
textChars: number;
|
|
176
|
+
hangulRatio: number;
|
|
177
|
+
controlCharRatio: number;
|
|
178
|
+
replacementCharRatio: number;
|
|
179
|
+
puaRatio: number;
|
|
180
|
+
needsOcr: boolean;
|
|
181
|
+
ocrReason?: "low_text" | "high_pua" | "high_control" | "high_replacement";
|
|
182
|
+
}
|
|
183
|
+
/** 문서 단위 품질 요약 (PDF 전용). */
|
|
184
|
+
interface DocumentQualitySummary {
|
|
185
|
+
totalPages: number;
|
|
186
|
+
totalTextChars: number;
|
|
187
|
+
avgHangulRatio: number;
|
|
188
|
+
avgControlCharRatio: number;
|
|
189
|
+
avgReplacementCharRatio: number;
|
|
190
|
+
avgPuaRatio: number;
|
|
191
|
+
lowTextPageCount: number;
|
|
192
|
+
highPuaPageCount: number;
|
|
193
|
+
needsOcr: boolean;
|
|
194
|
+
ocrCandidatePages: number[];
|
|
167
195
|
}
|
|
168
196
|
/** 추출된 이미지 — ParseSuccess.images에 포함 */
|
|
169
197
|
interface ExtractedImage {
|
package/dist/index.d.ts
CHANGED
|
@@ -164,6 +164,34 @@ interface ParseSuccess extends ParseResultBase {
|
|
|
164
164
|
warnings?: ParseWarning[];
|
|
165
165
|
/** 추출된 이미지 목록 — 마크다운에서 파일명으로 참조됨 */
|
|
166
166
|
images?: ExtractedImage[];
|
|
167
|
+
/** 페이지별 텍스트 품질 신호 — PDF에서만 제공 */
|
|
168
|
+
pageQuality?: PageQuality[];
|
|
169
|
+
/** 문서 단위 품질 요약 — PDF에서만 제공 */
|
|
170
|
+
qualitySummary?: DocumentQualitySummary;
|
|
171
|
+
}
|
|
172
|
+
/** 페이지별 텍스트 품질 신호 (PDF 전용). 자세한 설명은 src/pdf/quality.ts */
|
|
173
|
+
interface PageQuality {
|
|
174
|
+
page: number;
|
|
175
|
+
textChars: number;
|
|
176
|
+
hangulRatio: number;
|
|
177
|
+
controlCharRatio: number;
|
|
178
|
+
replacementCharRatio: number;
|
|
179
|
+
puaRatio: number;
|
|
180
|
+
needsOcr: boolean;
|
|
181
|
+
ocrReason?: "low_text" | "high_pua" | "high_control" | "high_replacement";
|
|
182
|
+
}
|
|
183
|
+
/** 문서 단위 품질 요약 (PDF 전용). */
|
|
184
|
+
interface DocumentQualitySummary {
|
|
185
|
+
totalPages: number;
|
|
186
|
+
totalTextChars: number;
|
|
187
|
+
avgHangulRatio: number;
|
|
188
|
+
avgControlCharRatio: number;
|
|
189
|
+
avgReplacementCharRatio: number;
|
|
190
|
+
avgPuaRatio: number;
|
|
191
|
+
lowTextPageCount: number;
|
|
192
|
+
highPuaPageCount: number;
|
|
193
|
+
needsOcr: boolean;
|
|
194
|
+
ocrCandidatePages: number[];
|
|
167
195
|
}
|
|
168
196
|
/** 추출된 이미지 — ParseSuccess.images에 포함 */
|
|
169
197
|
interface ExtractedImage {
|
package/dist/index.js
CHANGED
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
sanitizeHref,
|
|
17
17
|
stripDtd,
|
|
18
18
|
toArrayBuffer
|
|
19
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-RXZLTACX.js";
|
|
20
20
|
import {
|
|
21
21
|
parsePageRange
|
|
22
22
|
} from "./chunk-SBVRCJFH.js";
|
|
@@ -19281,7 +19281,7 @@ async function parseHwp(buffer, options) {
|
|
|
19281
19281
|
async function parsePdf(buffer, options) {
|
|
19282
19282
|
let parsePdfDocument;
|
|
19283
19283
|
try {
|
|
19284
|
-
const mod = await import("./parser-
|
|
19284
|
+
const mod = await import("./parser-OMPBVEFU.js");
|
|
19285
19285
|
parsePdfDocument = mod.parsePdfDocument;
|
|
19286
19286
|
} catch {
|
|
19287
19287
|
return {
|
|
@@ -19292,8 +19292,8 @@ async function parsePdf(buffer, options) {
|
|
|
19292
19292
|
};
|
|
19293
19293
|
}
|
|
19294
19294
|
try {
|
|
19295
|
-
const { markdown, blocks, metadata, outline, warnings, isImageBased } = await parsePdfDocument(buffer, options);
|
|
19296
|
-
return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased };
|
|
19295
|
+
const { markdown, blocks, metadata, outline, warnings, isImageBased, pageQuality, qualitySummary } = await parsePdfDocument(buffer, options);
|
|
19296
|
+
return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased, pageQuality, qualitySummary };
|
|
19297
19297
|
} catch (err) {
|
|
19298
19298
|
const isImageBased = err instanceof Error && "isImageBased" in err ? true : void 0;
|
|
19299
19299
|
return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code: classifyError(err), isImageBased };
|