kordoc 2.8.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +469 -450
  2. package/dist/{chunk-3QA624ON.js → chunk-M24KMDAR.js} +6 -6
  3. package/dist/chunk-M24KMDAR.js.map +1 -0
  4. package/dist/{chunk-5CJGKKMZ.js → chunk-MEPHGCPQ.js} +1 -1
  5. package/dist/chunk-MEPHGCPQ.js.map +1 -0
  6. package/dist/chunk-MOL7MDBG.js +0 -0
  7. package/dist/chunk-MUOQXDZ4.cjs.map +1 -1
  8. package/dist/{chunk-HXWPJPRO.cjs → chunk-QB7CS534.cjs} +2 -2
  9. package/dist/chunk-QB7CS534.cjs.map +1 -0
  10. package/dist/{chunk-DLQY6FJH.js → chunk-RXZLTACX.js} +2 -2
  11. package/dist/chunk-RXZLTACX.js.map +1 -0
  12. package/dist/{chunk-XSF3N6GU.js → chunk-SJ5TPMBT.js} +2 -2
  13. package/dist/chunk-SJ5TPMBT.js.map +1 -0
  14. package/dist/cli.js +4 -4
  15. package/dist/cli.js.map +1 -1
  16. package/dist/{detect-PJZMUL2Z.js → detect-RI2MQ33K.js} +2 -2
  17. package/dist/formula-JCNF43NE.js +0 -0
  18. package/dist/formula-XGG6ZP42.cjs.map +1 -1
  19. package/dist/index.cjs +99 -99
  20. package/dist/index.cjs.map +1 -1
  21. package/dist/index.d.cts +28 -0
  22. package/dist/index.d.ts +28 -0
  23. package/dist/index.js +4 -4
  24. package/dist/index.js.map +1 -1
  25. package/dist/mcp.js +5 -5
  26. package/dist/mcp.js.map +1 -1
  27. package/dist/page-range-3C7UGGEK.cjs.map +1 -1
  28. package/dist/page-range-737B4EZW.js +0 -0
  29. package/dist/{parser-LKF6PGPD.cjs → parser-EL5YETUA.cjs} +159 -19
  30. package/dist/parser-EL5YETUA.cjs.map +1 -0
  31. package/dist/{parser-ZQQM6J7T.js → parser-OMPBVEFU.js} +146 -6
  32. package/dist/parser-OMPBVEFU.js.map +1 -0
  33. package/dist/{parser-UCO6WPUW.js → parser-XBYGROQB.js} +146 -6
  34. package/dist/parser-XBYGROQB.js.map +1 -0
  35. package/dist/{provider-WPIYEALY.js → provider-2SEHU2FM.js} +1 -1
  36. package/dist/provider-2SEHU2FM.js.map +1 -0
  37. package/dist/{provider-7H4CPZYS.js → provider-AKROB7WQ.js} +1 -1
  38. package/dist/provider-AKROB7WQ.js.map +1 -0
  39. package/dist/{provider-YN2SSK4X.cjs → provider-SNONEZNW.cjs} +1 -1
  40. package/dist/provider-SNONEZNW.cjs.map +1 -0
  41. package/dist/setup-57FB3LSP.js +0 -0
  42. package/dist/{watch-MRHNFJPC.js → watch-ULLLK7ID.js} +4 -4
  43. package/dist/watch-ULLLK7ID.js.map +1 -0
  44. package/package.json +98 -98
  45. package/dist/chunk-3QA624ON.js.map +0 -1
  46. package/dist/chunk-5CJGKKMZ.js.map +0 -1
  47. package/dist/chunk-DLQY6FJH.js.map +0 -1
  48. package/dist/chunk-HXWPJPRO.cjs.map +0 -1
  49. package/dist/chunk-XSF3N6GU.js.map +0 -1
  50. package/dist/parser-LKF6PGPD.cjs.map +0 -1
  51. package/dist/parser-UCO6WPUW.js.map +0 -1
  52. package/dist/parser-ZQQM6J7T.js.map +0 -1
  53. package/dist/provider-7H4CPZYS.js.map +0 -1
  54. package/dist/provider-WPIYEALY.js.map +0 -1
  55. package/dist/provider-YN2SSK4X.cjs.map +0 -1
  56. package/dist/watch-MRHNFJPC.js.map +0 -1
  57. /package/dist/{detect-PJZMUL2Z.js.map → detect-RI2MQ33K.js.map} +0 -0
package/dist/index.d.cts CHANGED
@@ -164,6 +164,34 @@ interface ParseSuccess extends ParseResultBase {
164
164
  warnings?: ParseWarning[];
165
165
  /** 추출된 이미지 목록 — 마크다운에서 파일명으로 참조됨 */
166
166
  images?: ExtractedImage[];
167
+ /** 페이지별 텍스트 품질 신호 — PDF에서만 제공 */
168
+ pageQuality?: PageQuality[];
169
+ /** 문서 단위 품질 요약 — PDF에서만 제공 */
170
+ qualitySummary?: DocumentQualitySummary;
171
+ }
172
+ /** 페이지별 텍스트 품질 신호 (PDF 전용). 자세한 설명은 src/pdf/quality.ts */
173
+ interface PageQuality {
174
+ page: number;
175
+ textChars: number;
176
+ hangulRatio: number;
177
+ controlCharRatio: number;
178
+ replacementCharRatio: number;
179
+ puaRatio: number;
180
+ needsOcr: boolean;
181
+ ocrReason?: "low_text" | "high_pua" | "high_control" | "high_replacement";
182
+ }
183
+ /** 문서 단위 품질 요약 (PDF 전용). */
184
+ interface DocumentQualitySummary {
185
+ totalPages: number;
186
+ totalTextChars: number;
187
+ avgHangulRatio: number;
188
+ avgControlCharRatio: number;
189
+ avgReplacementCharRatio: number;
190
+ avgPuaRatio: number;
191
+ lowTextPageCount: number;
192
+ highPuaPageCount: number;
193
+ needsOcr: boolean;
194
+ ocrCandidatePages: number[];
167
195
  }
168
196
  /** 추출된 이미지 — ParseSuccess.images에 포함 */
169
197
  interface ExtractedImage {
package/dist/index.d.ts CHANGED
@@ -164,6 +164,34 @@ interface ParseSuccess extends ParseResultBase {
164
164
  warnings?: ParseWarning[];
165
165
  /** 추출된 이미지 목록 — 마크다운에서 파일명으로 참조됨 */
166
166
  images?: ExtractedImage[];
167
+ /** 페이지별 텍스트 품질 신호 — PDF에서만 제공 */
168
+ pageQuality?: PageQuality[];
169
+ /** 문서 단위 품질 요약 — PDF에서만 제공 */
170
+ qualitySummary?: DocumentQualitySummary;
171
+ }
172
+ /** 페이지별 텍스트 품질 신호 (PDF 전용). 자세한 설명은 src/pdf/quality.ts */
173
+ interface PageQuality {
174
+ page: number;
175
+ textChars: number;
176
+ hangulRatio: number;
177
+ controlCharRatio: number;
178
+ replacementCharRatio: number;
179
+ puaRatio: number;
180
+ needsOcr: boolean;
181
+ ocrReason?: "low_text" | "high_pua" | "high_control" | "high_replacement";
182
+ }
183
+ /** 문서 단위 품질 요약 (PDF 전용). */
184
+ interface DocumentQualitySummary {
185
+ totalPages: number;
186
+ totalTextChars: number;
187
+ avgHangulRatio: number;
188
+ avgControlCharRatio: number;
189
+ avgReplacementCharRatio: number;
190
+ avgPuaRatio: number;
191
+ lowTextPageCount: number;
192
+ highPuaPageCount: number;
193
+ needsOcr: boolean;
194
+ ocrCandidatePages: number[];
167
195
  }
168
196
  /** 추출된 이미지 — ParseSuccess.images에 포함 */
169
197
  interface ExtractedImage {
package/dist/index.js CHANGED
@@ -16,7 +16,7 @@ import {
16
16
  sanitizeHref,
17
17
  stripDtd,
18
18
  toArrayBuffer
19
- } from "./chunk-DLQY6FJH.js";
19
+ } from "./chunk-RXZLTACX.js";
20
20
  import {
21
21
  parsePageRange
22
22
  } from "./chunk-SBVRCJFH.js";
@@ -19281,7 +19281,7 @@ async function parseHwp(buffer, options) {
19281
19281
  async function parsePdf(buffer, options) {
19282
19282
  let parsePdfDocument;
19283
19283
  try {
19284
- const mod = await import("./parser-ZQQM6J7T.js");
19284
+ const mod = await import("./parser-OMPBVEFU.js");
19285
19285
  parsePdfDocument = mod.parsePdfDocument;
19286
19286
  } catch {
19287
19287
  return {
@@ -19292,8 +19292,8 @@ async function parsePdf(buffer, options) {
19292
19292
  };
19293
19293
  }
19294
19294
  try {
19295
- const { markdown, blocks, metadata, outline, warnings, isImageBased } = await parsePdfDocument(buffer, options);
19296
- return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased };
19295
+ const { markdown, blocks, metadata, outline, warnings, isImageBased, pageQuality, qualitySummary } = await parsePdfDocument(buffer, options);
19296
+ return { success: true, fileType: "pdf", markdown, blocks, metadata, outline, warnings, isImageBased, pageQuality, qualitySummary };
19297
19297
  } catch (err) {
19298
19298
  const isImageBased = err instanceof Error && "isImageBased" in err ? true : void 0;
19299
19299
  return { success: false, fileType: "pdf", error: err instanceof Error ? err.message : "PDF \uD30C\uC2F1 \uC2E4\uD328", code: classifyError(err), isImageBased };