npm - kordoc - Versions diffs - 2.2.6 → 2.4.0 - Mend

kordoc 2.2.6 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +22 -3
package/dist/{chunk-RF6UJXR3.js → chunk-KSBPABBQ.js} +482 -78
package/dist/chunk-KSBPABBQ.js.map +1 -0
package/dist/{chunk-5Y2Q3BRW.js → chunk-M3E3C5GS.js} +8 -1
package/dist/chunk-M3E3C5GS.js.map +1 -0
package/dist/{chunk-FCQEF2ZM.js → chunk-VJPDY4YT.js} +2 -2
package/dist/{chunk-NL5XLN5R.js.map → chunk-VJPDY4YT.js.map} +1 -1
package/dist/{chunk-HXUCZ2IL.cjs → chunk-VLSATRNQ.cjs} +2 -2
package/dist/{chunk-HXUCZ2IL.cjs.map → chunk-VLSATRNQ.cjs.map} +1 -1
package/dist/{chunk-NL5XLN5R.js → chunk-XG5CQUSC.js} +2 -2
package/dist/{chunk-FCQEF2ZM.js.map → chunk-XG5CQUSC.js.map} +1 -1
package/dist/cli.js +5 -5
package/dist/cli.js.map +1 -1
package/dist/{detect-GYK3HKD5.js → detect-I7YIS4Q6.js} +4 -2
package/dist/index.cjs +608 -197
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +6 -2
package/dist/index.d.ts +6 -2
package/dist/index.js +500 -89
package/dist/index.js.map +1 -1
package/dist/mcp.js +5 -5
package/dist/{parser-AMP7MAOH.js → parser-4275GJRB.js} +45 -42
package/dist/{parser-AMP7MAOH.js.map → parser-4275GJRB.js.map} +1 -1
package/dist/{parser-KOWPTDJU.cjs → parser-STAOZMUC.cjs} +61 -58
package/dist/{parser-KOWPTDJU.cjs.map → parser-STAOZMUC.cjs.map} +1 -1
package/dist/{parser-43IAQ5KE.js → parser-XRUZEFZT.js} +45 -42
package/dist/{parser-43IAQ5KE.js.map → parser-XRUZEFZT.js.map} +1 -1
package/dist/{watch-IUQXOXW3.js → watch-BFLNFJBE.js} +4 -4
package/package.json +2 -2
package/dist/chunk-5Y2Q3BRW.js.map +0 -1
package/dist/chunk-RF6UJXR3.js.map +0 -1
/package/dist/{detect-GYK3HKD5.js.map → detect-I7YIS4Q6.js.map} +0 -0
/package/dist/{watch-IUQXOXW3.js.map → watch-BFLNFJBE.js.map} +0 -0

package/dist/mcp.js CHANGED Viewed

@@ -8,18 +8,18 @@ import {
   fillHwpx,
   markdownToHwpx,
   parse
-} from "./chunk-RF6UJXR3.js";
+} from "./chunk-KSBPABBQ.js";
 import {
   detectFormat,
   detectZipFormat
-} from "./chunk-5Y2Q3BRW.js";
+} from "./chunk-M3E3C5GS.js";
 import {
   KordocError,
   VERSION,
   blocksToMarkdown,
   sanitizeError,
   toArrayBuffer
-} from "./chunk-FCQEF2ZM.js";
+} from "./chunk-VJPDY4YT.js";
 import "./chunk-MOL7MDBG.js";
 // src/mcp.ts
@@ -178,7 +178,7 @@ server.tool(
       let metadata;
       let effectiveFormat = format;
       if (format === "hwpx") {
-        const { detectZipFormat: detectZipFormat2 } = await import("./detect-GYK3HKD5.js");
+        const { detectZipFormat: detectZipFormat2 } = await import("./detect-I7YIS4Q6.js");
         const zipFormat = await detectZipFormat2(buffer);
         if (zipFormat === "xlsx" || zipFormat === "docx") effectiveFormat = zipFormat;
       }
@@ -191,7 +191,7 @@ server.tool(
           break;
         case "pdf":
           try {
-            const { extractPdfMetadataOnly } = await import("./parser-AMP7MAOH.js");
+            const { extractPdfMetadataOnly } = await import("./parser-4275GJRB.js");
             metadata = await extractPdfMetadataOnly(buffer);
           } catch {
             metadata = void 0;

package/dist/{parser-AMP7MAOH.js → parser-4275GJRB.js} RENAMED Viewed

@@ -7,7 +7,7 @@ import {
   blocksToMarkdown,
   safeMax,
   safeMin
-} from "./chunk-FCQEF2ZM.js";
+} from "./chunk-VJPDY4YT.js";
 import {
   parsePageRange
 } from "./chunk-MOL7MDBG.js";
@@ -2187,10 +2187,11 @@ function detectSpecialKoreanTables(blocks) {
   return result;
 }
 function removeHeaderFooterBlocks(blocks, pageHeights, warnings) {
-  const ZONE_RATIO = 0.1;
+  const ZONE_RATIO = 0.12;
   const MIN_REPEAT = 3;
-  const headerTexts = /* @__PURE__ */ new Map();
-  const footerTexts = /* @__PURE__ */ new Map();
+  const Y_BUCKET = 5;
+  const topEntries = [];
+  const bottomEntries = [];
   for (let bi = 0; bi < blocks.length; bi++) {
     const b = blocks[bi];
     if (!b.bbox || !b.pageNumber || !b.text?.trim()) continue;
@@ -2198,49 +2199,51 @@ function removeHeaderFooterBlocks(blocks, pageHeights, warnings) {
     if (!ph) continue;
     const blockTop = ph - (b.bbox.y + b.bbox.height);
     const blockBottom = ph - b.bbox.y;
-    if (blockBottom <= ph * ZONE_RATIO) {
-      const arr = footerTexts.get(b.pageNumber) || [];
-      arr.push(b.text.trim());
-      footerTexts.set(b.pageNumber, arr);
-    } else if (blockTop >= ph * (1 - ZONE_RATIO)) {
-      const arr = headerTexts.get(b.pageNumber) || [];
-      arr.push(b.text.trim());
-      headerTexts.set(b.pageNumber, arr);
-    }
-  }
-  const repeatedPatterns = /* @__PURE__ */ new Set();
-  for (const textsMap of [headerTexts, footerTexts]) {
+    const entry = { blockIdx: bi, page: b.pageNumber, y: b.bbox.y, text: b.text.trim() };
+    if (blockBottom <= ph * ZONE_RATIO) bottomEntries.push(entry);
+    else if (blockTop >= ph * (1 - ZONE_RATIO)) topEntries.push(entry);
+  }
+  const removeSet = /* @__PURE__ */ new Set();
+  for (const entries of [topEntries, bottomEntries]) {
+    if (entries.length === 0) continue;
     const patternCount = /* @__PURE__ */ new Map();
-    for (const [, texts] of textsMap) {
-      for (const t of texts) {
-        const normalized = t.replace(/\d+/g, "#");
-        patternCount.set(normalized, (patternCount.get(normalized) || 0) + 1);
+    const patternPages = /* @__PURE__ */ new Map();
+    for (const e of entries) {
+      const norm = e.text.replace(/\d+/g, "#");
+      patternCount.set(norm, (patternCount.get(norm) || 0) + 1);
+      const pages = patternPages.get(norm) || /* @__PURE__ */ new Set();
+      pages.add(e.page);
+      patternPages.set(norm, pages);
+    }
+    const repeatedPatterns = /* @__PURE__ */ new Set();
+    for (const [p, count] of patternCount) {
+      if (count >= MIN_REPEAT && (patternPages.get(p)?.size ?? 0) >= MIN_REPEAT) {
+        repeatedPatterns.add(p);
       }
     }
-    for (const [pattern, count] of patternCount) {
-      if (count >= MIN_REPEAT) repeatedPatterns.add(pattern);
-    }
-  }
-  if (repeatedPatterns.size === 0) return [];
-  const removeIndices = [];
-  for (let bi = 0; bi < blocks.length; bi++) {
-    const b = blocks[bi];
-    if (!b.bbox || !b.pageNumber || !b.text?.trim()) continue;
-    const ph = pageHeights.get(b.bbox.page) || pageHeights.get(b.pageNumber);
-    if (!ph) continue;
-    const blockTop = ph - (b.bbox.y + b.bbox.height);
-    const blockBottom = ph - b.bbox.y;
-    const inZone = blockBottom <= ph * ZONE_RATIO || blockTop >= ph * (1 - ZONE_RATIO);
-    if (!inZone) continue;
-    const normalized = b.text.trim().replace(/\d+/g, "#");
-    if (repeatedPatterns.has(normalized)) {
-      removeIndices.push(bi);
+    const bucketPages = /* @__PURE__ */ new Map();
+    for (const e of entries) {
+      const bucket = Math.round(e.y / Y_BUCKET);
+      const pages = bucketPages.get(bucket) || /* @__PURE__ */ new Set();
+      pages.add(e.page);
+      bucketPages.set(bucket, pages);
+    }
+    const repeatedBuckets = /* @__PURE__ */ new Set();
+    for (const [b, pages] of bucketPages) {
+      if (pages.size >= MIN_REPEAT) repeatedBuckets.add(b);
+    }
+    for (const e of entries) {
+      const norm = e.text.replace(/\d+/g, "#");
+      const bucket = Math.round(e.y / Y_BUCKET);
+      if (repeatedPatterns.has(norm) || repeatedBuckets.has(bucket)) {
+        removeSet.add(e.blockIdx);
+      }
     }
   }
-  if (removeIndices.length > 0) {
-    warnings.push({ message: `${removeIndices.length}\uAC1C \uBA38\uB9AC\uAE00/\uBC14\uB2E5\uAE00 \uC694\uC18C \uC81C\uAC70\uB428`, code: "HIDDEN_TEXT_FILTERED" });
+  if (removeSet.size > 0) {
+    warnings.push({ message: `${removeSet.size}\uAC1C \uBA38\uB9AC\uAE00/\uBC14\uB2E5\uAE00 \uC694\uC18C \uC81C\uAC70\uB428`, code: "HIDDEN_TEXT_FILTERED" });
   }
-  return removeIndices;
+  return [...removeSet].sort((a, b) => a - b);
 }
 function mergeKoreanLines(text) {
   if (!text) return "";
@@ -2276,4 +2279,4 @@ export {
   extractPdfMetadataOnly,
   parsePdfDocument
 };
-//# sourceMappingURL=parser-AMP7MAOH.js.map
+//# sourceMappingURL=parser-4275GJRB.js.map