kordoc 2.2.3 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -139,7 +139,7 @@ import { inflateRawSync } from "zlib";
139
139
  import { DOMParser } from "@xmldom/xmldom";
140
140
 
141
141
  // src/utils.ts
142
- var VERSION = true ? "2.2.3" : "0.0.0-dev";
142
+ var VERSION = true ? "2.2.4" : "0.0.0-dev";
143
143
  function toArrayBuffer(buf) {
144
144
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
145
145
  return buf.buffer;
@@ -5563,21 +5563,21 @@ async function parseXlsxDocument(buffer, options) {
5563
5563
  import JSZip4 from "jszip";
5564
5564
  import { DOMParser as DOMParser3 } from "@xmldom/xmldom";
5565
5565
  var MAX_DECOMPRESS_SIZE4 = 100 * 1024 * 1024;
5566
- function getChildElements(parent, localName) {
5566
+ function getChildElements(parent, localName2) {
5567
5567
  const result = [];
5568
5568
  const children = parent.childNodes;
5569
5569
  for (let i = 0; i < children.length; i++) {
5570
5570
  const node = children[i];
5571
5571
  if (node.nodeType === 1) {
5572
5572
  const el = node;
5573
- if (el.localName === localName || el.tagName?.endsWith(`:${localName}`)) {
5573
+ if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
5574
5574
  result.push(el);
5575
5575
  }
5576
5576
  }
5577
5577
  }
5578
5578
  return result;
5579
5579
  }
5580
- function findElements(parent, localName) {
5580
+ function findElements(parent, localName2) {
5581
5581
  const result = [];
5582
5582
  const walk = (node) => {
5583
5583
  const children = node.childNodes;
@@ -5585,7 +5585,7 @@ function findElements(parent, localName) {
5585
5585
  const child = children[i];
5586
5586
  if (child.nodeType === 1) {
5587
5587
  const el = child;
5588
- if (el.localName === localName || el.tagName?.endsWith(`:${localName}`)) {
5588
+ if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
5589
5589
  result.push(el);
5590
5590
  }
5591
5591
  walk(el);
@@ -5595,11 +5595,11 @@ function findElements(parent, localName) {
5595
5595
  walk(parent);
5596
5596
  return result;
5597
5597
  }
5598
- function getAttr(el, localName) {
5598
+ function getAttr(el, localName2) {
5599
5599
  const attrs = el.attributes;
5600
5600
  for (let i = 0; i < attrs.length; i++) {
5601
5601
  const attr = attrs[i];
5602
- if (attr.localName === localName || attr.name === localName) return attr.value;
5602
+ if (attr.localName === localName2 || attr.name === localName2) return attr.value;
5603
5603
  }
5604
5604
  return null;
5605
5605
  }
@@ -5946,11 +5946,11 @@ async function parseDocxDocument(buffer, options) {
5946
5946
  const node = children[i];
5947
5947
  if (node.nodeType !== 1) continue;
5948
5948
  const el = node;
5949
- const localName = el.localName ?? el.tagName?.split(":").pop();
5950
- if (localName === "p") {
5949
+ const localName2 = el.localName ?? el.tagName?.split(":").pop();
5950
+ if (localName2 === "p") {
5951
5951
  const block = parseParagraph(el, styles, numbering, footnotes, rels);
5952
5952
  if (block) blocks.push(block);
5953
- } else if (localName === "tbl") {
5953
+ } else if (localName2 === "tbl") {
5954
5954
  const block = parseTable(el, styles, numbering, footnotes, rels);
5955
5955
  if (block) blocks.push(block);
5956
5956
  }
@@ -5988,183 +5988,6 @@ async function parseDocxDocument(buffer, options) {
5988
5988
  };
5989
5989
  }
5990
5990
 
5991
- // src/diff/text-diff.ts
5992
- function similarity(a, b) {
5993
- if (a === b) return 1;
5994
- if (!a || !b) return 0;
5995
- const maxLen = Math.max(a.length, b.length);
5996
- if (maxLen === 0) return 1;
5997
- return 1 - levenshtein(a, b) / maxLen;
5998
- }
5999
- function normalizedSimilarity(a, b) {
6000
- return similarity(normalize(a), normalize(b));
6001
- }
6002
- function normalize(s) {
6003
- return s.replace(/\s+/g, " ").trim();
6004
- }
6005
- var MAX_LEVENSHTEIN_LEN = 1e4;
6006
- function levenshtein(a, b) {
6007
- if (a.length + b.length > MAX_LEVENSHTEIN_LEN) {
6008
- const sampleLen = Math.min(500, a.length, b.length);
6009
- let diffs = 0;
6010
- for (let i = 0; i < sampleLen; i++) if (a[i] !== b[i]) diffs++;
6011
- const sampleRate = sampleLen > 0 ? diffs / sampleLen : 1;
6012
- return Math.abs(a.length - b.length) + Math.round(Math.min(a.length, b.length) * sampleRate);
6013
- }
6014
- if (a.length > b.length) [a, b] = [b, a];
6015
- const m = a.length;
6016
- const n = b.length;
6017
- let prev = Array.from({ length: m + 1 }, (_, i) => i);
6018
- let curr = new Array(m + 1);
6019
- for (let j = 1; j <= n; j++) {
6020
- curr[0] = j;
6021
- for (let i = 1; i <= m; i++) {
6022
- if (a[i - 1] === b[j - 1]) {
6023
- curr[i] = prev[i - 1];
6024
- } else {
6025
- curr[i] = 1 + Math.min(prev[i - 1], prev[i], curr[i - 1]);
6026
- }
6027
- }
6028
- ;
6029
- [prev, curr] = [curr, prev];
6030
- }
6031
- return prev[m];
6032
- }
6033
-
6034
- // src/diff/compare.ts
6035
- var SIMILARITY_THRESHOLD = 0.4;
6036
- async function compare(bufferA, bufferB, options) {
6037
- const [resultA, resultB] = await Promise.all([
6038
- parse(bufferA, options),
6039
- parse(bufferB, options)
6040
- ]);
6041
- if (!resultA.success) throw new Error(`\uBB38\uC11CA \uD30C\uC2F1 \uC2E4\uD328: ${resultA.error}`);
6042
- if (!resultB.success) throw new Error(`\uBB38\uC11CB \uD30C\uC2F1 \uC2E4\uD328: ${resultB.error}`);
6043
- return diffBlocks(resultA.blocks, resultB.blocks);
6044
- }
6045
- function diffBlocks(blocksA, blocksB) {
6046
- const aligned = alignBlocks(blocksA, blocksB);
6047
- const stats = { added: 0, removed: 0, modified: 0, unchanged: 0 };
6048
- const diffs = [];
6049
- for (const [a, b] of aligned) {
6050
- if (a && b) {
6051
- const sim = blockSimilarity(a, b);
6052
- if (sim >= 0.99) {
6053
- diffs.push({ type: "unchanged", before: a, after: b, similarity: 1 });
6054
- stats.unchanged++;
6055
- } else {
6056
- const diff = { type: "modified", before: a, after: b, similarity: sim };
6057
- if (a.type === "table" && b.type === "table" && a.table && b.table) {
6058
- diff.cellDiffs = diffTableCells(a.table, b.table);
6059
- }
6060
- diffs.push(diff);
6061
- stats.modified++;
6062
- }
6063
- } else if (a) {
6064
- diffs.push({ type: "removed", before: a });
6065
- stats.removed++;
6066
- } else if (b) {
6067
- diffs.push({ type: "added", after: b });
6068
- stats.added++;
6069
- }
6070
- }
6071
- return { stats, diffs };
6072
- }
6073
- function alignBlocks(a, b) {
6074
- const m = a.length, n = b.length;
6075
- if (m * n > 1e7) return fallbackAlign(a, b);
6076
- const simCache = /* @__PURE__ */ new Map();
6077
- const getSim = (i2, j2) => {
6078
- const key = `${i2},${j2}`;
6079
- let v = simCache.get(key);
6080
- if (v === void 0) {
6081
- v = blockSimilarity(a[i2], b[j2]);
6082
- simCache.set(key, v);
6083
- }
6084
- return v;
6085
- };
6086
- const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
6087
- for (let i2 = 1; i2 <= m; i2++) {
6088
- for (let j2 = 1; j2 <= n; j2++) {
6089
- if (getSim(i2 - 1, j2 - 1) >= SIMILARITY_THRESHOLD) {
6090
- dp[i2][j2] = dp[i2 - 1][j2 - 1] + 1;
6091
- } else {
6092
- dp[i2][j2] = Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
6093
- }
6094
- }
6095
- }
6096
- const pairs = [];
6097
- let i = m, j = n;
6098
- while (i > 0 && j > 0) {
6099
- if (getSim(i - 1, j - 1) >= SIMILARITY_THRESHOLD && dp[i][j] === dp[i - 1][j - 1] + 1) {
6100
- pairs.push([i - 1, j - 1]);
6101
- i--;
6102
- j--;
6103
- } else if (dp[i - 1][j] >= dp[i][j - 1]) {
6104
- i--;
6105
- } else {
6106
- j--;
6107
- }
6108
- }
6109
- pairs.reverse();
6110
- const result = [];
6111
- let ai = 0, bi = 0;
6112
- for (const [pi, pj] of pairs) {
6113
- while (ai < pi) result.push([a[ai++], null]);
6114
- while (bi < pj) result.push([null, b[bi++]]);
6115
- result.push([a[ai++], b[bi++]]);
6116
- }
6117
- while (ai < m) result.push([a[ai++], null]);
6118
- while (bi < n) result.push([null, b[bi++]]);
6119
- return result;
6120
- }
6121
- function fallbackAlign(a, b) {
6122
- const result = [];
6123
- const len = Math.max(a.length, b.length);
6124
- for (let i = 0; i < len; i++) {
6125
- result.push([a[i] || null, b[i] || null]);
6126
- }
6127
- return result;
6128
- }
6129
- function blockSimilarity(a, b) {
6130
- if (a.type !== b.type) return 0;
6131
- if (a.text !== void 0 && b.text !== void 0) {
6132
- return normalizedSimilarity(a.text || "", b.text || "");
6133
- }
6134
- if (a.type === "table" && a.table && b.table) {
6135
- return tableSimilarity(a.table, b.table);
6136
- }
6137
- if (a.type === b.type) return 1;
6138
- return 0;
6139
- }
6140
- function tableSimilarity(a, b) {
6141
- const dimSim = 1 - Math.abs(a.rows * a.cols - b.rows * b.cols) / Math.max(a.rows * a.cols, b.rows * b.cols, 1);
6142
- const textsA = a.cells.flat().map((c) => c.text).join(" ");
6143
- const textsB = b.cells.flat().map((c) => c.text).join(" ");
6144
- const contentSim = normalizedSimilarity(textsA, textsB);
6145
- return dimSim * 0.3 + contentSim * 0.7;
6146
- }
6147
- function diffTableCells(a, b) {
6148
- const maxRows = Math.max(a.rows, b.rows);
6149
- const maxCols = Math.max(a.cols, b.cols);
6150
- const result = [];
6151
- for (let r = 0; r < maxRows; r++) {
6152
- const row = [];
6153
- for (let c = 0; c < maxCols; c++) {
6154
- const cellA = r < a.rows && c < a.cols ? a.cells[r][c].text : void 0;
6155
- const cellB = r < b.rows && c < b.cols ? b.cells[r][c].text : void 0;
6156
- let type;
6157
- if (cellA === void 0) type = "added";
6158
- else if (cellB === void 0) type = "removed";
6159
- else if (cellA === cellB) type = "unchanged";
6160
- else type = "modified";
6161
- row.push({ type, before: cellA, after: cellB });
6162
- }
6163
- result.push(row);
6164
- }
6165
- return result;
6166
- }
6167
-
6168
5991
  // src/form/recognize.ts
6169
5992
  var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
6170
5993
  "\uC131\uBA85",
@@ -6205,15 +6028,20 @@ var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
6205
6028
  "\uB2E8\uAC00",
6206
6029
  "\uD569\uACC4",
6207
6030
  "\uACC4",
6208
- "\uC18C\uACC4"
6031
+ "\uC18C\uACC4",
6032
+ "\uB4F1\uB85D\uAE30\uC900\uC9C0",
6033
+ "\uBCF8\uC801",
6034
+ "\uC704\uC784\uC778",
6035
+ "\uCCAD\uAD6C\uC0AC\uC720",
6036
+ "\uC18C\uBA85\uC790\uB8CC"
6209
6037
  ]);
6210
6038
  function isLabelCell(text) {
6211
- const trimmed = text.trim();
6039
+ const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
6212
6040
  if (!trimmed || trimmed.length > 30) return false;
6213
6041
  for (const kw of LABEL_KEYWORDS) {
6214
6042
  if (trimmed.includes(kw)) return true;
6215
6043
  }
6216
- if (/^[가-힣\s()·:]{2,8}$/.test(trimmed) && !/\d/.test(trimmed)) return true;
6044
+ if (/^[가-힣\s()()·::]+$/.test(trimmed) && trimmed.replace(/\s/g, "").length >= 2 && trimmed.replace(/\s/g, "").length <= 8 && !/\d/.test(trimmed)) return true;
6217
6045
  if (/^[가-힣A-Za-z\s]+[::]$/.test(trimmed)) return true;
6218
6046
  return false;
6219
6047
  }
@@ -6236,63 +6064,572 @@ function extractFormFields(blocks) {
6236
6064
  fields.push(...inlineFields);
6237
6065
  }
6238
6066
  }
6239
- const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
6240
- return { fields, confidence: Math.min(confidence, 1) };
6067
+ const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
6068
+ return { fields, confidence: Math.min(confidence, 1) };
6069
+ }
6070
+ function extractFromTable(table) {
6071
+ const fields = [];
6072
+ if (table.cols >= 2) {
6073
+ for (let r = 0; r < table.rows; r++) {
6074
+ for (let c = 0; c < table.cols - 1; c++) {
6075
+ const labelCell = table.cells[r][c];
6076
+ const valueCell = table.cells[r][c + 1];
6077
+ if (isLabelCell(labelCell.text)) {
6078
+ fields.push({
6079
+ label: labelCell.text.trim().replace(/[::]\s*$/, ""),
6080
+ value: valueCell.text.trim(),
6081
+ row: r,
6082
+ col: c
6083
+ });
6084
+ }
6085
+ }
6086
+ }
6087
+ }
6088
+ if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
6089
+ const headerRow = table.cells[0];
6090
+ const allLabels = headerRow.every((cell) => {
6091
+ const t = cell.text.trim();
6092
+ return t.length > 0 && t.length <= 20;
6093
+ });
6094
+ if (allLabels) {
6095
+ for (let r = 1; r < table.rows; r++) {
6096
+ for (let c = 0; c < table.cols; c++) {
6097
+ const label = headerRow[c].text.trim();
6098
+ const value = table.cells[r][c].text.trim();
6099
+ if (label && value) {
6100
+ fields.push({ label, value, row: r, col: c });
6101
+ }
6102
+ }
6103
+ }
6104
+ }
6105
+ }
6106
+ return fields;
6107
+ }
6108
+ function extractInlineFields(text) {
6109
+ const fields = [];
6110
+ const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
6111
+ let match;
6112
+ while ((match = pattern.exec(text)) !== null) {
6113
+ const label = match[1].trim();
6114
+ const value = match[2].trim();
6115
+ if (value) {
6116
+ fields.push({ label, value, row: -1, col: -1 });
6117
+ }
6118
+ }
6119
+ return fields;
6120
+ }
6121
+
6122
+ // src/form/match.ts
6123
+ function normalizeLabel(label) {
6124
+ return label.trim().replace(/[::\s()()·]/g, "");
6125
+ }
6126
+ function findMatchingKey(cellLabel, values) {
6127
+ if (values.has(cellLabel)) return cellLabel;
6128
+ let bestKey;
6129
+ let bestLen = 0;
6130
+ for (const key of values.keys()) {
6131
+ if (cellLabel.startsWith(key)) {
6132
+ if (key.length >= cellLabel.length * 0.6 && key.length > bestLen) {
6133
+ bestLen = key.length;
6134
+ bestKey = key;
6135
+ }
6136
+ } else if (key.startsWith(cellLabel)) {
6137
+ if (cellLabel.length >= key.length * 0.6 && cellLabel.length > bestLen) {
6138
+ bestLen = cellLabel.length;
6139
+ bestKey = key;
6140
+ }
6141
+ }
6142
+ }
6143
+ return bestKey;
6144
+ }
6145
+ function isKeywordLabel(text) {
6146
+ const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
6147
+ if (!trimmed || trimmed.length > 15) return false;
6148
+ for (const kw of LABEL_KEYWORDS) {
6149
+ if (trimmed.includes(kw)) return true;
6150
+ }
6151
+ return false;
6152
+ }
6153
+ function fillInCellPatterns(cellText, values, matchedLabels) {
6154
+ let text = cellText;
6155
+ const matches = [];
6156
+ text = text.replace(
6157
+ /([가-힣A-Za-z]+)\(\s{1,}\)([가-힣A-Za-z]*)/g,
6158
+ (match, prefix, suffix) => {
6159
+ const label = prefix + suffix;
6160
+ const normalizedLabel = normalizeLabel(label);
6161
+ const matchKey = values.has(normalizedLabel) ? normalizedLabel : values.has(normalizeLabel(prefix)) ? normalizeLabel(prefix) : void 0;
6162
+ if (matchKey === void 0) return match;
6163
+ const newValue = values.get(matchKey);
6164
+ matchedLabels.add(matchKey);
6165
+ matches.push({ key: matchKey, label, value: newValue });
6166
+ return `${prefix}(${newValue})${suffix}`;
6167
+ }
6168
+ );
6169
+ text = text.replace(
6170
+ /□([가-힣A-Za-z]+)/g,
6171
+ (match, keyword) => {
6172
+ const normalizedKw = normalizeLabel(keyword);
6173
+ const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
6174
+ if (matchKey === void 0) return match;
6175
+ const val = values.get(matchKey);
6176
+ const isTruthy = ["\u2611", "\u2713", "\u2714", "v", "V", "true", "1", "yes", "o", "O"].includes(val.trim()) || val.trim() === "";
6177
+ if (!isTruthy) return match;
6178
+ matchedLabels.add(matchKey);
6179
+ matches.push({ key: matchKey, label: `\u25A1${keyword}`, value: "\u2611" });
6180
+ return `\u2611${keyword}`;
6181
+ }
6182
+ );
6183
+ text = text.replace(
6184
+ /\(([가-힣A-Za-z]+)[::]\s{1,}\)/g,
6185
+ (match, keyword) => {
6186
+ const normalizedKw = normalizeLabel(keyword);
6187
+ const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
6188
+ if (matchKey === void 0) return match;
6189
+ const newValue = values.get(matchKey);
6190
+ matchedLabels.add(matchKey);
6191
+ matches.push({ key: matchKey, label: keyword, value: newValue });
6192
+ return `(${keyword}\uFF1A${newValue})`;
6193
+ }
6194
+ );
6195
+ return matches.length > 0 ? { text, matches } : null;
6196
+ }
6197
+ function normalizeValues(values) {
6198
+ const map = /* @__PURE__ */ new Map();
6199
+ for (const [label, value] of Object.entries(values)) {
6200
+ map.set(normalizeLabel(label), value);
6201
+ }
6202
+ return map;
6203
+ }
6204
+ function resolveUnmatched(normalizedValues, matchedLabels, originalValues) {
6205
+ return [...normalizedValues.keys()].filter((k) => !matchedLabels.has(k)).map((k) => {
6206
+ for (const orig of Object.keys(originalValues)) {
6207
+ if (normalizeLabel(orig) === k) return orig;
6208
+ }
6209
+ return k;
6210
+ });
6211
+ }
6212
+
6213
+ // src/form/filler.ts
6214
+ function fillFormFields(blocks, values) {
6215
+ const cloned = structuredClone(blocks);
6216
+ const filled = [];
6217
+ const matchedLabels = /* @__PURE__ */ new Set();
6218
+ const normalizedValues = normalizeValues(values);
6219
+ const patternFilledCells = /* @__PURE__ */ new Set();
6220
+ for (const block of cloned) {
6221
+ if (block.type !== "table" || !block.table) continue;
6222
+ for (let r = 0; r < block.table.rows; r++) {
6223
+ for (let c = 0; c < block.table.cols; c++) {
6224
+ const cell = block.table.cells[r]?.[c];
6225
+ if (!cell) continue;
6226
+ const result = fillInCellPatterns(cell.text, normalizedValues, matchedLabels);
6227
+ if (result) {
6228
+ cell.text = result.text;
6229
+ patternFilledCells.add(`${r},${c}`);
6230
+ for (const m of result.matches) {
6231
+ filled.push({ label: m.label, value: m.value, row: r, col: c });
6232
+ }
6233
+ }
6234
+ }
6235
+ }
6236
+ }
6237
+ for (const block of cloned) {
6238
+ if (block.type !== "table" || !block.table) continue;
6239
+ fillTable(block.table, normalizedValues, filled, matchedLabels, patternFilledCells);
6240
+ }
6241
+ for (const block of cloned) {
6242
+ if (block.type !== "paragraph" || !block.text) continue;
6243
+ const newText = fillInlineFields(block.text, normalizedValues, filled, matchedLabels);
6244
+ if (newText !== block.text) block.text = newText;
6245
+ }
6246
+ const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
6247
+ return { blocks: cloned, filled, unmatched };
6248
+ }
6249
+ function fillTable(table, values, filled, matchedLabels, patternFilledCells) {
6250
+ if (table.cols < 2) return;
6251
+ for (let r = 0; r < table.rows; r++) {
6252
+ for (let c = 0; c < table.cols - 1; c++) {
6253
+ const labelCell = table.cells[r][c];
6254
+ const valueCell = table.cells[r][c + 1];
6255
+ if (!labelCell || !valueCell) continue;
6256
+ if (!isLabelCell(labelCell.text)) continue;
6257
+ if (isKeywordLabel(valueCell.text)) continue;
6258
+ const normalizedCellLabel = normalizeLabel(labelCell.text);
6259
+ if (!normalizedCellLabel) continue;
6260
+ const matchKey = findMatchingKey(normalizedCellLabel, values);
6261
+ if (matchKey === void 0) continue;
6262
+ const newValue = values.get(matchKey);
6263
+ if (patternFilledCells?.has(`${r},${c + 1}`)) {
6264
+ valueCell.text = newValue + " " + valueCell.text;
6265
+ } else {
6266
+ valueCell.text = newValue;
6267
+ }
6268
+ matchedLabels.add(matchKey);
6269
+ filled.push({
6270
+ label: labelCell.text.trim().replace(/[::]\s*$/, ""),
6271
+ value: newValue,
6272
+ row: r,
6273
+ col: c
6274
+ });
6275
+ }
6276
+ }
6277
+ if (table.rows >= 2 && table.cols >= 2) {
6278
+ const headerRow = table.cells[0];
6279
+ const allLabels = headerRow.every((cell) => {
6280
+ const t = cell.text.trim();
6281
+ return t.length > 0 && t.length <= 20 && isLabelCell(t);
6282
+ });
6283
+ if (!allLabels) return;
6284
+ for (let r = 1; r < table.rows; r++) {
6285
+ for (let c = 0; c < table.cols; c++) {
6286
+ const headerLabel = normalizeLabel(headerRow[c].text);
6287
+ const matchKey = findMatchingKey(headerLabel, values);
6288
+ if (matchKey === void 0) continue;
6289
+ if (matchedLabels.has(matchKey)) continue;
6290
+ const newValue = values.get(matchKey);
6291
+ table.cells[r][c].text = newValue;
6292
+ matchedLabels.add(matchKey);
6293
+ filled.push({
6294
+ label: headerRow[c].text.trim(),
6295
+ value: newValue,
6296
+ row: r,
6297
+ col: c
6298
+ });
6299
+ }
6300
+ }
6301
+ }
6302
+ }
6303
+ function fillInlineFields(text, values, filled, matchedLabels) {
6304
+ return text.replace(
6305
+ /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g,
6306
+ (match, rawLabel, _oldValue) => {
6307
+ const normalized = normalizeLabel(rawLabel);
6308
+ const matchKey = findMatchingKey(normalized, values);
6309
+ if (matchKey === void 0) return match;
6310
+ const newValue = values.get(matchKey);
6311
+ matchedLabels.add(matchKey);
6312
+ filled.push({
6313
+ label: rawLabel.trim(),
6314
+ value: newValue,
6315
+ row: -1,
6316
+ col: -1
6317
+ });
6318
+ return `${rawLabel}: ${newValue}`;
6319
+ }
6320
+ );
6321
+ }
6322
+
6323
+ // src/form/filler-hwpx.ts
6324
+ import JSZip5 from "jszip";
6325
+ import { DOMParser as DOMParser4, XMLSerializer } from "@xmldom/xmldom";
6326
+ async function fillHwpx(hwpxBuffer, values) {
6327
+ const zip = await JSZip5.loadAsync(hwpxBuffer);
6328
+ const filled = [];
6329
+ const matchedLabels = /* @__PURE__ */ new Set();
6330
+ const normalizedValues = normalizeValues(values);
6331
+ const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
6332
+ if (sectionFiles.length === 0) {
6333
+ throw new KordocError("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
6334
+ }
6335
+ const xmlParser = new DOMParser4();
6336
+ const xmlSerializer = new XMLSerializer();
6337
+ for (const sectionPath of sectionFiles) {
6338
+ const zipEntry = zip.file(sectionPath);
6339
+ if (!zipEntry) continue;
6340
+ const rawXml = await zipEntry.async("text");
6341
+ const doc = xmlParser.parseFromString(stripDtd(rawXml), "text/xml");
6342
+ if (!doc.documentElement) continue;
6343
+ let modified = false;
6344
+ const tables = findAllElements(doc.documentElement, "tbl");
6345
+ const cellPatternApplied = /* @__PURE__ */ new Set();
6346
+ for (const tblEl of tables) {
6347
+ const allCells = findAllElements(tblEl, "tc");
6348
+ for (const tcEl of allCells) {
6349
+ const tNodes = collectCellTextNodes(tcEl);
6350
+ const fullText = tNodes.map((n) => n.text).join("");
6351
+ const result = fillInCellPatterns(fullText, normalizedValues, matchedLabels);
6352
+ if (!result) continue;
6353
+ applyTextReplacements(tNodes, fullText, result.text);
6354
+ cellPatternApplied.add(tcEl);
6355
+ for (const m of result.matches) {
6356
+ filled.push({ label: m.label, value: m.value, row: -1, col: -1 });
6357
+ }
6358
+ modified = true;
6359
+ }
6360
+ }
6361
+ for (const tblEl of tables) {
6362
+ const rows = findDirectChildren(tblEl, "tr");
6363
+ for (let rowIdx = 0; rowIdx < rows.length; rowIdx++) {
6364
+ const trEl = rows[rowIdx];
6365
+ const cells = findDirectChildren(trEl, "tc");
6366
+ for (let colIdx = 0; colIdx < cells.length - 1; colIdx++) {
6367
+ const labelText = extractCellText(cells[colIdx]);
6368
+ if (!isLabelCell(labelText)) continue;
6369
+ const valueCell = cells[colIdx + 1];
6370
+ const valueText = extractCellText(valueCell);
6371
+ if (isKeywordLabel(valueText)) continue;
6372
+ const normalizedCellLabel = normalizeLabel(labelText);
6373
+ if (!normalizedCellLabel) continue;
6374
+ const matchKey = findMatchingKey(normalizedCellLabel, normalizedValues);
6375
+ if (matchKey === void 0) continue;
6376
+ const newValue = normalizedValues.get(matchKey);
6377
+ if (cellPatternApplied.has(valueCell)) {
6378
+ prependCellText(valueCell, newValue);
6379
+ } else {
6380
+ replaceCellText(valueCell, newValue);
6381
+ }
6382
+ matchedLabels.add(matchKey);
6383
+ filled.push({
6384
+ label: labelText.trim().replace(/[::]\s*$/, ""),
6385
+ value: newValue,
6386
+ row: rowIdx,
6387
+ col: colIdx
6388
+ });
6389
+ modified = true;
6390
+ }
6391
+ }
6392
+ if (rows.length >= 2) {
6393
+ const headerCells = findDirectChildren(rows[0], "tc");
6394
+ const allLabels = headerCells.every((cell) => {
6395
+ const t = extractCellText(cell).trim();
6396
+ return t.length > 0 && t.length <= 20 && isLabelCell(t);
6397
+ });
6398
+ if (allLabels) {
6399
+ for (let rowIdx = 1; rowIdx < rows.length; rowIdx++) {
6400
+ const dataCells = findDirectChildren(rows[rowIdx], "tc");
6401
+ for (let colIdx = 0; colIdx < Math.min(headerCells.length, dataCells.length); colIdx++) {
6402
+ const headerLabel = normalizeLabel(extractCellText(headerCells[colIdx]));
6403
+ const matchKey = findMatchingKey(headerLabel, normalizedValues);
6404
+ if (matchKey === void 0) continue;
6405
+ if (matchedLabels.has(matchKey)) continue;
6406
+ const newValue = normalizedValues.get(matchKey);
6407
+ replaceCellText(dataCells[colIdx], newValue);
6408
+ matchedLabels.add(matchKey);
6409
+ filled.push({
6410
+ label: extractCellText(headerCells[colIdx]).trim(),
6411
+ value: newValue,
6412
+ row: rowIdx,
6413
+ col: colIdx
6414
+ });
6415
+ modified = true;
6416
+ }
6417
+ }
6418
+ }
6419
+ }
6420
+ }
6421
+ const allParagraphs = findAllElements(doc.documentElement, "p");
6422
+ for (const pEl of allParagraphs) {
6423
+ if (isInsideTable(pEl)) continue;
6424
+ const tNodes = collectTextNodes(pEl);
6425
+ const fullText = tNodes.map((n) => n.text).join("");
6426
+ const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g;
6427
+ let match;
6428
+ while ((match = pattern.exec(fullText)) !== null) {
6429
+ const rawLabel = match[1];
6430
+ const normalized = normalizeLabel(rawLabel);
6431
+ const matchKey = findMatchingKey(normalized, normalizedValues);
6432
+ if (matchKey === void 0) continue;
6433
+ const newValue = normalizedValues.get(matchKey);
6434
+ const valueStart = match.index + match[0].length - match[2].length;
6435
+ const valueEnd = match.index + match[0].length;
6436
+ replaceTextRange(tNodes, valueStart, valueEnd, newValue);
6437
+ matchedLabels.add(matchKey);
6438
+ filled.push({ label: rawLabel.trim(), value: newValue, row: -1, col: -1 });
6439
+ modified = true;
6440
+ break;
6441
+ }
6442
+ }
6443
+ if (modified) {
6444
+ const newXml = xmlSerializer.serializeToString(doc);
6445
+ zip.file(sectionPath, newXml);
6446
+ }
6447
+ }
6448
+ const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
6449
+ const buffer = await zip.generateAsync({ type: "arraybuffer" });
6450
+ return { buffer, filled, unmatched };
6451
+ }
6452
+ function localName(el) {
6453
+ return (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
6454
+ }
6455
+ function findAllElements(node, tagLocalName) {
6456
+ const result = [];
6457
+ const walk = (n) => {
6458
+ const children = n.childNodes;
6459
+ if (!children) return;
6460
+ for (let i = 0; i < children.length; i++) {
6461
+ const child = children[i];
6462
+ if (child.nodeType !== 1) continue;
6463
+ if (localName(child) === tagLocalName) result.push(child);
6464
+ walk(child);
6465
+ }
6466
+ };
6467
+ walk(node);
6468
+ return result;
6469
+ }
6470
+ function findDirectChildren(parent, tagLocalName) {
6471
+ const result = [];
6472
+ const children = parent.childNodes;
6473
+ if (!children) return result;
6474
+ for (let i = 0; i < children.length; i++) {
6475
+ const child = children[i];
6476
+ if (child.nodeType === 1 && localName(child) === tagLocalName) {
6477
+ result.push(child);
6478
+ }
6479
+ }
6480
+ return result;
6481
+ }
6482
+ function isInsideTable(el) {
6483
+ let parent = el.parentNode;
6484
+ while (parent) {
6485
+ if (parent.nodeType === 1 && localName(parent) === "tbl") return true;
6486
+ parent = parent.parentNode;
6487
+ }
6488
+ return false;
6489
+ }
6490
+ function extractCellText(tcEl) {
6491
+ const parts = [];
6492
+ const walk = (node) => {
6493
+ const children = node.childNodes;
6494
+ if (!children) return;
6495
+ for (let i = 0; i < children.length; i++) {
6496
+ const child = children[i];
6497
+ if (child.nodeType === 3) {
6498
+ parts.push(child.textContent || "");
6499
+ } else if (child.nodeType === 1) {
6500
+ const tag = localName(child);
6501
+ if (tag === "t") walk(child);
6502
+ else if (tag === "run" || tag === "r" || tag === "p" || tag === "subList") walk(child);
6503
+ else if (tag === "tab") parts.push(" ");
6504
+ else if (tag === "br") parts.push("\n");
6505
+ }
6506
+ }
6507
+ };
6508
+ walk(tcEl);
6509
+ return parts.join("");
6510
+ }
6511
+ function prependCellText(tcEl, text) {
6512
+ const tElements = findAllElements(tcEl, "t");
6513
+ if (tElements.length === 0) return;
6514
+ const firstT = tElements[0];
6515
+ const existing = firstT.textContent || "";
6516
+ clearChildren(firstT);
6517
+ firstT.appendChild(firstT.ownerDocument.createTextNode(text + " " + existing));
6518
+ }
6519
+ function replaceCellText(tcEl, newValue) {
6520
+ const paragraphs = findAllElements(tcEl, "p");
6521
+ if (paragraphs.length === 0) return;
6522
+ const firstP = paragraphs[0];
6523
+ const runs = findAllElements(firstP, "run").concat(findAllElements(firstP, "r"));
6524
+ if (runs.length > 0) {
6525
+ setRunText(runs[0], newValue);
6526
+ for (let i = 1; i < runs.length; i++) {
6527
+ setRunText(runs[i], "");
6528
+ }
6529
+ } else {
6530
+ const tElements = findAllElements(firstP, "t");
6531
+ if (tElements.length > 0) {
6532
+ clearChildren(tElements[0]);
6533
+ tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(newValue));
6534
+ for (let i = 1; i < tElements.length; i++) {
6535
+ clearChildren(tElements[i]);
6536
+ }
6537
+ }
6538
+ }
6539
+ for (let i = 1; i < paragraphs.length; i++) {
6540
+ const p = paragraphs[i];
6541
+ if (p.parentNode) {
6542
+ const pRuns = findAllElements(p, "run").concat(findAllElements(p, "r"));
6543
+ for (const run of pRuns) setRunText(run, "");
6544
+ const pTs = findAllElements(p, "t");
6545
+ for (const t of pTs) clearChildren(t);
6546
+ }
6547
+ }
6241
6548
  }
6242
- function extractFromTable(table) {
6243
- const fields = [];
6244
- if (table.cols >= 2) {
6245
- for (let r = 0; r < table.rows; r++) {
6246
- for (let c = 0; c < table.cols - 1; c++) {
6247
- const labelCell = table.cells[r][c];
6248
- const valueCell = table.cells[r][c + 1];
6249
- if (isLabelCell(labelCell.text) && valueCell.text.trim()) {
6250
- fields.push({
6251
- label: labelCell.text.trim().replace(/[::]\s*$/, ""),
6252
- value: valueCell.text.trim(),
6253
- row: r,
6254
- col: c
6255
- });
6256
- }
6257
- }
6549
+ function setRunText(runEl, text) {
6550
+ const tElements = findAllElements(runEl, "t");
6551
+ if (tElements.length > 0) {
6552
+ clearChildren(tElements[0]);
6553
+ tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(text));
6554
+ for (let i = 1; i < tElements.length; i++) {
6555
+ clearChildren(tElements[i]);
6258
6556
  }
6259
6557
  }
6260
- if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
6261
- const headerRow = table.cells[0];
6262
- const allLabels = headerRow.every((cell) => {
6263
- const t = cell.text.trim();
6264
- return t.length > 0 && t.length <= 20;
6265
- });
6266
- if (allLabels) {
6267
- for (let r = 1; r < table.rows; r++) {
6268
- for (let c = 0; c < table.cols; c++) {
6269
- const label = headerRow[c].text.trim();
6270
- const value = table.cells[r][c].text.trim();
6271
- if (label && value) {
6272
- fields.push({ label, value, row: r, col: c });
6273
- }
6274
- }
6275
- }
6276
- }
6558
+ }
6559
+ function clearChildren(el) {
6560
+ while (el.firstChild) el.removeChild(el.firstChild);
6561
+ }
6562
+ function collectTextNodes(pEl) {
6563
+ const tElements = findAllElements(pEl, "t");
6564
+ const result = [];
6565
+ let offset = 0;
6566
+ for (const t of tElements) {
6567
+ const text = t.textContent || "";
6568
+ result.push({ element: t, text, offset });
6569
+ offset += text.length;
6277
6570
  }
6278
- return fields;
6571
+ return result;
6279
6572
  }
6280
- function extractInlineFields(text) {
6281
- const fields = [];
6282
- const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
6283
- let match;
6284
- while ((match = pattern.exec(text)) !== null) {
6285
- const label = match[1].trim();
6286
- const value = match[2].trim();
6287
- if (value) {
6288
- fields.push({ label, value, row: -1, col: -1 });
6573
+ function replaceTextRange(tNodes, globalStart, globalEnd, newValue) {
6574
+ let replaced = false;
6575
+ for (const node of tNodes) {
6576
+ const nodeStart = node.offset;
6577
+ const nodeEnd = node.offset + node.text.length;
6578
+ if (nodeEnd <= globalStart || nodeStart >= globalEnd) continue;
6579
+ const localStart = Math.max(0, globalStart - nodeStart);
6580
+ const localEnd = Math.min(node.text.length, globalEnd - nodeStart);
6581
+ if (!replaced) {
6582
+ const before = node.text.slice(0, localStart);
6583
+ const after = node.text.slice(localEnd);
6584
+ const newText = before + newValue + after;
6585
+ clearChildren(node.element);
6586
+ node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
6587
+ replaced = true;
6588
+ } else {
6589
+ const before = node.text.slice(0, localStart);
6590
+ const after = node.text.slice(localEnd);
6591
+ const newText = before + after;
6592
+ clearChildren(node.element);
6593
+ node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
6289
6594
  }
6290
6595
  }
6291
- return fields;
6596
+ }
6597
+ function collectCellTextNodes(tcEl) {
6598
+ const tElements = findAllElements(tcEl, "t");
6599
+ const result = [];
6600
+ let offset = 0;
6601
+ for (const t of tElements) {
6602
+ const text = t.textContent || "";
6603
+ result.push({ element: t, text, offset });
6604
+ offset += text.length;
6605
+ }
6606
+ return result;
6607
+ }
6608
+ function applyTextReplacements(tNodes, originalFull, replacedFull) {
6609
+ if (originalFull === replacedFull) return;
6610
+ if (tNodes.length === 1) {
6611
+ clearChildren(tNodes[0].element);
6612
+ tNodes[0].element.appendChild(
6613
+ tNodes[0].element.ownerDocument.createTextNode(replacedFull)
6614
+ );
6615
+ return;
6616
+ }
6617
+ let diffStart = 0;
6618
+ while (diffStart < originalFull.length && diffStart < replacedFull.length && originalFull[diffStart] === replacedFull[diffStart]) {
6619
+ diffStart++;
6620
+ }
6621
+ let diffEndOrig = originalFull.length;
6622
+ let diffEndRepl = replacedFull.length;
6623
+ while (diffEndOrig > diffStart && diffEndRepl > diffStart && originalFull[diffEndOrig - 1] === replacedFull[diffEndRepl - 1]) {
6624
+ diffEndOrig--;
6625
+ diffEndRepl--;
6626
+ }
6627
+ const newPart = replacedFull.slice(diffStart, diffEndRepl);
6628
+ replaceTextRange(tNodes, diffStart, diffEndOrig, newPart);
6292
6629
  }
6293
6630
 
6294
6631
  // src/hwpx/generator.ts
6295
- import JSZip5 from "jszip";
6632
+ import JSZip6 from "jszip";
6296
6633
  var NS_SECTION = "http://www.hancom.co.kr/hwpml/2011/section";
6297
6634
  var NS_PARA = "http://www.hancom.co.kr/hwpml/2011/paragraph";
6298
6635
  var NS_HEAD = "http://www.hancom.co.kr/hwpml/2011/head";
@@ -6319,7 +6656,7 @@ var PARA_LIST = 7;
6319
6656
  async function markdownToHwpx(markdown) {
6320
6657
  const blocks = parseMarkdownToBlocks(markdown);
6321
6658
  const sectionXml = blocksToSectionXml(blocks);
6322
- const zip = new JSZip5();
6659
+ const zip = new JSZip6();
6323
6660
  zip.file("mimetype", "application/hwp+zip", { compression: "STORE" });
6324
6661
  zip.file("META-INF/container.xml", generateContainerXml());
6325
6662
  zip.file("Contents/content.hpf", generateManifest());
@@ -6679,6 +7016,183 @@ function blocksToSectionXml(blocks) {
6679
7016
  </hs:sec>`;
6680
7017
  }
6681
7018
 
7019
+ // src/diff/text-diff.ts
7020
+ function similarity(a, b) {
7021
+ if (a === b) return 1;
7022
+ if (!a || !b) return 0;
7023
+ const maxLen = Math.max(a.length, b.length);
7024
+ if (maxLen === 0) return 1;
7025
+ return 1 - levenshtein(a, b) / maxLen;
7026
+ }
7027
+ function normalizedSimilarity(a, b) {
7028
+ return similarity(normalize(a), normalize(b));
7029
+ }
7030
+ function normalize(s) {
7031
+ return s.replace(/\s+/g, " ").trim();
7032
+ }
7033
+ var MAX_LEVENSHTEIN_LEN = 1e4;
7034
+ function levenshtein(a, b) {
7035
+ if (a.length + b.length > MAX_LEVENSHTEIN_LEN) {
7036
+ const sampleLen = Math.min(500, a.length, b.length);
7037
+ let diffs = 0;
7038
+ for (let i = 0; i < sampleLen; i++) if (a[i] !== b[i]) diffs++;
7039
+ const sampleRate = sampleLen > 0 ? diffs / sampleLen : 1;
7040
+ return Math.abs(a.length - b.length) + Math.round(Math.min(a.length, b.length) * sampleRate);
7041
+ }
7042
+ if (a.length > b.length) [a, b] = [b, a];
7043
+ const m = a.length;
7044
+ const n = b.length;
7045
+ let prev = Array.from({ length: m + 1 }, (_, i) => i);
7046
+ let curr = new Array(m + 1);
7047
+ for (let j = 1; j <= n; j++) {
7048
+ curr[0] = j;
7049
+ for (let i = 1; i <= m; i++) {
7050
+ if (a[i - 1] === b[j - 1]) {
7051
+ curr[i] = prev[i - 1];
7052
+ } else {
7053
+ curr[i] = 1 + Math.min(prev[i - 1], prev[i], curr[i - 1]);
7054
+ }
7055
+ }
7056
+ ;
7057
+ [prev, curr] = [curr, prev];
7058
+ }
7059
+ return prev[m];
7060
+ }
7061
+
7062
+ // src/diff/compare.ts
7063
+ var SIMILARITY_THRESHOLD = 0.4;
7064
+ async function compare(bufferA, bufferB, options) {
7065
+ const [resultA, resultB] = await Promise.all([
7066
+ parse(bufferA, options),
7067
+ parse(bufferB, options)
7068
+ ]);
7069
+ if (!resultA.success) throw new Error(`\uBB38\uC11CA \uD30C\uC2F1 \uC2E4\uD328: ${resultA.error}`);
7070
+ if (!resultB.success) throw new Error(`\uBB38\uC11CB \uD30C\uC2F1 \uC2E4\uD328: ${resultB.error}`);
7071
+ return diffBlocks(resultA.blocks, resultB.blocks);
7072
+ }
7073
+ function diffBlocks(blocksA, blocksB) {
7074
+ const aligned = alignBlocks(blocksA, blocksB);
7075
+ const stats = { added: 0, removed: 0, modified: 0, unchanged: 0 };
7076
+ const diffs = [];
7077
+ for (const [a, b] of aligned) {
7078
+ if (a && b) {
7079
+ const sim = blockSimilarity(a, b);
7080
+ if (sim >= 0.99) {
7081
+ diffs.push({ type: "unchanged", before: a, after: b, similarity: 1 });
7082
+ stats.unchanged++;
7083
+ } else {
7084
+ const diff = { type: "modified", before: a, after: b, similarity: sim };
7085
+ if (a.type === "table" && b.type === "table" && a.table && b.table) {
7086
+ diff.cellDiffs = diffTableCells(a.table, b.table);
7087
+ }
7088
+ diffs.push(diff);
7089
+ stats.modified++;
7090
+ }
7091
+ } else if (a) {
7092
+ diffs.push({ type: "removed", before: a });
7093
+ stats.removed++;
7094
+ } else if (b) {
7095
+ diffs.push({ type: "added", after: b });
7096
+ stats.added++;
7097
+ }
7098
+ }
7099
+ return { stats, diffs };
7100
+ }
7101
+ function alignBlocks(a, b) {
7102
+ const m = a.length, n = b.length;
7103
+ if (m * n > 1e7) return fallbackAlign(a, b);
7104
+ const simCache = /* @__PURE__ */ new Map();
7105
+ const getSim = (i2, j2) => {
7106
+ const key = `${i2},${j2}`;
7107
+ let v = simCache.get(key);
7108
+ if (v === void 0) {
7109
+ v = blockSimilarity(a[i2], b[j2]);
7110
+ simCache.set(key, v);
7111
+ }
7112
+ return v;
7113
+ };
7114
+ const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
7115
+ for (let i2 = 1; i2 <= m; i2++) {
7116
+ for (let j2 = 1; j2 <= n; j2++) {
7117
+ if (getSim(i2 - 1, j2 - 1) >= SIMILARITY_THRESHOLD) {
7118
+ dp[i2][j2] = dp[i2 - 1][j2 - 1] + 1;
7119
+ } else {
7120
+ dp[i2][j2] = Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
7121
+ }
7122
+ }
7123
+ }
7124
+ const pairs = [];
7125
+ let i = m, j = n;
7126
+ while (i > 0 && j > 0) {
7127
+ if (getSim(i - 1, j - 1) >= SIMILARITY_THRESHOLD && dp[i][j] === dp[i - 1][j - 1] + 1) {
7128
+ pairs.push([i - 1, j - 1]);
7129
+ i--;
7130
+ j--;
7131
+ } else if (dp[i - 1][j] >= dp[i][j - 1]) {
7132
+ i--;
7133
+ } else {
7134
+ j--;
7135
+ }
7136
+ }
7137
+ pairs.reverse();
7138
+ const result = [];
7139
+ let ai = 0, bi = 0;
7140
+ for (const [pi, pj] of pairs) {
7141
+ while (ai < pi) result.push([a[ai++], null]);
7142
+ while (bi < pj) result.push([null, b[bi++]]);
7143
+ result.push([a[ai++], b[bi++]]);
7144
+ }
7145
+ while (ai < m) result.push([a[ai++], null]);
7146
+ while (bi < n) result.push([null, b[bi++]]);
7147
+ return result;
7148
+ }
7149
+ function fallbackAlign(a, b) {
7150
+ const result = [];
7151
+ const len = Math.max(a.length, b.length);
7152
+ for (let i = 0; i < len; i++) {
7153
+ result.push([a[i] || null, b[i] || null]);
7154
+ }
7155
+ return result;
7156
+ }
7157
+ function blockSimilarity(a, b) {
7158
+ if (a.type !== b.type) return 0;
7159
+ if (a.text !== void 0 && b.text !== void 0) {
7160
+ return normalizedSimilarity(a.text || "", b.text || "");
7161
+ }
7162
+ if (a.type === "table" && a.table && b.table) {
7163
+ return tableSimilarity(a.table, b.table);
7164
+ }
7165
+ if (a.type === b.type) return 1;
7166
+ return 0;
7167
+ }
7168
+ function tableSimilarity(a, b) {
7169
+ const dimSim = 1 - Math.abs(a.rows * a.cols - b.rows * b.cols) / Math.max(a.rows * a.cols, b.rows * b.cols, 1);
7170
+ const textsA = a.cells.flat().map((c) => c.text).join(" ");
7171
+ const textsB = b.cells.flat().map((c) => c.text).join(" ");
7172
+ const contentSim = normalizedSimilarity(textsA, textsB);
7173
+ return dimSim * 0.3 + contentSim * 0.7;
7174
+ }
7175
+ function diffTableCells(a, b) {
7176
+ const maxRows = Math.max(a.rows, b.rows);
7177
+ const maxCols = Math.max(a.cols, b.cols);
7178
+ const result = [];
7179
+ for (let r = 0; r < maxRows; r++) {
7180
+ const row = [];
7181
+ for (let c = 0; c < maxCols; c++) {
7182
+ const cellA = r < a.rows && c < a.cols ? a.cells[r][c].text : void 0;
7183
+ const cellB = r < b.rows && c < b.cols ? b.cells[r][c].text : void 0;
7184
+ let type;
7185
+ if (cellA === void 0) type = "added";
7186
+ else if (cellB === void 0) type = "removed";
7187
+ else if (cellA === cellB) type = "unchanged";
7188
+ else type = "modified";
7189
+ row.push({ type, before: cellA, after: cellB });
7190
+ }
7191
+ result.push(row);
7192
+ }
7193
+ return result;
7194
+ }
7195
+
6682
7196
  // src/index.ts
6683
7197
  async function parse(input, options) {
6684
7198
  let buffer;
@@ -6755,6 +7269,45 @@ async function parseDocx(buffer, options) {
6755
7269
  return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: classifyError(err) };
6756
7270
  }
6757
7271
  }
7272
+ async function fillForm(input, values, outputFormat = "markdown") {
7273
+ let buffer;
7274
+ if (typeof input === "string") {
7275
+ const buf = await readFile(input);
7276
+ buffer = toArrayBuffer(buf);
7277
+ } else if (Buffer.isBuffer(input)) {
7278
+ buffer = toArrayBuffer(input);
7279
+ } else {
7280
+ buffer = input;
7281
+ }
7282
+ if (outputFormat === "hwpx-preserve") {
7283
+ const format = detectFormat(buffer);
7284
+ if (format === "hwpx") {
7285
+ const zipFormat = await detectZipFormat(buffer);
7286
+ if (zipFormat !== "hwpx") {
7287
+ throw new Error(`hwpx-preserve \uD3EC\uB9F7\uC740 HWPX \uC785\uB825\uB9CC \uC9C0\uC6D0\uD569\uB2C8\uB2E4 (\uAC10\uC9C0\uB41C \uD3EC\uB9F7: ${zipFormat})`);
7288
+ }
7289
+ } else {
7290
+ throw new Error(`hwpx-preserve \uD3EC\uB9F7\uC740 HWPX \uC785\uB825\uB9CC \uC9C0\uC6D0\uD569\uB2C8\uB2E4 (\uAC10\uC9C0\uB41C \uD3EC\uB9F7: ${format})`);
7291
+ }
7292
+ const hwpxResult = await fillHwpx(buffer, values);
7293
+ return {
7294
+ output: hwpxResult.buffer,
7295
+ format: "hwpx-preserve",
7296
+ fill: { filled: hwpxResult.filled, unmatched: hwpxResult.unmatched }
7297
+ };
7298
+ }
7299
+ const parsed = await parse(buffer);
7300
+ if (!parsed.success) {
7301
+ throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
7302
+ }
7303
+ const fill = fillFormFields(parsed.blocks, values);
7304
+ const markdown = blocksToMarkdown(fill.blocks);
7305
+ if (outputFormat === "hwpx") {
7306
+ const hwpxBuffer = await markdownToHwpx(markdown);
7307
+ return { output: hwpxBuffer, format: "hwpx", fill };
7308
+ }
7309
+ return { output: markdown, format: "markdown", fill };
7310
+ }
6758
7311
  export {
6759
7312
  VERSION,
6760
7313
  blocksToMarkdown,
@@ -6763,7 +7316,11 @@ export {
6763
7316
  detectZipFormat,
6764
7317
  diffBlocks,
6765
7318
  extractFormFields,
7319
+ fillForm,
7320
+ fillFormFields,
7321
+ fillHwpx,
6766
7322
  isHwpxFile,
7323
+ isLabelCell,
6767
7324
  isOldHwpFile,
6768
7325
  isPdfFile,
6769
7326
  isZipFile,