kordoc 2.2.3 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -121,7 +121,11 @@ __export(index_exports, {
121
121
  detectZipFormat: () => detectZipFormat,
122
122
  diffBlocks: () => diffBlocks,
123
123
  extractFormFields: () => extractFormFields,
124
+ fillForm: () => fillForm,
125
+ fillFormFields: () => fillFormFields,
126
+ fillHwpx: () => fillHwpx,
124
127
  isHwpxFile: () => isHwpxFile,
128
+ isLabelCell: () => isLabelCell,
125
129
  isOldHwpFile: () => isOldHwpFile,
126
130
  isPdfFile: () => isPdfFile,
127
131
  isZipFile: () => isZipFile,
@@ -183,7 +187,7 @@ var import_zlib = require("zlib");
183
187
  var import_xmldom = require("@xmldom/xmldom");
184
188
 
185
189
  // src/utils.ts
186
- var VERSION = true ? "2.2.3" : "0.0.0-dev";
190
+ var VERSION = true ? "2.2.4" : "0.0.0-dev";
187
191
  function toArrayBuffer(buf) {
188
192
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
189
193
  return buf.buffer;
@@ -5608,21 +5612,21 @@ async function parseXlsxDocument(buffer, options) {
5608
5612
  var import_jszip4 = __toESM(require("jszip"), 1);
5609
5613
  var import_xmldom3 = require("@xmldom/xmldom");
5610
5614
  var MAX_DECOMPRESS_SIZE4 = 100 * 1024 * 1024;
5611
- function getChildElements(parent, localName) {
5615
+ function getChildElements(parent, localName2) {
5612
5616
  const result = [];
5613
5617
  const children = parent.childNodes;
5614
5618
  for (let i = 0; i < children.length; i++) {
5615
5619
  const node = children[i];
5616
5620
  if (node.nodeType === 1) {
5617
5621
  const el = node;
5618
- if (el.localName === localName || el.tagName?.endsWith(`:${localName}`)) {
5622
+ if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
5619
5623
  result.push(el);
5620
5624
  }
5621
5625
  }
5622
5626
  }
5623
5627
  return result;
5624
5628
  }
5625
- function findElements(parent, localName) {
5629
+ function findElements(parent, localName2) {
5626
5630
  const result = [];
5627
5631
  const walk = (node) => {
5628
5632
  const children = node.childNodes;
@@ -5630,7 +5634,7 @@ function findElements(parent, localName) {
5630
5634
  const child = children[i];
5631
5635
  if (child.nodeType === 1) {
5632
5636
  const el = child;
5633
- if (el.localName === localName || el.tagName?.endsWith(`:${localName}`)) {
5637
+ if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
5634
5638
  result.push(el);
5635
5639
  }
5636
5640
  walk(el);
@@ -5640,11 +5644,11 @@ function findElements(parent, localName) {
5640
5644
  walk(parent);
5641
5645
  return result;
5642
5646
  }
5643
- function getAttr(el, localName) {
5647
+ function getAttr(el, localName2) {
5644
5648
  const attrs = el.attributes;
5645
5649
  for (let i = 0; i < attrs.length; i++) {
5646
5650
  const attr = attrs[i];
5647
- if (attr.localName === localName || attr.name === localName) return attr.value;
5651
+ if (attr.localName === localName2 || attr.name === localName2) return attr.value;
5648
5652
  }
5649
5653
  return null;
5650
5654
  }
@@ -5991,11 +5995,11 @@ async function parseDocxDocument(buffer, options) {
5991
5995
  const node = children[i];
5992
5996
  if (node.nodeType !== 1) continue;
5993
5997
  const el = node;
5994
- const localName = el.localName ?? el.tagName?.split(":").pop();
5995
- if (localName === "p") {
5998
+ const localName2 = el.localName ?? el.tagName?.split(":").pop();
5999
+ if (localName2 === "p") {
5996
6000
  const block = parseParagraph(el, styles, numbering, footnotes, rels);
5997
6001
  if (block) blocks.push(block);
5998
- } else if (localName === "tbl") {
6002
+ } else if (localName2 === "tbl") {
5999
6003
  const block = parseTable(el, styles, numbering, footnotes, rels);
6000
6004
  if (block) blocks.push(block);
6001
6005
  }
@@ -6033,183 +6037,6 @@ async function parseDocxDocument(buffer, options) {
6033
6037
  };
6034
6038
  }
6035
6039
 
6036
- // src/diff/text-diff.ts
6037
- function similarity(a, b) {
6038
- if (a === b) return 1;
6039
- if (!a || !b) return 0;
6040
- const maxLen = Math.max(a.length, b.length);
6041
- if (maxLen === 0) return 1;
6042
- return 1 - levenshtein(a, b) / maxLen;
6043
- }
6044
- function normalizedSimilarity(a, b) {
6045
- return similarity(normalize(a), normalize(b));
6046
- }
6047
- function normalize(s) {
6048
- return s.replace(/\s+/g, " ").trim();
6049
- }
6050
- var MAX_LEVENSHTEIN_LEN = 1e4;
6051
- function levenshtein(a, b) {
6052
- if (a.length + b.length > MAX_LEVENSHTEIN_LEN) {
6053
- const sampleLen = Math.min(500, a.length, b.length);
6054
- let diffs = 0;
6055
- for (let i = 0; i < sampleLen; i++) if (a[i] !== b[i]) diffs++;
6056
- const sampleRate = sampleLen > 0 ? diffs / sampleLen : 1;
6057
- return Math.abs(a.length - b.length) + Math.round(Math.min(a.length, b.length) * sampleRate);
6058
- }
6059
- if (a.length > b.length) [a, b] = [b, a];
6060
- const m = a.length;
6061
- const n = b.length;
6062
- let prev = Array.from({ length: m + 1 }, (_, i) => i);
6063
- let curr = new Array(m + 1);
6064
- for (let j = 1; j <= n; j++) {
6065
- curr[0] = j;
6066
- for (let i = 1; i <= m; i++) {
6067
- if (a[i - 1] === b[j - 1]) {
6068
- curr[i] = prev[i - 1];
6069
- } else {
6070
- curr[i] = 1 + Math.min(prev[i - 1], prev[i], curr[i - 1]);
6071
- }
6072
- }
6073
- ;
6074
- [prev, curr] = [curr, prev];
6075
- }
6076
- return prev[m];
6077
- }
6078
-
6079
- // src/diff/compare.ts
6080
- var SIMILARITY_THRESHOLD = 0.4;
6081
- async function compare(bufferA, bufferB, options) {
6082
- const [resultA, resultB] = await Promise.all([
6083
- parse(bufferA, options),
6084
- parse(bufferB, options)
6085
- ]);
6086
- if (!resultA.success) throw new Error(`\uBB38\uC11CA \uD30C\uC2F1 \uC2E4\uD328: ${resultA.error}`);
6087
- if (!resultB.success) throw new Error(`\uBB38\uC11CB \uD30C\uC2F1 \uC2E4\uD328: ${resultB.error}`);
6088
- return diffBlocks(resultA.blocks, resultB.blocks);
6089
- }
6090
- function diffBlocks(blocksA, blocksB) {
6091
- const aligned = alignBlocks(blocksA, blocksB);
6092
- const stats = { added: 0, removed: 0, modified: 0, unchanged: 0 };
6093
- const diffs = [];
6094
- for (const [a, b] of aligned) {
6095
- if (a && b) {
6096
- const sim = blockSimilarity(a, b);
6097
- if (sim >= 0.99) {
6098
- diffs.push({ type: "unchanged", before: a, after: b, similarity: 1 });
6099
- stats.unchanged++;
6100
- } else {
6101
- const diff = { type: "modified", before: a, after: b, similarity: sim };
6102
- if (a.type === "table" && b.type === "table" && a.table && b.table) {
6103
- diff.cellDiffs = diffTableCells(a.table, b.table);
6104
- }
6105
- diffs.push(diff);
6106
- stats.modified++;
6107
- }
6108
- } else if (a) {
6109
- diffs.push({ type: "removed", before: a });
6110
- stats.removed++;
6111
- } else if (b) {
6112
- diffs.push({ type: "added", after: b });
6113
- stats.added++;
6114
- }
6115
- }
6116
- return { stats, diffs };
6117
- }
6118
- function alignBlocks(a, b) {
6119
- const m = a.length, n = b.length;
6120
- if (m * n > 1e7) return fallbackAlign(a, b);
6121
- const simCache = /* @__PURE__ */ new Map();
6122
- const getSim = (i2, j2) => {
6123
- const key = `${i2},${j2}`;
6124
- let v = simCache.get(key);
6125
- if (v === void 0) {
6126
- v = blockSimilarity(a[i2], b[j2]);
6127
- simCache.set(key, v);
6128
- }
6129
- return v;
6130
- };
6131
- const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
6132
- for (let i2 = 1; i2 <= m; i2++) {
6133
- for (let j2 = 1; j2 <= n; j2++) {
6134
- if (getSim(i2 - 1, j2 - 1) >= SIMILARITY_THRESHOLD) {
6135
- dp[i2][j2] = dp[i2 - 1][j2 - 1] + 1;
6136
- } else {
6137
- dp[i2][j2] = Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
6138
- }
6139
- }
6140
- }
6141
- const pairs = [];
6142
- let i = m, j = n;
6143
- while (i > 0 && j > 0) {
6144
- if (getSim(i - 1, j - 1) >= SIMILARITY_THRESHOLD && dp[i][j] === dp[i - 1][j - 1] + 1) {
6145
- pairs.push([i - 1, j - 1]);
6146
- i--;
6147
- j--;
6148
- } else if (dp[i - 1][j] >= dp[i][j - 1]) {
6149
- i--;
6150
- } else {
6151
- j--;
6152
- }
6153
- }
6154
- pairs.reverse();
6155
- const result = [];
6156
- let ai = 0, bi = 0;
6157
- for (const [pi, pj] of pairs) {
6158
- while (ai < pi) result.push([a[ai++], null]);
6159
- while (bi < pj) result.push([null, b[bi++]]);
6160
- result.push([a[ai++], b[bi++]]);
6161
- }
6162
- while (ai < m) result.push([a[ai++], null]);
6163
- while (bi < n) result.push([null, b[bi++]]);
6164
- return result;
6165
- }
6166
- function fallbackAlign(a, b) {
6167
- const result = [];
6168
- const len = Math.max(a.length, b.length);
6169
- for (let i = 0; i < len; i++) {
6170
- result.push([a[i] || null, b[i] || null]);
6171
- }
6172
- return result;
6173
- }
6174
- function blockSimilarity(a, b) {
6175
- if (a.type !== b.type) return 0;
6176
- if (a.text !== void 0 && b.text !== void 0) {
6177
- return normalizedSimilarity(a.text || "", b.text || "");
6178
- }
6179
- if (a.type === "table" && a.table && b.table) {
6180
- return tableSimilarity(a.table, b.table);
6181
- }
6182
- if (a.type === b.type) return 1;
6183
- return 0;
6184
- }
6185
- function tableSimilarity(a, b) {
6186
- const dimSim = 1 - Math.abs(a.rows * a.cols - b.rows * b.cols) / Math.max(a.rows * a.cols, b.rows * b.cols, 1);
6187
- const textsA = a.cells.flat().map((c) => c.text).join(" ");
6188
- const textsB = b.cells.flat().map((c) => c.text).join(" ");
6189
- const contentSim = normalizedSimilarity(textsA, textsB);
6190
- return dimSim * 0.3 + contentSim * 0.7;
6191
- }
6192
- function diffTableCells(a, b) {
6193
- const maxRows = Math.max(a.rows, b.rows);
6194
- const maxCols = Math.max(a.cols, b.cols);
6195
- const result = [];
6196
- for (let r = 0; r < maxRows; r++) {
6197
- const row = [];
6198
- for (let c = 0; c < maxCols; c++) {
6199
- const cellA = r < a.rows && c < a.cols ? a.cells[r][c].text : void 0;
6200
- const cellB = r < b.rows && c < b.cols ? b.cells[r][c].text : void 0;
6201
- let type;
6202
- if (cellA === void 0) type = "added";
6203
- else if (cellB === void 0) type = "removed";
6204
- else if (cellA === cellB) type = "unchanged";
6205
- else type = "modified";
6206
- row.push({ type, before: cellA, after: cellB });
6207
- }
6208
- result.push(row);
6209
- }
6210
- return result;
6211
- }
6212
-
6213
6040
  // src/form/recognize.ts
6214
6041
  var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
6215
6042
  "\uC131\uBA85",
@@ -6250,15 +6077,20 @@ var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
6250
6077
  "\uB2E8\uAC00",
6251
6078
  "\uD569\uACC4",
6252
6079
  "\uACC4",
6253
- "\uC18C\uACC4"
6080
+ "\uC18C\uACC4",
6081
+ "\uB4F1\uB85D\uAE30\uC900\uC9C0",
6082
+ "\uBCF8\uC801",
6083
+ "\uC704\uC784\uC778",
6084
+ "\uCCAD\uAD6C\uC0AC\uC720",
6085
+ "\uC18C\uBA85\uC790\uB8CC"
6254
6086
  ]);
6255
6087
  function isLabelCell(text) {
6256
- const trimmed = text.trim();
6088
+ const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
6257
6089
  if (!trimmed || trimmed.length > 30) return false;
6258
6090
  for (const kw of LABEL_KEYWORDS) {
6259
6091
  if (trimmed.includes(kw)) return true;
6260
6092
  }
6261
- if (/^[가-힣\s()·:]{2,8}$/.test(trimmed) && !/\d/.test(trimmed)) return true;
6093
+ if (/^[가-힣\s()()·::]+$/.test(trimmed) && trimmed.replace(/\s/g, "").length >= 2 && trimmed.replace(/\s/g, "").length <= 8 && !/\d/.test(trimmed)) return true;
6262
6094
  if (/^[가-힣A-Za-z\s]+[::]$/.test(trimmed)) return true;
6263
6095
  return false;
6264
6096
  }
@@ -6281,63 +6113,572 @@ function extractFormFields(blocks) {
6281
6113
  fields.push(...inlineFields);
6282
6114
  }
6283
6115
  }
6284
- const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
6285
- return { fields, confidence: Math.min(confidence, 1) };
6116
+ const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
6117
+ return { fields, confidence: Math.min(confidence, 1) };
6118
+ }
6119
+ function extractFromTable(table) {
6120
+ const fields = [];
6121
+ if (table.cols >= 2) {
6122
+ for (let r = 0; r < table.rows; r++) {
6123
+ for (let c = 0; c < table.cols - 1; c++) {
6124
+ const labelCell = table.cells[r][c];
6125
+ const valueCell = table.cells[r][c + 1];
6126
+ if (isLabelCell(labelCell.text)) {
6127
+ fields.push({
6128
+ label: labelCell.text.trim().replace(/[::]\s*$/, ""),
6129
+ value: valueCell.text.trim(),
6130
+ row: r,
6131
+ col: c
6132
+ });
6133
+ }
6134
+ }
6135
+ }
6136
+ }
6137
+ if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
6138
+ const headerRow = table.cells[0];
6139
+ const allLabels = headerRow.every((cell) => {
6140
+ const t = cell.text.trim();
6141
+ return t.length > 0 && t.length <= 20;
6142
+ });
6143
+ if (allLabels) {
6144
+ for (let r = 1; r < table.rows; r++) {
6145
+ for (let c = 0; c < table.cols; c++) {
6146
+ const label = headerRow[c].text.trim();
6147
+ const value = table.cells[r][c].text.trim();
6148
+ if (label && value) {
6149
+ fields.push({ label, value, row: r, col: c });
6150
+ }
6151
+ }
6152
+ }
6153
+ }
6154
+ }
6155
+ return fields;
6156
+ }
6157
+ function extractInlineFields(text) {
6158
+ const fields = [];
6159
+ const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
6160
+ let match;
6161
+ while ((match = pattern.exec(text)) !== null) {
6162
+ const label = match[1].trim();
6163
+ const value = match[2].trim();
6164
+ if (value) {
6165
+ fields.push({ label, value, row: -1, col: -1 });
6166
+ }
6167
+ }
6168
+ return fields;
6169
+ }
6170
+
6171
+ // src/form/match.ts
6172
+ function normalizeLabel(label) {
6173
+ return label.trim().replace(/[::\s()()·]/g, "");
6174
+ }
6175
+ function findMatchingKey(cellLabel, values) {
6176
+ if (values.has(cellLabel)) return cellLabel;
6177
+ let bestKey;
6178
+ let bestLen = 0;
6179
+ for (const key of values.keys()) {
6180
+ if (cellLabel.startsWith(key)) {
6181
+ if (key.length >= cellLabel.length * 0.6 && key.length > bestLen) {
6182
+ bestLen = key.length;
6183
+ bestKey = key;
6184
+ }
6185
+ } else if (key.startsWith(cellLabel)) {
6186
+ if (cellLabel.length >= key.length * 0.6 && cellLabel.length > bestLen) {
6187
+ bestLen = cellLabel.length;
6188
+ bestKey = key;
6189
+ }
6190
+ }
6191
+ }
6192
+ return bestKey;
6193
+ }
6194
+ function isKeywordLabel(text) {
6195
+ const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
6196
+ if (!trimmed || trimmed.length > 15) return false;
6197
+ for (const kw of LABEL_KEYWORDS) {
6198
+ if (trimmed.includes(kw)) return true;
6199
+ }
6200
+ return false;
6201
+ }
6202
+ function fillInCellPatterns(cellText, values, matchedLabels) {
6203
+ let text = cellText;
6204
+ const matches = [];
6205
+ text = text.replace(
6206
+ /([가-힣A-Za-z]+)\(\s{1,}\)([가-힣A-Za-z]*)/g,
6207
+ (match, prefix, suffix) => {
6208
+ const label = prefix + suffix;
6209
+ const normalizedLabel = normalizeLabel(label);
6210
+ const matchKey = values.has(normalizedLabel) ? normalizedLabel : values.has(normalizeLabel(prefix)) ? normalizeLabel(prefix) : void 0;
6211
+ if (matchKey === void 0) return match;
6212
+ const newValue = values.get(matchKey);
6213
+ matchedLabels.add(matchKey);
6214
+ matches.push({ key: matchKey, label, value: newValue });
6215
+ return `${prefix}(${newValue})${suffix}`;
6216
+ }
6217
+ );
6218
+ text = text.replace(
6219
+ /□([가-힣A-Za-z]+)/g,
6220
+ (match, keyword) => {
6221
+ const normalizedKw = normalizeLabel(keyword);
6222
+ const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
6223
+ if (matchKey === void 0) return match;
6224
+ const val = values.get(matchKey);
6225
+ const isTruthy = ["\u2611", "\u2713", "\u2714", "v", "V", "true", "1", "yes", "o", "O"].includes(val.trim()) || val.trim() === "";
6226
+ if (!isTruthy) return match;
6227
+ matchedLabels.add(matchKey);
6228
+ matches.push({ key: matchKey, label: `\u25A1${keyword}`, value: "\u2611" });
6229
+ return `\u2611${keyword}`;
6230
+ }
6231
+ );
6232
+ text = text.replace(
6233
+ /\(([가-힣A-Za-z]+)[::]\s{1,}\)/g,
6234
+ (match, keyword) => {
6235
+ const normalizedKw = normalizeLabel(keyword);
6236
+ const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
6237
+ if (matchKey === void 0) return match;
6238
+ const newValue = values.get(matchKey);
6239
+ matchedLabels.add(matchKey);
6240
+ matches.push({ key: matchKey, label: keyword, value: newValue });
6241
+ return `(${keyword}\uFF1A${newValue})`;
6242
+ }
6243
+ );
6244
+ return matches.length > 0 ? { text, matches } : null;
6245
+ }
6246
+ function normalizeValues(values) {
6247
+ const map = /* @__PURE__ */ new Map();
6248
+ for (const [label, value] of Object.entries(values)) {
6249
+ map.set(normalizeLabel(label), value);
6250
+ }
6251
+ return map;
6252
+ }
6253
+ function resolveUnmatched(normalizedValues, matchedLabels, originalValues) {
6254
+ return [...normalizedValues.keys()].filter((k) => !matchedLabels.has(k)).map((k) => {
6255
+ for (const orig of Object.keys(originalValues)) {
6256
+ if (normalizeLabel(orig) === k) return orig;
6257
+ }
6258
+ return k;
6259
+ });
6260
+ }
6261
+
6262
+ // src/form/filler.ts
6263
+ function fillFormFields(blocks, values) {
6264
+ const cloned = structuredClone(blocks);
6265
+ const filled = [];
6266
+ const matchedLabels = /* @__PURE__ */ new Set();
6267
+ const normalizedValues = normalizeValues(values);
6268
+ const patternFilledCells = /* @__PURE__ */ new Set();
6269
+ for (const block of cloned) {
6270
+ if (block.type !== "table" || !block.table) continue;
6271
+ for (let r = 0; r < block.table.rows; r++) {
6272
+ for (let c = 0; c < block.table.cols; c++) {
6273
+ const cell = block.table.cells[r]?.[c];
6274
+ if (!cell) continue;
6275
+ const result = fillInCellPatterns(cell.text, normalizedValues, matchedLabels);
6276
+ if (result) {
6277
+ cell.text = result.text;
6278
+ patternFilledCells.add(`${r},${c}`);
6279
+ for (const m of result.matches) {
6280
+ filled.push({ label: m.label, value: m.value, row: r, col: c });
6281
+ }
6282
+ }
6283
+ }
6284
+ }
6285
+ }
6286
+ for (const block of cloned) {
6287
+ if (block.type !== "table" || !block.table) continue;
6288
+ fillTable(block.table, normalizedValues, filled, matchedLabels, patternFilledCells);
6289
+ }
6290
+ for (const block of cloned) {
6291
+ if (block.type !== "paragraph" || !block.text) continue;
6292
+ const newText = fillInlineFields(block.text, normalizedValues, filled, matchedLabels);
6293
+ if (newText !== block.text) block.text = newText;
6294
+ }
6295
+ const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
6296
+ return { blocks: cloned, filled, unmatched };
6297
+ }
6298
+ function fillTable(table, values, filled, matchedLabels, patternFilledCells) {
6299
+ if (table.cols < 2) return;
6300
+ for (let r = 0; r < table.rows; r++) {
6301
+ for (let c = 0; c < table.cols - 1; c++) {
6302
+ const labelCell = table.cells[r][c];
6303
+ const valueCell = table.cells[r][c + 1];
6304
+ if (!labelCell || !valueCell) continue;
6305
+ if (!isLabelCell(labelCell.text)) continue;
6306
+ if (isKeywordLabel(valueCell.text)) continue;
6307
+ const normalizedCellLabel = normalizeLabel(labelCell.text);
6308
+ if (!normalizedCellLabel) continue;
6309
+ const matchKey = findMatchingKey(normalizedCellLabel, values);
6310
+ if (matchKey === void 0) continue;
6311
+ const newValue = values.get(matchKey);
6312
+ if (patternFilledCells?.has(`${r},${c + 1}`)) {
6313
+ valueCell.text = newValue + " " + valueCell.text;
6314
+ } else {
6315
+ valueCell.text = newValue;
6316
+ }
6317
+ matchedLabels.add(matchKey);
6318
+ filled.push({
6319
+ label: labelCell.text.trim().replace(/[::]\s*$/, ""),
6320
+ value: newValue,
6321
+ row: r,
6322
+ col: c
6323
+ });
6324
+ }
6325
+ }
6326
+ if (table.rows >= 2 && table.cols >= 2) {
6327
+ const headerRow = table.cells[0];
6328
+ const allLabels = headerRow.every((cell) => {
6329
+ const t = cell.text.trim();
6330
+ return t.length > 0 && t.length <= 20 && isLabelCell(t);
6331
+ });
6332
+ if (!allLabels) return;
6333
+ for (let r = 1; r < table.rows; r++) {
6334
+ for (let c = 0; c < table.cols; c++) {
6335
+ const headerLabel = normalizeLabel(headerRow[c].text);
6336
+ const matchKey = findMatchingKey(headerLabel, values);
6337
+ if (matchKey === void 0) continue;
6338
+ if (matchedLabels.has(matchKey)) continue;
6339
+ const newValue = values.get(matchKey);
6340
+ table.cells[r][c].text = newValue;
6341
+ matchedLabels.add(matchKey);
6342
+ filled.push({
6343
+ label: headerRow[c].text.trim(),
6344
+ value: newValue,
6345
+ row: r,
6346
+ col: c
6347
+ });
6348
+ }
6349
+ }
6350
+ }
6351
+ }
6352
+ function fillInlineFields(text, values, filled, matchedLabels) {
6353
+ return text.replace(
6354
+ /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g,
6355
+ (match, rawLabel, _oldValue) => {
6356
+ const normalized = normalizeLabel(rawLabel);
6357
+ const matchKey = findMatchingKey(normalized, values);
6358
+ if (matchKey === void 0) return match;
6359
+ const newValue = values.get(matchKey);
6360
+ matchedLabels.add(matchKey);
6361
+ filled.push({
6362
+ label: rawLabel.trim(),
6363
+ value: newValue,
6364
+ row: -1,
6365
+ col: -1
6366
+ });
6367
+ return `${rawLabel}: ${newValue}`;
6368
+ }
6369
+ );
6370
+ }
6371
+
6372
+ // src/form/filler-hwpx.ts
6373
+ var import_jszip5 = __toESM(require("jszip"), 1);
6374
+ var import_xmldom4 = require("@xmldom/xmldom");
6375
+ async function fillHwpx(hwpxBuffer, values) {
6376
+ const zip = await import_jszip5.default.loadAsync(hwpxBuffer);
6377
+ const filled = [];
6378
+ const matchedLabels = /* @__PURE__ */ new Set();
6379
+ const normalizedValues = normalizeValues(values);
6380
+ const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
6381
+ if (sectionFiles.length === 0) {
6382
+ throw new KordocError("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
6383
+ }
6384
+ const xmlParser = new import_xmldom4.DOMParser();
6385
+ const xmlSerializer = new import_xmldom4.XMLSerializer();
6386
+ for (const sectionPath of sectionFiles) {
6387
+ const zipEntry = zip.file(sectionPath);
6388
+ if (!zipEntry) continue;
6389
+ const rawXml = await zipEntry.async("text");
6390
+ const doc = xmlParser.parseFromString(stripDtd(rawXml), "text/xml");
6391
+ if (!doc.documentElement) continue;
6392
+ let modified = false;
6393
+ const tables = findAllElements(doc.documentElement, "tbl");
6394
+ const cellPatternApplied = /* @__PURE__ */ new Set();
6395
+ for (const tblEl of tables) {
6396
+ const allCells = findAllElements(tblEl, "tc");
6397
+ for (const tcEl of allCells) {
6398
+ const tNodes = collectCellTextNodes(tcEl);
6399
+ const fullText = tNodes.map((n) => n.text).join("");
6400
+ const result = fillInCellPatterns(fullText, normalizedValues, matchedLabels);
6401
+ if (!result) continue;
6402
+ applyTextReplacements(tNodes, fullText, result.text);
6403
+ cellPatternApplied.add(tcEl);
6404
+ for (const m of result.matches) {
6405
+ filled.push({ label: m.label, value: m.value, row: -1, col: -1 });
6406
+ }
6407
+ modified = true;
6408
+ }
6409
+ }
6410
+ for (const tblEl of tables) {
6411
+ const rows = findDirectChildren(tblEl, "tr");
6412
+ for (let rowIdx = 0; rowIdx < rows.length; rowIdx++) {
6413
+ const trEl = rows[rowIdx];
6414
+ const cells = findDirectChildren(trEl, "tc");
6415
+ for (let colIdx = 0; colIdx < cells.length - 1; colIdx++) {
6416
+ const labelText = extractCellText(cells[colIdx]);
6417
+ if (!isLabelCell(labelText)) continue;
6418
+ const valueCell = cells[colIdx + 1];
6419
+ const valueText = extractCellText(valueCell);
6420
+ if (isKeywordLabel(valueText)) continue;
6421
+ const normalizedCellLabel = normalizeLabel(labelText);
6422
+ if (!normalizedCellLabel) continue;
6423
+ const matchKey = findMatchingKey(normalizedCellLabel, normalizedValues);
6424
+ if (matchKey === void 0) continue;
6425
+ const newValue = normalizedValues.get(matchKey);
6426
+ if (cellPatternApplied.has(valueCell)) {
6427
+ prependCellText(valueCell, newValue);
6428
+ } else {
6429
+ replaceCellText(valueCell, newValue);
6430
+ }
6431
+ matchedLabels.add(matchKey);
6432
+ filled.push({
6433
+ label: labelText.trim().replace(/[::]\s*$/, ""),
6434
+ value: newValue,
6435
+ row: rowIdx,
6436
+ col: colIdx
6437
+ });
6438
+ modified = true;
6439
+ }
6440
+ }
6441
+ if (rows.length >= 2) {
6442
+ const headerCells = findDirectChildren(rows[0], "tc");
6443
+ const allLabels = headerCells.every((cell) => {
6444
+ const t = extractCellText(cell).trim();
6445
+ return t.length > 0 && t.length <= 20 && isLabelCell(t);
6446
+ });
6447
+ if (allLabels) {
6448
+ for (let rowIdx = 1; rowIdx < rows.length; rowIdx++) {
6449
+ const dataCells = findDirectChildren(rows[rowIdx], "tc");
6450
+ for (let colIdx = 0; colIdx < Math.min(headerCells.length, dataCells.length); colIdx++) {
6451
+ const headerLabel = normalizeLabel(extractCellText(headerCells[colIdx]));
6452
+ const matchKey = findMatchingKey(headerLabel, normalizedValues);
6453
+ if (matchKey === void 0) continue;
6454
+ if (matchedLabels.has(matchKey)) continue;
6455
+ const newValue = normalizedValues.get(matchKey);
6456
+ replaceCellText(dataCells[colIdx], newValue);
6457
+ matchedLabels.add(matchKey);
6458
+ filled.push({
6459
+ label: extractCellText(headerCells[colIdx]).trim(),
6460
+ value: newValue,
6461
+ row: rowIdx,
6462
+ col: colIdx
6463
+ });
6464
+ modified = true;
6465
+ }
6466
+ }
6467
+ }
6468
+ }
6469
+ }
6470
+ const allParagraphs = findAllElements(doc.documentElement, "p");
6471
+ for (const pEl of allParagraphs) {
6472
+ if (isInsideTable(pEl)) continue;
6473
+ const tNodes = collectTextNodes(pEl);
6474
+ const fullText = tNodes.map((n) => n.text).join("");
6475
+ const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g;
6476
+ let match;
6477
+ while ((match = pattern.exec(fullText)) !== null) {
6478
+ const rawLabel = match[1];
6479
+ const normalized = normalizeLabel(rawLabel);
6480
+ const matchKey = findMatchingKey(normalized, normalizedValues);
6481
+ if (matchKey === void 0) continue;
6482
+ const newValue = normalizedValues.get(matchKey);
6483
+ const valueStart = match.index + match[0].length - match[2].length;
6484
+ const valueEnd = match.index + match[0].length;
6485
+ replaceTextRange(tNodes, valueStart, valueEnd, newValue);
6486
+ matchedLabels.add(matchKey);
6487
+ filled.push({ label: rawLabel.trim(), value: newValue, row: -1, col: -1 });
6488
+ modified = true;
6489
+ break;
6490
+ }
6491
+ }
6492
+ if (modified) {
6493
+ const newXml = xmlSerializer.serializeToString(doc);
6494
+ zip.file(sectionPath, newXml);
6495
+ }
6496
+ }
6497
+ const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
6498
+ const buffer = await zip.generateAsync({ type: "arraybuffer" });
6499
+ return { buffer, filled, unmatched };
6500
+ }
6501
+ function localName(el) {
6502
+ return (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
6503
+ }
6504
+ function findAllElements(node, tagLocalName) {
6505
+ const result = [];
6506
+ const walk = (n) => {
6507
+ const children = n.childNodes;
6508
+ if (!children) return;
6509
+ for (let i = 0; i < children.length; i++) {
6510
+ const child = children[i];
6511
+ if (child.nodeType !== 1) continue;
6512
+ if (localName(child) === tagLocalName) result.push(child);
6513
+ walk(child);
6514
+ }
6515
+ };
6516
+ walk(node);
6517
+ return result;
6518
+ }
6519
+ function findDirectChildren(parent, tagLocalName) {
6520
+ const result = [];
6521
+ const children = parent.childNodes;
6522
+ if (!children) return result;
6523
+ for (let i = 0; i < children.length; i++) {
6524
+ const child = children[i];
6525
+ if (child.nodeType === 1 && localName(child) === tagLocalName) {
6526
+ result.push(child);
6527
+ }
6528
+ }
6529
+ return result;
6530
+ }
6531
+ function isInsideTable(el) {
6532
+ let parent = el.parentNode;
6533
+ while (parent) {
6534
+ if (parent.nodeType === 1 && localName(parent) === "tbl") return true;
6535
+ parent = parent.parentNode;
6536
+ }
6537
+ return false;
6538
+ }
6539
+ function extractCellText(tcEl) {
6540
+ const parts = [];
6541
+ const walk = (node) => {
6542
+ const children = node.childNodes;
6543
+ if (!children) return;
6544
+ for (let i = 0; i < children.length; i++) {
6545
+ const child = children[i];
6546
+ if (child.nodeType === 3) {
6547
+ parts.push(child.textContent || "");
6548
+ } else if (child.nodeType === 1) {
6549
+ const tag = localName(child);
6550
+ if (tag === "t") walk(child);
6551
+ else if (tag === "run" || tag === "r" || tag === "p" || tag === "subList") walk(child);
6552
+ else if (tag === "tab") parts.push(" ");
6553
+ else if (tag === "br") parts.push("\n");
6554
+ }
6555
+ }
6556
+ };
6557
+ walk(tcEl);
6558
+ return parts.join("");
6559
+ }
6560
+ function prependCellText(tcEl, text) {
6561
+ const tElements = findAllElements(tcEl, "t");
6562
+ if (tElements.length === 0) return;
6563
+ const firstT = tElements[0];
6564
+ const existing = firstT.textContent || "";
6565
+ clearChildren(firstT);
6566
+ firstT.appendChild(firstT.ownerDocument.createTextNode(text + " " + existing));
6567
+ }
6568
+ function replaceCellText(tcEl, newValue) {
6569
+ const paragraphs = findAllElements(tcEl, "p");
6570
+ if (paragraphs.length === 0) return;
6571
+ const firstP = paragraphs[0];
6572
+ const runs = findAllElements(firstP, "run").concat(findAllElements(firstP, "r"));
6573
+ if (runs.length > 0) {
6574
+ setRunText(runs[0], newValue);
6575
+ for (let i = 1; i < runs.length; i++) {
6576
+ setRunText(runs[i], "");
6577
+ }
6578
+ } else {
6579
+ const tElements = findAllElements(firstP, "t");
6580
+ if (tElements.length > 0) {
6581
+ clearChildren(tElements[0]);
6582
+ tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(newValue));
6583
+ for (let i = 1; i < tElements.length; i++) {
6584
+ clearChildren(tElements[i]);
6585
+ }
6586
+ }
6587
+ }
6588
+ for (let i = 1; i < paragraphs.length; i++) {
6589
+ const p = paragraphs[i];
6590
+ if (p.parentNode) {
6591
+ const pRuns = findAllElements(p, "run").concat(findAllElements(p, "r"));
6592
+ for (const run of pRuns) setRunText(run, "");
6593
+ const pTs = findAllElements(p, "t");
6594
+ for (const t of pTs) clearChildren(t);
6595
+ }
6596
+ }
6286
6597
  }
6287
- function extractFromTable(table) {
6288
- const fields = [];
6289
- if (table.cols >= 2) {
6290
- for (let r = 0; r < table.rows; r++) {
6291
- for (let c = 0; c < table.cols - 1; c++) {
6292
- const labelCell = table.cells[r][c];
6293
- const valueCell = table.cells[r][c + 1];
6294
- if (isLabelCell(labelCell.text) && valueCell.text.trim()) {
6295
- fields.push({
6296
- label: labelCell.text.trim().replace(/[::]\s*$/, ""),
6297
- value: valueCell.text.trim(),
6298
- row: r,
6299
- col: c
6300
- });
6301
- }
6302
- }
6598
+ function setRunText(runEl, text) {
6599
+ const tElements = findAllElements(runEl, "t");
6600
+ if (tElements.length > 0) {
6601
+ clearChildren(tElements[0]);
6602
+ tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(text));
6603
+ for (let i = 1; i < tElements.length; i++) {
6604
+ clearChildren(tElements[i]);
6303
6605
  }
6304
6606
  }
6305
- if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
6306
- const headerRow = table.cells[0];
6307
- const allLabels = headerRow.every((cell) => {
6308
- const t = cell.text.trim();
6309
- return t.length > 0 && t.length <= 20;
6310
- });
6311
- if (allLabels) {
6312
- for (let r = 1; r < table.rows; r++) {
6313
- for (let c = 0; c < table.cols; c++) {
6314
- const label = headerRow[c].text.trim();
6315
- const value = table.cells[r][c].text.trim();
6316
- if (label && value) {
6317
- fields.push({ label, value, row: r, col: c });
6318
- }
6319
- }
6320
- }
6321
- }
6607
+ }
6608
+ function clearChildren(el) {
6609
+ while (el.firstChild) el.removeChild(el.firstChild);
6610
+ }
6611
+ function collectTextNodes(pEl) {
6612
+ const tElements = findAllElements(pEl, "t");
6613
+ const result = [];
6614
+ let offset = 0;
6615
+ for (const t of tElements) {
6616
+ const text = t.textContent || "";
6617
+ result.push({ element: t, text, offset });
6618
+ offset += text.length;
6322
6619
  }
6323
- return fields;
6620
+ return result;
6324
6621
  }
6325
- function extractInlineFields(text) {
6326
- const fields = [];
6327
- const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
6328
- let match;
6329
- while ((match = pattern.exec(text)) !== null) {
6330
- const label = match[1].trim();
6331
- const value = match[2].trim();
6332
- if (value) {
6333
- fields.push({ label, value, row: -1, col: -1 });
6622
+ function replaceTextRange(tNodes, globalStart, globalEnd, newValue) {
6623
+ let replaced = false;
6624
+ for (const node of tNodes) {
6625
+ const nodeStart = node.offset;
6626
+ const nodeEnd = node.offset + node.text.length;
6627
+ if (nodeEnd <= globalStart || nodeStart >= globalEnd) continue;
6628
+ const localStart = Math.max(0, globalStart - nodeStart);
6629
+ const localEnd = Math.min(node.text.length, globalEnd - nodeStart);
6630
+ if (!replaced) {
6631
+ const before = node.text.slice(0, localStart);
6632
+ const after = node.text.slice(localEnd);
6633
+ const newText = before + newValue + after;
6634
+ clearChildren(node.element);
6635
+ node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
6636
+ replaced = true;
6637
+ } else {
6638
+ const before = node.text.slice(0, localStart);
6639
+ const after = node.text.slice(localEnd);
6640
+ const newText = before + after;
6641
+ clearChildren(node.element);
6642
+ node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
6334
6643
  }
6335
6644
  }
6336
- return fields;
6645
+ }
6646
+ function collectCellTextNodes(tcEl) {
6647
+ const tElements = findAllElements(tcEl, "t");
6648
+ const result = [];
6649
+ let offset = 0;
6650
+ for (const t of tElements) {
6651
+ const text = t.textContent || "";
6652
+ result.push({ element: t, text, offset });
6653
+ offset += text.length;
6654
+ }
6655
+ return result;
6656
+ }
6657
+ function applyTextReplacements(tNodes, originalFull, replacedFull) {
6658
+ if (originalFull === replacedFull) return;
6659
+ if (tNodes.length === 1) {
6660
+ clearChildren(tNodes[0].element);
6661
+ tNodes[0].element.appendChild(
6662
+ tNodes[0].element.ownerDocument.createTextNode(replacedFull)
6663
+ );
6664
+ return;
6665
+ }
6666
+ let diffStart = 0;
6667
+ while (diffStart < originalFull.length && diffStart < replacedFull.length && originalFull[diffStart] === replacedFull[diffStart]) {
6668
+ diffStart++;
6669
+ }
6670
+ let diffEndOrig = originalFull.length;
6671
+ let diffEndRepl = replacedFull.length;
6672
+ while (diffEndOrig > diffStart && diffEndRepl > diffStart && originalFull[diffEndOrig - 1] === replacedFull[diffEndRepl - 1]) {
6673
+ diffEndOrig--;
6674
+ diffEndRepl--;
6675
+ }
6676
+ const newPart = replacedFull.slice(diffStart, diffEndRepl);
6677
+ replaceTextRange(tNodes, diffStart, diffEndOrig, newPart);
6337
6678
  }
6338
6679
 
6339
6680
  // src/hwpx/generator.ts
6340
- var import_jszip5 = __toESM(require("jszip"), 1);
6681
+ var import_jszip6 = __toESM(require("jszip"), 1);
6341
6682
  var NS_SECTION = "http://www.hancom.co.kr/hwpml/2011/section";
6342
6683
  var NS_PARA = "http://www.hancom.co.kr/hwpml/2011/paragraph";
6343
6684
  var NS_HEAD = "http://www.hancom.co.kr/hwpml/2011/head";
@@ -6364,7 +6705,7 @@ var PARA_LIST = 7;
6364
6705
  async function markdownToHwpx(markdown) {
6365
6706
  const blocks = parseMarkdownToBlocks(markdown);
6366
6707
  const sectionXml = blocksToSectionXml(blocks);
6367
- const zip = new import_jszip5.default();
6708
+ const zip = new import_jszip6.default();
6368
6709
  zip.file("mimetype", "application/hwp+zip", { compression: "STORE" });
6369
6710
  zip.file("META-INF/container.xml", generateContainerXml());
6370
6711
  zip.file("Contents/content.hpf", generateManifest());
@@ -6724,6 +7065,183 @@ function blocksToSectionXml(blocks) {
6724
7065
  </hs:sec>`;
6725
7066
  }
6726
7067
 
7068
+ // src/diff/text-diff.ts
7069
+ function similarity(a, b) {
7070
+ if (a === b) return 1;
7071
+ if (!a || !b) return 0;
7072
+ const maxLen = Math.max(a.length, b.length);
7073
+ if (maxLen === 0) return 1;
7074
+ return 1 - levenshtein(a, b) / maxLen;
7075
+ }
7076
+ function normalizedSimilarity(a, b) {
7077
+ return similarity(normalize(a), normalize(b));
7078
+ }
7079
+ function normalize(s) {
7080
+ return s.replace(/\s+/g, " ").trim();
7081
+ }
7082
+ var MAX_LEVENSHTEIN_LEN = 1e4;
7083
+ function levenshtein(a, b) {
7084
+ if (a.length + b.length > MAX_LEVENSHTEIN_LEN) {
7085
+ const sampleLen = Math.min(500, a.length, b.length);
7086
+ let diffs = 0;
7087
+ for (let i = 0; i < sampleLen; i++) if (a[i] !== b[i]) diffs++;
7088
+ const sampleRate = sampleLen > 0 ? diffs / sampleLen : 1;
7089
+ return Math.abs(a.length - b.length) + Math.round(Math.min(a.length, b.length) * sampleRate);
7090
+ }
7091
+ if (a.length > b.length) [a, b] = [b, a];
7092
+ const m = a.length;
7093
+ const n = b.length;
7094
+ let prev = Array.from({ length: m + 1 }, (_, i) => i);
7095
+ let curr = new Array(m + 1);
7096
+ for (let j = 1; j <= n; j++) {
7097
+ curr[0] = j;
7098
+ for (let i = 1; i <= m; i++) {
7099
+ if (a[i - 1] === b[j - 1]) {
7100
+ curr[i] = prev[i - 1];
7101
+ } else {
7102
+ curr[i] = 1 + Math.min(prev[i - 1], prev[i], curr[i - 1]);
7103
+ }
7104
+ }
7105
+ ;
7106
+ [prev, curr] = [curr, prev];
7107
+ }
7108
+ return prev[m];
7109
+ }
7110
+
7111
+ // src/diff/compare.ts
7112
+ var SIMILARITY_THRESHOLD = 0.4;
7113
+ async function compare(bufferA, bufferB, options) {
7114
+ const [resultA, resultB] = await Promise.all([
7115
+ parse(bufferA, options),
7116
+ parse(bufferB, options)
7117
+ ]);
7118
+ if (!resultA.success) throw new Error(`\uBB38\uC11CA \uD30C\uC2F1 \uC2E4\uD328: ${resultA.error}`);
7119
+ if (!resultB.success) throw new Error(`\uBB38\uC11CB \uD30C\uC2F1 \uC2E4\uD328: ${resultB.error}`);
7120
+ return diffBlocks(resultA.blocks, resultB.blocks);
7121
+ }
7122
+ function diffBlocks(blocksA, blocksB) {
7123
+ const aligned = alignBlocks(blocksA, blocksB);
7124
+ const stats = { added: 0, removed: 0, modified: 0, unchanged: 0 };
7125
+ const diffs = [];
7126
+ for (const [a, b] of aligned) {
7127
+ if (a && b) {
7128
+ const sim = blockSimilarity(a, b);
7129
+ if (sim >= 0.99) {
7130
+ diffs.push({ type: "unchanged", before: a, after: b, similarity: 1 });
7131
+ stats.unchanged++;
7132
+ } else {
7133
+ const diff = { type: "modified", before: a, after: b, similarity: sim };
7134
+ if (a.type === "table" && b.type === "table" && a.table && b.table) {
7135
+ diff.cellDiffs = diffTableCells(a.table, b.table);
7136
+ }
7137
+ diffs.push(diff);
7138
+ stats.modified++;
7139
+ }
7140
+ } else if (a) {
7141
+ diffs.push({ type: "removed", before: a });
7142
+ stats.removed++;
7143
+ } else if (b) {
7144
+ diffs.push({ type: "added", after: b });
7145
+ stats.added++;
7146
+ }
7147
+ }
7148
+ return { stats, diffs };
7149
+ }
7150
+ function alignBlocks(a, b) {
7151
+ const m = a.length, n = b.length;
7152
+ if (m * n > 1e7) return fallbackAlign(a, b);
7153
+ const simCache = /* @__PURE__ */ new Map();
7154
+ const getSim = (i2, j2) => {
7155
+ const key = `${i2},${j2}`;
7156
+ let v = simCache.get(key);
7157
+ if (v === void 0) {
7158
+ v = blockSimilarity(a[i2], b[j2]);
7159
+ simCache.set(key, v);
7160
+ }
7161
+ return v;
7162
+ };
7163
+ const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
7164
+ for (let i2 = 1; i2 <= m; i2++) {
7165
+ for (let j2 = 1; j2 <= n; j2++) {
7166
+ if (getSim(i2 - 1, j2 - 1) >= SIMILARITY_THRESHOLD) {
7167
+ dp[i2][j2] = dp[i2 - 1][j2 - 1] + 1;
7168
+ } else {
7169
+ dp[i2][j2] = Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
7170
+ }
7171
+ }
7172
+ }
7173
+ const pairs = [];
7174
+ let i = m, j = n;
7175
+ while (i > 0 && j > 0) {
7176
+ if (getSim(i - 1, j - 1) >= SIMILARITY_THRESHOLD && dp[i][j] === dp[i - 1][j - 1] + 1) {
7177
+ pairs.push([i - 1, j - 1]);
7178
+ i--;
7179
+ j--;
7180
+ } else if (dp[i - 1][j] >= dp[i][j - 1]) {
7181
+ i--;
7182
+ } else {
7183
+ j--;
7184
+ }
7185
+ }
7186
+ pairs.reverse();
7187
+ const result = [];
7188
+ let ai = 0, bi = 0;
7189
+ for (const [pi, pj] of pairs) {
7190
+ while (ai < pi) result.push([a[ai++], null]);
7191
+ while (bi < pj) result.push([null, b[bi++]]);
7192
+ result.push([a[ai++], b[bi++]]);
7193
+ }
7194
+ while (ai < m) result.push([a[ai++], null]);
7195
+ while (bi < n) result.push([null, b[bi++]]);
7196
+ return result;
7197
+ }
7198
+ function fallbackAlign(a, b) {
7199
+ const result = [];
7200
+ const len = Math.max(a.length, b.length);
7201
+ for (let i = 0; i < len; i++) {
7202
+ result.push([a[i] || null, b[i] || null]);
7203
+ }
7204
+ return result;
7205
+ }
7206
+ function blockSimilarity(a, b) {
7207
+ if (a.type !== b.type) return 0;
7208
+ if (a.text !== void 0 && b.text !== void 0) {
7209
+ return normalizedSimilarity(a.text || "", b.text || "");
7210
+ }
7211
+ if (a.type === "table" && a.table && b.table) {
7212
+ return tableSimilarity(a.table, b.table);
7213
+ }
7214
+ if (a.type === b.type) return 1;
7215
+ return 0;
7216
+ }
7217
+ function tableSimilarity(a, b) {
7218
+ const dimSim = 1 - Math.abs(a.rows * a.cols - b.rows * b.cols) / Math.max(a.rows * a.cols, b.rows * b.cols, 1);
7219
+ const textsA = a.cells.flat().map((c) => c.text).join(" ");
7220
+ const textsB = b.cells.flat().map((c) => c.text).join(" ");
7221
+ const contentSim = normalizedSimilarity(textsA, textsB);
7222
+ return dimSim * 0.3 + contentSim * 0.7;
7223
+ }
7224
+ function diffTableCells(a, b) {
7225
+ const maxRows = Math.max(a.rows, b.rows);
7226
+ const maxCols = Math.max(a.cols, b.cols);
7227
+ const result = [];
7228
+ for (let r = 0; r < maxRows; r++) {
7229
+ const row = [];
7230
+ for (let c = 0; c < maxCols; c++) {
7231
+ const cellA = r < a.rows && c < a.cols ? a.cells[r][c].text : void 0;
7232
+ const cellB = r < b.rows && c < b.cols ? b.cells[r][c].text : void 0;
7233
+ let type;
7234
+ if (cellA === void 0) type = "added";
7235
+ else if (cellB === void 0) type = "removed";
7236
+ else if (cellA === cellB) type = "unchanged";
7237
+ else type = "modified";
7238
+ row.push({ type, before: cellA, after: cellB });
7239
+ }
7240
+ result.push(row);
7241
+ }
7242
+ return result;
7243
+ }
7244
+
6727
7245
  // src/index.ts
6728
7246
  async function parse(input, options) {
6729
7247
  let buffer;
@@ -6800,6 +7318,45 @@ async function parseDocx(buffer, options) {
6800
7318
  return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: classifyError(err) };
6801
7319
  }
6802
7320
  }
7321
+ async function fillForm(input, values, outputFormat = "markdown") {
7322
+ let buffer;
7323
+ if (typeof input === "string") {
7324
+ const buf = await (0, import_promises.readFile)(input);
7325
+ buffer = toArrayBuffer(buf);
7326
+ } else if (Buffer.isBuffer(input)) {
7327
+ buffer = toArrayBuffer(input);
7328
+ } else {
7329
+ buffer = input;
7330
+ }
7331
+ if (outputFormat === "hwpx-preserve") {
7332
+ const format = detectFormat(buffer);
7333
+ if (format === "hwpx") {
7334
+ const zipFormat = await detectZipFormat(buffer);
7335
+ if (zipFormat !== "hwpx") {
7336
+ throw new Error(`hwpx-preserve \uD3EC\uB9F7\uC740 HWPX \uC785\uB825\uB9CC \uC9C0\uC6D0\uD569\uB2C8\uB2E4 (\uAC10\uC9C0\uB41C \uD3EC\uB9F7: ${zipFormat})`);
7337
+ }
7338
+ } else {
7339
+ throw new Error(`hwpx-preserve \uD3EC\uB9F7\uC740 HWPX \uC785\uB825\uB9CC \uC9C0\uC6D0\uD569\uB2C8\uB2E4 (\uAC10\uC9C0\uB41C \uD3EC\uB9F7: ${format})`);
7340
+ }
7341
+ const hwpxResult = await fillHwpx(buffer, values);
7342
+ return {
7343
+ output: hwpxResult.buffer,
7344
+ format: "hwpx-preserve",
7345
+ fill: { filled: hwpxResult.filled, unmatched: hwpxResult.unmatched }
7346
+ };
7347
+ }
7348
+ const parsed = await parse(buffer);
7349
+ if (!parsed.success) {
7350
+ throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
7351
+ }
7352
+ const fill = fillFormFields(parsed.blocks, values);
7353
+ const markdown = blocksToMarkdown(fill.blocks);
7354
+ if (outputFormat === "hwpx") {
7355
+ const hwpxBuffer = await markdownToHwpx(markdown);
7356
+ return { output: hwpxBuffer, format: "hwpx", fill };
7357
+ }
7358
+ return { output: markdown, format: "markdown", fill };
7359
+ }
6803
7360
  // Annotate the CommonJS export names for ESM import in node:
6804
7361
  0 && (module.exports = {
6805
7362
  VERSION,
@@ -6809,7 +7366,11 @@ async function parseDocx(buffer, options) {
6809
7366
  detectZipFormat,
6810
7367
  diffBlocks,
6811
7368
  extractFormFields,
7369
+ fillForm,
7370
+ fillFormFields,
7371
+ fillHwpx,
6812
7372
  isHwpxFile,
7373
+ isLabelCell,
6813
7374
  isOldHwpFile,
6814
7375
  isPdfFile,
6815
7376
  isZipFile,