kordoc 2.2.2 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -121,7 +121,11 @@ __export(index_exports, {
121
121
  detectZipFormat: () => detectZipFormat,
122
122
  diffBlocks: () => diffBlocks,
123
123
  extractFormFields: () => extractFormFields,
124
+ fillForm: () => fillForm,
125
+ fillFormFields: () => fillFormFields,
126
+ fillHwpx: () => fillHwpx,
124
127
  isHwpxFile: () => isHwpxFile,
128
+ isLabelCell: () => isLabelCell,
125
129
  isOldHwpFile: () => isOldHwpFile,
126
130
  isPdfFile: () => isPdfFile,
127
131
  isZipFile: () => isZipFile,
@@ -183,7 +187,7 @@ var import_zlib = require("zlib");
183
187
  var import_xmldom = require("@xmldom/xmldom");
184
188
 
185
189
  // src/utils.ts
186
- var VERSION = true ? "2.2.2" : "0.0.0-dev";
190
+ var VERSION = true ? "2.2.4" : "0.0.0-dev";
187
191
  function toArrayBuffer(buf) {
188
192
  if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
189
193
  return buf.buffer;
@@ -501,9 +505,47 @@ function blocksToMarkdown(blocks) {
501
505
  }
502
506
  return lines.join("\n").trim();
503
507
  }
508
+ function hasMergedCells(table) {
509
+ for (const row of table.cells) {
510
+ for (const cell of row) {
511
+ if (cell.colSpan > 1 || cell.rowSpan > 1) return true;
512
+ }
513
+ }
514
+ return false;
515
+ }
516
+ function tableToHtml(table) {
517
+ const { cells, rows: numRows, cols: numCols } = table;
518
+ const skip = /* @__PURE__ */ new Set();
519
+ const lines = ["<table>"];
520
+ for (let r = 0; r < numRows; r++) {
521
+ const tag = r === 0 ? "th" : "td";
522
+ const rowHtml = [];
523
+ for (let c = 0; c < numCols; c++) {
524
+ if (skip.has(`${r},${c}`)) continue;
525
+ const cell = cells[r]?.[c];
526
+ if (!cell) continue;
527
+ for (let dr = 0; dr < cell.rowSpan; dr++) {
528
+ for (let dc = 0; dc < cell.colSpan; dc++) {
529
+ if (dr === 0 && dc === 0) continue;
530
+ if (r + dr < numRows && c + dc < numCols) skip.add(`${r + dr},${c + dc}`);
531
+ }
532
+ }
533
+ const text = sanitizeText(cell.text).replace(/\n/g, "<br>");
534
+ const attrs = [];
535
+ if (cell.colSpan > 1) attrs.push(`colspan="${cell.colSpan}"`);
536
+ if (cell.rowSpan > 1) attrs.push(`rowspan="${cell.rowSpan}"`);
537
+ const attrStr = attrs.length ? " " + attrs.join(" ") : "";
538
+ rowHtml.push(`<${tag}${attrStr}>${text}</${tag}>`);
539
+ }
540
+ if (rowHtml.length) lines.push(`<tr>${rowHtml.join("")}</tr>`);
541
+ }
542
+ lines.push("</table>");
543
+ return lines.join("\n");
544
+ }
504
545
  function tableToMarkdown(table) {
505
546
  if (table.rows === 0 || table.cols === 0) return "";
506
547
  const { cells, rows: numRows, cols: numCols } = table;
548
+ if (hasMergedCells(table)) return tableToHtml(table);
507
549
  if (numRows === 1 && numCols === 1) {
508
550
  const content = sanitizeText(cells[0][0].text);
509
551
  if (!content) return "";
@@ -5570,21 +5612,21 @@ async function parseXlsxDocument(buffer, options) {
5570
5612
  var import_jszip4 = __toESM(require("jszip"), 1);
5571
5613
  var import_xmldom3 = require("@xmldom/xmldom");
5572
5614
  var MAX_DECOMPRESS_SIZE4 = 100 * 1024 * 1024;
5573
- function getChildElements(parent, localName) {
5615
+ function getChildElements(parent, localName2) {
5574
5616
  const result = [];
5575
5617
  const children = parent.childNodes;
5576
5618
  for (let i = 0; i < children.length; i++) {
5577
5619
  const node = children[i];
5578
5620
  if (node.nodeType === 1) {
5579
5621
  const el = node;
5580
- if (el.localName === localName || el.tagName?.endsWith(`:${localName}`)) {
5622
+ if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
5581
5623
  result.push(el);
5582
5624
  }
5583
5625
  }
5584
5626
  }
5585
5627
  return result;
5586
5628
  }
5587
- function findElements(parent, localName) {
5629
+ function findElements(parent, localName2) {
5588
5630
  const result = [];
5589
5631
  const walk = (node) => {
5590
5632
  const children = node.childNodes;
@@ -5592,7 +5634,7 @@ function findElements(parent, localName) {
5592
5634
  const child = children[i];
5593
5635
  if (child.nodeType === 1) {
5594
5636
  const el = child;
5595
- if (el.localName === localName || el.tagName?.endsWith(`:${localName}`)) {
5637
+ if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
5596
5638
  result.push(el);
5597
5639
  }
5598
5640
  walk(el);
@@ -5602,11 +5644,11 @@ function findElements(parent, localName) {
5602
5644
  walk(parent);
5603
5645
  return result;
5604
5646
  }
5605
- function getAttr(el, localName) {
5647
+ function getAttr(el, localName2) {
5606
5648
  const attrs = el.attributes;
5607
5649
  for (let i = 0; i < attrs.length; i++) {
5608
5650
  const attr = attrs[i];
5609
- if (attr.localName === localName || attr.name === localName) return attr.value;
5651
+ if (attr.localName === localName2 || attr.name === localName2) return attr.value;
5610
5652
  }
5611
5653
  return null;
5612
5654
  }
@@ -5953,11 +5995,11 @@ async function parseDocxDocument(buffer, options) {
5953
5995
  const node = children[i];
5954
5996
  if (node.nodeType !== 1) continue;
5955
5997
  const el = node;
5956
- const localName = el.localName ?? el.tagName?.split(":").pop();
5957
- if (localName === "p") {
5998
+ const localName2 = el.localName ?? el.tagName?.split(":").pop();
5999
+ if (localName2 === "p") {
5958
6000
  const block = parseParagraph(el, styles, numbering, footnotes, rels);
5959
6001
  if (block) blocks.push(block);
5960
- } else if (localName === "tbl") {
6002
+ } else if (localName2 === "tbl") {
5961
6003
  const block = parseTable(el, styles, numbering, footnotes, rels);
5962
6004
  if (block) blocks.push(block);
5963
6005
  }
@@ -5995,183 +6037,6 @@ async function parseDocxDocument(buffer, options) {
5995
6037
  };
5996
6038
  }
5997
6039
 
5998
- // src/diff/text-diff.ts
5999
- function similarity(a, b) {
6000
- if (a === b) return 1;
6001
- if (!a || !b) return 0;
6002
- const maxLen = Math.max(a.length, b.length);
6003
- if (maxLen === 0) return 1;
6004
- return 1 - levenshtein(a, b) / maxLen;
6005
- }
6006
- function normalizedSimilarity(a, b) {
6007
- return similarity(normalize(a), normalize(b));
6008
- }
6009
- function normalize(s) {
6010
- return s.replace(/\s+/g, " ").trim();
6011
- }
6012
- var MAX_LEVENSHTEIN_LEN = 1e4;
6013
- function levenshtein(a, b) {
6014
- if (a.length + b.length > MAX_LEVENSHTEIN_LEN) {
6015
- const sampleLen = Math.min(500, a.length, b.length);
6016
- let diffs = 0;
6017
- for (let i = 0; i < sampleLen; i++) if (a[i] !== b[i]) diffs++;
6018
- const sampleRate = sampleLen > 0 ? diffs / sampleLen : 1;
6019
- return Math.abs(a.length - b.length) + Math.round(Math.min(a.length, b.length) * sampleRate);
6020
- }
6021
- if (a.length > b.length) [a, b] = [b, a];
6022
- const m = a.length;
6023
- const n = b.length;
6024
- let prev = Array.from({ length: m + 1 }, (_, i) => i);
6025
- let curr = new Array(m + 1);
6026
- for (let j = 1; j <= n; j++) {
6027
- curr[0] = j;
6028
- for (let i = 1; i <= m; i++) {
6029
- if (a[i - 1] === b[j - 1]) {
6030
- curr[i] = prev[i - 1];
6031
- } else {
6032
- curr[i] = 1 + Math.min(prev[i - 1], prev[i], curr[i - 1]);
6033
- }
6034
- }
6035
- ;
6036
- [prev, curr] = [curr, prev];
6037
- }
6038
- return prev[m];
6039
- }
6040
-
6041
- // src/diff/compare.ts
6042
- var SIMILARITY_THRESHOLD = 0.4;
6043
- async function compare(bufferA, bufferB, options) {
6044
- const [resultA, resultB] = await Promise.all([
6045
- parse(bufferA, options),
6046
- parse(bufferB, options)
6047
- ]);
6048
- if (!resultA.success) throw new Error(`\uBB38\uC11CA \uD30C\uC2F1 \uC2E4\uD328: ${resultA.error}`);
6049
- if (!resultB.success) throw new Error(`\uBB38\uC11CB \uD30C\uC2F1 \uC2E4\uD328: ${resultB.error}`);
6050
- return diffBlocks(resultA.blocks, resultB.blocks);
6051
- }
6052
- function diffBlocks(blocksA, blocksB) {
6053
- const aligned = alignBlocks(blocksA, blocksB);
6054
- const stats = { added: 0, removed: 0, modified: 0, unchanged: 0 };
6055
- const diffs = [];
6056
- for (const [a, b] of aligned) {
6057
- if (a && b) {
6058
- const sim = blockSimilarity(a, b);
6059
- if (sim >= 0.99) {
6060
- diffs.push({ type: "unchanged", before: a, after: b, similarity: 1 });
6061
- stats.unchanged++;
6062
- } else {
6063
- const diff = { type: "modified", before: a, after: b, similarity: sim };
6064
- if (a.type === "table" && b.type === "table" && a.table && b.table) {
6065
- diff.cellDiffs = diffTableCells(a.table, b.table);
6066
- }
6067
- diffs.push(diff);
6068
- stats.modified++;
6069
- }
6070
- } else if (a) {
6071
- diffs.push({ type: "removed", before: a });
6072
- stats.removed++;
6073
- } else if (b) {
6074
- diffs.push({ type: "added", after: b });
6075
- stats.added++;
6076
- }
6077
- }
6078
- return { stats, diffs };
6079
- }
6080
- function alignBlocks(a, b) {
6081
- const m = a.length, n = b.length;
6082
- if (m * n > 1e7) return fallbackAlign(a, b);
6083
- const simCache = /* @__PURE__ */ new Map();
6084
- const getSim = (i2, j2) => {
6085
- const key = `${i2},${j2}`;
6086
- let v = simCache.get(key);
6087
- if (v === void 0) {
6088
- v = blockSimilarity(a[i2], b[j2]);
6089
- simCache.set(key, v);
6090
- }
6091
- return v;
6092
- };
6093
- const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
6094
- for (let i2 = 1; i2 <= m; i2++) {
6095
- for (let j2 = 1; j2 <= n; j2++) {
6096
- if (getSim(i2 - 1, j2 - 1) >= SIMILARITY_THRESHOLD) {
6097
- dp[i2][j2] = dp[i2 - 1][j2 - 1] + 1;
6098
- } else {
6099
- dp[i2][j2] = Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
6100
- }
6101
- }
6102
- }
6103
- const pairs = [];
6104
- let i = m, j = n;
6105
- while (i > 0 && j > 0) {
6106
- if (getSim(i - 1, j - 1) >= SIMILARITY_THRESHOLD && dp[i][j] === dp[i - 1][j - 1] + 1) {
6107
- pairs.push([i - 1, j - 1]);
6108
- i--;
6109
- j--;
6110
- } else if (dp[i - 1][j] >= dp[i][j - 1]) {
6111
- i--;
6112
- } else {
6113
- j--;
6114
- }
6115
- }
6116
- pairs.reverse();
6117
- const result = [];
6118
- let ai = 0, bi = 0;
6119
- for (const [pi, pj] of pairs) {
6120
- while (ai < pi) result.push([a[ai++], null]);
6121
- while (bi < pj) result.push([null, b[bi++]]);
6122
- result.push([a[ai++], b[bi++]]);
6123
- }
6124
- while (ai < m) result.push([a[ai++], null]);
6125
- while (bi < n) result.push([null, b[bi++]]);
6126
- return result;
6127
- }
6128
- function fallbackAlign(a, b) {
6129
- const result = [];
6130
- const len = Math.max(a.length, b.length);
6131
- for (let i = 0; i < len; i++) {
6132
- result.push([a[i] || null, b[i] || null]);
6133
- }
6134
- return result;
6135
- }
6136
- function blockSimilarity(a, b) {
6137
- if (a.type !== b.type) return 0;
6138
- if (a.text !== void 0 && b.text !== void 0) {
6139
- return normalizedSimilarity(a.text || "", b.text || "");
6140
- }
6141
- if (a.type === "table" && a.table && b.table) {
6142
- return tableSimilarity(a.table, b.table);
6143
- }
6144
- if (a.type === b.type) return 1;
6145
- return 0;
6146
- }
6147
- function tableSimilarity(a, b) {
6148
- const dimSim = 1 - Math.abs(a.rows * a.cols - b.rows * b.cols) / Math.max(a.rows * a.cols, b.rows * b.cols, 1);
6149
- const textsA = a.cells.flat().map((c) => c.text).join(" ");
6150
- const textsB = b.cells.flat().map((c) => c.text).join(" ");
6151
- const contentSim = normalizedSimilarity(textsA, textsB);
6152
- return dimSim * 0.3 + contentSim * 0.7;
6153
- }
6154
- function diffTableCells(a, b) {
6155
- const maxRows = Math.max(a.rows, b.rows);
6156
- const maxCols = Math.max(a.cols, b.cols);
6157
- const result = [];
6158
- for (let r = 0; r < maxRows; r++) {
6159
- const row = [];
6160
- for (let c = 0; c < maxCols; c++) {
6161
- const cellA = r < a.rows && c < a.cols ? a.cells[r][c].text : void 0;
6162
- const cellB = r < b.rows && c < b.cols ? b.cells[r][c].text : void 0;
6163
- let type;
6164
- if (cellA === void 0) type = "added";
6165
- else if (cellB === void 0) type = "removed";
6166
- else if (cellA === cellB) type = "unchanged";
6167
- else type = "modified";
6168
- row.push({ type, before: cellA, after: cellB });
6169
- }
6170
- result.push(row);
6171
- }
6172
- return result;
6173
- }
6174
-
6175
6040
  // src/form/recognize.ts
6176
6041
  var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
6177
6042
  "\uC131\uBA85",
@@ -6212,15 +6077,20 @@ var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
6212
6077
  "\uB2E8\uAC00",
6213
6078
  "\uD569\uACC4",
6214
6079
  "\uACC4",
6215
- "\uC18C\uACC4"
6080
+ "\uC18C\uACC4",
6081
+ "\uB4F1\uB85D\uAE30\uC900\uC9C0",
6082
+ "\uBCF8\uC801",
6083
+ "\uC704\uC784\uC778",
6084
+ "\uCCAD\uAD6C\uC0AC\uC720",
6085
+ "\uC18C\uBA85\uC790\uB8CC"
6216
6086
  ]);
6217
6087
  function isLabelCell(text) {
6218
- const trimmed = text.trim();
6088
+ const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
6219
6089
  if (!trimmed || trimmed.length > 30) return false;
6220
6090
  for (const kw of LABEL_KEYWORDS) {
6221
6091
  if (trimmed.includes(kw)) return true;
6222
6092
  }
6223
- if (/^[가-힣\s()·:]{2,8}$/.test(trimmed) && !/\d/.test(trimmed)) return true;
6093
+ if (/^[가-힣\s()()·::]+$/.test(trimmed) && trimmed.replace(/\s/g, "").length >= 2 && trimmed.replace(/\s/g, "").length <= 8 && !/\d/.test(trimmed)) return true;
6224
6094
  if (/^[가-힣A-Za-z\s]+[::]$/.test(trimmed)) return true;
6225
6095
  return false;
6226
6096
  }
@@ -6253,7 +6123,7 @@ function extractFromTable(table) {
6253
6123
  for (let c = 0; c < table.cols - 1; c++) {
6254
6124
  const labelCell = table.cells[r][c];
6255
6125
  const valueCell = table.cells[r][c + 1];
6256
- if (isLabelCell(labelCell.text) && valueCell.text.trim()) {
6126
+ if (isLabelCell(labelCell.text)) {
6257
6127
  fields.push({
6258
6128
  label: labelCell.text.trim().replace(/[::]\s*$/, ""),
6259
6129
  value: valueCell.text.trim(),
@@ -6298,65 +6168,574 @@ function extractInlineFields(text) {
6298
6168
  return fields;
6299
6169
  }
6300
6170
 
6301
- // src/hwpx/generator.ts
6302
- var import_jszip5 = __toESM(require("jszip"), 1);
6303
- var NS_SECTION = "http://www.hancom.co.kr/hwpml/2011/section";
6304
- var NS_PARA = "http://www.hancom.co.kr/hwpml/2011/paragraph";
6305
- var NS_HEAD = "http://www.hancom.co.kr/hwpml/2011/head";
6306
- var NS_OPF = "http://www.idpf.org/2007/opf/";
6307
- var NS_HPF = "http://www.hancom.co.kr/schema/2011/hpf";
6308
- var NS_OCF = "urn:oasis:names:tc:opendocument:xmlns:container";
6309
- var CHAR_NORMAL = 0;
6310
- var CHAR_BOLD = 1;
6311
- var CHAR_ITALIC = 2;
6312
- var CHAR_BOLD_ITALIC = 3;
6313
- var CHAR_CODE = 4;
6314
- var CHAR_H1 = 5;
6315
- var CHAR_H2 = 6;
6316
- var CHAR_H3 = 7;
6317
- var CHAR_H4 = 8;
6318
- var PARA_NORMAL = 0;
6319
- var PARA_H1 = 1;
6320
- var PARA_H2 = 2;
6321
- var PARA_H3 = 3;
6322
- var PARA_H4 = 4;
6323
- var PARA_CODE = 5;
6324
- var PARA_QUOTE = 6;
6325
- var PARA_LIST = 7;
6326
- async function markdownToHwpx(markdown) {
6327
- const blocks = parseMarkdownToBlocks(markdown);
6328
- const sectionXml = blocksToSectionXml(blocks);
6329
- const zip = new import_jszip5.default();
6330
- zip.file("mimetype", "application/hwp+zip", { compression: "STORE" });
6331
- zip.file("META-INF/container.xml", generateContainerXml());
6332
- zip.file("Contents/content.hpf", generateManifest());
6333
- zip.file("Contents/header.xml", generateHeaderXml());
6334
- zip.file("Contents/section0.xml", sectionXml);
6335
- return await zip.generateAsync({ type: "arraybuffer" });
6171
+ // src/form/match.ts
6172
+ function normalizeLabel(label) {
6173
+ return label.trim().replace(/[::\s()()·]/g, "");
6336
6174
  }
6337
- function parseMarkdownToBlocks(md) {
6338
- const lines = md.split("\n");
6339
- const blocks = [];
6340
- let i = 0;
6341
- while (i < lines.length) {
6342
- const line = lines[i];
6343
- if (!line.trim()) {
6344
- i++;
6345
- continue;
6346
- }
6347
- const fenceMatch = line.match(/^(`{3,}|~{3,})(.*)$/);
6348
- if (fenceMatch) {
6349
- const fence = fenceMatch[1];
6350
- const lang = fenceMatch[2].trim();
6351
- const codeLines = [];
6352
- i++;
6353
- while (i < lines.length && !lines[i].startsWith(fence)) {
6354
- codeLines.push(lines[i]);
6355
- i++;
6175
+ function findMatchingKey(cellLabel, values) {
6176
+ if (values.has(cellLabel)) return cellLabel;
6177
+ let bestKey;
6178
+ let bestLen = 0;
6179
+ for (const key of values.keys()) {
6180
+ if (cellLabel.startsWith(key)) {
6181
+ if (key.length >= cellLabel.length * 0.6 && key.length > bestLen) {
6182
+ bestLen = key.length;
6183
+ bestKey = key;
6184
+ }
6185
+ } else if (key.startsWith(cellLabel)) {
6186
+ if (cellLabel.length >= key.length * 0.6 && cellLabel.length > bestLen) {
6187
+ bestLen = cellLabel.length;
6188
+ bestKey = key;
6356
6189
  }
6357
- if (i < lines.length) i++;
6358
- blocks.push({ type: "code_block", text: codeLines.join("\n"), lang });
6359
- continue;
6190
+ }
6191
+ }
6192
+ return bestKey;
6193
+ }
6194
+ function isKeywordLabel(text) {
6195
+ const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
6196
+ if (!trimmed || trimmed.length > 15) return false;
6197
+ for (const kw of LABEL_KEYWORDS) {
6198
+ if (trimmed.includes(kw)) return true;
6199
+ }
6200
+ return false;
6201
+ }
6202
+ function fillInCellPatterns(cellText, values, matchedLabels) {
6203
+ let text = cellText;
6204
+ const matches = [];
6205
+ text = text.replace(
6206
+ /([가-힣A-Za-z]+)\(\s{1,}\)([가-힣A-Za-z]*)/g,
6207
+ (match, prefix, suffix) => {
6208
+ const label = prefix + suffix;
6209
+ const normalizedLabel = normalizeLabel(label);
6210
+ const matchKey = values.has(normalizedLabel) ? normalizedLabel : values.has(normalizeLabel(prefix)) ? normalizeLabel(prefix) : void 0;
6211
+ if (matchKey === void 0) return match;
6212
+ const newValue = values.get(matchKey);
6213
+ matchedLabels.add(matchKey);
6214
+ matches.push({ key: matchKey, label, value: newValue });
6215
+ return `${prefix}(${newValue})${suffix}`;
6216
+ }
6217
+ );
6218
+ text = text.replace(
6219
+ /□([가-힣A-Za-z]+)/g,
6220
+ (match, keyword) => {
6221
+ const normalizedKw = normalizeLabel(keyword);
6222
+ const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
6223
+ if (matchKey === void 0) return match;
6224
+ const val = values.get(matchKey);
6225
+ const isTruthy = ["\u2611", "\u2713", "\u2714", "v", "V", "true", "1", "yes", "o", "O"].includes(val.trim()) || val.trim() === "";
6226
+ if (!isTruthy) return match;
6227
+ matchedLabels.add(matchKey);
6228
+ matches.push({ key: matchKey, label: `\u25A1${keyword}`, value: "\u2611" });
6229
+ return `\u2611${keyword}`;
6230
+ }
6231
+ );
6232
+ text = text.replace(
6233
+ /\(([가-힣A-Za-z]+)[::]\s{1,}\)/g,
6234
+ (match, keyword) => {
6235
+ const normalizedKw = normalizeLabel(keyword);
6236
+ const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
6237
+ if (matchKey === void 0) return match;
6238
+ const newValue = values.get(matchKey);
6239
+ matchedLabels.add(matchKey);
6240
+ matches.push({ key: matchKey, label: keyword, value: newValue });
6241
+ return `(${keyword}\uFF1A${newValue})`;
6242
+ }
6243
+ );
6244
+ return matches.length > 0 ? { text, matches } : null;
6245
+ }
6246
+ function normalizeValues(values) {
6247
+ const map = /* @__PURE__ */ new Map();
6248
+ for (const [label, value] of Object.entries(values)) {
6249
+ map.set(normalizeLabel(label), value);
6250
+ }
6251
+ return map;
6252
+ }
6253
+ function resolveUnmatched(normalizedValues, matchedLabels, originalValues) {
6254
+ return [...normalizedValues.keys()].filter((k) => !matchedLabels.has(k)).map((k) => {
6255
+ for (const orig of Object.keys(originalValues)) {
6256
+ if (normalizeLabel(orig) === k) return orig;
6257
+ }
6258
+ return k;
6259
+ });
6260
+ }
6261
+
6262
+ // src/form/filler.ts
6263
+ function fillFormFields(blocks, values) {
6264
+ const cloned = structuredClone(blocks);
6265
+ const filled = [];
6266
+ const matchedLabels = /* @__PURE__ */ new Set();
6267
+ const normalizedValues = normalizeValues(values);
6268
+ const patternFilledCells = /* @__PURE__ */ new Set();
6269
+ for (const block of cloned) {
6270
+ if (block.type !== "table" || !block.table) continue;
6271
+ for (let r = 0; r < block.table.rows; r++) {
6272
+ for (let c = 0; c < block.table.cols; c++) {
6273
+ const cell = block.table.cells[r]?.[c];
6274
+ if (!cell) continue;
6275
+ const result = fillInCellPatterns(cell.text, normalizedValues, matchedLabels);
6276
+ if (result) {
6277
+ cell.text = result.text;
6278
+ patternFilledCells.add(`${r},${c}`);
6279
+ for (const m of result.matches) {
6280
+ filled.push({ label: m.label, value: m.value, row: r, col: c });
6281
+ }
6282
+ }
6283
+ }
6284
+ }
6285
+ }
6286
+ for (const block of cloned) {
6287
+ if (block.type !== "table" || !block.table) continue;
6288
+ fillTable(block.table, normalizedValues, filled, matchedLabels, patternFilledCells);
6289
+ }
6290
+ for (const block of cloned) {
6291
+ if (block.type !== "paragraph" || !block.text) continue;
6292
+ const newText = fillInlineFields(block.text, normalizedValues, filled, matchedLabels);
6293
+ if (newText !== block.text) block.text = newText;
6294
+ }
6295
+ const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
6296
+ return { blocks: cloned, filled, unmatched };
6297
+ }
6298
+ function fillTable(table, values, filled, matchedLabels, patternFilledCells) {
6299
+ if (table.cols < 2) return;
6300
+ for (let r = 0; r < table.rows; r++) {
6301
+ for (let c = 0; c < table.cols - 1; c++) {
6302
+ const labelCell = table.cells[r][c];
6303
+ const valueCell = table.cells[r][c + 1];
6304
+ if (!labelCell || !valueCell) continue;
6305
+ if (!isLabelCell(labelCell.text)) continue;
6306
+ if (isKeywordLabel(valueCell.text)) continue;
6307
+ const normalizedCellLabel = normalizeLabel(labelCell.text);
6308
+ if (!normalizedCellLabel) continue;
6309
+ const matchKey = findMatchingKey(normalizedCellLabel, values);
6310
+ if (matchKey === void 0) continue;
6311
+ const newValue = values.get(matchKey);
6312
+ if (patternFilledCells?.has(`${r},${c + 1}`)) {
6313
+ valueCell.text = newValue + " " + valueCell.text;
6314
+ } else {
6315
+ valueCell.text = newValue;
6316
+ }
6317
+ matchedLabels.add(matchKey);
6318
+ filled.push({
6319
+ label: labelCell.text.trim().replace(/[::]\s*$/, ""),
6320
+ value: newValue,
6321
+ row: r,
6322
+ col: c
6323
+ });
6324
+ }
6325
+ }
6326
+ if (table.rows >= 2 && table.cols >= 2) {
6327
+ const headerRow = table.cells[0];
6328
+ const allLabels = headerRow.every((cell) => {
6329
+ const t = cell.text.trim();
6330
+ return t.length > 0 && t.length <= 20 && isLabelCell(t);
6331
+ });
6332
+ if (!allLabels) return;
6333
+ for (let r = 1; r < table.rows; r++) {
6334
+ for (let c = 0; c < table.cols; c++) {
6335
+ const headerLabel = normalizeLabel(headerRow[c].text);
6336
+ const matchKey = findMatchingKey(headerLabel, values);
6337
+ if (matchKey === void 0) continue;
6338
+ if (matchedLabels.has(matchKey)) continue;
6339
+ const newValue = values.get(matchKey);
6340
+ table.cells[r][c].text = newValue;
6341
+ matchedLabels.add(matchKey);
6342
+ filled.push({
6343
+ label: headerRow[c].text.trim(),
6344
+ value: newValue,
6345
+ row: r,
6346
+ col: c
6347
+ });
6348
+ }
6349
+ }
6350
+ }
6351
+ }
6352
+ function fillInlineFields(text, values, filled, matchedLabels) {
6353
+ return text.replace(
6354
+ /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g,
6355
+ (match, rawLabel, _oldValue) => {
6356
+ const normalized = normalizeLabel(rawLabel);
6357
+ const matchKey = findMatchingKey(normalized, values);
6358
+ if (matchKey === void 0) return match;
6359
+ const newValue = values.get(matchKey);
6360
+ matchedLabels.add(matchKey);
6361
+ filled.push({
6362
+ label: rawLabel.trim(),
6363
+ value: newValue,
6364
+ row: -1,
6365
+ col: -1
6366
+ });
6367
+ return `${rawLabel}: ${newValue}`;
6368
+ }
6369
+ );
6370
+ }
6371
+
6372
+ // src/form/filler-hwpx.ts
6373
+ var import_jszip5 = __toESM(require("jszip"), 1);
6374
+ var import_xmldom4 = require("@xmldom/xmldom");
6375
+ async function fillHwpx(hwpxBuffer, values) {
6376
+ const zip = await import_jszip5.default.loadAsync(hwpxBuffer);
6377
+ const filled = [];
6378
+ const matchedLabels = /* @__PURE__ */ new Set();
6379
+ const normalizedValues = normalizeValues(values);
6380
+ const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
6381
+ if (sectionFiles.length === 0) {
6382
+ throw new KordocError("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
6383
+ }
6384
+ const xmlParser = new import_xmldom4.DOMParser();
6385
+ const xmlSerializer = new import_xmldom4.XMLSerializer();
6386
+ for (const sectionPath of sectionFiles) {
6387
+ const zipEntry = zip.file(sectionPath);
6388
+ if (!zipEntry) continue;
6389
+ const rawXml = await zipEntry.async("text");
6390
+ const doc = xmlParser.parseFromString(stripDtd(rawXml), "text/xml");
6391
+ if (!doc.documentElement) continue;
6392
+ let modified = false;
6393
+ const tables = findAllElements(doc.documentElement, "tbl");
6394
+ const cellPatternApplied = /* @__PURE__ */ new Set();
6395
+ for (const tblEl of tables) {
6396
+ const allCells = findAllElements(tblEl, "tc");
6397
+ for (const tcEl of allCells) {
6398
+ const tNodes = collectCellTextNodes(tcEl);
6399
+ const fullText = tNodes.map((n) => n.text).join("");
6400
+ const result = fillInCellPatterns(fullText, normalizedValues, matchedLabels);
6401
+ if (!result) continue;
6402
+ applyTextReplacements(tNodes, fullText, result.text);
6403
+ cellPatternApplied.add(tcEl);
6404
+ for (const m of result.matches) {
6405
+ filled.push({ label: m.label, value: m.value, row: -1, col: -1 });
6406
+ }
6407
+ modified = true;
6408
+ }
6409
+ }
6410
+ for (const tblEl of tables) {
6411
+ const rows = findDirectChildren(tblEl, "tr");
6412
+ for (let rowIdx = 0; rowIdx < rows.length; rowIdx++) {
6413
+ const trEl = rows[rowIdx];
6414
+ const cells = findDirectChildren(trEl, "tc");
6415
+ for (let colIdx = 0; colIdx < cells.length - 1; colIdx++) {
6416
+ const labelText = extractCellText(cells[colIdx]);
6417
+ if (!isLabelCell(labelText)) continue;
6418
+ const valueCell = cells[colIdx + 1];
6419
+ const valueText = extractCellText(valueCell);
6420
+ if (isKeywordLabel(valueText)) continue;
6421
+ const normalizedCellLabel = normalizeLabel(labelText);
6422
+ if (!normalizedCellLabel) continue;
6423
+ const matchKey = findMatchingKey(normalizedCellLabel, normalizedValues);
6424
+ if (matchKey === void 0) continue;
6425
+ const newValue = normalizedValues.get(matchKey);
6426
+ if (cellPatternApplied.has(valueCell)) {
6427
+ prependCellText(valueCell, newValue);
6428
+ } else {
6429
+ replaceCellText(valueCell, newValue);
6430
+ }
6431
+ matchedLabels.add(matchKey);
6432
+ filled.push({
6433
+ label: labelText.trim().replace(/[::]\s*$/, ""),
6434
+ value: newValue,
6435
+ row: rowIdx,
6436
+ col: colIdx
6437
+ });
6438
+ modified = true;
6439
+ }
6440
+ }
6441
+ if (rows.length >= 2) {
6442
+ const headerCells = findDirectChildren(rows[0], "tc");
6443
+ const allLabels = headerCells.every((cell) => {
6444
+ const t = extractCellText(cell).trim();
6445
+ return t.length > 0 && t.length <= 20 && isLabelCell(t);
6446
+ });
6447
+ if (allLabels) {
6448
+ for (let rowIdx = 1; rowIdx < rows.length; rowIdx++) {
6449
+ const dataCells = findDirectChildren(rows[rowIdx], "tc");
6450
+ for (let colIdx = 0; colIdx < Math.min(headerCells.length, dataCells.length); colIdx++) {
6451
+ const headerLabel = normalizeLabel(extractCellText(headerCells[colIdx]));
6452
+ const matchKey = findMatchingKey(headerLabel, normalizedValues);
6453
+ if (matchKey === void 0) continue;
6454
+ if (matchedLabels.has(matchKey)) continue;
6455
+ const newValue = normalizedValues.get(matchKey);
6456
+ replaceCellText(dataCells[colIdx], newValue);
6457
+ matchedLabels.add(matchKey);
6458
+ filled.push({
6459
+ label: extractCellText(headerCells[colIdx]).trim(),
6460
+ value: newValue,
6461
+ row: rowIdx,
6462
+ col: colIdx
6463
+ });
6464
+ modified = true;
6465
+ }
6466
+ }
6467
+ }
6468
+ }
6469
+ }
6470
+ const allParagraphs = findAllElements(doc.documentElement, "p");
6471
+ for (const pEl of allParagraphs) {
6472
+ if (isInsideTable(pEl)) continue;
6473
+ const tNodes = collectTextNodes(pEl);
6474
+ const fullText = tNodes.map((n) => n.text).join("");
6475
+ const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g;
6476
+ let match;
6477
+ while ((match = pattern.exec(fullText)) !== null) {
6478
+ const rawLabel = match[1];
6479
+ const normalized = normalizeLabel(rawLabel);
6480
+ const matchKey = findMatchingKey(normalized, normalizedValues);
6481
+ if (matchKey === void 0) continue;
6482
+ const newValue = normalizedValues.get(matchKey);
6483
+ const valueStart = match.index + match[0].length - match[2].length;
6484
+ const valueEnd = match.index + match[0].length;
6485
+ replaceTextRange(tNodes, valueStart, valueEnd, newValue);
6486
+ matchedLabels.add(matchKey);
6487
+ filled.push({ label: rawLabel.trim(), value: newValue, row: -1, col: -1 });
6488
+ modified = true;
6489
+ break;
6490
+ }
6491
+ }
6492
+ if (modified) {
6493
+ const newXml = xmlSerializer.serializeToString(doc);
6494
+ zip.file(sectionPath, newXml);
6495
+ }
6496
+ }
6497
+ const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
6498
+ const buffer = await zip.generateAsync({ type: "arraybuffer" });
6499
+ return { buffer, filled, unmatched };
6500
+ }
6501
+ function localName(el) {
6502
+ return (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
6503
+ }
6504
+ function findAllElements(node, tagLocalName) {
6505
+ const result = [];
6506
+ const walk = (n) => {
6507
+ const children = n.childNodes;
6508
+ if (!children) return;
6509
+ for (let i = 0; i < children.length; i++) {
6510
+ const child = children[i];
6511
+ if (child.nodeType !== 1) continue;
6512
+ if (localName(child) === tagLocalName) result.push(child);
6513
+ walk(child);
6514
+ }
6515
+ };
6516
+ walk(node);
6517
+ return result;
6518
+ }
6519
+ function findDirectChildren(parent, tagLocalName) {
6520
+ const result = [];
6521
+ const children = parent.childNodes;
6522
+ if (!children) return result;
6523
+ for (let i = 0; i < children.length; i++) {
6524
+ const child = children[i];
6525
+ if (child.nodeType === 1 && localName(child) === tagLocalName) {
6526
+ result.push(child);
6527
+ }
6528
+ }
6529
+ return result;
6530
+ }
6531
+ function isInsideTable(el) {
6532
+ let parent = el.parentNode;
6533
+ while (parent) {
6534
+ if (parent.nodeType === 1 && localName(parent) === "tbl") return true;
6535
+ parent = parent.parentNode;
6536
+ }
6537
+ return false;
6538
+ }
6539
+ function extractCellText(tcEl) {
6540
+ const parts = [];
6541
+ const walk = (node) => {
6542
+ const children = node.childNodes;
6543
+ if (!children) return;
6544
+ for (let i = 0; i < children.length; i++) {
6545
+ const child = children[i];
6546
+ if (child.nodeType === 3) {
6547
+ parts.push(child.textContent || "");
6548
+ } else if (child.nodeType === 1) {
6549
+ const tag = localName(child);
6550
+ if (tag === "t") walk(child);
6551
+ else if (tag === "run" || tag === "r" || tag === "p" || tag === "subList") walk(child);
6552
+ else if (tag === "tab") parts.push(" ");
6553
+ else if (tag === "br") parts.push("\n");
6554
+ }
6555
+ }
6556
+ };
6557
+ walk(tcEl);
6558
+ return parts.join("");
6559
+ }
6560
+ function prependCellText(tcEl, text) {
6561
+ const tElements = findAllElements(tcEl, "t");
6562
+ if (tElements.length === 0) return;
6563
+ const firstT = tElements[0];
6564
+ const existing = firstT.textContent || "";
6565
+ clearChildren(firstT);
6566
+ firstT.appendChild(firstT.ownerDocument.createTextNode(text + " " + existing));
6567
+ }
6568
+ function replaceCellText(tcEl, newValue) {
6569
+ const paragraphs = findAllElements(tcEl, "p");
6570
+ if (paragraphs.length === 0) return;
6571
+ const firstP = paragraphs[0];
6572
+ const runs = findAllElements(firstP, "run").concat(findAllElements(firstP, "r"));
6573
+ if (runs.length > 0) {
6574
+ setRunText(runs[0], newValue);
6575
+ for (let i = 1; i < runs.length; i++) {
6576
+ setRunText(runs[i], "");
6577
+ }
6578
+ } else {
6579
+ const tElements = findAllElements(firstP, "t");
6580
+ if (tElements.length > 0) {
6581
+ clearChildren(tElements[0]);
6582
+ tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(newValue));
6583
+ for (let i = 1; i < tElements.length; i++) {
6584
+ clearChildren(tElements[i]);
6585
+ }
6586
+ }
6587
+ }
6588
+ for (let i = 1; i < paragraphs.length; i++) {
6589
+ const p = paragraphs[i];
6590
+ if (p.parentNode) {
6591
+ const pRuns = findAllElements(p, "run").concat(findAllElements(p, "r"));
6592
+ for (const run of pRuns) setRunText(run, "");
6593
+ const pTs = findAllElements(p, "t");
6594
+ for (const t of pTs) clearChildren(t);
6595
+ }
6596
+ }
6597
+ }
6598
+ function setRunText(runEl, text) {
6599
+ const tElements = findAllElements(runEl, "t");
6600
+ if (tElements.length > 0) {
6601
+ clearChildren(tElements[0]);
6602
+ tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(text));
6603
+ for (let i = 1; i < tElements.length; i++) {
6604
+ clearChildren(tElements[i]);
6605
+ }
6606
+ }
6607
+ }
6608
+ function clearChildren(el) {
6609
+ while (el.firstChild) el.removeChild(el.firstChild);
6610
+ }
6611
+ function collectTextNodes(pEl) {
6612
+ const tElements = findAllElements(pEl, "t");
6613
+ const result = [];
6614
+ let offset = 0;
6615
+ for (const t of tElements) {
6616
+ const text = t.textContent || "";
6617
+ result.push({ element: t, text, offset });
6618
+ offset += text.length;
6619
+ }
6620
+ return result;
6621
+ }
6622
+ function replaceTextRange(tNodes, globalStart, globalEnd, newValue) {
6623
+ let replaced = false;
6624
+ for (const node of tNodes) {
6625
+ const nodeStart = node.offset;
6626
+ const nodeEnd = node.offset + node.text.length;
6627
+ if (nodeEnd <= globalStart || nodeStart >= globalEnd) continue;
6628
+ const localStart = Math.max(0, globalStart - nodeStart);
6629
+ const localEnd = Math.min(node.text.length, globalEnd - nodeStart);
6630
+ if (!replaced) {
6631
+ const before = node.text.slice(0, localStart);
6632
+ const after = node.text.slice(localEnd);
6633
+ const newText = before + newValue + after;
6634
+ clearChildren(node.element);
6635
+ node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
6636
+ replaced = true;
6637
+ } else {
6638
+ const before = node.text.slice(0, localStart);
6639
+ const after = node.text.slice(localEnd);
6640
+ const newText = before + after;
6641
+ clearChildren(node.element);
6642
+ node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
6643
+ }
6644
+ }
6645
+ }
6646
+ function collectCellTextNodes(tcEl) {
6647
+ const tElements = findAllElements(tcEl, "t");
6648
+ const result = [];
6649
+ let offset = 0;
6650
+ for (const t of tElements) {
6651
+ const text = t.textContent || "";
6652
+ result.push({ element: t, text, offset });
6653
+ offset += text.length;
6654
+ }
6655
+ return result;
6656
+ }
6657
+ function applyTextReplacements(tNodes, originalFull, replacedFull) {
6658
+ if (originalFull === replacedFull) return;
6659
+ if (tNodes.length === 1) {
6660
+ clearChildren(tNodes[0].element);
6661
+ tNodes[0].element.appendChild(
6662
+ tNodes[0].element.ownerDocument.createTextNode(replacedFull)
6663
+ );
6664
+ return;
6665
+ }
6666
+ let diffStart = 0;
6667
+ while (diffStart < originalFull.length && diffStart < replacedFull.length && originalFull[diffStart] === replacedFull[diffStart]) {
6668
+ diffStart++;
6669
+ }
6670
+ let diffEndOrig = originalFull.length;
6671
+ let diffEndRepl = replacedFull.length;
6672
+ while (diffEndOrig > diffStart && diffEndRepl > diffStart && originalFull[diffEndOrig - 1] === replacedFull[diffEndRepl - 1]) {
6673
+ diffEndOrig--;
6674
+ diffEndRepl--;
6675
+ }
6676
+ const newPart = replacedFull.slice(diffStart, diffEndRepl);
6677
+ replaceTextRange(tNodes, diffStart, diffEndOrig, newPart);
6678
+ }
6679
+
6680
+ // src/hwpx/generator.ts
6681
+ var import_jszip6 = __toESM(require("jszip"), 1);
6682
+ var NS_SECTION = "http://www.hancom.co.kr/hwpml/2011/section";
6683
+ var NS_PARA = "http://www.hancom.co.kr/hwpml/2011/paragraph";
6684
+ var NS_HEAD = "http://www.hancom.co.kr/hwpml/2011/head";
6685
+ var NS_OPF = "http://www.idpf.org/2007/opf/";
6686
+ var NS_HPF = "http://www.hancom.co.kr/schema/2011/hpf";
6687
+ var NS_OCF = "urn:oasis:names:tc:opendocument:xmlns:container";
6688
+ var CHAR_NORMAL = 0;
6689
+ var CHAR_BOLD = 1;
6690
+ var CHAR_ITALIC = 2;
6691
+ var CHAR_BOLD_ITALIC = 3;
6692
+ var CHAR_CODE = 4;
6693
+ var CHAR_H1 = 5;
6694
+ var CHAR_H2 = 6;
6695
+ var CHAR_H3 = 7;
6696
+ var CHAR_H4 = 8;
6697
+ var PARA_NORMAL = 0;
6698
+ var PARA_H1 = 1;
6699
+ var PARA_H2 = 2;
6700
+ var PARA_H3 = 3;
6701
+ var PARA_H4 = 4;
6702
+ var PARA_CODE = 5;
6703
+ var PARA_QUOTE = 6;
6704
+ var PARA_LIST = 7;
6705
+ async function markdownToHwpx(markdown) {
6706
+ const blocks = parseMarkdownToBlocks(markdown);
6707
+ const sectionXml = blocksToSectionXml(blocks);
6708
+ const zip = new import_jszip6.default();
6709
+ zip.file("mimetype", "application/hwp+zip", { compression: "STORE" });
6710
+ zip.file("META-INF/container.xml", generateContainerXml());
6711
+ zip.file("Contents/content.hpf", generateManifest());
6712
+ zip.file("Contents/header.xml", generateHeaderXml());
6713
+ zip.file("Contents/section0.xml", sectionXml);
6714
+ return await zip.generateAsync({ type: "arraybuffer" });
6715
+ }
6716
+ function parseMarkdownToBlocks(md) {
6717
+ const lines = md.split("\n");
6718
+ const blocks = [];
6719
+ let i = 0;
6720
+ while (i < lines.length) {
6721
+ const line = lines[i];
6722
+ if (!line.trim()) {
6723
+ i++;
6724
+ continue;
6725
+ }
6726
+ const fenceMatch = line.match(/^(`{3,}|~{3,})(.*)$/);
6727
+ if (fenceMatch) {
6728
+ const fence = fenceMatch[1];
6729
+ const lang = fenceMatch[2].trim();
6730
+ const codeLines = [];
6731
+ i++;
6732
+ while (i < lines.length && !lines[i].startsWith(fence)) {
6733
+ codeLines.push(lines[i]);
6734
+ i++;
6735
+ }
6736
+ if (i < lines.length) i++;
6737
+ blocks.push({ type: "code_block", text: codeLines.join("\n"), lang });
6738
+ continue;
6360
6739
  }
6361
6740
  if (/^(\*{3,}|-{3,}|_{3,})\s*$/.test(line.trim())) {
6362
6741
  blocks.push({ type: "hr" });
@@ -6686,6 +7065,183 @@ function blocksToSectionXml(blocks) {
6686
7065
  </hs:sec>`;
6687
7066
  }
6688
7067
 
7068
+ // src/diff/text-diff.ts
7069
+ function similarity(a, b) {
7070
+ if (a === b) return 1;
7071
+ if (!a || !b) return 0;
7072
+ const maxLen = Math.max(a.length, b.length);
7073
+ if (maxLen === 0) return 1;
7074
+ return 1 - levenshtein(a, b) / maxLen;
7075
+ }
7076
+ function normalizedSimilarity(a, b) {
7077
+ return similarity(normalize(a), normalize(b));
7078
+ }
7079
+ function normalize(s) {
7080
+ return s.replace(/\s+/g, " ").trim();
7081
+ }
7082
+ var MAX_LEVENSHTEIN_LEN = 1e4;
7083
+ function levenshtein(a, b) {
7084
+ if (a.length + b.length > MAX_LEVENSHTEIN_LEN) {
7085
+ const sampleLen = Math.min(500, a.length, b.length);
7086
+ let diffs = 0;
7087
+ for (let i = 0; i < sampleLen; i++) if (a[i] !== b[i]) diffs++;
7088
+ const sampleRate = sampleLen > 0 ? diffs / sampleLen : 1;
7089
+ return Math.abs(a.length - b.length) + Math.round(Math.min(a.length, b.length) * sampleRate);
7090
+ }
7091
+ if (a.length > b.length) [a, b] = [b, a];
7092
+ const m = a.length;
7093
+ const n = b.length;
7094
+ let prev = Array.from({ length: m + 1 }, (_, i) => i);
7095
+ let curr = new Array(m + 1);
7096
+ for (let j = 1; j <= n; j++) {
7097
+ curr[0] = j;
7098
+ for (let i = 1; i <= m; i++) {
7099
+ if (a[i - 1] === b[j - 1]) {
7100
+ curr[i] = prev[i - 1];
7101
+ } else {
7102
+ curr[i] = 1 + Math.min(prev[i - 1], prev[i], curr[i - 1]);
7103
+ }
7104
+ }
7105
+ ;
7106
+ [prev, curr] = [curr, prev];
7107
+ }
7108
+ return prev[m];
7109
+ }
7110
+
7111
+ // src/diff/compare.ts
7112
+ var SIMILARITY_THRESHOLD = 0.4;
7113
+ async function compare(bufferA, bufferB, options) {
7114
+ const [resultA, resultB] = await Promise.all([
7115
+ parse(bufferA, options),
7116
+ parse(bufferB, options)
7117
+ ]);
7118
+ if (!resultA.success) throw new Error(`\uBB38\uC11CA \uD30C\uC2F1 \uC2E4\uD328: ${resultA.error}`);
7119
+ if (!resultB.success) throw new Error(`\uBB38\uC11CB \uD30C\uC2F1 \uC2E4\uD328: ${resultB.error}`);
7120
+ return diffBlocks(resultA.blocks, resultB.blocks);
7121
+ }
7122
+ function diffBlocks(blocksA, blocksB) {
7123
+ const aligned = alignBlocks(blocksA, blocksB);
7124
+ const stats = { added: 0, removed: 0, modified: 0, unchanged: 0 };
7125
+ const diffs = [];
7126
+ for (const [a, b] of aligned) {
7127
+ if (a && b) {
7128
+ const sim = blockSimilarity(a, b);
7129
+ if (sim >= 0.99) {
7130
+ diffs.push({ type: "unchanged", before: a, after: b, similarity: 1 });
7131
+ stats.unchanged++;
7132
+ } else {
7133
+ const diff = { type: "modified", before: a, after: b, similarity: sim };
7134
+ if (a.type === "table" && b.type === "table" && a.table && b.table) {
7135
+ diff.cellDiffs = diffTableCells(a.table, b.table);
7136
+ }
7137
+ diffs.push(diff);
7138
+ stats.modified++;
7139
+ }
7140
+ } else if (a) {
7141
+ diffs.push({ type: "removed", before: a });
7142
+ stats.removed++;
7143
+ } else if (b) {
7144
+ diffs.push({ type: "added", after: b });
7145
+ stats.added++;
7146
+ }
7147
+ }
7148
+ return { stats, diffs };
7149
+ }
7150
+ function alignBlocks(a, b) {
7151
+ const m = a.length, n = b.length;
7152
+ if (m * n > 1e7) return fallbackAlign(a, b);
7153
+ const simCache = /* @__PURE__ */ new Map();
7154
+ const getSim = (i2, j2) => {
7155
+ const key = `${i2},${j2}`;
7156
+ let v = simCache.get(key);
7157
+ if (v === void 0) {
7158
+ v = blockSimilarity(a[i2], b[j2]);
7159
+ simCache.set(key, v);
7160
+ }
7161
+ return v;
7162
+ };
7163
+ const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
7164
+ for (let i2 = 1; i2 <= m; i2++) {
7165
+ for (let j2 = 1; j2 <= n; j2++) {
7166
+ if (getSim(i2 - 1, j2 - 1) >= SIMILARITY_THRESHOLD) {
7167
+ dp[i2][j2] = dp[i2 - 1][j2 - 1] + 1;
7168
+ } else {
7169
+ dp[i2][j2] = Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
7170
+ }
7171
+ }
7172
+ }
7173
+ const pairs = [];
7174
+ let i = m, j = n;
7175
+ while (i > 0 && j > 0) {
7176
+ if (getSim(i - 1, j - 1) >= SIMILARITY_THRESHOLD && dp[i][j] === dp[i - 1][j - 1] + 1) {
7177
+ pairs.push([i - 1, j - 1]);
7178
+ i--;
7179
+ j--;
7180
+ } else if (dp[i - 1][j] >= dp[i][j - 1]) {
7181
+ i--;
7182
+ } else {
7183
+ j--;
7184
+ }
7185
+ }
7186
+ pairs.reverse();
7187
+ const result = [];
7188
+ let ai = 0, bi = 0;
7189
+ for (const [pi, pj] of pairs) {
7190
+ while (ai < pi) result.push([a[ai++], null]);
7191
+ while (bi < pj) result.push([null, b[bi++]]);
7192
+ result.push([a[ai++], b[bi++]]);
7193
+ }
7194
+ while (ai < m) result.push([a[ai++], null]);
7195
+ while (bi < n) result.push([null, b[bi++]]);
7196
+ return result;
7197
+ }
7198
+ function fallbackAlign(a, b) {
7199
+ const result = [];
7200
+ const len = Math.max(a.length, b.length);
7201
+ for (let i = 0; i < len; i++) {
7202
+ result.push([a[i] || null, b[i] || null]);
7203
+ }
7204
+ return result;
7205
+ }
7206
+ function blockSimilarity(a, b) {
7207
+ if (a.type !== b.type) return 0;
7208
+ if (a.text !== void 0 && b.text !== void 0) {
7209
+ return normalizedSimilarity(a.text || "", b.text || "");
7210
+ }
7211
+ if (a.type === "table" && a.table && b.table) {
7212
+ return tableSimilarity(a.table, b.table);
7213
+ }
7214
+ if (a.type === b.type) return 1;
7215
+ return 0;
7216
+ }
7217
+ function tableSimilarity(a, b) {
7218
+ const dimSim = 1 - Math.abs(a.rows * a.cols - b.rows * b.cols) / Math.max(a.rows * a.cols, b.rows * b.cols, 1);
7219
+ const textsA = a.cells.flat().map((c) => c.text).join(" ");
7220
+ const textsB = b.cells.flat().map((c) => c.text).join(" ");
7221
+ const contentSim = normalizedSimilarity(textsA, textsB);
7222
+ return dimSim * 0.3 + contentSim * 0.7;
7223
+ }
7224
+ function diffTableCells(a, b) {
7225
+ const maxRows = Math.max(a.rows, b.rows);
7226
+ const maxCols = Math.max(a.cols, b.cols);
7227
+ const result = [];
7228
+ for (let r = 0; r < maxRows; r++) {
7229
+ const row = [];
7230
+ for (let c = 0; c < maxCols; c++) {
7231
+ const cellA = r < a.rows && c < a.cols ? a.cells[r][c].text : void 0;
7232
+ const cellB = r < b.rows && c < b.cols ? b.cells[r][c].text : void 0;
7233
+ let type;
7234
+ if (cellA === void 0) type = "added";
7235
+ else if (cellB === void 0) type = "removed";
7236
+ else if (cellA === cellB) type = "unchanged";
7237
+ else type = "modified";
7238
+ row.push({ type, before: cellA, after: cellB });
7239
+ }
7240
+ result.push(row);
7241
+ }
7242
+ return result;
7243
+ }
7244
+
6689
7245
  // src/index.ts
6690
7246
  async function parse(input, options) {
6691
7247
  let buffer;
@@ -6762,6 +7318,45 @@ async function parseDocx(buffer, options) {
6762
7318
  return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: classifyError(err) };
6763
7319
  }
6764
7320
  }
7321
+ async function fillForm(input, values, outputFormat = "markdown") {
7322
+ let buffer;
7323
+ if (typeof input === "string") {
7324
+ const buf = await (0, import_promises.readFile)(input);
7325
+ buffer = toArrayBuffer(buf);
7326
+ } else if (Buffer.isBuffer(input)) {
7327
+ buffer = toArrayBuffer(input);
7328
+ } else {
7329
+ buffer = input;
7330
+ }
7331
+ if (outputFormat === "hwpx-preserve") {
7332
+ const format = detectFormat(buffer);
7333
+ if (format === "hwpx") {
7334
+ const zipFormat = await detectZipFormat(buffer);
7335
+ if (zipFormat !== "hwpx") {
7336
+ throw new Error(`hwpx-preserve \uD3EC\uB9F7\uC740 HWPX \uC785\uB825\uB9CC \uC9C0\uC6D0\uD569\uB2C8\uB2E4 (\uAC10\uC9C0\uB41C \uD3EC\uB9F7: ${zipFormat})`);
7337
+ }
7338
+ } else {
7339
+ throw new Error(`hwpx-preserve \uD3EC\uB9F7\uC740 HWPX \uC785\uB825\uB9CC \uC9C0\uC6D0\uD569\uB2C8\uB2E4 (\uAC10\uC9C0\uB41C \uD3EC\uB9F7: ${format})`);
7340
+ }
7341
+ const hwpxResult = await fillHwpx(buffer, values);
7342
+ return {
7343
+ output: hwpxResult.buffer,
7344
+ format: "hwpx-preserve",
7345
+ fill: { filled: hwpxResult.filled, unmatched: hwpxResult.unmatched }
7346
+ };
7347
+ }
7348
+ const parsed = await parse(buffer);
7349
+ if (!parsed.success) {
7350
+ throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
7351
+ }
7352
+ const fill = fillFormFields(parsed.blocks, values);
7353
+ const markdown = blocksToMarkdown(fill.blocks);
7354
+ if (outputFormat === "hwpx") {
7355
+ const hwpxBuffer = await markdownToHwpx(markdown);
7356
+ return { output: hwpxBuffer, format: "hwpx", fill };
7357
+ }
7358
+ return { output: markdown, format: "markdown", fill };
7359
+ }
6765
7360
  // Annotate the CommonJS export names for ESM import in node:
6766
7361
  0 && (module.exports = {
6767
7362
  VERSION,
@@ -6771,7 +7366,11 @@ async function parseDocx(buffer, options) {
6771
7366
  detectZipFormat,
6772
7367
  diffBlocks,
6773
7368
  extractFormFields,
7369
+ fillForm,
7370
+ fillFormFields,
7371
+ fillHwpx,
6774
7372
  isHwpxFile,
7373
+ isLabelCell,
6775
7374
  isOldHwpFile,
6776
7375
  isPdfFile,
6777
7376
  isZipFile,