kordoc 2.2.3 → 2.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-AIG7SDWU.js → chunk-SY2RFVLW.js} +1051 -149
- package/dist/chunk-SY2RFVLW.js.map +1 -0
- package/dist/cli.js +149 -4
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +799 -238
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +97 -7
- package/dist/index.d.ts +97 -7
- package/dist/index.js +795 -238
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +126 -6
- package/dist/mcp.js.map +1 -1
- package/dist/{watch-H672QAW2.js → watch-5P7DJ3HG.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-AIG7SDWU.js.map +0 -1
- /package/dist/{watch-H672QAW2.js.map → watch-5P7DJ3HG.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -121,7 +121,11 @@ __export(index_exports, {
|
|
|
121
121
|
detectZipFormat: () => detectZipFormat,
|
|
122
122
|
diffBlocks: () => diffBlocks,
|
|
123
123
|
extractFormFields: () => extractFormFields,
|
|
124
|
+
fillForm: () => fillForm,
|
|
125
|
+
fillFormFields: () => fillFormFields,
|
|
126
|
+
fillHwpx: () => fillHwpx,
|
|
124
127
|
isHwpxFile: () => isHwpxFile,
|
|
128
|
+
isLabelCell: () => isLabelCell,
|
|
125
129
|
isOldHwpFile: () => isOldHwpFile,
|
|
126
130
|
isPdfFile: () => isPdfFile,
|
|
127
131
|
isZipFile: () => isZipFile,
|
|
@@ -183,7 +187,7 @@ var import_zlib = require("zlib");
|
|
|
183
187
|
var import_xmldom = require("@xmldom/xmldom");
|
|
184
188
|
|
|
185
189
|
// src/utils.ts
|
|
186
|
-
var VERSION = true ? "2.2.
|
|
190
|
+
var VERSION = true ? "2.2.4" : "0.0.0-dev";
|
|
187
191
|
function toArrayBuffer(buf) {
|
|
188
192
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
189
193
|
return buf.buffer;
|
|
@@ -5608,21 +5612,21 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
5608
5612
|
var import_jszip4 = __toESM(require("jszip"), 1);
|
|
5609
5613
|
var import_xmldom3 = require("@xmldom/xmldom");
|
|
5610
5614
|
var MAX_DECOMPRESS_SIZE4 = 100 * 1024 * 1024;
|
|
5611
|
-
function getChildElements(parent,
|
|
5615
|
+
function getChildElements(parent, localName2) {
|
|
5612
5616
|
const result = [];
|
|
5613
5617
|
const children = parent.childNodes;
|
|
5614
5618
|
for (let i = 0; i < children.length; i++) {
|
|
5615
5619
|
const node = children[i];
|
|
5616
5620
|
if (node.nodeType === 1) {
|
|
5617
5621
|
const el = node;
|
|
5618
|
-
if (el.localName ===
|
|
5622
|
+
if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
|
|
5619
5623
|
result.push(el);
|
|
5620
5624
|
}
|
|
5621
5625
|
}
|
|
5622
5626
|
}
|
|
5623
5627
|
return result;
|
|
5624
5628
|
}
|
|
5625
|
-
function findElements(parent,
|
|
5629
|
+
function findElements(parent, localName2) {
|
|
5626
5630
|
const result = [];
|
|
5627
5631
|
const walk = (node) => {
|
|
5628
5632
|
const children = node.childNodes;
|
|
@@ -5630,7 +5634,7 @@ function findElements(parent, localName) {
|
|
|
5630
5634
|
const child = children[i];
|
|
5631
5635
|
if (child.nodeType === 1) {
|
|
5632
5636
|
const el = child;
|
|
5633
|
-
if (el.localName ===
|
|
5637
|
+
if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
|
|
5634
5638
|
result.push(el);
|
|
5635
5639
|
}
|
|
5636
5640
|
walk(el);
|
|
@@ -5640,11 +5644,11 @@ function findElements(parent, localName) {
|
|
|
5640
5644
|
walk(parent);
|
|
5641
5645
|
return result;
|
|
5642
5646
|
}
|
|
5643
|
-
function getAttr(el,
|
|
5647
|
+
function getAttr(el, localName2) {
|
|
5644
5648
|
const attrs = el.attributes;
|
|
5645
5649
|
for (let i = 0; i < attrs.length; i++) {
|
|
5646
5650
|
const attr = attrs[i];
|
|
5647
|
-
if (attr.localName ===
|
|
5651
|
+
if (attr.localName === localName2 || attr.name === localName2) return attr.value;
|
|
5648
5652
|
}
|
|
5649
5653
|
return null;
|
|
5650
5654
|
}
|
|
@@ -5991,11 +5995,11 @@ async function parseDocxDocument(buffer, options) {
|
|
|
5991
5995
|
const node = children[i];
|
|
5992
5996
|
if (node.nodeType !== 1) continue;
|
|
5993
5997
|
const el = node;
|
|
5994
|
-
const
|
|
5995
|
-
if (
|
|
5998
|
+
const localName2 = el.localName ?? el.tagName?.split(":").pop();
|
|
5999
|
+
if (localName2 === "p") {
|
|
5996
6000
|
const block = parseParagraph(el, styles, numbering, footnotes, rels);
|
|
5997
6001
|
if (block) blocks.push(block);
|
|
5998
|
-
} else if (
|
|
6002
|
+
} else if (localName2 === "tbl") {
|
|
5999
6003
|
const block = parseTable(el, styles, numbering, footnotes, rels);
|
|
6000
6004
|
if (block) blocks.push(block);
|
|
6001
6005
|
}
|
|
@@ -6033,183 +6037,6 @@ async function parseDocxDocument(buffer, options) {
|
|
|
6033
6037
|
};
|
|
6034
6038
|
}
|
|
6035
6039
|
|
|
6036
|
-
// src/diff/text-diff.ts
|
|
6037
|
-
function similarity(a, b) {
|
|
6038
|
-
if (a === b) return 1;
|
|
6039
|
-
if (!a || !b) return 0;
|
|
6040
|
-
const maxLen = Math.max(a.length, b.length);
|
|
6041
|
-
if (maxLen === 0) return 1;
|
|
6042
|
-
return 1 - levenshtein(a, b) / maxLen;
|
|
6043
|
-
}
|
|
6044
|
-
function normalizedSimilarity(a, b) {
|
|
6045
|
-
return similarity(normalize(a), normalize(b));
|
|
6046
|
-
}
|
|
6047
|
-
function normalize(s) {
|
|
6048
|
-
return s.replace(/\s+/g, " ").trim();
|
|
6049
|
-
}
|
|
6050
|
-
var MAX_LEVENSHTEIN_LEN = 1e4;
|
|
6051
|
-
function levenshtein(a, b) {
|
|
6052
|
-
if (a.length + b.length > MAX_LEVENSHTEIN_LEN) {
|
|
6053
|
-
const sampleLen = Math.min(500, a.length, b.length);
|
|
6054
|
-
let diffs = 0;
|
|
6055
|
-
for (let i = 0; i < sampleLen; i++) if (a[i] !== b[i]) diffs++;
|
|
6056
|
-
const sampleRate = sampleLen > 0 ? diffs / sampleLen : 1;
|
|
6057
|
-
return Math.abs(a.length - b.length) + Math.round(Math.min(a.length, b.length) * sampleRate);
|
|
6058
|
-
}
|
|
6059
|
-
if (a.length > b.length) [a, b] = [b, a];
|
|
6060
|
-
const m = a.length;
|
|
6061
|
-
const n = b.length;
|
|
6062
|
-
let prev = Array.from({ length: m + 1 }, (_, i) => i);
|
|
6063
|
-
let curr = new Array(m + 1);
|
|
6064
|
-
for (let j = 1; j <= n; j++) {
|
|
6065
|
-
curr[0] = j;
|
|
6066
|
-
for (let i = 1; i <= m; i++) {
|
|
6067
|
-
if (a[i - 1] === b[j - 1]) {
|
|
6068
|
-
curr[i] = prev[i - 1];
|
|
6069
|
-
} else {
|
|
6070
|
-
curr[i] = 1 + Math.min(prev[i - 1], prev[i], curr[i - 1]);
|
|
6071
|
-
}
|
|
6072
|
-
}
|
|
6073
|
-
;
|
|
6074
|
-
[prev, curr] = [curr, prev];
|
|
6075
|
-
}
|
|
6076
|
-
return prev[m];
|
|
6077
|
-
}
|
|
6078
|
-
|
|
6079
|
-
// src/diff/compare.ts
|
|
6080
|
-
var SIMILARITY_THRESHOLD = 0.4;
|
|
6081
|
-
async function compare(bufferA, bufferB, options) {
|
|
6082
|
-
const [resultA, resultB] = await Promise.all([
|
|
6083
|
-
parse(bufferA, options),
|
|
6084
|
-
parse(bufferB, options)
|
|
6085
|
-
]);
|
|
6086
|
-
if (!resultA.success) throw new Error(`\uBB38\uC11CA \uD30C\uC2F1 \uC2E4\uD328: ${resultA.error}`);
|
|
6087
|
-
if (!resultB.success) throw new Error(`\uBB38\uC11CB \uD30C\uC2F1 \uC2E4\uD328: ${resultB.error}`);
|
|
6088
|
-
return diffBlocks(resultA.blocks, resultB.blocks);
|
|
6089
|
-
}
|
|
6090
|
-
function diffBlocks(blocksA, blocksB) {
|
|
6091
|
-
const aligned = alignBlocks(blocksA, blocksB);
|
|
6092
|
-
const stats = { added: 0, removed: 0, modified: 0, unchanged: 0 };
|
|
6093
|
-
const diffs = [];
|
|
6094
|
-
for (const [a, b] of aligned) {
|
|
6095
|
-
if (a && b) {
|
|
6096
|
-
const sim = blockSimilarity(a, b);
|
|
6097
|
-
if (sim >= 0.99) {
|
|
6098
|
-
diffs.push({ type: "unchanged", before: a, after: b, similarity: 1 });
|
|
6099
|
-
stats.unchanged++;
|
|
6100
|
-
} else {
|
|
6101
|
-
const diff = { type: "modified", before: a, after: b, similarity: sim };
|
|
6102
|
-
if (a.type === "table" && b.type === "table" && a.table && b.table) {
|
|
6103
|
-
diff.cellDiffs = diffTableCells(a.table, b.table);
|
|
6104
|
-
}
|
|
6105
|
-
diffs.push(diff);
|
|
6106
|
-
stats.modified++;
|
|
6107
|
-
}
|
|
6108
|
-
} else if (a) {
|
|
6109
|
-
diffs.push({ type: "removed", before: a });
|
|
6110
|
-
stats.removed++;
|
|
6111
|
-
} else if (b) {
|
|
6112
|
-
diffs.push({ type: "added", after: b });
|
|
6113
|
-
stats.added++;
|
|
6114
|
-
}
|
|
6115
|
-
}
|
|
6116
|
-
return { stats, diffs };
|
|
6117
|
-
}
|
|
6118
|
-
function alignBlocks(a, b) {
|
|
6119
|
-
const m = a.length, n = b.length;
|
|
6120
|
-
if (m * n > 1e7) return fallbackAlign(a, b);
|
|
6121
|
-
const simCache = /* @__PURE__ */ new Map();
|
|
6122
|
-
const getSim = (i2, j2) => {
|
|
6123
|
-
const key = `${i2},${j2}`;
|
|
6124
|
-
let v = simCache.get(key);
|
|
6125
|
-
if (v === void 0) {
|
|
6126
|
-
v = blockSimilarity(a[i2], b[j2]);
|
|
6127
|
-
simCache.set(key, v);
|
|
6128
|
-
}
|
|
6129
|
-
return v;
|
|
6130
|
-
};
|
|
6131
|
-
const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
|
|
6132
|
-
for (let i2 = 1; i2 <= m; i2++) {
|
|
6133
|
-
for (let j2 = 1; j2 <= n; j2++) {
|
|
6134
|
-
if (getSim(i2 - 1, j2 - 1) >= SIMILARITY_THRESHOLD) {
|
|
6135
|
-
dp[i2][j2] = dp[i2 - 1][j2 - 1] + 1;
|
|
6136
|
-
} else {
|
|
6137
|
-
dp[i2][j2] = Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
|
|
6138
|
-
}
|
|
6139
|
-
}
|
|
6140
|
-
}
|
|
6141
|
-
const pairs = [];
|
|
6142
|
-
let i = m, j = n;
|
|
6143
|
-
while (i > 0 && j > 0) {
|
|
6144
|
-
if (getSim(i - 1, j - 1) >= SIMILARITY_THRESHOLD && dp[i][j] === dp[i - 1][j - 1] + 1) {
|
|
6145
|
-
pairs.push([i - 1, j - 1]);
|
|
6146
|
-
i--;
|
|
6147
|
-
j--;
|
|
6148
|
-
} else if (dp[i - 1][j] >= dp[i][j - 1]) {
|
|
6149
|
-
i--;
|
|
6150
|
-
} else {
|
|
6151
|
-
j--;
|
|
6152
|
-
}
|
|
6153
|
-
}
|
|
6154
|
-
pairs.reverse();
|
|
6155
|
-
const result = [];
|
|
6156
|
-
let ai = 0, bi = 0;
|
|
6157
|
-
for (const [pi, pj] of pairs) {
|
|
6158
|
-
while (ai < pi) result.push([a[ai++], null]);
|
|
6159
|
-
while (bi < pj) result.push([null, b[bi++]]);
|
|
6160
|
-
result.push([a[ai++], b[bi++]]);
|
|
6161
|
-
}
|
|
6162
|
-
while (ai < m) result.push([a[ai++], null]);
|
|
6163
|
-
while (bi < n) result.push([null, b[bi++]]);
|
|
6164
|
-
return result;
|
|
6165
|
-
}
|
|
6166
|
-
function fallbackAlign(a, b) {
|
|
6167
|
-
const result = [];
|
|
6168
|
-
const len = Math.max(a.length, b.length);
|
|
6169
|
-
for (let i = 0; i < len; i++) {
|
|
6170
|
-
result.push([a[i] || null, b[i] || null]);
|
|
6171
|
-
}
|
|
6172
|
-
return result;
|
|
6173
|
-
}
|
|
6174
|
-
function blockSimilarity(a, b) {
|
|
6175
|
-
if (a.type !== b.type) return 0;
|
|
6176
|
-
if (a.text !== void 0 && b.text !== void 0) {
|
|
6177
|
-
return normalizedSimilarity(a.text || "", b.text || "");
|
|
6178
|
-
}
|
|
6179
|
-
if (a.type === "table" && a.table && b.table) {
|
|
6180
|
-
return tableSimilarity(a.table, b.table);
|
|
6181
|
-
}
|
|
6182
|
-
if (a.type === b.type) return 1;
|
|
6183
|
-
return 0;
|
|
6184
|
-
}
|
|
6185
|
-
function tableSimilarity(a, b) {
|
|
6186
|
-
const dimSim = 1 - Math.abs(a.rows * a.cols - b.rows * b.cols) / Math.max(a.rows * a.cols, b.rows * b.cols, 1);
|
|
6187
|
-
const textsA = a.cells.flat().map((c) => c.text).join(" ");
|
|
6188
|
-
const textsB = b.cells.flat().map((c) => c.text).join(" ");
|
|
6189
|
-
const contentSim = normalizedSimilarity(textsA, textsB);
|
|
6190
|
-
return dimSim * 0.3 + contentSim * 0.7;
|
|
6191
|
-
}
|
|
6192
|
-
function diffTableCells(a, b) {
|
|
6193
|
-
const maxRows = Math.max(a.rows, b.rows);
|
|
6194
|
-
const maxCols = Math.max(a.cols, b.cols);
|
|
6195
|
-
const result = [];
|
|
6196
|
-
for (let r = 0; r < maxRows; r++) {
|
|
6197
|
-
const row = [];
|
|
6198
|
-
for (let c = 0; c < maxCols; c++) {
|
|
6199
|
-
const cellA = r < a.rows && c < a.cols ? a.cells[r][c].text : void 0;
|
|
6200
|
-
const cellB = r < b.rows && c < b.cols ? b.cells[r][c].text : void 0;
|
|
6201
|
-
let type;
|
|
6202
|
-
if (cellA === void 0) type = "added";
|
|
6203
|
-
else if (cellB === void 0) type = "removed";
|
|
6204
|
-
else if (cellA === cellB) type = "unchanged";
|
|
6205
|
-
else type = "modified";
|
|
6206
|
-
row.push({ type, before: cellA, after: cellB });
|
|
6207
|
-
}
|
|
6208
|
-
result.push(row);
|
|
6209
|
-
}
|
|
6210
|
-
return result;
|
|
6211
|
-
}
|
|
6212
|
-
|
|
6213
6040
|
// src/form/recognize.ts
|
|
6214
6041
|
var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
|
|
6215
6042
|
"\uC131\uBA85",
|
|
@@ -6250,15 +6077,20 @@ var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
|
|
|
6250
6077
|
"\uB2E8\uAC00",
|
|
6251
6078
|
"\uD569\uACC4",
|
|
6252
6079
|
"\uACC4",
|
|
6253
|
-
"\uC18C\uACC4"
|
|
6080
|
+
"\uC18C\uACC4",
|
|
6081
|
+
"\uB4F1\uB85D\uAE30\uC900\uC9C0",
|
|
6082
|
+
"\uBCF8\uC801",
|
|
6083
|
+
"\uC704\uC784\uC778",
|
|
6084
|
+
"\uCCAD\uAD6C\uC0AC\uC720",
|
|
6085
|
+
"\uC18C\uBA85\uC790\uB8CC"
|
|
6254
6086
|
]);
|
|
6255
6087
|
function isLabelCell(text) {
|
|
6256
|
-
const trimmed = text.trim();
|
|
6088
|
+
const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
|
|
6257
6089
|
if (!trimmed || trimmed.length > 30) return false;
|
|
6258
6090
|
for (const kw of LABEL_KEYWORDS) {
|
|
6259
6091
|
if (trimmed.includes(kw)) return true;
|
|
6260
6092
|
}
|
|
6261
|
-
if (/^[가-힣\s()
|
|
6093
|
+
if (/^[가-힣\s()()·::]+$/.test(trimmed) && trimmed.replace(/\s/g, "").length >= 2 && trimmed.replace(/\s/g, "").length <= 8 && !/\d/.test(trimmed)) return true;
|
|
6262
6094
|
if (/^[가-힣A-Za-z\s]+[::]$/.test(trimmed)) return true;
|
|
6263
6095
|
return false;
|
|
6264
6096
|
}
|
|
@@ -6281,63 +6113,572 @@ function extractFormFields(blocks) {
|
|
|
6281
6113
|
fields.push(...inlineFields);
|
|
6282
6114
|
}
|
|
6283
6115
|
}
|
|
6284
|
-
const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
|
|
6285
|
-
return { fields, confidence: Math.min(confidence, 1) };
|
|
6116
|
+
const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
|
|
6117
|
+
return { fields, confidence: Math.min(confidence, 1) };
|
|
6118
|
+
}
|
|
6119
|
+
function extractFromTable(table) {
|
|
6120
|
+
const fields = [];
|
|
6121
|
+
if (table.cols >= 2) {
|
|
6122
|
+
for (let r = 0; r < table.rows; r++) {
|
|
6123
|
+
for (let c = 0; c < table.cols - 1; c++) {
|
|
6124
|
+
const labelCell = table.cells[r][c];
|
|
6125
|
+
const valueCell = table.cells[r][c + 1];
|
|
6126
|
+
if (isLabelCell(labelCell.text)) {
|
|
6127
|
+
fields.push({
|
|
6128
|
+
label: labelCell.text.trim().replace(/[::]\s*$/, ""),
|
|
6129
|
+
value: valueCell.text.trim(),
|
|
6130
|
+
row: r,
|
|
6131
|
+
col: c
|
|
6132
|
+
});
|
|
6133
|
+
}
|
|
6134
|
+
}
|
|
6135
|
+
}
|
|
6136
|
+
}
|
|
6137
|
+
if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
|
|
6138
|
+
const headerRow = table.cells[0];
|
|
6139
|
+
const allLabels = headerRow.every((cell) => {
|
|
6140
|
+
const t = cell.text.trim();
|
|
6141
|
+
return t.length > 0 && t.length <= 20;
|
|
6142
|
+
});
|
|
6143
|
+
if (allLabels) {
|
|
6144
|
+
for (let r = 1; r < table.rows; r++) {
|
|
6145
|
+
for (let c = 0; c < table.cols; c++) {
|
|
6146
|
+
const label = headerRow[c].text.trim();
|
|
6147
|
+
const value = table.cells[r][c].text.trim();
|
|
6148
|
+
if (label && value) {
|
|
6149
|
+
fields.push({ label, value, row: r, col: c });
|
|
6150
|
+
}
|
|
6151
|
+
}
|
|
6152
|
+
}
|
|
6153
|
+
}
|
|
6154
|
+
}
|
|
6155
|
+
return fields;
|
|
6156
|
+
}
|
|
6157
|
+
function extractInlineFields(text) {
|
|
6158
|
+
const fields = [];
|
|
6159
|
+
const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
|
|
6160
|
+
let match;
|
|
6161
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
6162
|
+
const label = match[1].trim();
|
|
6163
|
+
const value = match[2].trim();
|
|
6164
|
+
if (value) {
|
|
6165
|
+
fields.push({ label, value, row: -1, col: -1 });
|
|
6166
|
+
}
|
|
6167
|
+
}
|
|
6168
|
+
return fields;
|
|
6169
|
+
}
|
|
6170
|
+
|
|
6171
|
+
// src/form/match.ts
|
|
6172
|
+
function normalizeLabel(label) {
|
|
6173
|
+
return label.trim().replace(/[::\s()()·]/g, "");
|
|
6174
|
+
}
|
|
6175
|
+
function findMatchingKey(cellLabel, values) {
|
|
6176
|
+
if (values.has(cellLabel)) return cellLabel;
|
|
6177
|
+
let bestKey;
|
|
6178
|
+
let bestLen = 0;
|
|
6179
|
+
for (const key of values.keys()) {
|
|
6180
|
+
if (cellLabel.startsWith(key)) {
|
|
6181
|
+
if (key.length >= cellLabel.length * 0.6 && key.length > bestLen) {
|
|
6182
|
+
bestLen = key.length;
|
|
6183
|
+
bestKey = key;
|
|
6184
|
+
}
|
|
6185
|
+
} else if (key.startsWith(cellLabel)) {
|
|
6186
|
+
if (cellLabel.length >= key.length * 0.6 && cellLabel.length > bestLen) {
|
|
6187
|
+
bestLen = cellLabel.length;
|
|
6188
|
+
bestKey = key;
|
|
6189
|
+
}
|
|
6190
|
+
}
|
|
6191
|
+
}
|
|
6192
|
+
return bestKey;
|
|
6193
|
+
}
|
|
6194
|
+
function isKeywordLabel(text) {
|
|
6195
|
+
const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
|
|
6196
|
+
if (!trimmed || trimmed.length > 15) return false;
|
|
6197
|
+
for (const kw of LABEL_KEYWORDS) {
|
|
6198
|
+
if (trimmed.includes(kw)) return true;
|
|
6199
|
+
}
|
|
6200
|
+
return false;
|
|
6201
|
+
}
|
|
6202
|
+
function fillInCellPatterns(cellText, values, matchedLabels) {
|
|
6203
|
+
let text = cellText;
|
|
6204
|
+
const matches = [];
|
|
6205
|
+
text = text.replace(
|
|
6206
|
+
/([가-힣A-Za-z]+)\(\s{1,}\)([가-힣A-Za-z]*)/g,
|
|
6207
|
+
(match, prefix, suffix) => {
|
|
6208
|
+
const label = prefix + suffix;
|
|
6209
|
+
const normalizedLabel = normalizeLabel(label);
|
|
6210
|
+
const matchKey = values.has(normalizedLabel) ? normalizedLabel : values.has(normalizeLabel(prefix)) ? normalizeLabel(prefix) : void 0;
|
|
6211
|
+
if (matchKey === void 0) return match;
|
|
6212
|
+
const newValue = values.get(matchKey);
|
|
6213
|
+
matchedLabels.add(matchKey);
|
|
6214
|
+
matches.push({ key: matchKey, label, value: newValue });
|
|
6215
|
+
return `${prefix}(${newValue})${suffix}`;
|
|
6216
|
+
}
|
|
6217
|
+
);
|
|
6218
|
+
text = text.replace(
|
|
6219
|
+
/□([가-힣A-Za-z]+)/g,
|
|
6220
|
+
(match, keyword) => {
|
|
6221
|
+
const normalizedKw = normalizeLabel(keyword);
|
|
6222
|
+
const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
|
|
6223
|
+
if (matchKey === void 0) return match;
|
|
6224
|
+
const val = values.get(matchKey);
|
|
6225
|
+
const isTruthy = ["\u2611", "\u2713", "\u2714", "v", "V", "true", "1", "yes", "o", "O"].includes(val.trim()) || val.trim() === "";
|
|
6226
|
+
if (!isTruthy) return match;
|
|
6227
|
+
matchedLabels.add(matchKey);
|
|
6228
|
+
matches.push({ key: matchKey, label: `\u25A1${keyword}`, value: "\u2611" });
|
|
6229
|
+
return `\u2611${keyword}`;
|
|
6230
|
+
}
|
|
6231
|
+
);
|
|
6232
|
+
text = text.replace(
|
|
6233
|
+
/\(([가-힣A-Za-z]+)[::]\s{1,}\)/g,
|
|
6234
|
+
(match, keyword) => {
|
|
6235
|
+
const normalizedKw = normalizeLabel(keyword);
|
|
6236
|
+
const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
|
|
6237
|
+
if (matchKey === void 0) return match;
|
|
6238
|
+
const newValue = values.get(matchKey);
|
|
6239
|
+
matchedLabels.add(matchKey);
|
|
6240
|
+
matches.push({ key: matchKey, label: keyword, value: newValue });
|
|
6241
|
+
return `(${keyword}\uFF1A${newValue})`;
|
|
6242
|
+
}
|
|
6243
|
+
);
|
|
6244
|
+
return matches.length > 0 ? { text, matches } : null;
|
|
6245
|
+
}
|
|
6246
|
+
function normalizeValues(values) {
|
|
6247
|
+
const map = /* @__PURE__ */ new Map();
|
|
6248
|
+
for (const [label, value] of Object.entries(values)) {
|
|
6249
|
+
map.set(normalizeLabel(label), value);
|
|
6250
|
+
}
|
|
6251
|
+
return map;
|
|
6252
|
+
}
|
|
6253
|
+
function resolveUnmatched(normalizedValues, matchedLabels, originalValues) {
|
|
6254
|
+
return [...normalizedValues.keys()].filter((k) => !matchedLabels.has(k)).map((k) => {
|
|
6255
|
+
for (const orig of Object.keys(originalValues)) {
|
|
6256
|
+
if (normalizeLabel(orig) === k) return orig;
|
|
6257
|
+
}
|
|
6258
|
+
return k;
|
|
6259
|
+
});
|
|
6260
|
+
}
|
|
6261
|
+
|
|
6262
|
+
// src/form/filler.ts
|
|
6263
|
+
function fillFormFields(blocks, values) {
|
|
6264
|
+
const cloned = structuredClone(blocks);
|
|
6265
|
+
const filled = [];
|
|
6266
|
+
const matchedLabels = /* @__PURE__ */ new Set();
|
|
6267
|
+
const normalizedValues = normalizeValues(values);
|
|
6268
|
+
const patternFilledCells = /* @__PURE__ */ new Set();
|
|
6269
|
+
for (const block of cloned) {
|
|
6270
|
+
if (block.type !== "table" || !block.table) continue;
|
|
6271
|
+
for (let r = 0; r < block.table.rows; r++) {
|
|
6272
|
+
for (let c = 0; c < block.table.cols; c++) {
|
|
6273
|
+
const cell = block.table.cells[r]?.[c];
|
|
6274
|
+
if (!cell) continue;
|
|
6275
|
+
const result = fillInCellPatterns(cell.text, normalizedValues, matchedLabels);
|
|
6276
|
+
if (result) {
|
|
6277
|
+
cell.text = result.text;
|
|
6278
|
+
patternFilledCells.add(`${r},${c}`);
|
|
6279
|
+
for (const m of result.matches) {
|
|
6280
|
+
filled.push({ label: m.label, value: m.value, row: r, col: c });
|
|
6281
|
+
}
|
|
6282
|
+
}
|
|
6283
|
+
}
|
|
6284
|
+
}
|
|
6285
|
+
}
|
|
6286
|
+
for (const block of cloned) {
|
|
6287
|
+
if (block.type !== "table" || !block.table) continue;
|
|
6288
|
+
fillTable(block.table, normalizedValues, filled, matchedLabels, patternFilledCells);
|
|
6289
|
+
}
|
|
6290
|
+
for (const block of cloned) {
|
|
6291
|
+
if (block.type !== "paragraph" || !block.text) continue;
|
|
6292
|
+
const newText = fillInlineFields(block.text, normalizedValues, filled, matchedLabels);
|
|
6293
|
+
if (newText !== block.text) block.text = newText;
|
|
6294
|
+
}
|
|
6295
|
+
const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
|
|
6296
|
+
return { blocks: cloned, filled, unmatched };
|
|
6297
|
+
}
|
|
6298
|
+
function fillTable(table, values, filled, matchedLabels, patternFilledCells) {
|
|
6299
|
+
if (table.cols < 2) return;
|
|
6300
|
+
for (let r = 0; r < table.rows; r++) {
|
|
6301
|
+
for (let c = 0; c < table.cols - 1; c++) {
|
|
6302
|
+
const labelCell = table.cells[r][c];
|
|
6303
|
+
const valueCell = table.cells[r][c + 1];
|
|
6304
|
+
if (!labelCell || !valueCell) continue;
|
|
6305
|
+
if (!isLabelCell(labelCell.text)) continue;
|
|
6306
|
+
if (isKeywordLabel(valueCell.text)) continue;
|
|
6307
|
+
const normalizedCellLabel = normalizeLabel(labelCell.text);
|
|
6308
|
+
if (!normalizedCellLabel) continue;
|
|
6309
|
+
const matchKey = findMatchingKey(normalizedCellLabel, values);
|
|
6310
|
+
if (matchKey === void 0) continue;
|
|
6311
|
+
const newValue = values.get(matchKey);
|
|
6312
|
+
if (patternFilledCells?.has(`${r},${c + 1}`)) {
|
|
6313
|
+
valueCell.text = newValue + " " + valueCell.text;
|
|
6314
|
+
} else {
|
|
6315
|
+
valueCell.text = newValue;
|
|
6316
|
+
}
|
|
6317
|
+
matchedLabels.add(matchKey);
|
|
6318
|
+
filled.push({
|
|
6319
|
+
label: labelCell.text.trim().replace(/[::]\s*$/, ""),
|
|
6320
|
+
value: newValue,
|
|
6321
|
+
row: r,
|
|
6322
|
+
col: c
|
|
6323
|
+
});
|
|
6324
|
+
}
|
|
6325
|
+
}
|
|
6326
|
+
if (table.rows >= 2 && table.cols >= 2) {
|
|
6327
|
+
const headerRow = table.cells[0];
|
|
6328
|
+
const allLabels = headerRow.every((cell) => {
|
|
6329
|
+
const t = cell.text.trim();
|
|
6330
|
+
return t.length > 0 && t.length <= 20 && isLabelCell(t);
|
|
6331
|
+
});
|
|
6332
|
+
if (!allLabels) return;
|
|
6333
|
+
for (let r = 1; r < table.rows; r++) {
|
|
6334
|
+
for (let c = 0; c < table.cols; c++) {
|
|
6335
|
+
const headerLabel = normalizeLabel(headerRow[c].text);
|
|
6336
|
+
const matchKey = findMatchingKey(headerLabel, values);
|
|
6337
|
+
if (matchKey === void 0) continue;
|
|
6338
|
+
if (matchedLabels.has(matchKey)) continue;
|
|
6339
|
+
const newValue = values.get(matchKey);
|
|
6340
|
+
table.cells[r][c].text = newValue;
|
|
6341
|
+
matchedLabels.add(matchKey);
|
|
6342
|
+
filled.push({
|
|
6343
|
+
label: headerRow[c].text.trim(),
|
|
6344
|
+
value: newValue,
|
|
6345
|
+
row: r,
|
|
6346
|
+
col: c
|
|
6347
|
+
});
|
|
6348
|
+
}
|
|
6349
|
+
}
|
|
6350
|
+
}
|
|
6351
|
+
}
|
|
6352
|
+
function fillInlineFields(text, values, filled, matchedLabels) {
|
|
6353
|
+
return text.replace(
|
|
6354
|
+
/([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g,
|
|
6355
|
+
(match, rawLabel, _oldValue) => {
|
|
6356
|
+
const normalized = normalizeLabel(rawLabel);
|
|
6357
|
+
const matchKey = findMatchingKey(normalized, values);
|
|
6358
|
+
if (matchKey === void 0) return match;
|
|
6359
|
+
const newValue = values.get(matchKey);
|
|
6360
|
+
matchedLabels.add(matchKey);
|
|
6361
|
+
filled.push({
|
|
6362
|
+
label: rawLabel.trim(),
|
|
6363
|
+
value: newValue,
|
|
6364
|
+
row: -1,
|
|
6365
|
+
col: -1
|
|
6366
|
+
});
|
|
6367
|
+
return `${rawLabel}: ${newValue}`;
|
|
6368
|
+
}
|
|
6369
|
+
);
|
|
6370
|
+
}
|
|
6371
|
+
|
|
6372
|
+
// src/form/filler-hwpx.ts
|
|
6373
|
+
var import_jszip5 = __toESM(require("jszip"), 1);
|
|
6374
|
+
var import_xmldom4 = require("@xmldom/xmldom");
|
|
6375
|
+
async function fillHwpx(hwpxBuffer, values) {
|
|
6376
|
+
const zip = await import_jszip5.default.loadAsync(hwpxBuffer);
|
|
6377
|
+
const filled = [];
|
|
6378
|
+
const matchedLabels = /* @__PURE__ */ new Set();
|
|
6379
|
+
const normalizedValues = normalizeValues(values);
|
|
6380
|
+
const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
|
|
6381
|
+
if (sectionFiles.length === 0) {
|
|
6382
|
+
throw new KordocError("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
6383
|
+
}
|
|
6384
|
+
const xmlParser = new import_xmldom4.DOMParser();
|
|
6385
|
+
const xmlSerializer = new import_xmldom4.XMLSerializer();
|
|
6386
|
+
for (const sectionPath of sectionFiles) {
|
|
6387
|
+
const zipEntry = zip.file(sectionPath);
|
|
6388
|
+
if (!zipEntry) continue;
|
|
6389
|
+
const rawXml = await zipEntry.async("text");
|
|
6390
|
+
const doc = xmlParser.parseFromString(stripDtd(rawXml), "text/xml");
|
|
6391
|
+
if (!doc.documentElement) continue;
|
|
6392
|
+
let modified = false;
|
|
6393
|
+
const tables = findAllElements(doc.documentElement, "tbl");
|
|
6394
|
+
const cellPatternApplied = /* @__PURE__ */ new Set();
|
|
6395
|
+
for (const tblEl of tables) {
|
|
6396
|
+
const allCells = findAllElements(tblEl, "tc");
|
|
6397
|
+
for (const tcEl of allCells) {
|
|
6398
|
+
const tNodes = collectCellTextNodes(tcEl);
|
|
6399
|
+
const fullText = tNodes.map((n) => n.text).join("");
|
|
6400
|
+
const result = fillInCellPatterns(fullText, normalizedValues, matchedLabels);
|
|
6401
|
+
if (!result) continue;
|
|
6402
|
+
applyTextReplacements(tNodes, fullText, result.text);
|
|
6403
|
+
cellPatternApplied.add(tcEl);
|
|
6404
|
+
for (const m of result.matches) {
|
|
6405
|
+
filled.push({ label: m.label, value: m.value, row: -1, col: -1 });
|
|
6406
|
+
}
|
|
6407
|
+
modified = true;
|
|
6408
|
+
}
|
|
6409
|
+
}
|
|
6410
|
+
for (const tblEl of tables) {
|
|
6411
|
+
const rows = findDirectChildren(tblEl, "tr");
|
|
6412
|
+
for (let rowIdx = 0; rowIdx < rows.length; rowIdx++) {
|
|
6413
|
+
const trEl = rows[rowIdx];
|
|
6414
|
+
const cells = findDirectChildren(trEl, "tc");
|
|
6415
|
+
for (let colIdx = 0; colIdx < cells.length - 1; colIdx++) {
|
|
6416
|
+
const labelText = extractCellText(cells[colIdx]);
|
|
6417
|
+
if (!isLabelCell(labelText)) continue;
|
|
6418
|
+
const valueCell = cells[colIdx + 1];
|
|
6419
|
+
const valueText = extractCellText(valueCell);
|
|
6420
|
+
if (isKeywordLabel(valueText)) continue;
|
|
6421
|
+
const normalizedCellLabel = normalizeLabel(labelText);
|
|
6422
|
+
if (!normalizedCellLabel) continue;
|
|
6423
|
+
const matchKey = findMatchingKey(normalizedCellLabel, normalizedValues);
|
|
6424
|
+
if (matchKey === void 0) continue;
|
|
6425
|
+
const newValue = normalizedValues.get(matchKey);
|
|
6426
|
+
if (cellPatternApplied.has(valueCell)) {
|
|
6427
|
+
prependCellText(valueCell, newValue);
|
|
6428
|
+
} else {
|
|
6429
|
+
replaceCellText(valueCell, newValue);
|
|
6430
|
+
}
|
|
6431
|
+
matchedLabels.add(matchKey);
|
|
6432
|
+
filled.push({
|
|
6433
|
+
label: labelText.trim().replace(/[::]\s*$/, ""),
|
|
6434
|
+
value: newValue,
|
|
6435
|
+
row: rowIdx,
|
|
6436
|
+
col: colIdx
|
|
6437
|
+
});
|
|
6438
|
+
modified = true;
|
|
6439
|
+
}
|
|
6440
|
+
}
|
|
6441
|
+
if (rows.length >= 2) {
|
|
6442
|
+
const headerCells = findDirectChildren(rows[0], "tc");
|
|
6443
|
+
const allLabels = headerCells.every((cell) => {
|
|
6444
|
+
const t = extractCellText(cell).trim();
|
|
6445
|
+
return t.length > 0 && t.length <= 20 && isLabelCell(t);
|
|
6446
|
+
});
|
|
6447
|
+
if (allLabels) {
|
|
6448
|
+
for (let rowIdx = 1; rowIdx < rows.length; rowIdx++) {
|
|
6449
|
+
const dataCells = findDirectChildren(rows[rowIdx], "tc");
|
|
6450
|
+
for (let colIdx = 0; colIdx < Math.min(headerCells.length, dataCells.length); colIdx++) {
|
|
6451
|
+
const headerLabel = normalizeLabel(extractCellText(headerCells[colIdx]));
|
|
6452
|
+
const matchKey = findMatchingKey(headerLabel, normalizedValues);
|
|
6453
|
+
if (matchKey === void 0) continue;
|
|
6454
|
+
if (matchedLabels.has(matchKey)) continue;
|
|
6455
|
+
const newValue = normalizedValues.get(matchKey);
|
|
6456
|
+
replaceCellText(dataCells[colIdx], newValue);
|
|
6457
|
+
matchedLabels.add(matchKey);
|
|
6458
|
+
filled.push({
|
|
6459
|
+
label: extractCellText(headerCells[colIdx]).trim(),
|
|
6460
|
+
value: newValue,
|
|
6461
|
+
row: rowIdx,
|
|
6462
|
+
col: colIdx
|
|
6463
|
+
});
|
|
6464
|
+
modified = true;
|
|
6465
|
+
}
|
|
6466
|
+
}
|
|
6467
|
+
}
|
|
6468
|
+
}
|
|
6469
|
+
}
|
|
6470
|
+
const allParagraphs = findAllElements(doc.documentElement, "p");
|
|
6471
|
+
for (const pEl of allParagraphs) {
|
|
6472
|
+
if (isInsideTable(pEl)) continue;
|
|
6473
|
+
const tNodes = collectTextNodes(pEl);
|
|
6474
|
+
const fullText = tNodes.map((n) => n.text).join("");
|
|
6475
|
+
const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g;
|
|
6476
|
+
let match;
|
|
6477
|
+
while ((match = pattern.exec(fullText)) !== null) {
|
|
6478
|
+
const rawLabel = match[1];
|
|
6479
|
+
const normalized = normalizeLabel(rawLabel);
|
|
6480
|
+
const matchKey = findMatchingKey(normalized, normalizedValues);
|
|
6481
|
+
if (matchKey === void 0) continue;
|
|
6482
|
+
const newValue = normalizedValues.get(matchKey);
|
|
6483
|
+
const valueStart = match.index + match[0].length - match[2].length;
|
|
6484
|
+
const valueEnd = match.index + match[0].length;
|
|
6485
|
+
replaceTextRange(tNodes, valueStart, valueEnd, newValue);
|
|
6486
|
+
matchedLabels.add(matchKey);
|
|
6487
|
+
filled.push({ label: rawLabel.trim(), value: newValue, row: -1, col: -1 });
|
|
6488
|
+
modified = true;
|
|
6489
|
+
break;
|
|
6490
|
+
}
|
|
6491
|
+
}
|
|
6492
|
+
if (modified) {
|
|
6493
|
+
const newXml = xmlSerializer.serializeToString(doc);
|
|
6494
|
+
zip.file(sectionPath, newXml);
|
|
6495
|
+
}
|
|
6496
|
+
}
|
|
6497
|
+
const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
|
|
6498
|
+
const buffer = await zip.generateAsync({ type: "arraybuffer" });
|
|
6499
|
+
return { buffer, filled, unmatched };
|
|
6500
|
+
}
|
|
6501
|
+
function localName(el) {
|
|
6502
|
+
return (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
|
|
6503
|
+
}
|
|
6504
|
+
function findAllElements(node, tagLocalName) {
|
|
6505
|
+
const result = [];
|
|
6506
|
+
const walk = (n) => {
|
|
6507
|
+
const children = n.childNodes;
|
|
6508
|
+
if (!children) return;
|
|
6509
|
+
for (let i = 0; i < children.length; i++) {
|
|
6510
|
+
const child = children[i];
|
|
6511
|
+
if (child.nodeType !== 1) continue;
|
|
6512
|
+
if (localName(child) === tagLocalName) result.push(child);
|
|
6513
|
+
walk(child);
|
|
6514
|
+
}
|
|
6515
|
+
};
|
|
6516
|
+
walk(node);
|
|
6517
|
+
return result;
|
|
6518
|
+
}
|
|
6519
|
+
function findDirectChildren(parent, tagLocalName) {
|
|
6520
|
+
const result = [];
|
|
6521
|
+
const children = parent.childNodes;
|
|
6522
|
+
if (!children) return result;
|
|
6523
|
+
for (let i = 0; i < children.length; i++) {
|
|
6524
|
+
const child = children[i];
|
|
6525
|
+
if (child.nodeType === 1 && localName(child) === tagLocalName) {
|
|
6526
|
+
result.push(child);
|
|
6527
|
+
}
|
|
6528
|
+
}
|
|
6529
|
+
return result;
|
|
6530
|
+
}
|
|
6531
|
+
function isInsideTable(el) {
|
|
6532
|
+
let parent = el.parentNode;
|
|
6533
|
+
while (parent) {
|
|
6534
|
+
if (parent.nodeType === 1 && localName(parent) === "tbl") return true;
|
|
6535
|
+
parent = parent.parentNode;
|
|
6536
|
+
}
|
|
6537
|
+
return false;
|
|
6538
|
+
}
|
|
6539
|
+
function extractCellText(tcEl) {
|
|
6540
|
+
const parts = [];
|
|
6541
|
+
const walk = (node) => {
|
|
6542
|
+
const children = node.childNodes;
|
|
6543
|
+
if (!children) return;
|
|
6544
|
+
for (let i = 0; i < children.length; i++) {
|
|
6545
|
+
const child = children[i];
|
|
6546
|
+
if (child.nodeType === 3) {
|
|
6547
|
+
parts.push(child.textContent || "");
|
|
6548
|
+
} else if (child.nodeType === 1) {
|
|
6549
|
+
const tag = localName(child);
|
|
6550
|
+
if (tag === "t") walk(child);
|
|
6551
|
+
else if (tag === "run" || tag === "r" || tag === "p" || tag === "subList") walk(child);
|
|
6552
|
+
else if (tag === "tab") parts.push(" ");
|
|
6553
|
+
else if (tag === "br") parts.push("\n");
|
|
6554
|
+
}
|
|
6555
|
+
}
|
|
6556
|
+
};
|
|
6557
|
+
walk(tcEl);
|
|
6558
|
+
return parts.join("");
|
|
6559
|
+
}
|
|
6560
|
+
function prependCellText(tcEl, text) {
|
|
6561
|
+
const tElements = findAllElements(tcEl, "t");
|
|
6562
|
+
if (tElements.length === 0) return;
|
|
6563
|
+
const firstT = tElements[0];
|
|
6564
|
+
const existing = firstT.textContent || "";
|
|
6565
|
+
clearChildren(firstT);
|
|
6566
|
+
firstT.appendChild(firstT.ownerDocument.createTextNode(text + " " + existing));
|
|
6567
|
+
}
|
|
6568
|
+
function replaceCellText(tcEl, newValue) {
|
|
6569
|
+
const paragraphs = findAllElements(tcEl, "p");
|
|
6570
|
+
if (paragraphs.length === 0) return;
|
|
6571
|
+
const firstP = paragraphs[0];
|
|
6572
|
+
const runs = findAllElements(firstP, "run").concat(findAllElements(firstP, "r"));
|
|
6573
|
+
if (runs.length > 0) {
|
|
6574
|
+
setRunText(runs[0], newValue);
|
|
6575
|
+
for (let i = 1; i < runs.length; i++) {
|
|
6576
|
+
setRunText(runs[i], "");
|
|
6577
|
+
}
|
|
6578
|
+
} else {
|
|
6579
|
+
const tElements = findAllElements(firstP, "t");
|
|
6580
|
+
if (tElements.length > 0) {
|
|
6581
|
+
clearChildren(tElements[0]);
|
|
6582
|
+
tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(newValue));
|
|
6583
|
+
for (let i = 1; i < tElements.length; i++) {
|
|
6584
|
+
clearChildren(tElements[i]);
|
|
6585
|
+
}
|
|
6586
|
+
}
|
|
6587
|
+
}
|
|
6588
|
+
for (let i = 1; i < paragraphs.length; i++) {
|
|
6589
|
+
const p = paragraphs[i];
|
|
6590
|
+
if (p.parentNode) {
|
|
6591
|
+
const pRuns = findAllElements(p, "run").concat(findAllElements(p, "r"));
|
|
6592
|
+
for (const run of pRuns) setRunText(run, "");
|
|
6593
|
+
const pTs = findAllElements(p, "t");
|
|
6594
|
+
for (const t of pTs) clearChildren(t);
|
|
6595
|
+
}
|
|
6596
|
+
}
|
|
6286
6597
|
}
|
|
6287
|
-
function
|
|
6288
|
-
const
|
|
6289
|
-
if (
|
|
6290
|
-
|
|
6291
|
-
|
|
6292
|
-
|
|
6293
|
-
|
|
6294
|
-
if (isLabelCell(labelCell.text) && valueCell.text.trim()) {
|
|
6295
|
-
fields.push({
|
|
6296
|
-
label: labelCell.text.trim().replace(/[::]\s*$/, ""),
|
|
6297
|
-
value: valueCell.text.trim(),
|
|
6298
|
-
row: r,
|
|
6299
|
-
col: c
|
|
6300
|
-
});
|
|
6301
|
-
}
|
|
6302
|
-
}
|
|
6598
|
+
function setRunText(runEl, text) {
|
|
6599
|
+
const tElements = findAllElements(runEl, "t");
|
|
6600
|
+
if (tElements.length > 0) {
|
|
6601
|
+
clearChildren(tElements[0]);
|
|
6602
|
+
tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(text));
|
|
6603
|
+
for (let i = 1; i < tElements.length; i++) {
|
|
6604
|
+
clearChildren(tElements[i]);
|
|
6303
6605
|
}
|
|
6304
6606
|
}
|
|
6305
|
-
|
|
6306
|
-
|
|
6307
|
-
|
|
6308
|
-
|
|
6309
|
-
|
|
6310
|
-
|
|
6311
|
-
|
|
6312
|
-
|
|
6313
|
-
|
|
6314
|
-
|
|
6315
|
-
|
|
6316
|
-
|
|
6317
|
-
fields.push({ label, value, row: r, col: c });
|
|
6318
|
-
}
|
|
6319
|
-
}
|
|
6320
|
-
}
|
|
6321
|
-
}
|
|
6607
|
+
}
|
|
6608
|
+
function clearChildren(el) {
|
|
6609
|
+
while (el.firstChild) el.removeChild(el.firstChild);
|
|
6610
|
+
}
|
|
6611
|
+
function collectTextNodes(pEl) {
|
|
6612
|
+
const tElements = findAllElements(pEl, "t");
|
|
6613
|
+
const result = [];
|
|
6614
|
+
let offset = 0;
|
|
6615
|
+
for (const t of tElements) {
|
|
6616
|
+
const text = t.textContent || "";
|
|
6617
|
+
result.push({ element: t, text, offset });
|
|
6618
|
+
offset += text.length;
|
|
6322
6619
|
}
|
|
6323
|
-
return
|
|
6620
|
+
return result;
|
|
6324
6621
|
}
|
|
6325
|
-
function
|
|
6326
|
-
|
|
6327
|
-
const
|
|
6328
|
-
|
|
6329
|
-
|
|
6330
|
-
|
|
6331
|
-
const
|
|
6332
|
-
|
|
6333
|
-
|
|
6622
|
+
function replaceTextRange(tNodes, globalStart, globalEnd, newValue) {
|
|
6623
|
+
let replaced = false;
|
|
6624
|
+
for (const node of tNodes) {
|
|
6625
|
+
const nodeStart = node.offset;
|
|
6626
|
+
const nodeEnd = node.offset + node.text.length;
|
|
6627
|
+
if (nodeEnd <= globalStart || nodeStart >= globalEnd) continue;
|
|
6628
|
+
const localStart = Math.max(0, globalStart - nodeStart);
|
|
6629
|
+
const localEnd = Math.min(node.text.length, globalEnd - nodeStart);
|
|
6630
|
+
if (!replaced) {
|
|
6631
|
+
const before = node.text.slice(0, localStart);
|
|
6632
|
+
const after = node.text.slice(localEnd);
|
|
6633
|
+
const newText = before + newValue + after;
|
|
6634
|
+
clearChildren(node.element);
|
|
6635
|
+
node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
|
|
6636
|
+
replaced = true;
|
|
6637
|
+
} else {
|
|
6638
|
+
const before = node.text.slice(0, localStart);
|
|
6639
|
+
const after = node.text.slice(localEnd);
|
|
6640
|
+
const newText = before + after;
|
|
6641
|
+
clearChildren(node.element);
|
|
6642
|
+
node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
|
|
6334
6643
|
}
|
|
6335
6644
|
}
|
|
6336
|
-
|
|
6645
|
+
}
|
|
6646
|
+
function collectCellTextNodes(tcEl) {
|
|
6647
|
+
const tElements = findAllElements(tcEl, "t");
|
|
6648
|
+
const result = [];
|
|
6649
|
+
let offset = 0;
|
|
6650
|
+
for (const t of tElements) {
|
|
6651
|
+
const text = t.textContent || "";
|
|
6652
|
+
result.push({ element: t, text, offset });
|
|
6653
|
+
offset += text.length;
|
|
6654
|
+
}
|
|
6655
|
+
return result;
|
|
6656
|
+
}
|
|
6657
|
+
function applyTextReplacements(tNodes, originalFull, replacedFull) {
|
|
6658
|
+
if (originalFull === replacedFull) return;
|
|
6659
|
+
if (tNodes.length === 1) {
|
|
6660
|
+
clearChildren(tNodes[0].element);
|
|
6661
|
+
tNodes[0].element.appendChild(
|
|
6662
|
+
tNodes[0].element.ownerDocument.createTextNode(replacedFull)
|
|
6663
|
+
);
|
|
6664
|
+
return;
|
|
6665
|
+
}
|
|
6666
|
+
let diffStart = 0;
|
|
6667
|
+
while (diffStart < originalFull.length && diffStart < replacedFull.length && originalFull[diffStart] === replacedFull[diffStart]) {
|
|
6668
|
+
diffStart++;
|
|
6669
|
+
}
|
|
6670
|
+
let diffEndOrig = originalFull.length;
|
|
6671
|
+
let diffEndRepl = replacedFull.length;
|
|
6672
|
+
while (diffEndOrig > diffStart && diffEndRepl > diffStart && originalFull[diffEndOrig - 1] === replacedFull[diffEndRepl - 1]) {
|
|
6673
|
+
diffEndOrig--;
|
|
6674
|
+
diffEndRepl--;
|
|
6675
|
+
}
|
|
6676
|
+
const newPart = replacedFull.slice(diffStart, diffEndRepl);
|
|
6677
|
+
replaceTextRange(tNodes, diffStart, diffEndOrig, newPart);
|
|
6337
6678
|
}
|
|
6338
6679
|
|
|
6339
6680
|
// src/hwpx/generator.ts
|
|
6340
|
-
var
|
|
6681
|
+
var import_jszip6 = __toESM(require("jszip"), 1);
|
|
6341
6682
|
var NS_SECTION = "http://www.hancom.co.kr/hwpml/2011/section";
|
|
6342
6683
|
var NS_PARA = "http://www.hancom.co.kr/hwpml/2011/paragraph";
|
|
6343
6684
|
var NS_HEAD = "http://www.hancom.co.kr/hwpml/2011/head";
|
|
@@ -6364,7 +6705,7 @@ var PARA_LIST = 7;
|
|
|
6364
6705
|
async function markdownToHwpx(markdown) {
|
|
6365
6706
|
const blocks = parseMarkdownToBlocks(markdown);
|
|
6366
6707
|
const sectionXml = blocksToSectionXml(blocks);
|
|
6367
|
-
const zip = new
|
|
6708
|
+
const zip = new import_jszip6.default();
|
|
6368
6709
|
zip.file("mimetype", "application/hwp+zip", { compression: "STORE" });
|
|
6369
6710
|
zip.file("META-INF/container.xml", generateContainerXml());
|
|
6370
6711
|
zip.file("Contents/content.hpf", generateManifest());
|
|
@@ -6724,6 +7065,183 @@ function blocksToSectionXml(blocks) {
|
|
|
6724
7065
|
</hs:sec>`;
|
|
6725
7066
|
}
|
|
6726
7067
|
|
|
7068
|
+
// src/diff/text-diff.ts
|
|
7069
|
+
function similarity(a, b) {
|
|
7070
|
+
if (a === b) return 1;
|
|
7071
|
+
if (!a || !b) return 0;
|
|
7072
|
+
const maxLen = Math.max(a.length, b.length);
|
|
7073
|
+
if (maxLen === 0) return 1;
|
|
7074
|
+
return 1 - levenshtein(a, b) / maxLen;
|
|
7075
|
+
}
|
|
7076
|
+
function normalizedSimilarity(a, b) {
|
|
7077
|
+
return similarity(normalize(a), normalize(b));
|
|
7078
|
+
}
|
|
7079
|
+
function normalize(s) {
|
|
7080
|
+
return s.replace(/\s+/g, " ").trim();
|
|
7081
|
+
}
|
|
7082
|
+
var MAX_LEVENSHTEIN_LEN = 1e4;
|
|
7083
|
+
function levenshtein(a, b) {
|
|
7084
|
+
if (a.length + b.length > MAX_LEVENSHTEIN_LEN) {
|
|
7085
|
+
const sampleLen = Math.min(500, a.length, b.length);
|
|
7086
|
+
let diffs = 0;
|
|
7087
|
+
for (let i = 0; i < sampleLen; i++) if (a[i] !== b[i]) diffs++;
|
|
7088
|
+
const sampleRate = sampleLen > 0 ? diffs / sampleLen : 1;
|
|
7089
|
+
return Math.abs(a.length - b.length) + Math.round(Math.min(a.length, b.length) * sampleRate);
|
|
7090
|
+
}
|
|
7091
|
+
if (a.length > b.length) [a, b] = [b, a];
|
|
7092
|
+
const m = a.length;
|
|
7093
|
+
const n = b.length;
|
|
7094
|
+
let prev = Array.from({ length: m + 1 }, (_, i) => i);
|
|
7095
|
+
let curr = new Array(m + 1);
|
|
7096
|
+
for (let j = 1; j <= n; j++) {
|
|
7097
|
+
curr[0] = j;
|
|
7098
|
+
for (let i = 1; i <= m; i++) {
|
|
7099
|
+
if (a[i - 1] === b[j - 1]) {
|
|
7100
|
+
curr[i] = prev[i - 1];
|
|
7101
|
+
} else {
|
|
7102
|
+
curr[i] = 1 + Math.min(prev[i - 1], prev[i], curr[i - 1]);
|
|
7103
|
+
}
|
|
7104
|
+
}
|
|
7105
|
+
;
|
|
7106
|
+
[prev, curr] = [curr, prev];
|
|
7107
|
+
}
|
|
7108
|
+
return prev[m];
|
|
7109
|
+
}
|
|
7110
|
+
|
|
7111
|
+
// src/diff/compare.ts
|
|
7112
|
+
var SIMILARITY_THRESHOLD = 0.4;
|
|
7113
|
+
async function compare(bufferA, bufferB, options) {
|
|
7114
|
+
const [resultA, resultB] = await Promise.all([
|
|
7115
|
+
parse(bufferA, options),
|
|
7116
|
+
parse(bufferB, options)
|
|
7117
|
+
]);
|
|
7118
|
+
if (!resultA.success) throw new Error(`\uBB38\uC11CA \uD30C\uC2F1 \uC2E4\uD328: ${resultA.error}`);
|
|
7119
|
+
if (!resultB.success) throw new Error(`\uBB38\uC11CB \uD30C\uC2F1 \uC2E4\uD328: ${resultB.error}`);
|
|
7120
|
+
return diffBlocks(resultA.blocks, resultB.blocks);
|
|
7121
|
+
}
|
|
7122
|
+
function diffBlocks(blocksA, blocksB) {
|
|
7123
|
+
const aligned = alignBlocks(blocksA, blocksB);
|
|
7124
|
+
const stats = { added: 0, removed: 0, modified: 0, unchanged: 0 };
|
|
7125
|
+
const diffs = [];
|
|
7126
|
+
for (const [a, b] of aligned) {
|
|
7127
|
+
if (a && b) {
|
|
7128
|
+
const sim = blockSimilarity(a, b);
|
|
7129
|
+
if (sim >= 0.99) {
|
|
7130
|
+
diffs.push({ type: "unchanged", before: a, after: b, similarity: 1 });
|
|
7131
|
+
stats.unchanged++;
|
|
7132
|
+
} else {
|
|
7133
|
+
const diff = { type: "modified", before: a, after: b, similarity: sim };
|
|
7134
|
+
if (a.type === "table" && b.type === "table" && a.table && b.table) {
|
|
7135
|
+
diff.cellDiffs = diffTableCells(a.table, b.table);
|
|
7136
|
+
}
|
|
7137
|
+
diffs.push(diff);
|
|
7138
|
+
stats.modified++;
|
|
7139
|
+
}
|
|
7140
|
+
} else if (a) {
|
|
7141
|
+
diffs.push({ type: "removed", before: a });
|
|
7142
|
+
stats.removed++;
|
|
7143
|
+
} else if (b) {
|
|
7144
|
+
diffs.push({ type: "added", after: b });
|
|
7145
|
+
stats.added++;
|
|
7146
|
+
}
|
|
7147
|
+
}
|
|
7148
|
+
return { stats, diffs };
|
|
7149
|
+
}
|
|
7150
|
+
function alignBlocks(a, b) {
|
|
7151
|
+
const m = a.length, n = b.length;
|
|
7152
|
+
if (m * n > 1e7) return fallbackAlign(a, b);
|
|
7153
|
+
const simCache = /* @__PURE__ */ new Map();
|
|
7154
|
+
const getSim = (i2, j2) => {
|
|
7155
|
+
const key = `${i2},${j2}`;
|
|
7156
|
+
let v = simCache.get(key);
|
|
7157
|
+
if (v === void 0) {
|
|
7158
|
+
v = blockSimilarity(a[i2], b[j2]);
|
|
7159
|
+
simCache.set(key, v);
|
|
7160
|
+
}
|
|
7161
|
+
return v;
|
|
7162
|
+
};
|
|
7163
|
+
const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
|
|
7164
|
+
for (let i2 = 1; i2 <= m; i2++) {
|
|
7165
|
+
for (let j2 = 1; j2 <= n; j2++) {
|
|
7166
|
+
if (getSim(i2 - 1, j2 - 1) >= SIMILARITY_THRESHOLD) {
|
|
7167
|
+
dp[i2][j2] = dp[i2 - 1][j2 - 1] + 1;
|
|
7168
|
+
} else {
|
|
7169
|
+
dp[i2][j2] = Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
|
|
7170
|
+
}
|
|
7171
|
+
}
|
|
7172
|
+
}
|
|
7173
|
+
const pairs = [];
|
|
7174
|
+
let i = m, j = n;
|
|
7175
|
+
while (i > 0 && j > 0) {
|
|
7176
|
+
if (getSim(i - 1, j - 1) >= SIMILARITY_THRESHOLD && dp[i][j] === dp[i - 1][j - 1] + 1) {
|
|
7177
|
+
pairs.push([i - 1, j - 1]);
|
|
7178
|
+
i--;
|
|
7179
|
+
j--;
|
|
7180
|
+
} else if (dp[i - 1][j] >= dp[i][j - 1]) {
|
|
7181
|
+
i--;
|
|
7182
|
+
} else {
|
|
7183
|
+
j--;
|
|
7184
|
+
}
|
|
7185
|
+
}
|
|
7186
|
+
pairs.reverse();
|
|
7187
|
+
const result = [];
|
|
7188
|
+
let ai = 0, bi = 0;
|
|
7189
|
+
for (const [pi, pj] of pairs) {
|
|
7190
|
+
while (ai < pi) result.push([a[ai++], null]);
|
|
7191
|
+
while (bi < pj) result.push([null, b[bi++]]);
|
|
7192
|
+
result.push([a[ai++], b[bi++]]);
|
|
7193
|
+
}
|
|
7194
|
+
while (ai < m) result.push([a[ai++], null]);
|
|
7195
|
+
while (bi < n) result.push([null, b[bi++]]);
|
|
7196
|
+
return result;
|
|
7197
|
+
}
|
|
7198
|
+
function fallbackAlign(a, b) {
|
|
7199
|
+
const result = [];
|
|
7200
|
+
const len = Math.max(a.length, b.length);
|
|
7201
|
+
for (let i = 0; i < len; i++) {
|
|
7202
|
+
result.push([a[i] || null, b[i] || null]);
|
|
7203
|
+
}
|
|
7204
|
+
return result;
|
|
7205
|
+
}
|
|
7206
|
+
function blockSimilarity(a, b) {
|
|
7207
|
+
if (a.type !== b.type) return 0;
|
|
7208
|
+
if (a.text !== void 0 && b.text !== void 0) {
|
|
7209
|
+
return normalizedSimilarity(a.text || "", b.text || "");
|
|
7210
|
+
}
|
|
7211
|
+
if (a.type === "table" && a.table && b.table) {
|
|
7212
|
+
return tableSimilarity(a.table, b.table);
|
|
7213
|
+
}
|
|
7214
|
+
if (a.type === b.type) return 1;
|
|
7215
|
+
return 0;
|
|
7216
|
+
}
|
|
7217
|
+
function tableSimilarity(a, b) {
|
|
7218
|
+
const dimSim = 1 - Math.abs(a.rows * a.cols - b.rows * b.cols) / Math.max(a.rows * a.cols, b.rows * b.cols, 1);
|
|
7219
|
+
const textsA = a.cells.flat().map((c) => c.text).join(" ");
|
|
7220
|
+
const textsB = b.cells.flat().map((c) => c.text).join(" ");
|
|
7221
|
+
const contentSim = normalizedSimilarity(textsA, textsB);
|
|
7222
|
+
return dimSim * 0.3 + contentSim * 0.7;
|
|
7223
|
+
}
|
|
7224
|
+
function diffTableCells(a, b) {
|
|
7225
|
+
const maxRows = Math.max(a.rows, b.rows);
|
|
7226
|
+
const maxCols = Math.max(a.cols, b.cols);
|
|
7227
|
+
const result = [];
|
|
7228
|
+
for (let r = 0; r < maxRows; r++) {
|
|
7229
|
+
const row = [];
|
|
7230
|
+
for (let c = 0; c < maxCols; c++) {
|
|
7231
|
+
const cellA = r < a.rows && c < a.cols ? a.cells[r][c].text : void 0;
|
|
7232
|
+
const cellB = r < b.rows && c < b.cols ? b.cells[r][c].text : void 0;
|
|
7233
|
+
let type;
|
|
7234
|
+
if (cellA === void 0) type = "added";
|
|
7235
|
+
else if (cellB === void 0) type = "removed";
|
|
7236
|
+
else if (cellA === cellB) type = "unchanged";
|
|
7237
|
+
else type = "modified";
|
|
7238
|
+
row.push({ type, before: cellA, after: cellB });
|
|
7239
|
+
}
|
|
7240
|
+
result.push(row);
|
|
7241
|
+
}
|
|
7242
|
+
return result;
|
|
7243
|
+
}
|
|
7244
|
+
|
|
6727
7245
|
// src/index.ts
|
|
6728
7246
|
async function parse(input, options) {
|
|
6729
7247
|
let buffer;
|
|
@@ -6800,6 +7318,45 @@ async function parseDocx(buffer, options) {
|
|
|
6800
7318
|
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: classifyError(err) };
|
|
6801
7319
|
}
|
|
6802
7320
|
}
|
|
7321
|
+
async function fillForm(input, values, outputFormat = "markdown") {
|
|
7322
|
+
let buffer;
|
|
7323
|
+
if (typeof input === "string") {
|
|
7324
|
+
const buf = await (0, import_promises.readFile)(input);
|
|
7325
|
+
buffer = toArrayBuffer(buf);
|
|
7326
|
+
} else if (Buffer.isBuffer(input)) {
|
|
7327
|
+
buffer = toArrayBuffer(input);
|
|
7328
|
+
} else {
|
|
7329
|
+
buffer = input;
|
|
7330
|
+
}
|
|
7331
|
+
if (outputFormat === "hwpx-preserve") {
|
|
7332
|
+
const format = detectFormat(buffer);
|
|
7333
|
+
if (format === "hwpx") {
|
|
7334
|
+
const zipFormat = await detectZipFormat(buffer);
|
|
7335
|
+
if (zipFormat !== "hwpx") {
|
|
7336
|
+
throw new Error(`hwpx-preserve \uD3EC\uB9F7\uC740 HWPX \uC785\uB825\uB9CC \uC9C0\uC6D0\uD569\uB2C8\uB2E4 (\uAC10\uC9C0\uB41C \uD3EC\uB9F7: ${zipFormat})`);
|
|
7337
|
+
}
|
|
7338
|
+
} else {
|
|
7339
|
+
throw new Error(`hwpx-preserve \uD3EC\uB9F7\uC740 HWPX \uC785\uB825\uB9CC \uC9C0\uC6D0\uD569\uB2C8\uB2E4 (\uAC10\uC9C0\uB41C \uD3EC\uB9F7: ${format})`);
|
|
7340
|
+
}
|
|
7341
|
+
const hwpxResult = await fillHwpx(buffer, values);
|
|
7342
|
+
return {
|
|
7343
|
+
output: hwpxResult.buffer,
|
|
7344
|
+
format: "hwpx-preserve",
|
|
7345
|
+
fill: { filled: hwpxResult.filled, unmatched: hwpxResult.unmatched }
|
|
7346
|
+
};
|
|
7347
|
+
}
|
|
7348
|
+
const parsed = await parse(buffer);
|
|
7349
|
+
if (!parsed.success) {
|
|
7350
|
+
throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
|
|
7351
|
+
}
|
|
7352
|
+
const fill = fillFormFields(parsed.blocks, values);
|
|
7353
|
+
const markdown = blocksToMarkdown(fill.blocks);
|
|
7354
|
+
if (outputFormat === "hwpx") {
|
|
7355
|
+
const hwpxBuffer = await markdownToHwpx(markdown);
|
|
7356
|
+
return { output: hwpxBuffer, format: "hwpx", fill };
|
|
7357
|
+
}
|
|
7358
|
+
return { output: markdown, format: "markdown", fill };
|
|
7359
|
+
}
|
|
6803
7360
|
// Annotate the CommonJS export names for ESM import in node:
|
|
6804
7361
|
0 && (module.exports = {
|
|
6805
7362
|
VERSION,
|
|
@@ -6809,7 +7366,11 @@ async function parseDocx(buffer, options) {
|
|
|
6809
7366
|
detectZipFormat,
|
|
6810
7367
|
diffBlocks,
|
|
6811
7368
|
extractFormFields,
|
|
7369
|
+
fillForm,
|
|
7370
|
+
fillFormFields,
|
|
7371
|
+
fillHwpx,
|
|
6812
7372
|
isHwpxFile,
|
|
7373
|
+
isLabelCell,
|
|
6813
7374
|
isOldHwpFile,
|
|
6814
7375
|
isPdfFile,
|
|
6815
7376
|
isZipFile,
|