kordoc 2.2.3 → 2.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-AIG7SDWU.js → chunk-SY2RFVLW.js} +1051 -149
- package/dist/chunk-SY2RFVLW.js.map +1 -0
- package/dist/cli.js +149 -4
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +799 -238
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +97 -7
- package/dist/index.d.ts +97 -7
- package/dist/index.js +795 -238
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +126 -6
- package/dist/mcp.js.map +1 -1
- package/dist/{watch-H672QAW2.js → watch-5P7DJ3HG.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-AIG7SDWU.js.map +0 -1
- /package/dist/{watch-H672QAW2.js.map → watch-5P7DJ3HG.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -139,7 +139,7 @@ import { inflateRawSync } from "zlib";
|
|
|
139
139
|
import { DOMParser } from "@xmldom/xmldom";
|
|
140
140
|
|
|
141
141
|
// src/utils.ts
|
|
142
|
-
var VERSION = true ? "2.2.
|
|
142
|
+
var VERSION = true ? "2.2.4" : "0.0.0-dev";
|
|
143
143
|
function toArrayBuffer(buf) {
|
|
144
144
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
145
145
|
return buf.buffer;
|
|
@@ -5563,21 +5563,21 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
5563
5563
|
import JSZip4 from "jszip";
|
|
5564
5564
|
import { DOMParser as DOMParser3 } from "@xmldom/xmldom";
|
|
5565
5565
|
var MAX_DECOMPRESS_SIZE4 = 100 * 1024 * 1024;
|
|
5566
|
-
function getChildElements(parent,
|
|
5566
|
+
function getChildElements(parent, localName2) {
|
|
5567
5567
|
const result = [];
|
|
5568
5568
|
const children = parent.childNodes;
|
|
5569
5569
|
for (let i = 0; i < children.length; i++) {
|
|
5570
5570
|
const node = children[i];
|
|
5571
5571
|
if (node.nodeType === 1) {
|
|
5572
5572
|
const el = node;
|
|
5573
|
-
if (el.localName ===
|
|
5573
|
+
if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
|
|
5574
5574
|
result.push(el);
|
|
5575
5575
|
}
|
|
5576
5576
|
}
|
|
5577
5577
|
}
|
|
5578
5578
|
return result;
|
|
5579
5579
|
}
|
|
5580
|
-
function findElements(parent,
|
|
5580
|
+
function findElements(parent, localName2) {
|
|
5581
5581
|
const result = [];
|
|
5582
5582
|
const walk = (node) => {
|
|
5583
5583
|
const children = node.childNodes;
|
|
@@ -5585,7 +5585,7 @@ function findElements(parent, localName) {
|
|
|
5585
5585
|
const child = children[i];
|
|
5586
5586
|
if (child.nodeType === 1) {
|
|
5587
5587
|
const el = child;
|
|
5588
|
-
if (el.localName ===
|
|
5588
|
+
if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
|
|
5589
5589
|
result.push(el);
|
|
5590
5590
|
}
|
|
5591
5591
|
walk(el);
|
|
@@ -5595,11 +5595,11 @@ function findElements(parent, localName) {
|
|
|
5595
5595
|
walk(parent);
|
|
5596
5596
|
return result;
|
|
5597
5597
|
}
|
|
5598
|
-
function getAttr(el,
|
|
5598
|
+
function getAttr(el, localName2) {
|
|
5599
5599
|
const attrs = el.attributes;
|
|
5600
5600
|
for (let i = 0; i < attrs.length; i++) {
|
|
5601
5601
|
const attr = attrs[i];
|
|
5602
|
-
if (attr.localName ===
|
|
5602
|
+
if (attr.localName === localName2 || attr.name === localName2) return attr.value;
|
|
5603
5603
|
}
|
|
5604
5604
|
return null;
|
|
5605
5605
|
}
|
|
@@ -5946,11 +5946,11 @@ async function parseDocxDocument(buffer, options) {
|
|
|
5946
5946
|
const node = children[i];
|
|
5947
5947
|
if (node.nodeType !== 1) continue;
|
|
5948
5948
|
const el = node;
|
|
5949
|
-
const
|
|
5950
|
-
if (
|
|
5949
|
+
const localName2 = el.localName ?? el.tagName?.split(":").pop();
|
|
5950
|
+
if (localName2 === "p") {
|
|
5951
5951
|
const block = parseParagraph(el, styles, numbering, footnotes, rels);
|
|
5952
5952
|
if (block) blocks.push(block);
|
|
5953
|
-
} else if (
|
|
5953
|
+
} else if (localName2 === "tbl") {
|
|
5954
5954
|
const block = parseTable(el, styles, numbering, footnotes, rels);
|
|
5955
5955
|
if (block) blocks.push(block);
|
|
5956
5956
|
}
|
|
@@ -5988,183 +5988,6 @@ async function parseDocxDocument(buffer, options) {
|
|
|
5988
5988
|
};
|
|
5989
5989
|
}
|
|
5990
5990
|
|
|
5991
|
-
// src/diff/text-diff.ts
|
|
5992
|
-
function similarity(a, b) {
|
|
5993
|
-
if (a === b) return 1;
|
|
5994
|
-
if (!a || !b) return 0;
|
|
5995
|
-
const maxLen = Math.max(a.length, b.length);
|
|
5996
|
-
if (maxLen === 0) return 1;
|
|
5997
|
-
return 1 - levenshtein(a, b) / maxLen;
|
|
5998
|
-
}
|
|
5999
|
-
function normalizedSimilarity(a, b) {
|
|
6000
|
-
return similarity(normalize(a), normalize(b));
|
|
6001
|
-
}
|
|
6002
|
-
function normalize(s) {
|
|
6003
|
-
return s.replace(/\s+/g, " ").trim();
|
|
6004
|
-
}
|
|
6005
|
-
var MAX_LEVENSHTEIN_LEN = 1e4;
|
|
6006
|
-
function levenshtein(a, b) {
|
|
6007
|
-
if (a.length + b.length > MAX_LEVENSHTEIN_LEN) {
|
|
6008
|
-
const sampleLen = Math.min(500, a.length, b.length);
|
|
6009
|
-
let diffs = 0;
|
|
6010
|
-
for (let i = 0; i < sampleLen; i++) if (a[i] !== b[i]) diffs++;
|
|
6011
|
-
const sampleRate = sampleLen > 0 ? diffs / sampleLen : 1;
|
|
6012
|
-
return Math.abs(a.length - b.length) + Math.round(Math.min(a.length, b.length) * sampleRate);
|
|
6013
|
-
}
|
|
6014
|
-
if (a.length > b.length) [a, b] = [b, a];
|
|
6015
|
-
const m = a.length;
|
|
6016
|
-
const n = b.length;
|
|
6017
|
-
let prev = Array.from({ length: m + 1 }, (_, i) => i);
|
|
6018
|
-
let curr = new Array(m + 1);
|
|
6019
|
-
for (let j = 1; j <= n; j++) {
|
|
6020
|
-
curr[0] = j;
|
|
6021
|
-
for (let i = 1; i <= m; i++) {
|
|
6022
|
-
if (a[i - 1] === b[j - 1]) {
|
|
6023
|
-
curr[i] = prev[i - 1];
|
|
6024
|
-
} else {
|
|
6025
|
-
curr[i] = 1 + Math.min(prev[i - 1], prev[i], curr[i - 1]);
|
|
6026
|
-
}
|
|
6027
|
-
}
|
|
6028
|
-
;
|
|
6029
|
-
[prev, curr] = [curr, prev];
|
|
6030
|
-
}
|
|
6031
|
-
return prev[m];
|
|
6032
|
-
}
|
|
6033
|
-
|
|
6034
|
-
// src/diff/compare.ts
|
|
6035
|
-
var SIMILARITY_THRESHOLD = 0.4;
|
|
6036
|
-
async function compare(bufferA, bufferB, options) {
|
|
6037
|
-
const [resultA, resultB] = await Promise.all([
|
|
6038
|
-
parse(bufferA, options),
|
|
6039
|
-
parse(bufferB, options)
|
|
6040
|
-
]);
|
|
6041
|
-
if (!resultA.success) throw new Error(`\uBB38\uC11CA \uD30C\uC2F1 \uC2E4\uD328: ${resultA.error}`);
|
|
6042
|
-
if (!resultB.success) throw new Error(`\uBB38\uC11CB \uD30C\uC2F1 \uC2E4\uD328: ${resultB.error}`);
|
|
6043
|
-
return diffBlocks(resultA.blocks, resultB.blocks);
|
|
6044
|
-
}
|
|
6045
|
-
function diffBlocks(blocksA, blocksB) {
|
|
6046
|
-
const aligned = alignBlocks(blocksA, blocksB);
|
|
6047
|
-
const stats = { added: 0, removed: 0, modified: 0, unchanged: 0 };
|
|
6048
|
-
const diffs = [];
|
|
6049
|
-
for (const [a, b] of aligned) {
|
|
6050
|
-
if (a && b) {
|
|
6051
|
-
const sim = blockSimilarity(a, b);
|
|
6052
|
-
if (sim >= 0.99) {
|
|
6053
|
-
diffs.push({ type: "unchanged", before: a, after: b, similarity: 1 });
|
|
6054
|
-
stats.unchanged++;
|
|
6055
|
-
} else {
|
|
6056
|
-
const diff = { type: "modified", before: a, after: b, similarity: sim };
|
|
6057
|
-
if (a.type === "table" && b.type === "table" && a.table && b.table) {
|
|
6058
|
-
diff.cellDiffs = diffTableCells(a.table, b.table);
|
|
6059
|
-
}
|
|
6060
|
-
diffs.push(diff);
|
|
6061
|
-
stats.modified++;
|
|
6062
|
-
}
|
|
6063
|
-
} else if (a) {
|
|
6064
|
-
diffs.push({ type: "removed", before: a });
|
|
6065
|
-
stats.removed++;
|
|
6066
|
-
} else if (b) {
|
|
6067
|
-
diffs.push({ type: "added", after: b });
|
|
6068
|
-
stats.added++;
|
|
6069
|
-
}
|
|
6070
|
-
}
|
|
6071
|
-
return { stats, diffs };
|
|
6072
|
-
}
|
|
6073
|
-
function alignBlocks(a, b) {
|
|
6074
|
-
const m = a.length, n = b.length;
|
|
6075
|
-
if (m * n > 1e7) return fallbackAlign(a, b);
|
|
6076
|
-
const simCache = /* @__PURE__ */ new Map();
|
|
6077
|
-
const getSim = (i2, j2) => {
|
|
6078
|
-
const key = `${i2},${j2}`;
|
|
6079
|
-
let v = simCache.get(key);
|
|
6080
|
-
if (v === void 0) {
|
|
6081
|
-
v = blockSimilarity(a[i2], b[j2]);
|
|
6082
|
-
simCache.set(key, v);
|
|
6083
|
-
}
|
|
6084
|
-
return v;
|
|
6085
|
-
};
|
|
6086
|
-
const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
|
|
6087
|
-
for (let i2 = 1; i2 <= m; i2++) {
|
|
6088
|
-
for (let j2 = 1; j2 <= n; j2++) {
|
|
6089
|
-
if (getSim(i2 - 1, j2 - 1) >= SIMILARITY_THRESHOLD) {
|
|
6090
|
-
dp[i2][j2] = dp[i2 - 1][j2 - 1] + 1;
|
|
6091
|
-
} else {
|
|
6092
|
-
dp[i2][j2] = Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
|
|
6093
|
-
}
|
|
6094
|
-
}
|
|
6095
|
-
}
|
|
6096
|
-
const pairs = [];
|
|
6097
|
-
let i = m, j = n;
|
|
6098
|
-
while (i > 0 && j > 0) {
|
|
6099
|
-
if (getSim(i - 1, j - 1) >= SIMILARITY_THRESHOLD && dp[i][j] === dp[i - 1][j - 1] + 1) {
|
|
6100
|
-
pairs.push([i - 1, j - 1]);
|
|
6101
|
-
i--;
|
|
6102
|
-
j--;
|
|
6103
|
-
} else if (dp[i - 1][j] >= dp[i][j - 1]) {
|
|
6104
|
-
i--;
|
|
6105
|
-
} else {
|
|
6106
|
-
j--;
|
|
6107
|
-
}
|
|
6108
|
-
}
|
|
6109
|
-
pairs.reverse();
|
|
6110
|
-
const result = [];
|
|
6111
|
-
let ai = 0, bi = 0;
|
|
6112
|
-
for (const [pi, pj] of pairs) {
|
|
6113
|
-
while (ai < pi) result.push([a[ai++], null]);
|
|
6114
|
-
while (bi < pj) result.push([null, b[bi++]]);
|
|
6115
|
-
result.push([a[ai++], b[bi++]]);
|
|
6116
|
-
}
|
|
6117
|
-
while (ai < m) result.push([a[ai++], null]);
|
|
6118
|
-
while (bi < n) result.push([null, b[bi++]]);
|
|
6119
|
-
return result;
|
|
6120
|
-
}
|
|
6121
|
-
function fallbackAlign(a, b) {
|
|
6122
|
-
const result = [];
|
|
6123
|
-
const len = Math.max(a.length, b.length);
|
|
6124
|
-
for (let i = 0; i < len; i++) {
|
|
6125
|
-
result.push([a[i] || null, b[i] || null]);
|
|
6126
|
-
}
|
|
6127
|
-
return result;
|
|
6128
|
-
}
|
|
6129
|
-
function blockSimilarity(a, b) {
|
|
6130
|
-
if (a.type !== b.type) return 0;
|
|
6131
|
-
if (a.text !== void 0 && b.text !== void 0) {
|
|
6132
|
-
return normalizedSimilarity(a.text || "", b.text || "");
|
|
6133
|
-
}
|
|
6134
|
-
if (a.type === "table" && a.table && b.table) {
|
|
6135
|
-
return tableSimilarity(a.table, b.table);
|
|
6136
|
-
}
|
|
6137
|
-
if (a.type === b.type) return 1;
|
|
6138
|
-
return 0;
|
|
6139
|
-
}
|
|
6140
|
-
function tableSimilarity(a, b) {
|
|
6141
|
-
const dimSim = 1 - Math.abs(a.rows * a.cols - b.rows * b.cols) / Math.max(a.rows * a.cols, b.rows * b.cols, 1);
|
|
6142
|
-
const textsA = a.cells.flat().map((c) => c.text).join(" ");
|
|
6143
|
-
const textsB = b.cells.flat().map((c) => c.text).join(" ");
|
|
6144
|
-
const contentSim = normalizedSimilarity(textsA, textsB);
|
|
6145
|
-
return dimSim * 0.3 + contentSim * 0.7;
|
|
6146
|
-
}
|
|
6147
|
-
function diffTableCells(a, b) {
|
|
6148
|
-
const maxRows = Math.max(a.rows, b.rows);
|
|
6149
|
-
const maxCols = Math.max(a.cols, b.cols);
|
|
6150
|
-
const result = [];
|
|
6151
|
-
for (let r = 0; r < maxRows; r++) {
|
|
6152
|
-
const row = [];
|
|
6153
|
-
for (let c = 0; c < maxCols; c++) {
|
|
6154
|
-
const cellA = r < a.rows && c < a.cols ? a.cells[r][c].text : void 0;
|
|
6155
|
-
const cellB = r < b.rows && c < b.cols ? b.cells[r][c].text : void 0;
|
|
6156
|
-
let type;
|
|
6157
|
-
if (cellA === void 0) type = "added";
|
|
6158
|
-
else if (cellB === void 0) type = "removed";
|
|
6159
|
-
else if (cellA === cellB) type = "unchanged";
|
|
6160
|
-
else type = "modified";
|
|
6161
|
-
row.push({ type, before: cellA, after: cellB });
|
|
6162
|
-
}
|
|
6163
|
-
result.push(row);
|
|
6164
|
-
}
|
|
6165
|
-
return result;
|
|
6166
|
-
}
|
|
6167
|
-
|
|
6168
5991
|
// src/form/recognize.ts
|
|
6169
5992
|
var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
|
|
6170
5993
|
"\uC131\uBA85",
|
|
@@ -6205,15 +6028,20 @@ var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
|
|
|
6205
6028
|
"\uB2E8\uAC00",
|
|
6206
6029
|
"\uD569\uACC4",
|
|
6207
6030
|
"\uACC4",
|
|
6208
|
-
"\uC18C\uACC4"
|
|
6031
|
+
"\uC18C\uACC4",
|
|
6032
|
+
"\uB4F1\uB85D\uAE30\uC900\uC9C0",
|
|
6033
|
+
"\uBCF8\uC801",
|
|
6034
|
+
"\uC704\uC784\uC778",
|
|
6035
|
+
"\uCCAD\uAD6C\uC0AC\uC720",
|
|
6036
|
+
"\uC18C\uBA85\uC790\uB8CC"
|
|
6209
6037
|
]);
|
|
6210
6038
|
function isLabelCell(text) {
|
|
6211
|
-
const trimmed = text.trim();
|
|
6039
|
+
const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
|
|
6212
6040
|
if (!trimmed || trimmed.length > 30) return false;
|
|
6213
6041
|
for (const kw of LABEL_KEYWORDS) {
|
|
6214
6042
|
if (trimmed.includes(kw)) return true;
|
|
6215
6043
|
}
|
|
6216
|
-
if (/^[가-힣\s()
|
|
6044
|
+
if (/^[가-힣\s()()·::]+$/.test(trimmed) && trimmed.replace(/\s/g, "").length >= 2 && trimmed.replace(/\s/g, "").length <= 8 && !/\d/.test(trimmed)) return true;
|
|
6217
6045
|
if (/^[가-힣A-Za-z\s]+[::]$/.test(trimmed)) return true;
|
|
6218
6046
|
return false;
|
|
6219
6047
|
}
|
|
@@ -6236,63 +6064,572 @@ function extractFormFields(blocks) {
|
|
|
6236
6064
|
fields.push(...inlineFields);
|
|
6237
6065
|
}
|
|
6238
6066
|
}
|
|
6239
|
-
const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
|
|
6240
|
-
return { fields, confidence: Math.min(confidence, 1) };
|
|
6067
|
+
const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
|
|
6068
|
+
return { fields, confidence: Math.min(confidence, 1) };
|
|
6069
|
+
}
|
|
6070
|
+
function extractFromTable(table) {
|
|
6071
|
+
const fields = [];
|
|
6072
|
+
if (table.cols >= 2) {
|
|
6073
|
+
for (let r = 0; r < table.rows; r++) {
|
|
6074
|
+
for (let c = 0; c < table.cols - 1; c++) {
|
|
6075
|
+
const labelCell = table.cells[r][c];
|
|
6076
|
+
const valueCell = table.cells[r][c + 1];
|
|
6077
|
+
if (isLabelCell(labelCell.text)) {
|
|
6078
|
+
fields.push({
|
|
6079
|
+
label: labelCell.text.trim().replace(/[::]\s*$/, ""),
|
|
6080
|
+
value: valueCell.text.trim(),
|
|
6081
|
+
row: r,
|
|
6082
|
+
col: c
|
|
6083
|
+
});
|
|
6084
|
+
}
|
|
6085
|
+
}
|
|
6086
|
+
}
|
|
6087
|
+
}
|
|
6088
|
+
if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
|
|
6089
|
+
const headerRow = table.cells[0];
|
|
6090
|
+
const allLabels = headerRow.every((cell) => {
|
|
6091
|
+
const t = cell.text.trim();
|
|
6092
|
+
return t.length > 0 && t.length <= 20;
|
|
6093
|
+
});
|
|
6094
|
+
if (allLabels) {
|
|
6095
|
+
for (let r = 1; r < table.rows; r++) {
|
|
6096
|
+
for (let c = 0; c < table.cols; c++) {
|
|
6097
|
+
const label = headerRow[c].text.trim();
|
|
6098
|
+
const value = table.cells[r][c].text.trim();
|
|
6099
|
+
if (label && value) {
|
|
6100
|
+
fields.push({ label, value, row: r, col: c });
|
|
6101
|
+
}
|
|
6102
|
+
}
|
|
6103
|
+
}
|
|
6104
|
+
}
|
|
6105
|
+
}
|
|
6106
|
+
return fields;
|
|
6107
|
+
}
|
|
6108
|
+
function extractInlineFields(text) {
|
|
6109
|
+
const fields = [];
|
|
6110
|
+
const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
|
|
6111
|
+
let match;
|
|
6112
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
6113
|
+
const label = match[1].trim();
|
|
6114
|
+
const value = match[2].trim();
|
|
6115
|
+
if (value) {
|
|
6116
|
+
fields.push({ label, value, row: -1, col: -1 });
|
|
6117
|
+
}
|
|
6118
|
+
}
|
|
6119
|
+
return fields;
|
|
6120
|
+
}
|
|
6121
|
+
|
|
6122
|
+
// src/form/match.ts
|
|
6123
|
+
function normalizeLabel(label) {
|
|
6124
|
+
return label.trim().replace(/[::\s()()·]/g, "");
|
|
6125
|
+
}
|
|
6126
|
+
function findMatchingKey(cellLabel, values) {
|
|
6127
|
+
if (values.has(cellLabel)) return cellLabel;
|
|
6128
|
+
let bestKey;
|
|
6129
|
+
let bestLen = 0;
|
|
6130
|
+
for (const key of values.keys()) {
|
|
6131
|
+
if (cellLabel.startsWith(key)) {
|
|
6132
|
+
if (key.length >= cellLabel.length * 0.6 && key.length > bestLen) {
|
|
6133
|
+
bestLen = key.length;
|
|
6134
|
+
bestKey = key;
|
|
6135
|
+
}
|
|
6136
|
+
} else if (key.startsWith(cellLabel)) {
|
|
6137
|
+
if (cellLabel.length >= key.length * 0.6 && cellLabel.length > bestLen) {
|
|
6138
|
+
bestLen = cellLabel.length;
|
|
6139
|
+
bestKey = key;
|
|
6140
|
+
}
|
|
6141
|
+
}
|
|
6142
|
+
}
|
|
6143
|
+
return bestKey;
|
|
6144
|
+
}
|
|
6145
|
+
function isKeywordLabel(text) {
|
|
6146
|
+
const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
|
|
6147
|
+
if (!trimmed || trimmed.length > 15) return false;
|
|
6148
|
+
for (const kw of LABEL_KEYWORDS) {
|
|
6149
|
+
if (trimmed.includes(kw)) return true;
|
|
6150
|
+
}
|
|
6151
|
+
return false;
|
|
6152
|
+
}
|
|
6153
|
+
function fillInCellPatterns(cellText, values, matchedLabels) {
|
|
6154
|
+
let text = cellText;
|
|
6155
|
+
const matches = [];
|
|
6156
|
+
text = text.replace(
|
|
6157
|
+
/([가-힣A-Za-z]+)\(\s{1,}\)([가-힣A-Za-z]*)/g,
|
|
6158
|
+
(match, prefix, suffix) => {
|
|
6159
|
+
const label = prefix + suffix;
|
|
6160
|
+
const normalizedLabel = normalizeLabel(label);
|
|
6161
|
+
const matchKey = values.has(normalizedLabel) ? normalizedLabel : values.has(normalizeLabel(prefix)) ? normalizeLabel(prefix) : void 0;
|
|
6162
|
+
if (matchKey === void 0) return match;
|
|
6163
|
+
const newValue = values.get(matchKey);
|
|
6164
|
+
matchedLabels.add(matchKey);
|
|
6165
|
+
matches.push({ key: matchKey, label, value: newValue });
|
|
6166
|
+
return `${prefix}(${newValue})${suffix}`;
|
|
6167
|
+
}
|
|
6168
|
+
);
|
|
6169
|
+
text = text.replace(
|
|
6170
|
+
/□([가-힣A-Za-z]+)/g,
|
|
6171
|
+
(match, keyword) => {
|
|
6172
|
+
const normalizedKw = normalizeLabel(keyword);
|
|
6173
|
+
const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
|
|
6174
|
+
if (matchKey === void 0) return match;
|
|
6175
|
+
const val = values.get(matchKey);
|
|
6176
|
+
const isTruthy = ["\u2611", "\u2713", "\u2714", "v", "V", "true", "1", "yes", "o", "O"].includes(val.trim()) || val.trim() === "";
|
|
6177
|
+
if (!isTruthy) return match;
|
|
6178
|
+
matchedLabels.add(matchKey);
|
|
6179
|
+
matches.push({ key: matchKey, label: `\u25A1${keyword}`, value: "\u2611" });
|
|
6180
|
+
return `\u2611${keyword}`;
|
|
6181
|
+
}
|
|
6182
|
+
);
|
|
6183
|
+
text = text.replace(
|
|
6184
|
+
/\(([가-힣A-Za-z]+)[::]\s{1,}\)/g,
|
|
6185
|
+
(match, keyword) => {
|
|
6186
|
+
const normalizedKw = normalizeLabel(keyword);
|
|
6187
|
+
const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
|
|
6188
|
+
if (matchKey === void 0) return match;
|
|
6189
|
+
const newValue = values.get(matchKey);
|
|
6190
|
+
matchedLabels.add(matchKey);
|
|
6191
|
+
matches.push({ key: matchKey, label: keyword, value: newValue });
|
|
6192
|
+
return `(${keyword}\uFF1A${newValue})`;
|
|
6193
|
+
}
|
|
6194
|
+
);
|
|
6195
|
+
return matches.length > 0 ? { text, matches } : null;
|
|
6196
|
+
}
|
|
6197
|
+
function normalizeValues(values) {
|
|
6198
|
+
const map = /* @__PURE__ */ new Map();
|
|
6199
|
+
for (const [label, value] of Object.entries(values)) {
|
|
6200
|
+
map.set(normalizeLabel(label), value);
|
|
6201
|
+
}
|
|
6202
|
+
return map;
|
|
6203
|
+
}
|
|
6204
|
+
function resolveUnmatched(normalizedValues, matchedLabels, originalValues) {
|
|
6205
|
+
return [...normalizedValues.keys()].filter((k) => !matchedLabels.has(k)).map((k) => {
|
|
6206
|
+
for (const orig of Object.keys(originalValues)) {
|
|
6207
|
+
if (normalizeLabel(orig) === k) return orig;
|
|
6208
|
+
}
|
|
6209
|
+
return k;
|
|
6210
|
+
});
|
|
6211
|
+
}
|
|
6212
|
+
|
|
6213
|
+
// src/form/filler.ts
|
|
6214
|
+
function fillFormFields(blocks, values) {
|
|
6215
|
+
const cloned = structuredClone(blocks);
|
|
6216
|
+
const filled = [];
|
|
6217
|
+
const matchedLabels = /* @__PURE__ */ new Set();
|
|
6218
|
+
const normalizedValues = normalizeValues(values);
|
|
6219
|
+
const patternFilledCells = /* @__PURE__ */ new Set();
|
|
6220
|
+
for (const block of cloned) {
|
|
6221
|
+
if (block.type !== "table" || !block.table) continue;
|
|
6222
|
+
for (let r = 0; r < block.table.rows; r++) {
|
|
6223
|
+
for (let c = 0; c < block.table.cols; c++) {
|
|
6224
|
+
const cell = block.table.cells[r]?.[c];
|
|
6225
|
+
if (!cell) continue;
|
|
6226
|
+
const result = fillInCellPatterns(cell.text, normalizedValues, matchedLabels);
|
|
6227
|
+
if (result) {
|
|
6228
|
+
cell.text = result.text;
|
|
6229
|
+
patternFilledCells.add(`${r},${c}`);
|
|
6230
|
+
for (const m of result.matches) {
|
|
6231
|
+
filled.push({ label: m.label, value: m.value, row: r, col: c });
|
|
6232
|
+
}
|
|
6233
|
+
}
|
|
6234
|
+
}
|
|
6235
|
+
}
|
|
6236
|
+
}
|
|
6237
|
+
for (const block of cloned) {
|
|
6238
|
+
if (block.type !== "table" || !block.table) continue;
|
|
6239
|
+
fillTable(block.table, normalizedValues, filled, matchedLabels, patternFilledCells);
|
|
6240
|
+
}
|
|
6241
|
+
for (const block of cloned) {
|
|
6242
|
+
if (block.type !== "paragraph" || !block.text) continue;
|
|
6243
|
+
const newText = fillInlineFields(block.text, normalizedValues, filled, matchedLabels);
|
|
6244
|
+
if (newText !== block.text) block.text = newText;
|
|
6245
|
+
}
|
|
6246
|
+
const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
|
|
6247
|
+
return { blocks: cloned, filled, unmatched };
|
|
6248
|
+
}
|
|
6249
|
+
function fillTable(table, values, filled, matchedLabels, patternFilledCells) {
|
|
6250
|
+
if (table.cols < 2) return;
|
|
6251
|
+
for (let r = 0; r < table.rows; r++) {
|
|
6252
|
+
for (let c = 0; c < table.cols - 1; c++) {
|
|
6253
|
+
const labelCell = table.cells[r][c];
|
|
6254
|
+
const valueCell = table.cells[r][c + 1];
|
|
6255
|
+
if (!labelCell || !valueCell) continue;
|
|
6256
|
+
if (!isLabelCell(labelCell.text)) continue;
|
|
6257
|
+
if (isKeywordLabel(valueCell.text)) continue;
|
|
6258
|
+
const normalizedCellLabel = normalizeLabel(labelCell.text);
|
|
6259
|
+
if (!normalizedCellLabel) continue;
|
|
6260
|
+
const matchKey = findMatchingKey(normalizedCellLabel, values);
|
|
6261
|
+
if (matchKey === void 0) continue;
|
|
6262
|
+
const newValue = values.get(matchKey);
|
|
6263
|
+
if (patternFilledCells?.has(`${r},${c + 1}`)) {
|
|
6264
|
+
valueCell.text = newValue + " " + valueCell.text;
|
|
6265
|
+
} else {
|
|
6266
|
+
valueCell.text = newValue;
|
|
6267
|
+
}
|
|
6268
|
+
matchedLabels.add(matchKey);
|
|
6269
|
+
filled.push({
|
|
6270
|
+
label: labelCell.text.trim().replace(/[::]\s*$/, ""),
|
|
6271
|
+
value: newValue,
|
|
6272
|
+
row: r,
|
|
6273
|
+
col: c
|
|
6274
|
+
});
|
|
6275
|
+
}
|
|
6276
|
+
}
|
|
6277
|
+
if (table.rows >= 2 && table.cols >= 2) {
|
|
6278
|
+
const headerRow = table.cells[0];
|
|
6279
|
+
const allLabels = headerRow.every((cell) => {
|
|
6280
|
+
const t = cell.text.trim();
|
|
6281
|
+
return t.length > 0 && t.length <= 20 && isLabelCell(t);
|
|
6282
|
+
});
|
|
6283
|
+
if (!allLabels) return;
|
|
6284
|
+
for (let r = 1; r < table.rows; r++) {
|
|
6285
|
+
for (let c = 0; c < table.cols; c++) {
|
|
6286
|
+
const headerLabel = normalizeLabel(headerRow[c].text);
|
|
6287
|
+
const matchKey = findMatchingKey(headerLabel, values);
|
|
6288
|
+
if (matchKey === void 0) continue;
|
|
6289
|
+
if (matchedLabels.has(matchKey)) continue;
|
|
6290
|
+
const newValue = values.get(matchKey);
|
|
6291
|
+
table.cells[r][c].text = newValue;
|
|
6292
|
+
matchedLabels.add(matchKey);
|
|
6293
|
+
filled.push({
|
|
6294
|
+
label: headerRow[c].text.trim(),
|
|
6295
|
+
value: newValue,
|
|
6296
|
+
row: r,
|
|
6297
|
+
col: c
|
|
6298
|
+
});
|
|
6299
|
+
}
|
|
6300
|
+
}
|
|
6301
|
+
}
|
|
6302
|
+
}
|
|
6303
|
+
function fillInlineFields(text, values, filled, matchedLabels) {
|
|
6304
|
+
return text.replace(
|
|
6305
|
+
/([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g,
|
|
6306
|
+
(match, rawLabel, _oldValue) => {
|
|
6307
|
+
const normalized = normalizeLabel(rawLabel);
|
|
6308
|
+
const matchKey = findMatchingKey(normalized, values);
|
|
6309
|
+
if (matchKey === void 0) return match;
|
|
6310
|
+
const newValue = values.get(matchKey);
|
|
6311
|
+
matchedLabels.add(matchKey);
|
|
6312
|
+
filled.push({
|
|
6313
|
+
label: rawLabel.trim(),
|
|
6314
|
+
value: newValue,
|
|
6315
|
+
row: -1,
|
|
6316
|
+
col: -1
|
|
6317
|
+
});
|
|
6318
|
+
return `${rawLabel}: ${newValue}`;
|
|
6319
|
+
}
|
|
6320
|
+
);
|
|
6321
|
+
}
|
|
6322
|
+
|
|
6323
|
+
// src/form/filler-hwpx.ts
|
|
6324
|
+
import JSZip5 from "jszip";
|
|
6325
|
+
import { DOMParser as DOMParser4, XMLSerializer } from "@xmldom/xmldom";
|
|
6326
|
+
async function fillHwpx(hwpxBuffer, values) {
|
|
6327
|
+
const zip = await JSZip5.loadAsync(hwpxBuffer);
|
|
6328
|
+
const filled = [];
|
|
6329
|
+
const matchedLabels = /* @__PURE__ */ new Set();
|
|
6330
|
+
const normalizedValues = normalizeValues(values);
|
|
6331
|
+
const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
|
|
6332
|
+
if (sectionFiles.length === 0) {
|
|
6333
|
+
throw new KordocError("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
6334
|
+
}
|
|
6335
|
+
const xmlParser = new DOMParser4();
|
|
6336
|
+
const xmlSerializer = new XMLSerializer();
|
|
6337
|
+
for (const sectionPath of sectionFiles) {
|
|
6338
|
+
const zipEntry = zip.file(sectionPath);
|
|
6339
|
+
if (!zipEntry) continue;
|
|
6340
|
+
const rawXml = await zipEntry.async("text");
|
|
6341
|
+
const doc = xmlParser.parseFromString(stripDtd(rawXml), "text/xml");
|
|
6342
|
+
if (!doc.documentElement) continue;
|
|
6343
|
+
let modified = false;
|
|
6344
|
+
const tables = findAllElements(doc.documentElement, "tbl");
|
|
6345
|
+
const cellPatternApplied = /* @__PURE__ */ new Set();
|
|
6346
|
+
for (const tblEl of tables) {
|
|
6347
|
+
const allCells = findAllElements(tblEl, "tc");
|
|
6348
|
+
for (const tcEl of allCells) {
|
|
6349
|
+
const tNodes = collectCellTextNodes(tcEl);
|
|
6350
|
+
const fullText = tNodes.map((n) => n.text).join("");
|
|
6351
|
+
const result = fillInCellPatterns(fullText, normalizedValues, matchedLabels);
|
|
6352
|
+
if (!result) continue;
|
|
6353
|
+
applyTextReplacements(tNodes, fullText, result.text);
|
|
6354
|
+
cellPatternApplied.add(tcEl);
|
|
6355
|
+
for (const m of result.matches) {
|
|
6356
|
+
filled.push({ label: m.label, value: m.value, row: -1, col: -1 });
|
|
6357
|
+
}
|
|
6358
|
+
modified = true;
|
|
6359
|
+
}
|
|
6360
|
+
}
|
|
6361
|
+
for (const tblEl of tables) {
|
|
6362
|
+
const rows = findDirectChildren(tblEl, "tr");
|
|
6363
|
+
for (let rowIdx = 0; rowIdx < rows.length; rowIdx++) {
|
|
6364
|
+
const trEl = rows[rowIdx];
|
|
6365
|
+
const cells = findDirectChildren(trEl, "tc");
|
|
6366
|
+
for (let colIdx = 0; colIdx < cells.length - 1; colIdx++) {
|
|
6367
|
+
const labelText = extractCellText(cells[colIdx]);
|
|
6368
|
+
if (!isLabelCell(labelText)) continue;
|
|
6369
|
+
const valueCell = cells[colIdx + 1];
|
|
6370
|
+
const valueText = extractCellText(valueCell);
|
|
6371
|
+
if (isKeywordLabel(valueText)) continue;
|
|
6372
|
+
const normalizedCellLabel = normalizeLabel(labelText);
|
|
6373
|
+
if (!normalizedCellLabel) continue;
|
|
6374
|
+
const matchKey = findMatchingKey(normalizedCellLabel, normalizedValues);
|
|
6375
|
+
if (matchKey === void 0) continue;
|
|
6376
|
+
const newValue = normalizedValues.get(matchKey);
|
|
6377
|
+
if (cellPatternApplied.has(valueCell)) {
|
|
6378
|
+
prependCellText(valueCell, newValue);
|
|
6379
|
+
} else {
|
|
6380
|
+
replaceCellText(valueCell, newValue);
|
|
6381
|
+
}
|
|
6382
|
+
matchedLabels.add(matchKey);
|
|
6383
|
+
filled.push({
|
|
6384
|
+
label: labelText.trim().replace(/[::]\s*$/, ""),
|
|
6385
|
+
value: newValue,
|
|
6386
|
+
row: rowIdx,
|
|
6387
|
+
col: colIdx
|
|
6388
|
+
});
|
|
6389
|
+
modified = true;
|
|
6390
|
+
}
|
|
6391
|
+
}
|
|
6392
|
+
if (rows.length >= 2) {
|
|
6393
|
+
const headerCells = findDirectChildren(rows[0], "tc");
|
|
6394
|
+
const allLabels = headerCells.every((cell) => {
|
|
6395
|
+
const t = extractCellText(cell).trim();
|
|
6396
|
+
return t.length > 0 && t.length <= 20 && isLabelCell(t);
|
|
6397
|
+
});
|
|
6398
|
+
if (allLabels) {
|
|
6399
|
+
for (let rowIdx = 1; rowIdx < rows.length; rowIdx++) {
|
|
6400
|
+
const dataCells = findDirectChildren(rows[rowIdx], "tc");
|
|
6401
|
+
for (let colIdx = 0; colIdx < Math.min(headerCells.length, dataCells.length); colIdx++) {
|
|
6402
|
+
const headerLabel = normalizeLabel(extractCellText(headerCells[colIdx]));
|
|
6403
|
+
const matchKey = findMatchingKey(headerLabel, normalizedValues);
|
|
6404
|
+
if (matchKey === void 0) continue;
|
|
6405
|
+
if (matchedLabels.has(matchKey)) continue;
|
|
6406
|
+
const newValue = normalizedValues.get(matchKey);
|
|
6407
|
+
replaceCellText(dataCells[colIdx], newValue);
|
|
6408
|
+
matchedLabels.add(matchKey);
|
|
6409
|
+
filled.push({
|
|
6410
|
+
label: extractCellText(headerCells[colIdx]).trim(),
|
|
6411
|
+
value: newValue,
|
|
6412
|
+
row: rowIdx,
|
|
6413
|
+
col: colIdx
|
|
6414
|
+
});
|
|
6415
|
+
modified = true;
|
|
6416
|
+
}
|
|
6417
|
+
}
|
|
6418
|
+
}
|
|
6419
|
+
}
|
|
6420
|
+
}
|
|
6421
|
+
const allParagraphs = findAllElements(doc.documentElement, "p");
|
|
6422
|
+
for (const pEl of allParagraphs) {
|
|
6423
|
+
if (isInsideTable(pEl)) continue;
|
|
6424
|
+
const tNodes = collectTextNodes(pEl);
|
|
6425
|
+
const fullText = tNodes.map((n) => n.text).join("");
|
|
6426
|
+
const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g;
|
|
6427
|
+
let match;
|
|
6428
|
+
while ((match = pattern.exec(fullText)) !== null) {
|
|
6429
|
+
const rawLabel = match[1];
|
|
6430
|
+
const normalized = normalizeLabel(rawLabel);
|
|
6431
|
+
const matchKey = findMatchingKey(normalized, normalizedValues);
|
|
6432
|
+
if (matchKey === void 0) continue;
|
|
6433
|
+
const newValue = normalizedValues.get(matchKey);
|
|
6434
|
+
const valueStart = match.index + match[0].length - match[2].length;
|
|
6435
|
+
const valueEnd = match.index + match[0].length;
|
|
6436
|
+
replaceTextRange(tNodes, valueStart, valueEnd, newValue);
|
|
6437
|
+
matchedLabels.add(matchKey);
|
|
6438
|
+
filled.push({ label: rawLabel.trim(), value: newValue, row: -1, col: -1 });
|
|
6439
|
+
modified = true;
|
|
6440
|
+
break;
|
|
6441
|
+
}
|
|
6442
|
+
}
|
|
6443
|
+
if (modified) {
|
|
6444
|
+
const newXml = xmlSerializer.serializeToString(doc);
|
|
6445
|
+
zip.file(sectionPath, newXml);
|
|
6446
|
+
}
|
|
6447
|
+
}
|
|
6448
|
+
const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
|
|
6449
|
+
const buffer = await zip.generateAsync({ type: "arraybuffer" });
|
|
6450
|
+
return { buffer, filled, unmatched };
|
|
6451
|
+
}
|
|
6452
|
+
function localName(el) {
|
|
6453
|
+
return (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
|
|
6454
|
+
}
|
|
6455
|
+
function findAllElements(node, tagLocalName) {
|
|
6456
|
+
const result = [];
|
|
6457
|
+
const walk = (n) => {
|
|
6458
|
+
const children = n.childNodes;
|
|
6459
|
+
if (!children) return;
|
|
6460
|
+
for (let i = 0; i < children.length; i++) {
|
|
6461
|
+
const child = children[i];
|
|
6462
|
+
if (child.nodeType !== 1) continue;
|
|
6463
|
+
if (localName(child) === tagLocalName) result.push(child);
|
|
6464
|
+
walk(child);
|
|
6465
|
+
}
|
|
6466
|
+
};
|
|
6467
|
+
walk(node);
|
|
6468
|
+
return result;
|
|
6469
|
+
}
|
|
6470
|
+
function findDirectChildren(parent, tagLocalName) {
|
|
6471
|
+
const result = [];
|
|
6472
|
+
const children = parent.childNodes;
|
|
6473
|
+
if (!children) return result;
|
|
6474
|
+
for (let i = 0; i < children.length; i++) {
|
|
6475
|
+
const child = children[i];
|
|
6476
|
+
if (child.nodeType === 1 && localName(child) === tagLocalName) {
|
|
6477
|
+
result.push(child);
|
|
6478
|
+
}
|
|
6479
|
+
}
|
|
6480
|
+
return result;
|
|
6481
|
+
}
|
|
6482
|
+
function isInsideTable(el) {
|
|
6483
|
+
let parent = el.parentNode;
|
|
6484
|
+
while (parent) {
|
|
6485
|
+
if (parent.nodeType === 1 && localName(parent) === "tbl") return true;
|
|
6486
|
+
parent = parent.parentNode;
|
|
6487
|
+
}
|
|
6488
|
+
return false;
|
|
6489
|
+
}
|
|
6490
|
+
function extractCellText(tcEl) {
|
|
6491
|
+
const parts = [];
|
|
6492
|
+
const walk = (node) => {
|
|
6493
|
+
const children = node.childNodes;
|
|
6494
|
+
if (!children) return;
|
|
6495
|
+
for (let i = 0; i < children.length; i++) {
|
|
6496
|
+
const child = children[i];
|
|
6497
|
+
if (child.nodeType === 3) {
|
|
6498
|
+
parts.push(child.textContent || "");
|
|
6499
|
+
} else if (child.nodeType === 1) {
|
|
6500
|
+
const tag = localName(child);
|
|
6501
|
+
if (tag === "t") walk(child);
|
|
6502
|
+
else if (tag === "run" || tag === "r" || tag === "p" || tag === "subList") walk(child);
|
|
6503
|
+
else if (tag === "tab") parts.push(" ");
|
|
6504
|
+
else if (tag === "br") parts.push("\n");
|
|
6505
|
+
}
|
|
6506
|
+
}
|
|
6507
|
+
};
|
|
6508
|
+
walk(tcEl);
|
|
6509
|
+
return parts.join("");
|
|
6510
|
+
}
|
|
6511
|
+
function prependCellText(tcEl, text) {
|
|
6512
|
+
const tElements = findAllElements(tcEl, "t");
|
|
6513
|
+
if (tElements.length === 0) return;
|
|
6514
|
+
const firstT = tElements[0];
|
|
6515
|
+
const existing = firstT.textContent || "";
|
|
6516
|
+
clearChildren(firstT);
|
|
6517
|
+
firstT.appendChild(firstT.ownerDocument.createTextNode(text + " " + existing));
|
|
6518
|
+
}
|
|
6519
|
+
function replaceCellText(tcEl, newValue) {
|
|
6520
|
+
const paragraphs = findAllElements(tcEl, "p");
|
|
6521
|
+
if (paragraphs.length === 0) return;
|
|
6522
|
+
const firstP = paragraphs[0];
|
|
6523
|
+
const runs = findAllElements(firstP, "run").concat(findAllElements(firstP, "r"));
|
|
6524
|
+
if (runs.length > 0) {
|
|
6525
|
+
setRunText(runs[0], newValue);
|
|
6526
|
+
for (let i = 1; i < runs.length; i++) {
|
|
6527
|
+
setRunText(runs[i], "");
|
|
6528
|
+
}
|
|
6529
|
+
} else {
|
|
6530
|
+
const tElements = findAllElements(firstP, "t");
|
|
6531
|
+
if (tElements.length > 0) {
|
|
6532
|
+
clearChildren(tElements[0]);
|
|
6533
|
+
tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(newValue));
|
|
6534
|
+
for (let i = 1; i < tElements.length; i++) {
|
|
6535
|
+
clearChildren(tElements[i]);
|
|
6536
|
+
}
|
|
6537
|
+
}
|
|
6538
|
+
}
|
|
6539
|
+
for (let i = 1; i < paragraphs.length; i++) {
|
|
6540
|
+
const p = paragraphs[i];
|
|
6541
|
+
if (p.parentNode) {
|
|
6542
|
+
const pRuns = findAllElements(p, "run").concat(findAllElements(p, "r"));
|
|
6543
|
+
for (const run of pRuns) setRunText(run, "");
|
|
6544
|
+
const pTs = findAllElements(p, "t");
|
|
6545
|
+
for (const t of pTs) clearChildren(t);
|
|
6546
|
+
}
|
|
6547
|
+
}
|
|
6241
6548
|
}
|
|
6242
|
-
function
|
|
6243
|
-
const
|
|
6244
|
-
if (
|
|
6245
|
-
|
|
6246
|
-
|
|
6247
|
-
|
|
6248
|
-
|
|
6249
|
-
if (isLabelCell(labelCell.text) && valueCell.text.trim()) {
|
|
6250
|
-
fields.push({
|
|
6251
|
-
label: labelCell.text.trim().replace(/[::]\s*$/, ""),
|
|
6252
|
-
value: valueCell.text.trim(),
|
|
6253
|
-
row: r,
|
|
6254
|
-
col: c
|
|
6255
|
-
});
|
|
6256
|
-
}
|
|
6257
|
-
}
|
|
6549
|
+
function setRunText(runEl, text) {
|
|
6550
|
+
const tElements = findAllElements(runEl, "t");
|
|
6551
|
+
if (tElements.length > 0) {
|
|
6552
|
+
clearChildren(tElements[0]);
|
|
6553
|
+
tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(text));
|
|
6554
|
+
for (let i = 1; i < tElements.length; i++) {
|
|
6555
|
+
clearChildren(tElements[i]);
|
|
6258
6556
|
}
|
|
6259
6557
|
}
|
|
6260
|
-
|
|
6261
|
-
|
|
6262
|
-
|
|
6263
|
-
|
|
6264
|
-
|
|
6265
|
-
|
|
6266
|
-
|
|
6267
|
-
|
|
6268
|
-
|
|
6269
|
-
|
|
6270
|
-
|
|
6271
|
-
|
|
6272
|
-
fields.push({ label, value, row: r, col: c });
|
|
6273
|
-
}
|
|
6274
|
-
}
|
|
6275
|
-
}
|
|
6276
|
-
}
|
|
6558
|
+
}
|
|
6559
|
+
function clearChildren(el) {
|
|
6560
|
+
while (el.firstChild) el.removeChild(el.firstChild);
|
|
6561
|
+
}
|
|
6562
|
+
function collectTextNodes(pEl) {
|
|
6563
|
+
const tElements = findAllElements(pEl, "t");
|
|
6564
|
+
const result = [];
|
|
6565
|
+
let offset = 0;
|
|
6566
|
+
for (const t of tElements) {
|
|
6567
|
+
const text = t.textContent || "";
|
|
6568
|
+
result.push({ element: t, text, offset });
|
|
6569
|
+
offset += text.length;
|
|
6277
6570
|
}
|
|
6278
|
-
return
|
|
6571
|
+
return result;
|
|
6279
6572
|
}
|
|
6280
|
-
function
|
|
6281
|
-
|
|
6282
|
-
const
|
|
6283
|
-
|
|
6284
|
-
|
|
6285
|
-
|
|
6286
|
-
const
|
|
6287
|
-
|
|
6288
|
-
|
|
6573
|
+
function replaceTextRange(tNodes, globalStart, globalEnd, newValue) {
|
|
6574
|
+
let replaced = false;
|
|
6575
|
+
for (const node of tNodes) {
|
|
6576
|
+
const nodeStart = node.offset;
|
|
6577
|
+
const nodeEnd = node.offset + node.text.length;
|
|
6578
|
+
if (nodeEnd <= globalStart || nodeStart >= globalEnd) continue;
|
|
6579
|
+
const localStart = Math.max(0, globalStart - nodeStart);
|
|
6580
|
+
const localEnd = Math.min(node.text.length, globalEnd - nodeStart);
|
|
6581
|
+
if (!replaced) {
|
|
6582
|
+
const before = node.text.slice(0, localStart);
|
|
6583
|
+
const after = node.text.slice(localEnd);
|
|
6584
|
+
const newText = before + newValue + after;
|
|
6585
|
+
clearChildren(node.element);
|
|
6586
|
+
node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
|
|
6587
|
+
replaced = true;
|
|
6588
|
+
} else {
|
|
6589
|
+
const before = node.text.slice(0, localStart);
|
|
6590
|
+
const after = node.text.slice(localEnd);
|
|
6591
|
+
const newText = before + after;
|
|
6592
|
+
clearChildren(node.element);
|
|
6593
|
+
node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
|
|
6289
6594
|
}
|
|
6290
6595
|
}
|
|
6291
|
-
|
|
6596
|
+
}
|
|
6597
|
+
function collectCellTextNodes(tcEl) {
|
|
6598
|
+
const tElements = findAllElements(tcEl, "t");
|
|
6599
|
+
const result = [];
|
|
6600
|
+
let offset = 0;
|
|
6601
|
+
for (const t of tElements) {
|
|
6602
|
+
const text = t.textContent || "";
|
|
6603
|
+
result.push({ element: t, text, offset });
|
|
6604
|
+
offset += text.length;
|
|
6605
|
+
}
|
|
6606
|
+
return result;
|
|
6607
|
+
}
|
|
6608
|
+
function applyTextReplacements(tNodes, originalFull, replacedFull) {
|
|
6609
|
+
if (originalFull === replacedFull) return;
|
|
6610
|
+
if (tNodes.length === 1) {
|
|
6611
|
+
clearChildren(tNodes[0].element);
|
|
6612
|
+
tNodes[0].element.appendChild(
|
|
6613
|
+
tNodes[0].element.ownerDocument.createTextNode(replacedFull)
|
|
6614
|
+
);
|
|
6615
|
+
return;
|
|
6616
|
+
}
|
|
6617
|
+
let diffStart = 0;
|
|
6618
|
+
while (diffStart < originalFull.length && diffStart < replacedFull.length && originalFull[diffStart] === replacedFull[diffStart]) {
|
|
6619
|
+
diffStart++;
|
|
6620
|
+
}
|
|
6621
|
+
let diffEndOrig = originalFull.length;
|
|
6622
|
+
let diffEndRepl = replacedFull.length;
|
|
6623
|
+
while (diffEndOrig > diffStart && diffEndRepl > diffStart && originalFull[diffEndOrig - 1] === replacedFull[diffEndRepl - 1]) {
|
|
6624
|
+
diffEndOrig--;
|
|
6625
|
+
diffEndRepl--;
|
|
6626
|
+
}
|
|
6627
|
+
const newPart = replacedFull.slice(diffStart, diffEndRepl);
|
|
6628
|
+
replaceTextRange(tNodes, diffStart, diffEndOrig, newPart);
|
|
6292
6629
|
}
|
|
6293
6630
|
|
|
6294
6631
|
// src/hwpx/generator.ts
|
|
6295
|
-
import
|
|
6632
|
+
import JSZip6 from "jszip";
|
|
6296
6633
|
var NS_SECTION = "http://www.hancom.co.kr/hwpml/2011/section";
|
|
6297
6634
|
var NS_PARA = "http://www.hancom.co.kr/hwpml/2011/paragraph";
|
|
6298
6635
|
var NS_HEAD = "http://www.hancom.co.kr/hwpml/2011/head";
|
|
@@ -6319,7 +6656,7 @@ var PARA_LIST = 7;
|
|
|
6319
6656
|
async function markdownToHwpx(markdown) {
|
|
6320
6657
|
const blocks = parseMarkdownToBlocks(markdown);
|
|
6321
6658
|
const sectionXml = blocksToSectionXml(blocks);
|
|
6322
|
-
const zip = new
|
|
6659
|
+
const zip = new JSZip6();
|
|
6323
6660
|
zip.file("mimetype", "application/hwp+zip", { compression: "STORE" });
|
|
6324
6661
|
zip.file("META-INF/container.xml", generateContainerXml());
|
|
6325
6662
|
zip.file("Contents/content.hpf", generateManifest());
|
|
@@ -6679,6 +7016,183 @@ function blocksToSectionXml(blocks) {
|
|
|
6679
7016
|
</hs:sec>`;
|
|
6680
7017
|
}
|
|
6681
7018
|
|
|
7019
|
+
// src/diff/text-diff.ts
|
|
7020
|
+
function similarity(a, b) {
|
|
7021
|
+
if (a === b) return 1;
|
|
7022
|
+
if (!a || !b) return 0;
|
|
7023
|
+
const maxLen = Math.max(a.length, b.length);
|
|
7024
|
+
if (maxLen === 0) return 1;
|
|
7025
|
+
return 1 - levenshtein(a, b) / maxLen;
|
|
7026
|
+
}
|
|
7027
|
+
function normalizedSimilarity(a, b) {
|
|
7028
|
+
return similarity(normalize(a), normalize(b));
|
|
7029
|
+
}
|
|
7030
|
+
function normalize(s) {
|
|
7031
|
+
return s.replace(/\s+/g, " ").trim();
|
|
7032
|
+
}
|
|
7033
|
+
var MAX_LEVENSHTEIN_LEN = 1e4;
|
|
7034
|
+
function levenshtein(a, b) {
|
|
7035
|
+
if (a.length + b.length > MAX_LEVENSHTEIN_LEN) {
|
|
7036
|
+
const sampleLen = Math.min(500, a.length, b.length);
|
|
7037
|
+
let diffs = 0;
|
|
7038
|
+
for (let i = 0; i < sampleLen; i++) if (a[i] !== b[i]) diffs++;
|
|
7039
|
+
const sampleRate = sampleLen > 0 ? diffs / sampleLen : 1;
|
|
7040
|
+
return Math.abs(a.length - b.length) + Math.round(Math.min(a.length, b.length) * sampleRate);
|
|
7041
|
+
}
|
|
7042
|
+
if (a.length > b.length) [a, b] = [b, a];
|
|
7043
|
+
const m = a.length;
|
|
7044
|
+
const n = b.length;
|
|
7045
|
+
let prev = Array.from({ length: m + 1 }, (_, i) => i);
|
|
7046
|
+
let curr = new Array(m + 1);
|
|
7047
|
+
for (let j = 1; j <= n; j++) {
|
|
7048
|
+
curr[0] = j;
|
|
7049
|
+
for (let i = 1; i <= m; i++) {
|
|
7050
|
+
if (a[i - 1] === b[j - 1]) {
|
|
7051
|
+
curr[i] = prev[i - 1];
|
|
7052
|
+
} else {
|
|
7053
|
+
curr[i] = 1 + Math.min(prev[i - 1], prev[i], curr[i - 1]);
|
|
7054
|
+
}
|
|
7055
|
+
}
|
|
7056
|
+
;
|
|
7057
|
+
[prev, curr] = [curr, prev];
|
|
7058
|
+
}
|
|
7059
|
+
return prev[m];
|
|
7060
|
+
}
|
|
7061
|
+
|
|
7062
|
+
// src/diff/compare.ts
|
|
7063
|
+
var SIMILARITY_THRESHOLD = 0.4;
|
|
7064
|
+
async function compare(bufferA, bufferB, options) {
|
|
7065
|
+
const [resultA, resultB] = await Promise.all([
|
|
7066
|
+
parse(bufferA, options),
|
|
7067
|
+
parse(bufferB, options)
|
|
7068
|
+
]);
|
|
7069
|
+
if (!resultA.success) throw new Error(`\uBB38\uC11CA \uD30C\uC2F1 \uC2E4\uD328: ${resultA.error}`);
|
|
7070
|
+
if (!resultB.success) throw new Error(`\uBB38\uC11CB \uD30C\uC2F1 \uC2E4\uD328: ${resultB.error}`);
|
|
7071
|
+
return diffBlocks(resultA.blocks, resultB.blocks);
|
|
7072
|
+
}
|
|
7073
|
+
function diffBlocks(blocksA, blocksB) {
|
|
7074
|
+
const aligned = alignBlocks(blocksA, blocksB);
|
|
7075
|
+
const stats = { added: 0, removed: 0, modified: 0, unchanged: 0 };
|
|
7076
|
+
const diffs = [];
|
|
7077
|
+
for (const [a, b] of aligned) {
|
|
7078
|
+
if (a && b) {
|
|
7079
|
+
const sim = blockSimilarity(a, b);
|
|
7080
|
+
if (sim >= 0.99) {
|
|
7081
|
+
diffs.push({ type: "unchanged", before: a, after: b, similarity: 1 });
|
|
7082
|
+
stats.unchanged++;
|
|
7083
|
+
} else {
|
|
7084
|
+
const diff = { type: "modified", before: a, after: b, similarity: sim };
|
|
7085
|
+
if (a.type === "table" && b.type === "table" && a.table && b.table) {
|
|
7086
|
+
diff.cellDiffs = diffTableCells(a.table, b.table);
|
|
7087
|
+
}
|
|
7088
|
+
diffs.push(diff);
|
|
7089
|
+
stats.modified++;
|
|
7090
|
+
}
|
|
7091
|
+
} else if (a) {
|
|
7092
|
+
diffs.push({ type: "removed", before: a });
|
|
7093
|
+
stats.removed++;
|
|
7094
|
+
} else if (b) {
|
|
7095
|
+
diffs.push({ type: "added", after: b });
|
|
7096
|
+
stats.added++;
|
|
7097
|
+
}
|
|
7098
|
+
}
|
|
7099
|
+
return { stats, diffs };
|
|
7100
|
+
}
|
|
7101
|
+
function alignBlocks(a, b) {
|
|
7102
|
+
const m = a.length, n = b.length;
|
|
7103
|
+
if (m * n > 1e7) return fallbackAlign(a, b);
|
|
7104
|
+
const simCache = /* @__PURE__ */ new Map();
|
|
7105
|
+
const getSim = (i2, j2) => {
|
|
7106
|
+
const key = `${i2},${j2}`;
|
|
7107
|
+
let v = simCache.get(key);
|
|
7108
|
+
if (v === void 0) {
|
|
7109
|
+
v = blockSimilarity(a[i2], b[j2]);
|
|
7110
|
+
simCache.set(key, v);
|
|
7111
|
+
}
|
|
7112
|
+
return v;
|
|
7113
|
+
};
|
|
7114
|
+
const dp = Array.from({ length: m + 1 }, () => new Array(n + 1).fill(0));
|
|
7115
|
+
for (let i2 = 1; i2 <= m; i2++) {
|
|
7116
|
+
for (let j2 = 1; j2 <= n; j2++) {
|
|
7117
|
+
if (getSim(i2 - 1, j2 - 1) >= SIMILARITY_THRESHOLD) {
|
|
7118
|
+
dp[i2][j2] = dp[i2 - 1][j2 - 1] + 1;
|
|
7119
|
+
} else {
|
|
7120
|
+
dp[i2][j2] = Math.max(dp[i2 - 1][j2], dp[i2][j2 - 1]);
|
|
7121
|
+
}
|
|
7122
|
+
}
|
|
7123
|
+
}
|
|
7124
|
+
const pairs = [];
|
|
7125
|
+
let i = m, j = n;
|
|
7126
|
+
while (i > 0 && j > 0) {
|
|
7127
|
+
if (getSim(i - 1, j - 1) >= SIMILARITY_THRESHOLD && dp[i][j] === dp[i - 1][j - 1] + 1) {
|
|
7128
|
+
pairs.push([i - 1, j - 1]);
|
|
7129
|
+
i--;
|
|
7130
|
+
j--;
|
|
7131
|
+
} else if (dp[i - 1][j] >= dp[i][j - 1]) {
|
|
7132
|
+
i--;
|
|
7133
|
+
} else {
|
|
7134
|
+
j--;
|
|
7135
|
+
}
|
|
7136
|
+
}
|
|
7137
|
+
pairs.reverse();
|
|
7138
|
+
const result = [];
|
|
7139
|
+
let ai = 0, bi = 0;
|
|
7140
|
+
for (const [pi, pj] of pairs) {
|
|
7141
|
+
while (ai < pi) result.push([a[ai++], null]);
|
|
7142
|
+
while (bi < pj) result.push([null, b[bi++]]);
|
|
7143
|
+
result.push([a[ai++], b[bi++]]);
|
|
7144
|
+
}
|
|
7145
|
+
while (ai < m) result.push([a[ai++], null]);
|
|
7146
|
+
while (bi < n) result.push([null, b[bi++]]);
|
|
7147
|
+
return result;
|
|
7148
|
+
}
|
|
7149
|
+
function fallbackAlign(a, b) {
|
|
7150
|
+
const result = [];
|
|
7151
|
+
const len = Math.max(a.length, b.length);
|
|
7152
|
+
for (let i = 0; i < len; i++) {
|
|
7153
|
+
result.push([a[i] || null, b[i] || null]);
|
|
7154
|
+
}
|
|
7155
|
+
return result;
|
|
7156
|
+
}
|
|
7157
|
+
function blockSimilarity(a, b) {
|
|
7158
|
+
if (a.type !== b.type) return 0;
|
|
7159
|
+
if (a.text !== void 0 && b.text !== void 0) {
|
|
7160
|
+
return normalizedSimilarity(a.text || "", b.text || "");
|
|
7161
|
+
}
|
|
7162
|
+
if (a.type === "table" && a.table && b.table) {
|
|
7163
|
+
return tableSimilarity(a.table, b.table);
|
|
7164
|
+
}
|
|
7165
|
+
if (a.type === b.type) return 1;
|
|
7166
|
+
return 0;
|
|
7167
|
+
}
|
|
7168
|
+
function tableSimilarity(a, b) {
|
|
7169
|
+
const dimSim = 1 - Math.abs(a.rows * a.cols - b.rows * b.cols) / Math.max(a.rows * a.cols, b.rows * b.cols, 1);
|
|
7170
|
+
const textsA = a.cells.flat().map((c) => c.text).join(" ");
|
|
7171
|
+
const textsB = b.cells.flat().map((c) => c.text).join(" ");
|
|
7172
|
+
const contentSim = normalizedSimilarity(textsA, textsB);
|
|
7173
|
+
return dimSim * 0.3 + contentSim * 0.7;
|
|
7174
|
+
}
|
|
7175
|
+
function diffTableCells(a, b) {
|
|
7176
|
+
const maxRows = Math.max(a.rows, b.rows);
|
|
7177
|
+
const maxCols = Math.max(a.cols, b.cols);
|
|
7178
|
+
const result = [];
|
|
7179
|
+
for (let r = 0; r < maxRows; r++) {
|
|
7180
|
+
const row = [];
|
|
7181
|
+
for (let c = 0; c < maxCols; c++) {
|
|
7182
|
+
const cellA = r < a.rows && c < a.cols ? a.cells[r][c].text : void 0;
|
|
7183
|
+
const cellB = r < b.rows && c < b.cols ? b.cells[r][c].text : void 0;
|
|
7184
|
+
let type;
|
|
7185
|
+
if (cellA === void 0) type = "added";
|
|
7186
|
+
else if (cellB === void 0) type = "removed";
|
|
7187
|
+
else if (cellA === cellB) type = "unchanged";
|
|
7188
|
+
else type = "modified";
|
|
7189
|
+
row.push({ type, before: cellA, after: cellB });
|
|
7190
|
+
}
|
|
7191
|
+
result.push(row);
|
|
7192
|
+
}
|
|
7193
|
+
return result;
|
|
7194
|
+
}
|
|
7195
|
+
|
|
6682
7196
|
// src/index.ts
|
|
6683
7197
|
async function parse(input, options) {
|
|
6684
7198
|
let buffer;
|
|
@@ -6755,6 +7269,45 @@ async function parseDocx(buffer, options) {
|
|
|
6755
7269
|
return { success: false, fileType: "docx", error: err instanceof Error ? err.message : "DOCX \uD30C\uC2F1 \uC2E4\uD328", code: classifyError(err) };
|
|
6756
7270
|
}
|
|
6757
7271
|
}
|
|
7272
|
+
async function fillForm(input, values, outputFormat = "markdown") {
|
|
7273
|
+
let buffer;
|
|
7274
|
+
if (typeof input === "string") {
|
|
7275
|
+
const buf = await readFile(input);
|
|
7276
|
+
buffer = toArrayBuffer(buf);
|
|
7277
|
+
} else if (Buffer.isBuffer(input)) {
|
|
7278
|
+
buffer = toArrayBuffer(input);
|
|
7279
|
+
} else {
|
|
7280
|
+
buffer = input;
|
|
7281
|
+
}
|
|
7282
|
+
if (outputFormat === "hwpx-preserve") {
|
|
7283
|
+
const format = detectFormat(buffer);
|
|
7284
|
+
if (format === "hwpx") {
|
|
7285
|
+
const zipFormat = await detectZipFormat(buffer);
|
|
7286
|
+
if (zipFormat !== "hwpx") {
|
|
7287
|
+
throw new Error(`hwpx-preserve \uD3EC\uB9F7\uC740 HWPX \uC785\uB825\uB9CC \uC9C0\uC6D0\uD569\uB2C8\uB2E4 (\uAC10\uC9C0\uB41C \uD3EC\uB9F7: ${zipFormat})`);
|
|
7288
|
+
}
|
|
7289
|
+
} else {
|
|
7290
|
+
throw new Error(`hwpx-preserve \uD3EC\uB9F7\uC740 HWPX \uC785\uB825\uB9CC \uC9C0\uC6D0\uD569\uB2C8\uB2E4 (\uAC10\uC9C0\uB41C \uD3EC\uB9F7: ${format})`);
|
|
7291
|
+
}
|
|
7292
|
+
const hwpxResult = await fillHwpx(buffer, values);
|
|
7293
|
+
return {
|
|
7294
|
+
output: hwpxResult.buffer,
|
|
7295
|
+
format: "hwpx-preserve",
|
|
7296
|
+
fill: { filled: hwpxResult.filled, unmatched: hwpxResult.unmatched }
|
|
7297
|
+
};
|
|
7298
|
+
}
|
|
7299
|
+
const parsed = await parse(buffer);
|
|
7300
|
+
if (!parsed.success) {
|
|
7301
|
+
throw new Error(`\uC11C\uC2DD \uD30C\uC2F1 \uC2E4\uD328: ${parsed.error}`);
|
|
7302
|
+
}
|
|
7303
|
+
const fill = fillFormFields(parsed.blocks, values);
|
|
7304
|
+
const markdown = blocksToMarkdown(fill.blocks);
|
|
7305
|
+
if (outputFormat === "hwpx") {
|
|
7306
|
+
const hwpxBuffer = await markdownToHwpx(markdown);
|
|
7307
|
+
return { output: hwpxBuffer, format: "hwpx", fill };
|
|
7308
|
+
}
|
|
7309
|
+
return { output: markdown, format: "markdown", fill };
|
|
7310
|
+
}
|
|
6758
7311
|
export {
|
|
6759
7312
|
VERSION,
|
|
6760
7313
|
blocksToMarkdown,
|
|
@@ -6763,7 +7316,11 @@ export {
|
|
|
6763
7316
|
detectZipFormat,
|
|
6764
7317
|
diffBlocks,
|
|
6765
7318
|
extractFormFields,
|
|
7319
|
+
fillForm,
|
|
7320
|
+
fillFormFields,
|
|
7321
|
+
fillHwpx,
|
|
6766
7322
|
isHwpxFile,
|
|
7323
|
+
isLabelCell,
|
|
6767
7324
|
isOldHwpFile,
|
|
6768
7325
|
isPdfFile,
|
|
6769
7326
|
isZipFile,
|