kordoc 2.2.2 → 2.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-R34CFFNV.js → chunk-SY2RFVLW.js} +1089 -149
- package/dist/chunk-SY2RFVLW.js.map +1 -0
- package/dist/cli.js +149 -4
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +847 -248
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +97 -7
- package/dist/index.d.ts +97 -7
- package/dist/index.js +843 -248
- package/dist/index.js.map +1 -1
- package/dist/mcp.js +126 -6
- package/dist/mcp.js.map +1 -1
- package/dist/{watch-VNJDVUVQ.js → watch-5P7DJ3HG.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-R34CFFNV.js.map +0 -1
- /package/dist/{watch-VNJDVUVQ.js.map → watch-5P7DJ3HG.js.map} +0 -0
|
@@ -8,7 +8,7 @@ import {
|
|
|
8
8
|
} from "./chunk-MOL7MDBG.js";
|
|
9
9
|
|
|
10
10
|
// src/utils.ts
|
|
11
|
-
var VERSION = true ? "2.2.
|
|
11
|
+
var VERSION = true ? "2.2.4" : "0.0.0-dev";
|
|
12
12
|
function toArrayBuffer(buf) {
|
|
13
13
|
if (buf.byteOffset === 0 && buf.byteLength === buf.buffer.byteLength) {
|
|
14
14
|
return buf.buffer;
|
|
@@ -330,9 +330,47 @@ function blocksToMarkdown(blocks) {
|
|
|
330
330
|
}
|
|
331
331
|
return lines.join("\n").trim();
|
|
332
332
|
}
|
|
333
|
+
function hasMergedCells(table) {
|
|
334
|
+
for (const row of table.cells) {
|
|
335
|
+
for (const cell of row) {
|
|
336
|
+
if (cell.colSpan > 1 || cell.rowSpan > 1) return true;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
return false;
|
|
340
|
+
}
|
|
341
|
+
function tableToHtml(table) {
|
|
342
|
+
const { cells, rows: numRows, cols: numCols } = table;
|
|
343
|
+
const skip = /* @__PURE__ */ new Set();
|
|
344
|
+
const lines = ["<table>"];
|
|
345
|
+
for (let r = 0; r < numRows; r++) {
|
|
346
|
+
const tag = r === 0 ? "th" : "td";
|
|
347
|
+
const rowHtml = [];
|
|
348
|
+
for (let c = 0; c < numCols; c++) {
|
|
349
|
+
if (skip.has(`${r},${c}`)) continue;
|
|
350
|
+
const cell = cells[r]?.[c];
|
|
351
|
+
if (!cell) continue;
|
|
352
|
+
for (let dr = 0; dr < cell.rowSpan; dr++) {
|
|
353
|
+
for (let dc = 0; dc < cell.colSpan; dc++) {
|
|
354
|
+
if (dr === 0 && dc === 0) continue;
|
|
355
|
+
if (r + dr < numRows && c + dc < numCols) skip.add(`${r + dr},${c + dc}`);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
const text = sanitizeText(cell.text).replace(/\n/g, "<br>");
|
|
359
|
+
const attrs = [];
|
|
360
|
+
if (cell.colSpan > 1) attrs.push(`colspan="${cell.colSpan}"`);
|
|
361
|
+
if (cell.rowSpan > 1) attrs.push(`rowspan="${cell.rowSpan}"`);
|
|
362
|
+
const attrStr = attrs.length ? " " + attrs.join(" ") : "";
|
|
363
|
+
rowHtml.push(`<${tag}${attrStr}>${text}</${tag}>`);
|
|
364
|
+
}
|
|
365
|
+
if (rowHtml.length) lines.push(`<tr>${rowHtml.join("")}</tr>`);
|
|
366
|
+
}
|
|
367
|
+
lines.push("</table>");
|
|
368
|
+
return lines.join("\n");
|
|
369
|
+
}
|
|
333
370
|
function tableToMarkdown(table) {
|
|
334
371
|
if (table.rows === 0 || table.cols === 0) return "";
|
|
335
372
|
const { cells, rows: numRows, cols: numCols } = table;
|
|
373
|
+
if (hasMergedCells(table)) return tableToHtml(table);
|
|
336
374
|
if (numRows === 1 && numCols === 1) {
|
|
337
375
|
const content = sanitizeText(cells[0][0].text);
|
|
338
376
|
if (!content) return "";
|
|
@@ -5151,12 +5189,1040 @@ function mergeKoreanLines(text) {
|
|
|
5151
5189
|
return result.join("\n");
|
|
5152
5190
|
}
|
|
5153
5191
|
|
|
5192
|
+
// src/form/recognize.ts
|
|
5193
|
+
var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
|
|
5194
|
+
"\uC131\uBA85",
|
|
5195
|
+
"\uC774\uB984",
|
|
5196
|
+
"\uC8FC\uC18C",
|
|
5197
|
+
"\uC804\uD654",
|
|
5198
|
+
"\uC804\uD654\uBC88\uD638",
|
|
5199
|
+
"\uD734\uB300\uD3F0",
|
|
5200
|
+
"\uD578\uB4DC\uD3F0",
|
|
5201
|
+
"\uC5F0\uB77D\uCC98",
|
|
5202
|
+
"\uC0DD\uB144\uC6D4\uC77C",
|
|
5203
|
+
"\uC8FC\uBBFC\uB4F1\uB85D\uBC88\uD638",
|
|
5204
|
+
"\uC18C\uC18D",
|
|
5205
|
+
"\uC9C1\uC704",
|
|
5206
|
+
"\uC9C1\uAE09",
|
|
5207
|
+
"\uBD80\uC11C",
|
|
5208
|
+
"\uC774\uBA54\uC77C",
|
|
5209
|
+
"\uD329\uC2A4",
|
|
5210
|
+
"\uD559\uAD50",
|
|
5211
|
+
"\uD559\uB144",
|
|
5212
|
+
"\uBC18",
|
|
5213
|
+
"\uBC88\uD638",
|
|
5214
|
+
"\uC2E0\uCCAD\uC778",
|
|
5215
|
+
"\uB300\uD45C\uC790",
|
|
5216
|
+
"\uB2F4\uB2F9\uC790",
|
|
5217
|
+
"\uC791\uC131\uC790",
|
|
5218
|
+
"\uD655\uC778\uC790",
|
|
5219
|
+
"\uC2B9\uC778\uC790",
|
|
5220
|
+
"\uC77C\uC2DC",
|
|
5221
|
+
"\uB0A0\uC9DC",
|
|
5222
|
+
"\uAE30\uAC04",
|
|
5223
|
+
"\uC7A5\uC18C",
|
|
5224
|
+
"\uBAA9\uC801",
|
|
5225
|
+
"\uC0AC\uC720",
|
|
5226
|
+
"\uBE44\uACE0",
|
|
5227
|
+
"\uAE08\uC561",
|
|
5228
|
+
"\uC218\uB7C9",
|
|
5229
|
+
"\uB2E8\uAC00",
|
|
5230
|
+
"\uD569\uACC4",
|
|
5231
|
+
"\uACC4",
|
|
5232
|
+
"\uC18C\uACC4",
|
|
5233
|
+
"\uB4F1\uB85D\uAE30\uC900\uC9C0",
|
|
5234
|
+
"\uBCF8\uC801",
|
|
5235
|
+
"\uC704\uC784\uC778",
|
|
5236
|
+
"\uCCAD\uAD6C\uC0AC\uC720",
|
|
5237
|
+
"\uC18C\uBA85\uC790\uB8CC"
|
|
5238
|
+
]);
|
|
5239
|
+
function isLabelCell(text) {
|
|
5240
|
+
const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
|
|
5241
|
+
if (!trimmed || trimmed.length > 30) return false;
|
|
5242
|
+
for (const kw of LABEL_KEYWORDS) {
|
|
5243
|
+
if (trimmed.includes(kw)) return true;
|
|
5244
|
+
}
|
|
5245
|
+
if (/^[가-힣\s()()·::]+$/.test(trimmed) && trimmed.replace(/\s/g, "").length >= 2 && trimmed.replace(/\s/g, "").length <= 8 && !/\d/.test(trimmed)) return true;
|
|
5246
|
+
if (/^[가-힣A-Za-z\s]+[::]$/.test(trimmed)) return true;
|
|
5247
|
+
return false;
|
|
5248
|
+
}
|
|
5249
|
+
function extractFormFields(blocks) {
|
|
5250
|
+
const fields = [];
|
|
5251
|
+
let totalTables = 0;
|
|
5252
|
+
let formTables = 0;
|
|
5253
|
+
for (const block of blocks) {
|
|
5254
|
+
if (block.type !== "table" || !block.table) continue;
|
|
5255
|
+
totalTables++;
|
|
5256
|
+
const tableFields = extractFromTable(block.table);
|
|
5257
|
+
if (tableFields.length > 0) {
|
|
5258
|
+
formTables++;
|
|
5259
|
+
fields.push(...tableFields);
|
|
5260
|
+
}
|
|
5261
|
+
}
|
|
5262
|
+
for (const block of blocks) {
|
|
5263
|
+
if (block.type === "paragraph" && block.text) {
|
|
5264
|
+
const inlineFields = extractInlineFields(block.text);
|
|
5265
|
+
fields.push(...inlineFields);
|
|
5266
|
+
}
|
|
5267
|
+
}
|
|
5268
|
+
const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
|
|
5269
|
+
return { fields, confidence: Math.min(confidence, 1) };
|
|
5270
|
+
}
|
|
5271
|
+
function extractFromTable(table) {
|
|
5272
|
+
const fields = [];
|
|
5273
|
+
if (table.cols >= 2) {
|
|
5274
|
+
for (let r = 0; r < table.rows; r++) {
|
|
5275
|
+
for (let c = 0; c < table.cols - 1; c++) {
|
|
5276
|
+
const labelCell = table.cells[r][c];
|
|
5277
|
+
const valueCell = table.cells[r][c + 1];
|
|
5278
|
+
if (isLabelCell(labelCell.text)) {
|
|
5279
|
+
fields.push({
|
|
5280
|
+
label: labelCell.text.trim().replace(/[::]\s*$/, ""),
|
|
5281
|
+
value: valueCell.text.trim(),
|
|
5282
|
+
row: r,
|
|
5283
|
+
col: c
|
|
5284
|
+
});
|
|
5285
|
+
}
|
|
5286
|
+
}
|
|
5287
|
+
}
|
|
5288
|
+
}
|
|
5289
|
+
if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
|
|
5290
|
+
const headerRow = table.cells[0];
|
|
5291
|
+
const allLabels = headerRow.every((cell) => {
|
|
5292
|
+
const t = cell.text.trim();
|
|
5293
|
+
return t.length > 0 && t.length <= 20;
|
|
5294
|
+
});
|
|
5295
|
+
if (allLabels) {
|
|
5296
|
+
for (let r = 1; r < table.rows; r++) {
|
|
5297
|
+
for (let c = 0; c < table.cols; c++) {
|
|
5298
|
+
const label = headerRow[c].text.trim();
|
|
5299
|
+
const value = table.cells[r][c].text.trim();
|
|
5300
|
+
if (label && value) {
|
|
5301
|
+
fields.push({ label, value, row: r, col: c });
|
|
5302
|
+
}
|
|
5303
|
+
}
|
|
5304
|
+
}
|
|
5305
|
+
}
|
|
5306
|
+
}
|
|
5307
|
+
return fields;
|
|
5308
|
+
}
|
|
5309
|
+
function extractInlineFields(text) {
|
|
5310
|
+
const fields = [];
|
|
5311
|
+
const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
|
|
5312
|
+
let match;
|
|
5313
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
5314
|
+
const label = match[1].trim();
|
|
5315
|
+
const value = match[2].trim();
|
|
5316
|
+
if (value) {
|
|
5317
|
+
fields.push({ label, value, row: -1, col: -1 });
|
|
5318
|
+
}
|
|
5319
|
+
}
|
|
5320
|
+
return fields;
|
|
5321
|
+
}
|
|
5322
|
+
|
|
5323
|
+
// src/form/match.ts
|
|
5324
|
+
function normalizeLabel(label) {
|
|
5325
|
+
return label.trim().replace(/[::\s()()·]/g, "");
|
|
5326
|
+
}
|
|
5327
|
+
function findMatchingKey(cellLabel, values) {
|
|
5328
|
+
if (values.has(cellLabel)) return cellLabel;
|
|
5329
|
+
let bestKey;
|
|
5330
|
+
let bestLen = 0;
|
|
5331
|
+
for (const key of values.keys()) {
|
|
5332
|
+
if (cellLabel.startsWith(key)) {
|
|
5333
|
+
if (key.length >= cellLabel.length * 0.6 && key.length > bestLen) {
|
|
5334
|
+
bestLen = key.length;
|
|
5335
|
+
bestKey = key;
|
|
5336
|
+
}
|
|
5337
|
+
} else if (key.startsWith(cellLabel)) {
|
|
5338
|
+
if (cellLabel.length >= key.length * 0.6 && cellLabel.length > bestLen) {
|
|
5339
|
+
bestLen = cellLabel.length;
|
|
5340
|
+
bestKey = key;
|
|
5341
|
+
}
|
|
5342
|
+
}
|
|
5343
|
+
}
|
|
5344
|
+
return bestKey;
|
|
5345
|
+
}
|
|
5346
|
+
function isKeywordLabel(text) {
|
|
5347
|
+
const trimmed = text.trim().replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰*※]+$/g, "").trim();
|
|
5348
|
+
if (!trimmed || trimmed.length > 15) return false;
|
|
5349
|
+
for (const kw of LABEL_KEYWORDS) {
|
|
5350
|
+
if (trimmed.includes(kw)) return true;
|
|
5351
|
+
}
|
|
5352
|
+
return false;
|
|
5353
|
+
}
|
|
5354
|
+
function fillInCellPatterns(cellText, values, matchedLabels) {
|
|
5355
|
+
let text = cellText;
|
|
5356
|
+
const matches = [];
|
|
5357
|
+
text = text.replace(
|
|
5358
|
+
/([가-힣A-Za-z]+)\(\s{1,}\)([가-힣A-Za-z]*)/g,
|
|
5359
|
+
(match, prefix, suffix) => {
|
|
5360
|
+
const label = prefix + suffix;
|
|
5361
|
+
const normalizedLabel = normalizeLabel(label);
|
|
5362
|
+
const matchKey = values.has(normalizedLabel) ? normalizedLabel : values.has(normalizeLabel(prefix)) ? normalizeLabel(prefix) : void 0;
|
|
5363
|
+
if (matchKey === void 0) return match;
|
|
5364
|
+
const newValue = values.get(matchKey);
|
|
5365
|
+
matchedLabels.add(matchKey);
|
|
5366
|
+
matches.push({ key: matchKey, label, value: newValue });
|
|
5367
|
+
return `${prefix}(${newValue})${suffix}`;
|
|
5368
|
+
}
|
|
5369
|
+
);
|
|
5370
|
+
text = text.replace(
|
|
5371
|
+
/□([가-힣A-Za-z]+)/g,
|
|
5372
|
+
(match, keyword) => {
|
|
5373
|
+
const normalizedKw = normalizeLabel(keyword);
|
|
5374
|
+
const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
|
|
5375
|
+
if (matchKey === void 0) return match;
|
|
5376
|
+
const val = values.get(matchKey);
|
|
5377
|
+
const isTruthy = ["\u2611", "\u2713", "\u2714", "v", "V", "true", "1", "yes", "o", "O"].includes(val.trim()) || val.trim() === "";
|
|
5378
|
+
if (!isTruthy) return match;
|
|
5379
|
+
matchedLabels.add(matchKey);
|
|
5380
|
+
matches.push({ key: matchKey, label: `\u25A1${keyword}`, value: "\u2611" });
|
|
5381
|
+
return `\u2611${keyword}`;
|
|
5382
|
+
}
|
|
5383
|
+
);
|
|
5384
|
+
text = text.replace(
|
|
5385
|
+
/\(([가-힣A-Za-z]+)[::]\s{1,}\)/g,
|
|
5386
|
+
(match, keyword) => {
|
|
5387
|
+
const normalizedKw = normalizeLabel(keyword);
|
|
5388
|
+
const matchKey = values.has(normalizedKw) ? normalizedKw : void 0;
|
|
5389
|
+
if (matchKey === void 0) return match;
|
|
5390
|
+
const newValue = values.get(matchKey);
|
|
5391
|
+
matchedLabels.add(matchKey);
|
|
5392
|
+
matches.push({ key: matchKey, label: keyword, value: newValue });
|
|
5393
|
+
return `(${keyword}\uFF1A${newValue})`;
|
|
5394
|
+
}
|
|
5395
|
+
);
|
|
5396
|
+
return matches.length > 0 ? { text, matches } : null;
|
|
5397
|
+
}
|
|
5398
|
+
function normalizeValues(values) {
|
|
5399
|
+
const map = /* @__PURE__ */ new Map();
|
|
5400
|
+
for (const [label, value] of Object.entries(values)) {
|
|
5401
|
+
map.set(normalizeLabel(label), value);
|
|
5402
|
+
}
|
|
5403
|
+
return map;
|
|
5404
|
+
}
|
|
5405
|
+
function resolveUnmatched(normalizedValues, matchedLabels, originalValues) {
|
|
5406
|
+
return [...normalizedValues.keys()].filter((k) => !matchedLabels.has(k)).map((k) => {
|
|
5407
|
+
for (const orig of Object.keys(originalValues)) {
|
|
5408
|
+
if (normalizeLabel(orig) === k) return orig;
|
|
5409
|
+
}
|
|
5410
|
+
return k;
|
|
5411
|
+
});
|
|
5412
|
+
}
|
|
5413
|
+
|
|
5414
|
+
// src/form/filler.ts
|
|
5415
|
+
function fillFormFields(blocks, values) {
|
|
5416
|
+
const cloned = structuredClone(blocks);
|
|
5417
|
+
const filled = [];
|
|
5418
|
+
const matchedLabels = /* @__PURE__ */ new Set();
|
|
5419
|
+
const normalizedValues = normalizeValues(values);
|
|
5420
|
+
const patternFilledCells = /* @__PURE__ */ new Set();
|
|
5421
|
+
for (const block of cloned) {
|
|
5422
|
+
if (block.type !== "table" || !block.table) continue;
|
|
5423
|
+
for (let r = 0; r < block.table.rows; r++) {
|
|
5424
|
+
for (let c = 0; c < block.table.cols; c++) {
|
|
5425
|
+
const cell = block.table.cells[r]?.[c];
|
|
5426
|
+
if (!cell) continue;
|
|
5427
|
+
const result = fillInCellPatterns(cell.text, normalizedValues, matchedLabels);
|
|
5428
|
+
if (result) {
|
|
5429
|
+
cell.text = result.text;
|
|
5430
|
+
patternFilledCells.add(`${r},${c}`);
|
|
5431
|
+
for (const m of result.matches) {
|
|
5432
|
+
filled.push({ label: m.label, value: m.value, row: r, col: c });
|
|
5433
|
+
}
|
|
5434
|
+
}
|
|
5435
|
+
}
|
|
5436
|
+
}
|
|
5437
|
+
}
|
|
5438
|
+
for (const block of cloned) {
|
|
5439
|
+
if (block.type !== "table" || !block.table) continue;
|
|
5440
|
+
fillTable(block.table, normalizedValues, filled, matchedLabels, patternFilledCells);
|
|
5441
|
+
}
|
|
5442
|
+
for (const block of cloned) {
|
|
5443
|
+
if (block.type !== "paragraph" || !block.text) continue;
|
|
5444
|
+
const newText = fillInlineFields(block.text, normalizedValues, filled, matchedLabels);
|
|
5445
|
+
if (newText !== block.text) block.text = newText;
|
|
5446
|
+
}
|
|
5447
|
+
const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
|
|
5448
|
+
return { blocks: cloned, filled, unmatched };
|
|
5449
|
+
}
|
|
5450
|
+
function fillTable(table, values, filled, matchedLabels, patternFilledCells) {
|
|
5451
|
+
if (table.cols < 2) return;
|
|
5452
|
+
for (let r = 0; r < table.rows; r++) {
|
|
5453
|
+
for (let c = 0; c < table.cols - 1; c++) {
|
|
5454
|
+
const labelCell = table.cells[r][c];
|
|
5455
|
+
const valueCell = table.cells[r][c + 1];
|
|
5456
|
+
if (!labelCell || !valueCell) continue;
|
|
5457
|
+
if (!isLabelCell(labelCell.text)) continue;
|
|
5458
|
+
if (isKeywordLabel(valueCell.text)) continue;
|
|
5459
|
+
const normalizedCellLabel = normalizeLabel(labelCell.text);
|
|
5460
|
+
if (!normalizedCellLabel) continue;
|
|
5461
|
+
const matchKey = findMatchingKey(normalizedCellLabel, values);
|
|
5462
|
+
if (matchKey === void 0) continue;
|
|
5463
|
+
const newValue = values.get(matchKey);
|
|
5464
|
+
if (patternFilledCells?.has(`${r},${c + 1}`)) {
|
|
5465
|
+
valueCell.text = newValue + " " + valueCell.text;
|
|
5466
|
+
} else {
|
|
5467
|
+
valueCell.text = newValue;
|
|
5468
|
+
}
|
|
5469
|
+
matchedLabels.add(matchKey);
|
|
5470
|
+
filled.push({
|
|
5471
|
+
label: labelCell.text.trim().replace(/[::]\s*$/, ""),
|
|
5472
|
+
value: newValue,
|
|
5473
|
+
row: r,
|
|
5474
|
+
col: c
|
|
5475
|
+
});
|
|
5476
|
+
}
|
|
5477
|
+
}
|
|
5478
|
+
if (table.rows >= 2 && table.cols >= 2) {
|
|
5479
|
+
const headerRow = table.cells[0];
|
|
5480
|
+
const allLabels = headerRow.every((cell) => {
|
|
5481
|
+
const t = cell.text.trim();
|
|
5482
|
+
return t.length > 0 && t.length <= 20 && isLabelCell(t);
|
|
5483
|
+
});
|
|
5484
|
+
if (!allLabels) return;
|
|
5485
|
+
for (let r = 1; r < table.rows; r++) {
|
|
5486
|
+
for (let c = 0; c < table.cols; c++) {
|
|
5487
|
+
const headerLabel = normalizeLabel(headerRow[c].text);
|
|
5488
|
+
const matchKey = findMatchingKey(headerLabel, values);
|
|
5489
|
+
if (matchKey === void 0) continue;
|
|
5490
|
+
if (matchedLabels.has(matchKey)) continue;
|
|
5491
|
+
const newValue = values.get(matchKey);
|
|
5492
|
+
table.cells[r][c].text = newValue;
|
|
5493
|
+
matchedLabels.add(matchKey);
|
|
5494
|
+
filled.push({
|
|
5495
|
+
label: headerRow[c].text.trim(),
|
|
5496
|
+
value: newValue,
|
|
5497
|
+
row: r,
|
|
5498
|
+
col: c
|
|
5499
|
+
});
|
|
5500
|
+
}
|
|
5501
|
+
}
|
|
5502
|
+
}
|
|
5503
|
+
}
|
|
5504
|
+
function fillInlineFields(text, values, filled, matchedLabels) {
|
|
5505
|
+
return text.replace(
|
|
5506
|
+
/([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g,
|
|
5507
|
+
(match, rawLabel, _oldValue) => {
|
|
5508
|
+
const normalized = normalizeLabel(rawLabel);
|
|
5509
|
+
const matchKey = findMatchingKey(normalized, values);
|
|
5510
|
+
if (matchKey === void 0) return match;
|
|
5511
|
+
const newValue = values.get(matchKey);
|
|
5512
|
+
matchedLabels.add(matchKey);
|
|
5513
|
+
filled.push({
|
|
5514
|
+
label: rawLabel.trim(),
|
|
5515
|
+
value: newValue,
|
|
5516
|
+
row: -1,
|
|
5517
|
+
col: -1
|
|
5518
|
+
});
|
|
5519
|
+
return `${rawLabel}: ${newValue}`;
|
|
5520
|
+
}
|
|
5521
|
+
);
|
|
5522
|
+
}
|
|
5523
|
+
|
|
5524
|
+
// src/form/filler-hwpx.ts
|
|
5525
|
+
import JSZip2 from "jszip";
|
|
5526
|
+
import { DOMParser as DOMParser2, XMLSerializer } from "@xmldom/xmldom";
|
|
5527
|
+
async function fillHwpx(hwpxBuffer, values) {
|
|
5528
|
+
const zip = await JSZip2.loadAsync(hwpxBuffer);
|
|
5529
|
+
const filled = [];
|
|
5530
|
+
const matchedLabels = /* @__PURE__ */ new Set();
|
|
5531
|
+
const normalizedValues = normalizeValues(values);
|
|
5532
|
+
const sectionFiles = Object.keys(zip.files).filter((name) => /[Ss]ection\d+\.xml$/i.test(name)).sort();
|
|
5533
|
+
if (sectionFiles.length === 0) {
|
|
5534
|
+
throw new KordocError("HWPX\uC5D0\uC11C \uC139\uC158 \uD30C\uC77C\uC744 \uCC3E\uC744 \uC218 \uC5C6\uC2B5\uB2C8\uB2E4");
|
|
5535
|
+
}
|
|
5536
|
+
const xmlParser = new DOMParser2();
|
|
5537
|
+
const xmlSerializer = new XMLSerializer();
|
|
5538
|
+
for (const sectionPath of sectionFiles) {
|
|
5539
|
+
const zipEntry = zip.file(sectionPath);
|
|
5540
|
+
if (!zipEntry) continue;
|
|
5541
|
+
const rawXml = await zipEntry.async("text");
|
|
5542
|
+
const doc = xmlParser.parseFromString(stripDtd(rawXml), "text/xml");
|
|
5543
|
+
if (!doc.documentElement) continue;
|
|
5544
|
+
let modified = false;
|
|
5545
|
+
const tables = findAllElements(doc.documentElement, "tbl");
|
|
5546
|
+
const cellPatternApplied = /* @__PURE__ */ new Set();
|
|
5547
|
+
for (const tblEl of tables) {
|
|
5548
|
+
const allCells = findAllElements(tblEl, "tc");
|
|
5549
|
+
for (const tcEl of allCells) {
|
|
5550
|
+
const tNodes = collectCellTextNodes(tcEl);
|
|
5551
|
+
const fullText = tNodes.map((n) => n.text).join("");
|
|
5552
|
+
const result = fillInCellPatterns(fullText, normalizedValues, matchedLabels);
|
|
5553
|
+
if (!result) continue;
|
|
5554
|
+
applyTextReplacements(tNodes, fullText, result.text);
|
|
5555
|
+
cellPatternApplied.add(tcEl);
|
|
5556
|
+
for (const m of result.matches) {
|
|
5557
|
+
filled.push({ label: m.label, value: m.value, row: -1, col: -1 });
|
|
5558
|
+
}
|
|
5559
|
+
modified = true;
|
|
5560
|
+
}
|
|
5561
|
+
}
|
|
5562
|
+
for (const tblEl of tables) {
|
|
5563
|
+
const rows = findDirectChildren(tblEl, "tr");
|
|
5564
|
+
for (let rowIdx = 0; rowIdx < rows.length; rowIdx++) {
|
|
5565
|
+
const trEl = rows[rowIdx];
|
|
5566
|
+
const cells = findDirectChildren(trEl, "tc");
|
|
5567
|
+
for (let colIdx = 0; colIdx < cells.length - 1; colIdx++) {
|
|
5568
|
+
const labelText = extractCellText(cells[colIdx]);
|
|
5569
|
+
if (!isLabelCell(labelText)) continue;
|
|
5570
|
+
const valueCell = cells[colIdx + 1];
|
|
5571
|
+
const valueText = extractCellText(valueCell);
|
|
5572
|
+
if (isKeywordLabel(valueText)) continue;
|
|
5573
|
+
const normalizedCellLabel = normalizeLabel(labelText);
|
|
5574
|
+
if (!normalizedCellLabel) continue;
|
|
5575
|
+
const matchKey = findMatchingKey(normalizedCellLabel, normalizedValues);
|
|
5576
|
+
if (matchKey === void 0) continue;
|
|
5577
|
+
const newValue = normalizedValues.get(matchKey);
|
|
5578
|
+
if (cellPatternApplied.has(valueCell)) {
|
|
5579
|
+
prependCellText(valueCell, newValue);
|
|
5580
|
+
} else {
|
|
5581
|
+
replaceCellText(valueCell, newValue);
|
|
5582
|
+
}
|
|
5583
|
+
matchedLabels.add(matchKey);
|
|
5584
|
+
filled.push({
|
|
5585
|
+
label: labelText.trim().replace(/[::]\s*$/, ""),
|
|
5586
|
+
value: newValue,
|
|
5587
|
+
row: rowIdx,
|
|
5588
|
+
col: colIdx
|
|
5589
|
+
});
|
|
5590
|
+
modified = true;
|
|
5591
|
+
}
|
|
5592
|
+
}
|
|
5593
|
+
if (rows.length >= 2) {
|
|
5594
|
+
const headerCells = findDirectChildren(rows[0], "tc");
|
|
5595
|
+
const allLabels = headerCells.every((cell) => {
|
|
5596
|
+
const t = extractCellText(cell).trim();
|
|
5597
|
+
return t.length > 0 && t.length <= 20 && isLabelCell(t);
|
|
5598
|
+
});
|
|
5599
|
+
if (allLabels) {
|
|
5600
|
+
for (let rowIdx = 1; rowIdx < rows.length; rowIdx++) {
|
|
5601
|
+
const dataCells = findDirectChildren(rows[rowIdx], "tc");
|
|
5602
|
+
for (let colIdx = 0; colIdx < Math.min(headerCells.length, dataCells.length); colIdx++) {
|
|
5603
|
+
const headerLabel = normalizeLabel(extractCellText(headerCells[colIdx]));
|
|
5604
|
+
const matchKey = findMatchingKey(headerLabel, normalizedValues);
|
|
5605
|
+
if (matchKey === void 0) continue;
|
|
5606
|
+
if (matchedLabels.has(matchKey)) continue;
|
|
5607
|
+
const newValue = normalizedValues.get(matchKey);
|
|
5608
|
+
replaceCellText(dataCells[colIdx], newValue);
|
|
5609
|
+
matchedLabels.add(matchKey);
|
|
5610
|
+
filled.push({
|
|
5611
|
+
label: extractCellText(headerCells[colIdx]).trim(),
|
|
5612
|
+
value: newValue,
|
|
5613
|
+
row: rowIdx,
|
|
5614
|
+
col: colIdx
|
|
5615
|
+
});
|
|
5616
|
+
modified = true;
|
|
5617
|
+
}
|
|
5618
|
+
}
|
|
5619
|
+
}
|
|
5620
|
+
}
|
|
5621
|
+
}
|
|
5622
|
+
const allParagraphs = findAllElements(doc.documentElement, "p");
|
|
5623
|
+
for (const pEl of allParagraphs) {
|
|
5624
|
+
if (isInsideTable(pEl)) continue;
|
|
5625
|
+
const tNodes = collectTextNodes(pEl);
|
|
5626
|
+
const fullText = tNodes.map((n) => n.text).join("");
|
|
5627
|
+
const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{0,100})/g;
|
|
5628
|
+
let match;
|
|
5629
|
+
while ((match = pattern.exec(fullText)) !== null) {
|
|
5630
|
+
const rawLabel = match[1];
|
|
5631
|
+
const normalized = normalizeLabel(rawLabel);
|
|
5632
|
+
const matchKey = findMatchingKey(normalized, normalizedValues);
|
|
5633
|
+
if (matchKey === void 0) continue;
|
|
5634
|
+
const newValue = normalizedValues.get(matchKey);
|
|
5635
|
+
const valueStart = match.index + match[0].length - match[2].length;
|
|
5636
|
+
const valueEnd = match.index + match[0].length;
|
|
5637
|
+
replaceTextRange(tNodes, valueStart, valueEnd, newValue);
|
|
5638
|
+
matchedLabels.add(matchKey);
|
|
5639
|
+
filled.push({ label: rawLabel.trim(), value: newValue, row: -1, col: -1 });
|
|
5640
|
+
modified = true;
|
|
5641
|
+
break;
|
|
5642
|
+
}
|
|
5643
|
+
}
|
|
5644
|
+
if (modified) {
|
|
5645
|
+
const newXml = xmlSerializer.serializeToString(doc);
|
|
5646
|
+
zip.file(sectionPath, newXml);
|
|
5647
|
+
}
|
|
5648
|
+
}
|
|
5649
|
+
const unmatched = resolveUnmatched(normalizedValues, matchedLabels, values);
|
|
5650
|
+
const buffer = await zip.generateAsync({ type: "arraybuffer" });
|
|
5651
|
+
return { buffer, filled, unmatched };
|
|
5652
|
+
}
|
|
5653
|
+
function localName(el) {
|
|
5654
|
+
return (el.tagName || el.localName || "").replace(/^[^:]+:/, "");
|
|
5655
|
+
}
|
|
5656
|
+
function findAllElements(node, tagLocalName) {
|
|
5657
|
+
const result = [];
|
|
5658
|
+
const walk = (n) => {
|
|
5659
|
+
const children = n.childNodes;
|
|
5660
|
+
if (!children) return;
|
|
5661
|
+
for (let i = 0; i < children.length; i++) {
|
|
5662
|
+
const child = children[i];
|
|
5663
|
+
if (child.nodeType !== 1) continue;
|
|
5664
|
+
if (localName(child) === tagLocalName) result.push(child);
|
|
5665
|
+
walk(child);
|
|
5666
|
+
}
|
|
5667
|
+
};
|
|
5668
|
+
walk(node);
|
|
5669
|
+
return result;
|
|
5670
|
+
}
|
|
5671
|
+
function findDirectChildren(parent, tagLocalName) {
|
|
5672
|
+
const result = [];
|
|
5673
|
+
const children = parent.childNodes;
|
|
5674
|
+
if (!children) return result;
|
|
5675
|
+
for (let i = 0; i < children.length; i++) {
|
|
5676
|
+
const child = children[i];
|
|
5677
|
+
if (child.nodeType === 1 && localName(child) === tagLocalName) {
|
|
5678
|
+
result.push(child);
|
|
5679
|
+
}
|
|
5680
|
+
}
|
|
5681
|
+
return result;
|
|
5682
|
+
}
|
|
5683
|
+
function isInsideTable(el) {
|
|
5684
|
+
let parent = el.parentNode;
|
|
5685
|
+
while (parent) {
|
|
5686
|
+
if (parent.nodeType === 1 && localName(parent) === "tbl") return true;
|
|
5687
|
+
parent = parent.parentNode;
|
|
5688
|
+
}
|
|
5689
|
+
return false;
|
|
5690
|
+
}
|
|
5691
|
+
function extractCellText(tcEl) {
|
|
5692
|
+
const parts = [];
|
|
5693
|
+
const walk = (node) => {
|
|
5694
|
+
const children = node.childNodes;
|
|
5695
|
+
if (!children) return;
|
|
5696
|
+
for (let i = 0; i < children.length; i++) {
|
|
5697
|
+
const child = children[i];
|
|
5698
|
+
if (child.nodeType === 3) {
|
|
5699
|
+
parts.push(child.textContent || "");
|
|
5700
|
+
} else if (child.nodeType === 1) {
|
|
5701
|
+
const tag = localName(child);
|
|
5702
|
+
if (tag === "t") walk(child);
|
|
5703
|
+
else if (tag === "run" || tag === "r" || tag === "p" || tag === "subList") walk(child);
|
|
5704
|
+
else if (tag === "tab") parts.push(" ");
|
|
5705
|
+
else if (tag === "br") parts.push("\n");
|
|
5706
|
+
}
|
|
5707
|
+
}
|
|
5708
|
+
};
|
|
5709
|
+
walk(tcEl);
|
|
5710
|
+
return parts.join("");
|
|
5711
|
+
}
|
|
5712
|
+
function prependCellText(tcEl, text) {
|
|
5713
|
+
const tElements = findAllElements(tcEl, "t");
|
|
5714
|
+
if (tElements.length === 0) return;
|
|
5715
|
+
const firstT = tElements[0];
|
|
5716
|
+
const existing = firstT.textContent || "";
|
|
5717
|
+
clearChildren(firstT);
|
|
5718
|
+
firstT.appendChild(firstT.ownerDocument.createTextNode(text + " " + existing));
|
|
5719
|
+
}
|
|
5720
|
+
function replaceCellText(tcEl, newValue) {
|
|
5721
|
+
const paragraphs = findAllElements(tcEl, "p");
|
|
5722
|
+
if (paragraphs.length === 0) return;
|
|
5723
|
+
const firstP = paragraphs[0];
|
|
5724
|
+
const runs = findAllElements(firstP, "run").concat(findAllElements(firstP, "r"));
|
|
5725
|
+
if (runs.length > 0) {
|
|
5726
|
+
setRunText(runs[0], newValue);
|
|
5727
|
+
for (let i = 1; i < runs.length; i++) {
|
|
5728
|
+
setRunText(runs[i], "");
|
|
5729
|
+
}
|
|
5730
|
+
} else {
|
|
5731
|
+
const tElements = findAllElements(firstP, "t");
|
|
5732
|
+
if (tElements.length > 0) {
|
|
5733
|
+
clearChildren(tElements[0]);
|
|
5734
|
+
tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(newValue));
|
|
5735
|
+
for (let i = 1; i < tElements.length; i++) {
|
|
5736
|
+
clearChildren(tElements[i]);
|
|
5737
|
+
}
|
|
5738
|
+
}
|
|
5739
|
+
}
|
|
5740
|
+
for (let i = 1; i < paragraphs.length; i++) {
|
|
5741
|
+
const p = paragraphs[i];
|
|
5742
|
+
if (p.parentNode) {
|
|
5743
|
+
const pRuns = findAllElements(p, "run").concat(findAllElements(p, "r"));
|
|
5744
|
+
for (const run of pRuns) setRunText(run, "");
|
|
5745
|
+
const pTs = findAllElements(p, "t");
|
|
5746
|
+
for (const t of pTs) clearChildren(t);
|
|
5747
|
+
}
|
|
5748
|
+
}
|
|
5749
|
+
}
|
|
5750
|
+
function setRunText(runEl, text) {
|
|
5751
|
+
const tElements = findAllElements(runEl, "t");
|
|
5752
|
+
if (tElements.length > 0) {
|
|
5753
|
+
clearChildren(tElements[0]);
|
|
5754
|
+
tElements[0].appendChild(tElements[0].ownerDocument.createTextNode(text));
|
|
5755
|
+
for (let i = 1; i < tElements.length; i++) {
|
|
5756
|
+
clearChildren(tElements[i]);
|
|
5757
|
+
}
|
|
5758
|
+
}
|
|
5759
|
+
}
|
|
5760
|
+
function clearChildren(el) {
|
|
5761
|
+
while (el.firstChild) el.removeChild(el.firstChild);
|
|
5762
|
+
}
|
|
5763
|
+
function collectTextNodes(pEl) {
|
|
5764
|
+
const tElements = findAllElements(pEl, "t");
|
|
5765
|
+
const result = [];
|
|
5766
|
+
let offset = 0;
|
|
5767
|
+
for (const t of tElements) {
|
|
5768
|
+
const text = t.textContent || "";
|
|
5769
|
+
result.push({ element: t, text, offset });
|
|
5770
|
+
offset += text.length;
|
|
5771
|
+
}
|
|
5772
|
+
return result;
|
|
5773
|
+
}
|
|
5774
|
+
function replaceTextRange(tNodes, globalStart, globalEnd, newValue) {
|
|
5775
|
+
let replaced = false;
|
|
5776
|
+
for (const node of tNodes) {
|
|
5777
|
+
const nodeStart = node.offset;
|
|
5778
|
+
const nodeEnd = node.offset + node.text.length;
|
|
5779
|
+
if (nodeEnd <= globalStart || nodeStart >= globalEnd) continue;
|
|
5780
|
+
const localStart = Math.max(0, globalStart - nodeStart);
|
|
5781
|
+
const localEnd = Math.min(node.text.length, globalEnd - nodeStart);
|
|
5782
|
+
if (!replaced) {
|
|
5783
|
+
const before = node.text.slice(0, localStart);
|
|
5784
|
+
const after = node.text.slice(localEnd);
|
|
5785
|
+
const newText = before + newValue + after;
|
|
5786
|
+
clearChildren(node.element);
|
|
5787
|
+
node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
|
|
5788
|
+
replaced = true;
|
|
5789
|
+
} else {
|
|
5790
|
+
const before = node.text.slice(0, localStart);
|
|
5791
|
+
const after = node.text.slice(localEnd);
|
|
5792
|
+
const newText = before + after;
|
|
5793
|
+
clearChildren(node.element);
|
|
5794
|
+
node.element.appendChild(node.element.ownerDocument.createTextNode(newText));
|
|
5795
|
+
}
|
|
5796
|
+
}
|
|
5797
|
+
}
|
|
5798
|
+
function collectCellTextNodes(tcEl) {
|
|
5799
|
+
const tElements = findAllElements(tcEl, "t");
|
|
5800
|
+
const result = [];
|
|
5801
|
+
let offset = 0;
|
|
5802
|
+
for (const t of tElements) {
|
|
5803
|
+
const text = t.textContent || "";
|
|
5804
|
+
result.push({ element: t, text, offset });
|
|
5805
|
+
offset += text.length;
|
|
5806
|
+
}
|
|
5807
|
+
return result;
|
|
5808
|
+
}
|
|
5809
|
+
function applyTextReplacements(tNodes, originalFull, replacedFull) {
|
|
5810
|
+
if (originalFull === replacedFull) return;
|
|
5811
|
+
if (tNodes.length === 1) {
|
|
5812
|
+
clearChildren(tNodes[0].element);
|
|
5813
|
+
tNodes[0].element.appendChild(
|
|
5814
|
+
tNodes[0].element.ownerDocument.createTextNode(replacedFull)
|
|
5815
|
+
);
|
|
5816
|
+
return;
|
|
5817
|
+
}
|
|
5818
|
+
let diffStart = 0;
|
|
5819
|
+
while (diffStart < originalFull.length && diffStart < replacedFull.length && originalFull[diffStart] === replacedFull[diffStart]) {
|
|
5820
|
+
diffStart++;
|
|
5821
|
+
}
|
|
5822
|
+
let diffEndOrig = originalFull.length;
|
|
5823
|
+
let diffEndRepl = replacedFull.length;
|
|
5824
|
+
while (diffEndOrig > diffStart && diffEndRepl > diffStart && originalFull[diffEndOrig - 1] === replacedFull[diffEndRepl - 1]) {
|
|
5825
|
+
diffEndOrig--;
|
|
5826
|
+
diffEndRepl--;
|
|
5827
|
+
}
|
|
5828
|
+
const newPart = replacedFull.slice(diffStart, diffEndRepl);
|
|
5829
|
+
replaceTextRange(tNodes, diffStart, diffEndOrig, newPart);
|
|
5830
|
+
}
|
|
5831
|
+
|
|
5832
|
+
// src/hwpx/generator.ts
|
|
5833
|
+
import JSZip3 from "jszip";
|
|
5834
|
+
var NS_SECTION = "http://www.hancom.co.kr/hwpml/2011/section";
|
|
5835
|
+
var NS_PARA = "http://www.hancom.co.kr/hwpml/2011/paragraph";
|
|
5836
|
+
var NS_HEAD = "http://www.hancom.co.kr/hwpml/2011/head";
|
|
5837
|
+
var NS_OPF = "http://www.idpf.org/2007/opf/";
|
|
5838
|
+
var NS_HPF = "http://www.hancom.co.kr/schema/2011/hpf";
|
|
5839
|
+
var NS_OCF = "urn:oasis:names:tc:opendocument:xmlns:container";
|
|
5840
|
+
var CHAR_NORMAL = 0;
|
|
5841
|
+
var CHAR_BOLD = 1;
|
|
5842
|
+
var CHAR_ITALIC = 2;
|
|
5843
|
+
var CHAR_BOLD_ITALIC = 3;
|
|
5844
|
+
var CHAR_CODE = 4;
|
|
5845
|
+
var CHAR_H1 = 5;
|
|
5846
|
+
var CHAR_H2 = 6;
|
|
5847
|
+
var CHAR_H3 = 7;
|
|
5848
|
+
var CHAR_H4 = 8;
|
|
5849
|
+
var PARA_NORMAL = 0;
|
|
5850
|
+
var PARA_H1 = 1;
|
|
5851
|
+
var PARA_H2 = 2;
|
|
5852
|
+
var PARA_H3 = 3;
|
|
5853
|
+
var PARA_H4 = 4;
|
|
5854
|
+
var PARA_CODE = 5;
|
|
5855
|
+
var PARA_QUOTE = 6;
|
|
5856
|
+
var PARA_LIST = 7;
|
|
5857
|
+
async function markdownToHwpx(markdown) {
|
|
5858
|
+
const blocks = parseMarkdownToBlocks(markdown);
|
|
5859
|
+
const sectionXml = blocksToSectionXml(blocks);
|
|
5860
|
+
const zip = new JSZip3();
|
|
5861
|
+
zip.file("mimetype", "application/hwp+zip", { compression: "STORE" });
|
|
5862
|
+
zip.file("META-INF/container.xml", generateContainerXml());
|
|
5863
|
+
zip.file("Contents/content.hpf", generateManifest());
|
|
5864
|
+
zip.file("Contents/header.xml", generateHeaderXml());
|
|
5865
|
+
zip.file("Contents/section0.xml", sectionXml);
|
|
5866
|
+
return await zip.generateAsync({ type: "arraybuffer" });
|
|
5867
|
+
}
|
|
5868
|
+
function parseMarkdownToBlocks(md) {
|
|
5869
|
+
const lines = md.split("\n");
|
|
5870
|
+
const blocks = [];
|
|
5871
|
+
let i = 0;
|
|
5872
|
+
while (i < lines.length) {
|
|
5873
|
+
const line = lines[i];
|
|
5874
|
+
if (!line.trim()) {
|
|
5875
|
+
i++;
|
|
5876
|
+
continue;
|
|
5877
|
+
}
|
|
5878
|
+
const fenceMatch = line.match(/^(`{3,}|~{3,})(.*)$/);
|
|
5879
|
+
if (fenceMatch) {
|
|
5880
|
+
const fence = fenceMatch[1];
|
|
5881
|
+
const lang = fenceMatch[2].trim();
|
|
5882
|
+
const codeLines = [];
|
|
5883
|
+
i++;
|
|
5884
|
+
while (i < lines.length && !lines[i].startsWith(fence)) {
|
|
5885
|
+
codeLines.push(lines[i]);
|
|
5886
|
+
i++;
|
|
5887
|
+
}
|
|
5888
|
+
if (i < lines.length) i++;
|
|
5889
|
+
blocks.push({ type: "code_block", text: codeLines.join("\n"), lang });
|
|
5890
|
+
continue;
|
|
5891
|
+
}
|
|
5892
|
+
if (/^(\*{3,}|-{3,}|_{3,})\s*$/.test(line.trim())) {
|
|
5893
|
+
blocks.push({ type: "hr" });
|
|
5894
|
+
i++;
|
|
5895
|
+
continue;
|
|
5896
|
+
}
|
|
5897
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
5898
|
+
if (headingMatch) {
|
|
5899
|
+
blocks.push({ type: "heading", text: headingMatch[2].trim(), level: headingMatch[1].length });
|
|
5900
|
+
i++;
|
|
5901
|
+
continue;
|
|
5902
|
+
}
|
|
5903
|
+
if (line.trimStart().startsWith("|")) {
|
|
5904
|
+
const tableRows = [];
|
|
5905
|
+
while (i < lines.length && lines[i].trimStart().startsWith("|")) {
|
|
5906
|
+
const row = lines[i];
|
|
5907
|
+
if (/^[\s|:\-]+$/.test(row)) {
|
|
5908
|
+
i++;
|
|
5909
|
+
continue;
|
|
5910
|
+
}
|
|
5911
|
+
const cells = row.split("|").slice(1, -1).map((c) => c.trim());
|
|
5912
|
+
if (cells.length > 0) tableRows.push(cells);
|
|
5913
|
+
i++;
|
|
5914
|
+
}
|
|
5915
|
+
if (tableRows.length > 0) blocks.push({ type: "table", rows: tableRows });
|
|
5916
|
+
continue;
|
|
5917
|
+
}
|
|
5918
|
+
if (line.trimStart().startsWith("> ")) {
|
|
5919
|
+
const quoteLines = [];
|
|
5920
|
+
while (i < lines.length && (lines[i].trimStart().startsWith("> ") || lines[i].trimStart().startsWith(">"))) {
|
|
5921
|
+
quoteLines.push(lines[i].replace(/^>\s?/, ""));
|
|
5922
|
+
i++;
|
|
5923
|
+
}
|
|
5924
|
+
for (const ql of quoteLines) {
|
|
5925
|
+
blocks.push({ type: "blockquote", text: ql.trim() || "" });
|
|
5926
|
+
}
|
|
5927
|
+
continue;
|
|
5928
|
+
}
|
|
5929
|
+
const listMatch = line.match(/^(\s*)([-*+]|\d+[.)]) (.+)$/);
|
|
5930
|
+
if (listMatch) {
|
|
5931
|
+
const indent = Math.floor(listMatch[1].length / 2);
|
|
5932
|
+
const ordered = /\d/.test(listMatch[2]);
|
|
5933
|
+
blocks.push({ type: "list_item", text: listMatch[3].trim(), ordered, indent });
|
|
5934
|
+
i++;
|
|
5935
|
+
continue;
|
|
5936
|
+
}
|
|
5937
|
+
blocks.push({ type: "paragraph", text: line.trim() });
|
|
5938
|
+
i++;
|
|
5939
|
+
}
|
|
5940
|
+
return blocks;
|
|
5941
|
+
}
|
|
5942
|
+
function parseInlineMarkdown(text) {
|
|
5943
|
+
text = text.replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1");
|
|
5944
|
+
text = text.replace(/\[([^\]]*)\]\(([^)]*)\)/g, (_, t, u) => t || u);
|
|
5945
|
+
text = text.replace(/~~([^~]+)~~/g, "$1");
|
|
5946
|
+
const spans = [];
|
|
5947
|
+
const regex = /(`[^`]+`|\*{3}[^*]+\*{3}|\*{2}[^*]+\*{2}|\*[^*]+\*|_{2}[^_]+_{2}|_[^_]+_)/g;
|
|
5948
|
+
let lastIdx = 0;
|
|
5949
|
+
for (const match of text.matchAll(regex)) {
|
|
5950
|
+
const idx = match.index;
|
|
5951
|
+
if (idx > lastIdx) {
|
|
5952
|
+
spans.push({ text: text.slice(lastIdx, idx), bold: false, italic: false, code: false });
|
|
5953
|
+
}
|
|
5954
|
+
const raw = match[0];
|
|
5955
|
+
if (raw.startsWith("`")) {
|
|
5956
|
+
spans.push({ text: raw.slice(1, -1), bold: false, italic: false, code: true });
|
|
5957
|
+
} else if (raw.startsWith("***") || raw.startsWith("___")) {
|
|
5958
|
+
spans.push({ text: raw.slice(3, -3), bold: true, italic: true, code: false });
|
|
5959
|
+
} else if (raw.startsWith("**") || raw.startsWith("__")) {
|
|
5960
|
+
spans.push({ text: raw.slice(2, -2), bold: true, italic: false, code: false });
|
|
5961
|
+
} else {
|
|
5962
|
+
spans.push({ text: raw.slice(1, -1), bold: false, italic: true, code: false });
|
|
5963
|
+
}
|
|
5964
|
+
lastIdx = idx + raw.length;
|
|
5965
|
+
}
|
|
5966
|
+
if (lastIdx < text.length) {
|
|
5967
|
+
spans.push({ text: text.slice(lastIdx), bold: false, italic: false, code: false });
|
|
5968
|
+
}
|
|
5969
|
+
if (spans.length === 0) {
|
|
5970
|
+
spans.push({ text, bold: false, italic: false, code: false });
|
|
5971
|
+
}
|
|
5972
|
+
return spans;
|
|
5973
|
+
}
|
|
5974
|
+
function spanToCharPrId(span) {
|
|
5975
|
+
if (span.code) return CHAR_CODE;
|
|
5976
|
+
if (span.bold && span.italic) return CHAR_BOLD_ITALIC;
|
|
5977
|
+
if (span.bold) return CHAR_BOLD;
|
|
5978
|
+
if (span.italic) return CHAR_ITALIC;
|
|
5979
|
+
return CHAR_NORMAL;
|
|
5980
|
+
}
|
|
5981
|
+
function escapeXml(text) {
|
|
5982
|
+
return text.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
5983
|
+
}
|
|
5984
|
+
function generateRuns(text, defaultCharPr = CHAR_NORMAL) {
|
|
5985
|
+
const spans = parseInlineMarkdown(text);
|
|
5986
|
+
return spans.map((span) => {
|
|
5987
|
+
const charId = span.code || span.bold || span.italic ? spanToCharPrId(span) : defaultCharPr;
|
|
5988
|
+
return `<hp:run charPrIDRef="${charId}"><hp:t>${escapeXml(span.text)}</hp:t></hp:run>`;
|
|
5989
|
+
}).join("");
|
|
5990
|
+
}
|
|
5991
|
+
function generateParagraph(text, paraPrId = PARA_NORMAL, charPrId = CHAR_NORMAL) {
|
|
5992
|
+
if (paraPrId === PARA_CODE) {
|
|
5993
|
+
return `<hp:p paraPrIDRef="${paraPrId}" styleIDRef="0"><hp:run charPrIDRef="${CHAR_CODE}"><hp:t>${escapeXml(text)}</hp:t></hp:run></hp:p>`;
|
|
5994
|
+
}
|
|
5995
|
+
const runs = generateRuns(text, charPrId);
|
|
5996
|
+
return `<hp:p paraPrIDRef="${paraPrId}" styleIDRef="0">${runs}</hp:p>`;
|
|
5997
|
+
}
|
|
5998
|
+
function headingParaPrId(level) {
|
|
5999
|
+
if (level === 1) return PARA_H1;
|
|
6000
|
+
if (level === 2) return PARA_H2;
|
|
6001
|
+
if (level === 3) return PARA_H3;
|
|
6002
|
+
return PARA_H4;
|
|
6003
|
+
}
|
|
6004
|
+
function headingCharPrId(level) {
|
|
6005
|
+
if (level === 1) return CHAR_H1;
|
|
6006
|
+
if (level === 2) return CHAR_H2;
|
|
6007
|
+
if (level === 3) return CHAR_H3;
|
|
6008
|
+
return CHAR_H4;
|
|
6009
|
+
}
|
|
6010
|
+
function generateContainerXml() {
|
|
6011
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
|
6012
|
+
<ocf:container xmlns:ocf="${NS_OCF}" xmlns:hpf="${NS_HPF}">
|
|
6013
|
+
<ocf:rootfiles>
|
|
6014
|
+
<ocf:rootfile full-path="Contents/content.hpf" media-type="application/hwpml-package+xml"/>
|
|
6015
|
+
</ocf:rootfiles>
|
|
6016
|
+
</ocf:container>`;
|
|
6017
|
+
}
|
|
6018
|
+
function generateManifest() {
|
|
6019
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
|
6020
|
+
<opf:package xmlns:opf="${NS_OPF}" xmlns:hpf="${NS_HPF}" xmlns:hh="${NS_HEAD}">
|
|
6021
|
+
<opf:manifest>
|
|
6022
|
+
<opf:item id="header" href="Contents/header.xml" media-type="application/xml"/>
|
|
6023
|
+
<opf:item id="section0" href="Contents/section0.xml" media-type="application/xml"/>
|
|
6024
|
+
</opf:manifest>
|
|
6025
|
+
<opf:spine>
|
|
6026
|
+
<opf:itemref idref="header" linear="no"/>
|
|
6027
|
+
<opf:itemref idref="section0" linear="yes"/>
|
|
6028
|
+
</opf:spine>
|
|
6029
|
+
</opf:package>`;
|
|
6030
|
+
}
|
|
6031
|
+
function charPr(id, height, bold, italic, fontId = 0) {
|
|
6032
|
+
const boldAttr = bold ? ` bold="1"` : "";
|
|
6033
|
+
const italicAttr = italic ? ` italic="1"` : "";
|
|
6034
|
+
return ` <hh:charPr id="${id}" height="${height}" textColor="#000000" shadeColor="none" useFontSpace="0" useKerning="0" symMark="NONE" borderFillIDRef="0"${boldAttr}${italicAttr}>
|
|
6035
|
+
<hh:fontRef hangul="${fontId}" latin="${fontId}" hanja="${fontId}" japanese="${fontId}" other="${fontId}" symbol="${fontId}" user="${fontId}"/>
|
|
6036
|
+
<hh:ratio hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
|
|
6037
|
+
<hh:spacing hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
|
|
6038
|
+
<hh:relSz hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
|
|
6039
|
+
<hh:offset hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
|
|
6040
|
+
</hh:charPr>`;
|
|
6041
|
+
}
|
|
6042
|
+
function paraPr(id, opts = {}) {
|
|
6043
|
+
const { align = "JUSTIFY", spaceBefore = 0, spaceAfter = 0, lineSpacing = 160, indent = 0 } = opts;
|
|
6044
|
+
return ` <hh:paraPr id="${id}" tabPrIDRef="0" condense="0" fontLineHeight="0" snapToGrid="1" suppressLineNumbers="0" checked="0" textDir="AUTO">
|
|
6045
|
+
<hh:align horizontal="${align}" vertical="BASELINE"/>
|
|
6046
|
+
<hh:heading type="NONE" idRef="0" level="0"/>
|
|
6047
|
+
<hh:breakSetting breakLatinWord="KEEP_WORD" breakNonLatinWord="BREAK_WORD" widowOrphan="0" keepWithNext="0" keepLines="0" pageBreakBefore="0" lineWrap="BREAK"/>
|
|
6048
|
+
<hh:autoSpacing eAsianEng="0" eAsianNum="0"/>
|
|
6049
|
+
<hh:margin indent="${indent}" left="0" right="0" prev="${spaceBefore}" next="${spaceAfter}"/>
|
|
6050
|
+
<hh:lineSpacing type="PERCENT" value="${lineSpacing}"/>
|
|
6051
|
+
<hh:border borderFillIDRef="0" offsetLeft="0" offsetRight="0" offsetTop="0" offsetBottom="0" connect="0" ignoreMargin="0"/>
|
|
6052
|
+
</hh:paraPr>`;
|
|
6053
|
+
}
|
|
6054
|
+
function generateHeaderXml() {
|
|
6055
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
|
6056
|
+
<hh:head xmlns:hh="${NS_HEAD}" xmlns:hp="${NS_PARA}" version="1.4" secCnt="1">
|
|
6057
|
+
<hh:beginNum page="1" footnote="1" endnote="1" pic="1" tbl="1" equation="1"/>
|
|
6058
|
+
<hh:refList>
|
|
6059
|
+
<hh:fontfaces itemCnt="7">
|
|
6060
|
+
<hh:fontface lang="HANGUL" fontCnt="2">
|
|
6061
|
+
<hh:font id="0" face="\uD568\uCD08\uB86C\uBC14\uD0D5" type="TTF" isEmbedded="0">
|
|
6062
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6063
|
+
</hh:font>
|
|
6064
|
+
<hh:font id="1" face="\uD568\uCD08\uB86C\uB3CB\uC6C0" type="TTF" isEmbedded="0">
|
|
6065
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6066
|
+
</hh:font>
|
|
6067
|
+
</hh:fontface>
|
|
6068
|
+
<hh:fontface lang="LATIN" fontCnt="2">
|
|
6069
|
+
<hh:font id="0" face="Times New Roman" type="TTF" isEmbedded="0">
|
|
6070
|
+
<hh:typeInfo familyType="FCAT_OLDSTYLE" weight="5" proportion="4" contrast="2" strokeVariation="0" armStyle="0" letterform="0" midline="0" xHeight="4"/>
|
|
6071
|
+
</hh:font>
|
|
6072
|
+
<hh:font id="1" face="Consolas" type="TTF" isEmbedded="0">
|
|
6073
|
+
<hh:typeInfo familyType="FCAT_MODERN" weight="5" proportion="0" contrast="0" strokeVariation="0" armStyle="0" letterform="0" midline="0" xHeight="0"/>
|
|
6074
|
+
</hh:font>
|
|
6075
|
+
</hh:fontface>
|
|
6076
|
+
<hh:fontface lang="HANJA" fontCnt="1">
|
|
6077
|
+
<hh:font id="0" face="\uD568\uCD08\uB86C\uBC14\uD0D5" type="TTF" isEmbedded="0">
|
|
6078
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="4" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6079
|
+
</hh:font>
|
|
6080
|
+
</hh:fontface>
|
|
6081
|
+
<hh:fontface lang="JAPANESE" fontCnt="1">
|
|
6082
|
+
<hh:font id="0" face="\uAD74\uB9BC" type="TTF" isEmbedded="0">
|
|
6083
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6084
|
+
</hh:font>
|
|
6085
|
+
</hh:fontface>
|
|
6086
|
+
<hh:fontface lang="OTHER" fontCnt="1">
|
|
6087
|
+
<hh:font id="0" face="\uAD74\uB9BC" type="TTF" isEmbedded="0">
|
|
6088
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6089
|
+
</hh:font>
|
|
6090
|
+
</hh:fontface>
|
|
6091
|
+
<hh:fontface lang="SYMBOL" fontCnt="1">
|
|
6092
|
+
<hh:font id="0" face="Symbol" type="TTF" isEmbedded="0">
|
|
6093
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6094
|
+
</hh:font>
|
|
6095
|
+
</hh:fontface>
|
|
6096
|
+
<hh:fontface lang="USER" fontCnt="1">
|
|
6097
|
+
<hh:font id="0" face="\uAD74\uB9BC" type="TTF" isEmbedded="0">
|
|
6098
|
+
<hh:typeInfo familyType="FCAT_GOTHIC" weight="6" proportion="0" contrast="0" strokeVariation="1" armStyle="1" letterform="1" midline="1" xHeight="1"/>
|
|
6099
|
+
</hh:font>
|
|
6100
|
+
</hh:fontface>
|
|
6101
|
+
</hh:fontfaces>
|
|
6102
|
+
<hh:borderFills itemCnt="1">
|
|
6103
|
+
<hh:borderFill id="0" threeD="0" shadow="0" centerLine="0" breakCellSeparateLine="0">
|
|
6104
|
+
<hh:slash type="NONE" Crooked="0" isCounter="0"/>
|
|
6105
|
+
<hh:backSlash type="NONE" Crooked="0" isCounter="0"/>
|
|
6106
|
+
<hh:leftBorder type="NONE" width="0.1mm" color="#000000"/>
|
|
6107
|
+
<hh:rightBorder type="NONE" width="0.1mm" color="#000000"/>
|
|
6108
|
+
<hh:topBorder type="NONE" width="0.1mm" color="#000000"/>
|
|
6109
|
+
<hh:bottomBorder type="NONE" width="0.1mm" color="#000000"/>
|
|
6110
|
+
<hh:diagonal type="NONE" width="0.1mm" color="#000000"/>
|
|
6111
|
+
<hh:fillInfo/>
|
|
6112
|
+
</hh:borderFill>
|
|
6113
|
+
</hh:borderFills>
|
|
6114
|
+
<hh:charProperties itemCnt="9">
|
|
6115
|
+
${charPr(0, 1e3, false, false)}
|
|
6116
|
+
${charPr(1, 1e3, true, false)}
|
|
6117
|
+
${charPr(2, 1e3, false, true)}
|
|
6118
|
+
${charPr(3, 1e3, true, true)}
|
|
6119
|
+
${charPr(4, 900, false, false, 1)}
|
|
6120
|
+
${charPr(5, 1800, true, false, 1)}
|
|
6121
|
+
${charPr(6, 1400, true, false, 1)}
|
|
6122
|
+
${charPr(7, 1200, true, false, 1)}
|
|
6123
|
+
${charPr(8, 1100, true, false, 1)}
|
|
6124
|
+
</hh:charProperties>
|
|
6125
|
+
<hh:tabProperties itemCnt="0"/>
|
|
6126
|
+
<hh:numberings itemCnt="0"/>
|
|
6127
|
+
<hh:bullets itemCnt="0"/>
|
|
6128
|
+
<hh:paraProperties itemCnt="8">
|
|
6129
|
+
${paraPr(0)}
|
|
6130
|
+
${paraPr(1, { align: "LEFT", spaceBefore: 800, spaceAfter: 200, lineSpacing: 180 })}
|
|
6131
|
+
${paraPr(2, { align: "LEFT", spaceBefore: 600, spaceAfter: 150, lineSpacing: 170 })}
|
|
6132
|
+
${paraPr(3, { align: "LEFT", spaceBefore: 400, spaceAfter: 100, lineSpacing: 160 })}
|
|
6133
|
+
${paraPr(4, { align: "LEFT", spaceBefore: 300, spaceAfter: 100, lineSpacing: 160 })}
|
|
6134
|
+
${paraPr(5, { align: "LEFT", lineSpacing: 130, indent: 400 })}
|
|
6135
|
+
${paraPr(6, { align: "LEFT", lineSpacing: 150, indent: 600 })}
|
|
6136
|
+
${paraPr(7, { align: "LEFT", lineSpacing: 160, indent: 600 })}
|
|
6137
|
+
</hh:paraProperties>
|
|
6138
|
+
<hh:styles itemCnt="1">
|
|
6139
|
+
<hh:style id="0" type="PARA" name="\uBC14\uD0D5\uAE00" engName="Normal" paraPrIDRef="0" charPrIDRef="0" nextStyleIDRef="0" langIDRef="1042" lockForm="0"/>
|
|
6140
|
+
</hh:styles>
|
|
6141
|
+
</hh:refList>
|
|
6142
|
+
<hh:compatibleDocument targetProgram="HWP2018"/>
|
|
6143
|
+
</hh:head>`;
|
|
6144
|
+
}
|
|
6145
|
+
function generateSecPr() {
|
|
6146
|
+
return `<hp:secPr textDirection="HORIZONTAL" spaceColumns="1134" tabStop="8000" outlineShapeIDRef="0" memoShapeIDRef="0" textVerticalWidthHead="0" masterPageCnt="0"><hp:grid lineGrid="0" charGrid="0" wonggojiFormat="0"/><hp:startNum pageStartsOn="BOTH" page="0" pic="0" tbl="0" equation="0"/><hp:visibility hideFirstHeader="0" hideFirstFooter="0" hideFirstMasterPage="0" border="SHOW_ALL" fill="SHOW_ALL" hideFirstPageNum="0" hideFirstEmptyLine="0" showLineNumber="0"/><hp:pagePr landscape="WIDELY" width="59528" height="84188" gutterType="LEFT_ONLY"><hp:margin header="2835" footer="2835" gutter="0" left="5670" right="4252" top="8504" bottom="4252"/></hp:pagePr><hp:footNotePr><hp:autoNumFormat type="DIGIT" userChar="" prefixChar="" suffixChar=")" supscript="0"/><hp:noteLine length="-1" type="SOLID" width="0.12 mm" color="#000000"/><hp:noteSpacing betweenNotes="283" belowLine="567" aboveLine="850"/><hp:numbering type="CONTINUOUS" newNum="1"/><hp:placement place="EACH_COLUMN" beneathText="0"/></hp:footNotePr><hp:endNotePr><hp:autoNumFormat type="DIGIT" userChar="" prefixChar="" suffixChar=")" supscript="0"/><hp:noteLine length="14692344" type="SOLID" width="0.12 mm" color="#000000"/><hp:noteSpacing betweenNotes="0" belowLine="567" aboveLine="850"/><hp:numbering type="CONTINUOUS" newNum="1"/><hp:placement place="END_OF_DOCUMENT" beneathText="0"/></hp:endNotePr></hp:secPr>`;
|
|
6147
|
+
}
|
|
6148
|
+
function generateTable(rows) {
|
|
6149
|
+
const trElements = rows.map((row) => {
|
|
6150
|
+
const tdElements = row.map((cell) => {
|
|
6151
|
+
const runs = generateRuns(cell);
|
|
6152
|
+
return `<hp:tc><hp:cellSpan colSpan="1" rowSpan="1"/><hp:p paraPrIDRef="0" styleIDRef="0">${runs}</hp:p></hp:tc>`;
|
|
6153
|
+
}).join("");
|
|
6154
|
+
return `<hp:tr>${tdElements}</hp:tr>`;
|
|
6155
|
+
}).join("");
|
|
6156
|
+
return `<hp:tbl>${trElements}</hp:tbl>`;
|
|
6157
|
+
}
|
|
6158
|
+
function blocksToSectionXml(blocks) {
|
|
6159
|
+
const paraXmls = [];
|
|
6160
|
+
let isFirst = true;
|
|
6161
|
+
for (const block of blocks) {
|
|
6162
|
+
let xml = "";
|
|
6163
|
+
switch (block.type) {
|
|
6164
|
+
case "heading": {
|
|
6165
|
+
const pId = headingParaPrId(block.level || 1);
|
|
6166
|
+
const cId = headingCharPrId(block.level || 1);
|
|
6167
|
+
xml = generateParagraph(block.text || "", pId, cId);
|
|
6168
|
+
break;
|
|
6169
|
+
}
|
|
6170
|
+
case "paragraph":
|
|
6171
|
+
xml = generateParagraph(block.text || "");
|
|
6172
|
+
break;
|
|
6173
|
+
case "code_block": {
|
|
6174
|
+
const codeLines = (block.text || "").split("\n");
|
|
6175
|
+
xml = codeLines.map((line) => generateParagraph(line || " ", PARA_CODE)).join("\n ");
|
|
6176
|
+
break;
|
|
6177
|
+
}
|
|
6178
|
+
case "blockquote":
|
|
6179
|
+
xml = generateParagraph(block.text || "", PARA_QUOTE);
|
|
6180
|
+
break;
|
|
6181
|
+
case "list_item": {
|
|
6182
|
+
const marker = block.ordered ? `${(block.indent || 0) + 1}. ` : "\xB7 ";
|
|
6183
|
+
const indentPrefix = " ".repeat(block.indent || 0);
|
|
6184
|
+
xml = generateParagraph(indentPrefix + marker + (block.text || ""), PARA_LIST);
|
|
6185
|
+
break;
|
|
6186
|
+
}
|
|
6187
|
+
case "hr":
|
|
6188
|
+
xml = `<hp:p paraPrIDRef="0" styleIDRef="0"><hp:run charPrIDRef="0"><hp:t>\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500</hp:t></hp:run></hp:p>`;
|
|
6189
|
+
break;
|
|
6190
|
+
case "table":
|
|
6191
|
+
if (block.rows) {
|
|
6192
|
+
if (isFirst) {
|
|
6193
|
+
const secRun = `<hp:run charPrIDRef="0">${generateSecPr()}<hp:t></hp:t></hp:run>`;
|
|
6194
|
+
paraXmls.push(`<hp:p paraPrIDRef="0" styleIDRef="0">${secRun}</hp:p>`);
|
|
6195
|
+
isFirst = false;
|
|
6196
|
+
}
|
|
6197
|
+
xml = generateTable(block.rows);
|
|
6198
|
+
}
|
|
6199
|
+
break;
|
|
6200
|
+
}
|
|
6201
|
+
if (!xml) continue;
|
|
6202
|
+
if (isFirst && block.type !== "table") {
|
|
6203
|
+
xml = xml.replace(
|
|
6204
|
+
/<hp:run charPrIDRef="(\d+)">/,
|
|
6205
|
+
`<hp:run charPrIDRef="$1">${generateSecPr()}`
|
|
6206
|
+
);
|
|
6207
|
+
isFirst = false;
|
|
6208
|
+
}
|
|
6209
|
+
paraXmls.push(xml);
|
|
6210
|
+
}
|
|
6211
|
+
if (paraXmls.length === 0) {
|
|
6212
|
+
paraXmls.push(`<hp:p paraPrIDRef="0" styleIDRef="0"><hp:run charPrIDRef="0">${generateSecPr()}<hp:t></hp:t></hp:run></hp:p>`);
|
|
6213
|
+
}
|
|
6214
|
+
return `<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
|
6215
|
+
<hs:sec xmlns:hs="${NS_SECTION}" xmlns:hp="${NS_PARA}">
|
|
6216
|
+
${paraXmls.join("\n ")}
|
|
6217
|
+
</hs:sec>`;
|
|
6218
|
+
}
|
|
6219
|
+
|
|
5154
6220
|
// src/index.ts
|
|
5155
6221
|
import { readFile } from "fs/promises";
|
|
5156
6222
|
|
|
5157
6223
|
// src/xlsx/parser.ts
|
|
5158
|
-
import
|
|
5159
|
-
import { DOMParser as
|
|
6224
|
+
import JSZip4 from "jszip";
|
|
6225
|
+
import { DOMParser as DOMParser3 } from "@xmldom/xmldom";
|
|
5160
6226
|
var MAX_SHEETS = 100;
|
|
5161
6227
|
var MAX_DECOMPRESS_SIZE3 = 100 * 1024 * 1024;
|
|
5162
6228
|
var MAX_ROWS2 = 1e4;
|
|
@@ -5193,7 +6259,7 @@ function getTextContent(el) {
|
|
|
5193
6259
|
return el.textContent?.trim() ?? "";
|
|
5194
6260
|
}
|
|
5195
6261
|
function parseXml(text) {
|
|
5196
|
-
return new
|
|
6262
|
+
return new DOMParser3().parseFromString(stripDtd(text), "text/xml");
|
|
5197
6263
|
}
|
|
5198
6264
|
function parseSharedStrings(xml) {
|
|
5199
6265
|
const doc = parseXml(xml);
|
|
@@ -5346,7 +6412,7 @@ function sheetToBlocks(sheetName, grid, merges, maxRow, maxCol, sheetIndex) {
|
|
|
5346
6412
|
}
|
|
5347
6413
|
async function parseXlsxDocument(buffer, options) {
|
|
5348
6414
|
precheckZipSize(buffer, MAX_DECOMPRESS_SIZE3);
|
|
5349
|
-
const zip = await
|
|
6415
|
+
const zip = await JSZip4.loadAsync(buffer);
|
|
5350
6416
|
const warnings = [];
|
|
5351
6417
|
const workbookFile = zip.file("xl/workbook.xml");
|
|
5352
6418
|
if (!workbookFile) {
|
|
@@ -5436,24 +6502,24 @@ async function parseXlsxDocument(buffer, options) {
|
|
|
5436
6502
|
}
|
|
5437
6503
|
|
|
5438
6504
|
// src/docx/parser.ts
|
|
5439
|
-
import
|
|
5440
|
-
import { DOMParser as
|
|
6505
|
+
import JSZip5 from "jszip";
|
|
6506
|
+
import { DOMParser as DOMParser4 } from "@xmldom/xmldom";
|
|
5441
6507
|
var MAX_DECOMPRESS_SIZE4 = 100 * 1024 * 1024;
|
|
5442
|
-
function getChildElements(parent,
|
|
6508
|
+
function getChildElements(parent, localName2) {
|
|
5443
6509
|
const result = [];
|
|
5444
6510
|
const children = parent.childNodes;
|
|
5445
6511
|
for (let i = 0; i < children.length; i++) {
|
|
5446
6512
|
const node = children[i];
|
|
5447
6513
|
if (node.nodeType === 1) {
|
|
5448
6514
|
const el = node;
|
|
5449
|
-
if (el.localName ===
|
|
6515
|
+
if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
|
|
5450
6516
|
result.push(el);
|
|
5451
6517
|
}
|
|
5452
6518
|
}
|
|
5453
6519
|
}
|
|
5454
6520
|
return result;
|
|
5455
6521
|
}
|
|
5456
|
-
function findElements(parent,
|
|
6522
|
+
function findElements(parent, localName2) {
|
|
5457
6523
|
const result = [];
|
|
5458
6524
|
const walk = (node) => {
|
|
5459
6525
|
const children = node.childNodes;
|
|
@@ -5461,7 +6527,7 @@ function findElements(parent, localName) {
|
|
|
5461
6527
|
const child = children[i];
|
|
5462
6528
|
if (child.nodeType === 1) {
|
|
5463
6529
|
const el = child;
|
|
5464
|
-
if (el.localName ===
|
|
6530
|
+
if (el.localName === localName2 || el.tagName?.endsWith(`:${localName2}`)) {
|
|
5465
6531
|
result.push(el);
|
|
5466
6532
|
}
|
|
5467
6533
|
walk(el);
|
|
@@ -5471,16 +6537,16 @@ function findElements(parent, localName) {
|
|
|
5471
6537
|
walk(parent);
|
|
5472
6538
|
return result;
|
|
5473
6539
|
}
|
|
5474
|
-
function getAttr(el,
|
|
6540
|
+
function getAttr(el, localName2) {
|
|
5475
6541
|
const attrs = el.attributes;
|
|
5476
6542
|
for (let i = 0; i < attrs.length; i++) {
|
|
5477
6543
|
const attr = attrs[i];
|
|
5478
|
-
if (attr.localName ===
|
|
6544
|
+
if (attr.localName === localName2 || attr.name === localName2) return attr.value;
|
|
5479
6545
|
}
|
|
5480
6546
|
return null;
|
|
5481
6547
|
}
|
|
5482
6548
|
function parseXml2(text) {
|
|
5483
|
-
return new
|
|
6549
|
+
return new DOMParser4().parseFromString(stripDtd(text), "text/xml");
|
|
5484
6550
|
}
|
|
5485
6551
|
function parseStyles(xml) {
|
|
5486
6552
|
const doc = parseXml2(xml);
|
|
@@ -5774,7 +6840,7 @@ async function extractImages(zip, rels, doc) {
|
|
|
5774
6840
|
}
|
|
5775
6841
|
async function parseDocxDocument(buffer, options) {
|
|
5776
6842
|
precheckZipSize(buffer, MAX_DECOMPRESS_SIZE4);
|
|
5777
|
-
const zip = await
|
|
6843
|
+
const zip = await JSZip5.loadAsync(buffer);
|
|
5778
6844
|
const warnings = [];
|
|
5779
6845
|
const docFile = zip.file("word/document.xml");
|
|
5780
6846
|
if (!docFile) {
|
|
@@ -5822,11 +6888,11 @@ async function parseDocxDocument(buffer, options) {
|
|
|
5822
6888
|
const node = children[i];
|
|
5823
6889
|
if (node.nodeType !== 1) continue;
|
|
5824
6890
|
const el = node;
|
|
5825
|
-
const
|
|
5826
|
-
if (
|
|
6891
|
+
const localName2 = el.localName ?? el.tagName?.split(":").pop();
|
|
6892
|
+
if (localName2 === "p") {
|
|
5827
6893
|
const block = parseParagraph(el, styles, numbering, footnotes, rels);
|
|
5828
6894
|
if (block) blocks.push(block);
|
|
5829
|
-
} else if (
|
|
6895
|
+
} else if (localName2 === "tbl") {
|
|
5830
6896
|
const block = parseTable(el, styles, numbering, footnotes, rels);
|
|
5831
6897
|
if (block) blocks.push(block);
|
|
5832
6898
|
}
|
|
@@ -5864,135 +6930,6 @@ async function parseDocxDocument(buffer, options) {
|
|
|
5864
6930
|
};
|
|
5865
6931
|
}
|
|
5866
6932
|
|
|
5867
|
-
// src/form/recognize.ts
|
|
5868
|
-
var LABEL_KEYWORDS = /* @__PURE__ */ new Set([
|
|
5869
|
-
"\uC131\uBA85",
|
|
5870
|
-
"\uC774\uB984",
|
|
5871
|
-
"\uC8FC\uC18C",
|
|
5872
|
-
"\uC804\uD654",
|
|
5873
|
-
"\uC804\uD654\uBC88\uD638",
|
|
5874
|
-
"\uD734\uB300\uD3F0",
|
|
5875
|
-
"\uD578\uB4DC\uD3F0",
|
|
5876
|
-
"\uC5F0\uB77D\uCC98",
|
|
5877
|
-
"\uC0DD\uB144\uC6D4\uC77C",
|
|
5878
|
-
"\uC8FC\uBBFC\uB4F1\uB85D\uBC88\uD638",
|
|
5879
|
-
"\uC18C\uC18D",
|
|
5880
|
-
"\uC9C1\uC704",
|
|
5881
|
-
"\uC9C1\uAE09",
|
|
5882
|
-
"\uBD80\uC11C",
|
|
5883
|
-
"\uC774\uBA54\uC77C",
|
|
5884
|
-
"\uD329\uC2A4",
|
|
5885
|
-
"\uD559\uAD50",
|
|
5886
|
-
"\uD559\uB144",
|
|
5887
|
-
"\uBC18",
|
|
5888
|
-
"\uBC88\uD638",
|
|
5889
|
-
"\uC2E0\uCCAD\uC778",
|
|
5890
|
-
"\uB300\uD45C\uC790",
|
|
5891
|
-
"\uB2F4\uB2F9\uC790",
|
|
5892
|
-
"\uC791\uC131\uC790",
|
|
5893
|
-
"\uD655\uC778\uC790",
|
|
5894
|
-
"\uC2B9\uC778\uC790",
|
|
5895
|
-
"\uC77C\uC2DC",
|
|
5896
|
-
"\uB0A0\uC9DC",
|
|
5897
|
-
"\uAE30\uAC04",
|
|
5898
|
-
"\uC7A5\uC18C",
|
|
5899
|
-
"\uBAA9\uC801",
|
|
5900
|
-
"\uC0AC\uC720",
|
|
5901
|
-
"\uBE44\uACE0",
|
|
5902
|
-
"\uAE08\uC561",
|
|
5903
|
-
"\uC218\uB7C9",
|
|
5904
|
-
"\uB2E8\uAC00",
|
|
5905
|
-
"\uD569\uACC4",
|
|
5906
|
-
"\uACC4",
|
|
5907
|
-
"\uC18C\uACC4"
|
|
5908
|
-
]);
|
|
5909
|
-
function isLabelCell(text) {
|
|
5910
|
-
const trimmed = text.trim();
|
|
5911
|
-
if (!trimmed || trimmed.length > 30) return false;
|
|
5912
|
-
for (const kw of LABEL_KEYWORDS) {
|
|
5913
|
-
if (trimmed.includes(kw)) return true;
|
|
5914
|
-
}
|
|
5915
|
-
if (/^[가-힣\s()·:]{2,8}$/.test(trimmed) && !/\d/.test(trimmed)) return true;
|
|
5916
|
-
if (/^[가-힣A-Za-z\s]+[::]$/.test(trimmed)) return true;
|
|
5917
|
-
return false;
|
|
5918
|
-
}
|
|
5919
|
-
function extractFormFields(blocks) {
|
|
5920
|
-
const fields = [];
|
|
5921
|
-
let totalTables = 0;
|
|
5922
|
-
let formTables = 0;
|
|
5923
|
-
for (const block of blocks) {
|
|
5924
|
-
if (block.type !== "table" || !block.table) continue;
|
|
5925
|
-
totalTables++;
|
|
5926
|
-
const tableFields = extractFromTable(block.table);
|
|
5927
|
-
if (tableFields.length > 0) {
|
|
5928
|
-
formTables++;
|
|
5929
|
-
fields.push(...tableFields);
|
|
5930
|
-
}
|
|
5931
|
-
}
|
|
5932
|
-
for (const block of blocks) {
|
|
5933
|
-
if (block.type === "paragraph" && block.text) {
|
|
5934
|
-
const inlineFields = extractInlineFields(block.text);
|
|
5935
|
-
fields.push(...inlineFields);
|
|
5936
|
-
}
|
|
5937
|
-
}
|
|
5938
|
-
const confidence = totalTables > 0 ? formTables / totalTables : fields.length > 0 ? 0.3 : 0;
|
|
5939
|
-
return { fields, confidence: Math.min(confidence, 1) };
|
|
5940
|
-
}
|
|
5941
|
-
function extractFromTable(table) {
|
|
5942
|
-
const fields = [];
|
|
5943
|
-
if (table.cols >= 2) {
|
|
5944
|
-
for (let r = 0; r < table.rows; r++) {
|
|
5945
|
-
for (let c = 0; c < table.cols - 1; c++) {
|
|
5946
|
-
const labelCell = table.cells[r][c];
|
|
5947
|
-
const valueCell = table.cells[r][c + 1];
|
|
5948
|
-
if (isLabelCell(labelCell.text) && valueCell.text.trim()) {
|
|
5949
|
-
fields.push({
|
|
5950
|
-
label: labelCell.text.trim().replace(/[::]\s*$/, ""),
|
|
5951
|
-
value: valueCell.text.trim(),
|
|
5952
|
-
row: r,
|
|
5953
|
-
col: c
|
|
5954
|
-
});
|
|
5955
|
-
}
|
|
5956
|
-
}
|
|
5957
|
-
}
|
|
5958
|
-
}
|
|
5959
|
-
if (fields.length === 0 && table.rows >= 2 && table.cols >= 2) {
|
|
5960
|
-
const headerRow = table.cells[0];
|
|
5961
|
-
const allLabels = headerRow.every((cell) => {
|
|
5962
|
-
const t = cell.text.trim();
|
|
5963
|
-
return t.length > 0 && t.length <= 20;
|
|
5964
|
-
});
|
|
5965
|
-
if (allLabels) {
|
|
5966
|
-
for (let r = 1; r < table.rows; r++) {
|
|
5967
|
-
for (let c = 0; c < table.cols; c++) {
|
|
5968
|
-
const label = headerRow[c].text.trim();
|
|
5969
|
-
const value = table.cells[r][c].text.trim();
|
|
5970
|
-
if (label && value) {
|
|
5971
|
-
fields.push({ label, value, row: r, col: c });
|
|
5972
|
-
}
|
|
5973
|
-
}
|
|
5974
|
-
}
|
|
5975
|
-
}
|
|
5976
|
-
}
|
|
5977
|
-
return fields;
|
|
5978
|
-
}
|
|
5979
|
-
function extractInlineFields(text) {
|
|
5980
|
-
const fields = [];
|
|
5981
|
-
const pattern = /([가-힣A-Za-z]{2,10})\s*[::]\s*([^\n,;]{1,100})/g;
|
|
5982
|
-
let match;
|
|
5983
|
-
while ((match = pattern.exec(text)) !== null) {
|
|
5984
|
-
const label = match[1].trim();
|
|
5985
|
-
const value = match[2].trim();
|
|
5986
|
-
if (value) {
|
|
5987
|
-
fields.push({ label, value, row: -1, col: -1 });
|
|
5988
|
-
}
|
|
5989
|
-
}
|
|
5990
|
-
return fields;
|
|
5991
|
-
}
|
|
5992
|
-
|
|
5993
|
-
// src/hwpx/generator.ts
|
|
5994
|
-
import JSZip4 from "jszip";
|
|
5995
|
-
|
|
5996
6933
|
// src/index.ts
|
|
5997
6934
|
async function parse(input, options) {
|
|
5998
6935
|
let buffer;
|
|
@@ -6256,8 +7193,11 @@ export {
|
|
|
6256
7193
|
extractHwpxMetadataOnly,
|
|
6257
7194
|
extractHwp5MetadataOnly,
|
|
6258
7195
|
extractPdfMetadataOnly,
|
|
6259
|
-
compare,
|
|
6260
7196
|
extractFormFields,
|
|
7197
|
+
fillFormFields,
|
|
7198
|
+
fillHwpx,
|
|
7199
|
+
markdownToHwpx,
|
|
7200
|
+
compare,
|
|
6261
7201
|
parse
|
|
6262
7202
|
};
|
|
6263
|
-
//# sourceMappingURL=chunk-
|
|
7203
|
+
//# sourceMappingURL=chunk-SY2RFVLW.js.map
|